From cb06b1bc3d7e6b9f0c0f92324f395e883158fe97 Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Tue, 28 Apr 2020 11:02:19 -0700 Subject: [PATCH] Implement stroked polylines This version seems to work but the allocation of segments has low utilization. Probably best to allocate in chunks rather than try to make them contiguous. --- piet-gpu-types/src/lib.rs | 1 + piet-gpu-types/src/main.rs | 1 + piet-gpu-types/src/ptcl.rs | 6 +- piet-gpu-types/src/segment.rs | 27 +++++++ piet-gpu-types/src/tilegroup.rs | 19 ++++- piet-gpu/shader/build.ninja | 2 + piet-gpu/shader/kernel1.comp | 40 ++++++++-- piet-gpu/shader/kernel1.spv | Bin 13456 -> 16320 bytes piet-gpu/shader/kernel2s.comp | 127 ++++++++++++++++++++++++++++++++ piet-gpu/shader/kernel2s.spv | Bin 0 -> 16608 bytes piet-gpu/shader/kernel3.comp | 32 +++++++- piet-gpu/shader/kernel3.spv | Bin 13176 -> 19300 bytes piet-gpu/shader/kernel4.comp | 24 +++++- piet-gpu/shader/kernel4.spv | Bin 6680 -> 11640 bytes piet-gpu/shader/ptcl.h | 20 +++-- piet-gpu/shader/segment.h | 99 +++++++++++++++++++++++++ piet-gpu/shader/setup.h | 9 +++ piet-gpu/shader/tilegroup.h | 37 +++++++++- piet-gpu/src/main.rs | 100 ++++++++++++++++++++----- piet-gpu/src/render_ctx.rs | 2 +- 20 files changed, 502 insertions(+), 44 deletions(-) create mode 100644 piet-gpu-types/src/segment.rs create mode 100644 piet-gpu/shader/kernel2s.comp create mode 100644 piet-gpu/shader/kernel2s.spv create mode 100644 piet-gpu/shader/segment.h diff --git a/piet-gpu-types/src/lib.rs b/piet-gpu-types/src/lib.rs index 2072d8a..db9516f 100644 --- a/piet-gpu-types/src/lib.rs +++ b/piet-gpu-types/src/lib.rs @@ -1,5 +1,6 @@ pub mod encoder; pub mod ptcl; pub mod scene; +pub mod segment; pub mod test; pub mod tilegroup; diff --git a/piet-gpu-types/src/main.rs b/piet-gpu-types/src/main.rs index d19e825..834f1b6 100644 --- a/piet-gpu-types/src/main.rs +++ b/piet-gpu-types/src/main.rs @@ -6,6 +6,7 @@ fn main() { match mod_name.as_str() { "scene" => print!("{}", piet_gpu_types::scene::gen_gpu_scene()), "tilegroup" => print!("{}", piet_gpu_types::tilegroup::gen_gpu_tilegroup()), + "segment" => print!("{}", piet_gpu_types::segment::gen_gpu_segment()), "ptcl" => print!("{}", piet_gpu_types::ptcl::gen_gpu_ptcl()), "test" => print!("{}", piet_gpu_types::test::gen_gpu_test()), _ => println!("Oops, unknown module name"), diff --git a/piet-gpu-types/src/ptcl.rs b/piet-gpu-types/src/ptcl.rs index ed72e42..3faffb9 100644 --- a/piet-gpu-types/src/ptcl.rs +++ b/piet-gpu-types/src/ptcl.rs @@ -13,8 +13,10 @@ piet_gpu! { end: [f32; 2], } struct CmdStroke { - // In existing code, this is f16. Should we have support? - halfWidth: f32, + n_segs: u32, + // Should be Ref if we had cross-module references. + seg_ref: u32, + half_width: f32, rgba_color: u32, } struct CmdFill { diff --git a/piet-gpu-types/src/segment.rs b/piet-gpu-types/src/segment.rs new file mode 100644 index 0000000..ba5f3e2 --- /dev/null +++ b/piet-gpu-types/src/segment.rs @@ -0,0 +1,27 @@ +use piet_gpu_derive::piet_gpu; + +// Structures representing segments for stroke/fill items. + +piet_gpu! { + #[gpu_write] + mod segment { + struct TileHeader { + n: u32, + items: Ref, + } + + // Note: this is only suitable for strokes, fills require backdrop. + struct ItemHeader { + n: u32, + segments: Ref, + } + + // TODO: strongly consider using f16. If so, these would be + // relative to the tile. We're doing f32 for now to minimize + // divergence from piet-metal originals. + struct Segment { + start: [f32; 2], + end: [f32; 2], + } + } +} diff --git a/piet-gpu-types/src/tilegroup.rs b/piet-gpu-types/src/tilegroup.rs index 5912154..ea295d9 100644 --- a/piet-gpu-types/src/tilegroup.rs +++ b/piet-gpu-types/src/tilegroup.rs @@ -1,5 +1,18 @@ use piet_gpu_derive::piet_gpu; +// Structures representing tilegroup instances (output of kernel 1). +// There are three outputs: the main instances, the stroke instances, +// and the fill instances. All three are conceptually a list of +// instances, but the encoding is slightly different. The first is +// encoded with Instance, Jump, and End. The other two are encoded +// as a linked list of Chunk. + +// The motivation for the difference is that the first requires fewer +// registers to track state, but the second contains information that +// is useful up front for doing dynamic allocation in kernel 2, as +// well as increasing read parallelism; the "jump" approach really is +// geared to sequential reading. + piet_gpu! { #[gpu_write] mod tilegroup { @@ -11,7 +24,11 @@ piet_gpu! { offset: [f32; 2], } struct Jump { - new_ref: u32, + new_ref: Ref, + } + struct Chunk { + chunk_n: u32, + next: Ref, } enum TileGroup { Instance(Instance), diff --git a/piet-gpu/shader/build.ninja b/piet-gpu/shader/build.ninja index 7509062..3da40c9 100644 --- a/piet-gpu/shader/build.ninja +++ b/piet-gpu/shader/build.ninja @@ -11,6 +11,8 @@ build image.spv: glsl image.comp | scene.h build kernel1.spv: glsl kernel1.comp | scene.h tilegroup.h setup.h +build kernel2s.spv: glsl kernel2s.comp | scene.h tilegroup.h segment.h setup.h + build kernel3.spv: glsl kernel3.comp | scene.h tilegroup.h ptcl.h setup.h build kernel4.spv: glsl kernel4.comp | ptcl.h setup.h diff --git a/piet-gpu/shader/kernel1.comp b/piet-gpu/shader/kernel1.comp index 82ccb8f..ce99005 100644 --- a/piet-gpu/shader/kernel1.comp +++ b/piet-gpu/shader/kernel1.comp @@ -7,8 +7,7 @@ // subgroups (or possibly both) to parallelize the reading of the input and // the computation of tilegroup intersection. // -// In addition, there are some features currently missing. One is the use of -// a bump allocator to extend the current fixed allocation. Another is support +// In addition, there are some features currently missing, such as support // for clipping. #version 450 @@ -46,8 +45,17 @@ void main() { StackElement stack[MAX_STACK]; uint stack_ix = 0; uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS + gl_GlobalInvocationID.x; - TileGroupRef tg_ref = TileGroupRef(tilegroup_ix * TILEGROUP_INITIAL_ALLOC); + TileGroupRef tg_ref = TileGroupRef(tilegroup_ix * TILEGROUP_STRIDE); uint tg_limit = tg_ref.offset + TILEGROUP_INITIAL_ALLOC - 2 * TileGroup_size; + + // State for stroke references. + TileGroupRef stroke_start = TileGroupRef(tg_ref.offset + TILEGROUP_STROKE_START); + ChunkRef stroke_chunk_start = ChunkRef(stroke_start.offset + 4); + InstanceRef stroke_ref = InstanceRef(stroke_chunk_start.offset + Chunk_size); + uint stroke_limit = stroke_start.offset + TILEGROUP_INITIAL_ALLOC - Instance_size; + uint stroke_chunk_n = 0; + uint stroke_n = 0; + vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILEGROUP_WIDTH_PX, TILEGROUP_HEIGHT_PX); PietItemRef root = PietItemRef(0); SimpleGroup group = PietItem_Group_read(root); @@ -60,9 +68,11 @@ void main() { bool hit = max(bb.x, xy0.x) < min(bb.z, xy0.x + float(TILEGROUP_WIDTH_PX)) && max(bb.y, xy0.y) < min(bb.w, xy0.y + float(TILEGROUP_HEIGHT_PX)); bool is_group = false; + uint tag; if (hit) { PietItemRef item_ref = PietItem_index(group.items, tos.index); - is_group = PietItem_tag(item_ref) == PietItem_Group; + tag = PietItem_tag(item_ref); + is_group = tag == PietItem_Group; } if (hit && !is_group) { PietItemRef item_ref = PietItem_index(group.items, tos.index); @@ -70,13 +80,27 @@ void main() { if (tg_ref.offset > tg_limit) { // Allocation exceeded; do atomic bump alloc. uint new_tg = atomicAdd(alloc, TILEGROUP_INITIAL_ALLOC); - Jump jump = Jump(new_tg); + Jump jump = Jump(TileGroupRef(new_tg)); TileGroup_Jump_write(tg_ref, jump); tg_ref = TileGroupRef(new_tg); tg_limit = tg_ref.offset + TILEGROUP_INITIAL_ALLOC - 2 * TileGroup_size; } TileGroup_Instance_write(tg_ref, ins); tg_ref.offset += TileGroup_size; + if (tag == PietItem_Poly) { + if (stroke_ref.offset > stroke_limit) { + uint new_stroke = atomicAdd(alloc, TILEGROUP_STROKE_ALLOC); + Chunk_write(stroke_chunk_start, Chunk(stroke_chunk_n, ChunkRef(new_stroke))); + stroke_chunk_start = ChunkRef(new_stroke); + stroke_ref = InstanceRef(new_stroke + Chunk_size); + stroke_n += stroke_chunk_n; + stroke_chunk_n = 0; + stroke_limit = new_stroke + TILEGROUP_STROKE_ALLOC - Instance_size; + } + Instance_write(stroke_ref, ins); + stroke_chunk_n++; + stroke_ref.offset += Instance_size; + } } if (is_group) { PietItemRef item_ref = PietItem_index(group.items, tos.index); @@ -99,4 +123,10 @@ void main() { } } TileGroup_End_write(tg_ref); + + stroke_n += stroke_chunk_n; + if (stroke_n > 0) { + Chunk_write(stroke_chunk_start, Chunk(stroke_chunk_n, ChunkRef(0))); + } + tilegroup[stroke_start.offset >> 2] = stroke_n; } diff --git a/piet-gpu/shader/kernel1.spv b/piet-gpu/shader/kernel1.spv index 9ac359394ba6be6c962643b79efacce5218e75ce..8430d74cfd8bc38871df6cce137a081d4d2602c8 100644 GIT binary patch literal 16320 zcmai)37nQ?xyE0ZZ$<=h0|hr61QnGa>~+kTtn;sf4}#hdFJb!UvE6;zOL)OpZmF=7t2U-OUpZnR1{3+h#R+oH5%GQ;RLp zRyDS^>o^+6_ULLm(2T1uYz=)K<{`vVHaMHVsA2x%#q$@I5RX+SLz>G*OcmCae+vF^l{6?uhnuZ|Tfx>~w3Y95je zg&Wh*+TPrN5@`5Zf2 z8k_&GDdKm2yJ9Qj)Mk5tmvpqYcRLv_cUaAd;1Q}4o>^#%h7N6e@bEhgL?bdW>dfm+q=3O+nZWkNY_!#t8b3A*`e@=(^tp; z)Vy3XM(sTZ>aJy9{~fa#@aW&Lwv#R~%TQyM#Vr;xOEzK6ii5n#to?BEIF#7?j%@*# zvYFUpdRFC!!8ypi`f8}lX2DNtO;1f_Pu0EpVoTX3&3;F>1jWYE^l(&lY3ze*GsAL#DIHHMgUfNgC7p3_Ol}F^}WHm3i!xoeb`s zdP8h(|3Q2G%rzZ2q8+<93i+BXO~yK@cdQZF8U4ok=No?}S_U3;vHNx|JXJcc%T&q@|)ShP_*qx1Q^LJ@2%K7l6O)c#$b9#EuftpvBI@WWv zAy+CnE(McgvXZ{5v6cO($!-D1Shs@v#wulZRCsT{d*IRU{-oc7RsA0B*RPa4Ug17& z^6{U>R`Qgx7r?6>(@O+<&xm^J7Tj#p$BKT6*ZC+c;s+RV=pU*3Gujm_^a(vH(<~zCX*;kWoB&fTi zE3fBzaNoC0DZ2sQH-0I*v5Mc+kJn`Xp7(d|HQDXpbJzmc;ZbmFS3{JSYfq>p?>^V# zNwjplr~2`l>=`((c>0&JXZ!hTv*+Q}`CjN3Q_5cI$7`~e?bBYF`)gp|Pu(jkd-p%^ z*==ndO}+O_ZKmecsE&1>uNRs%y#+qEc<;%#z|;A>SH-ttGgAN7{djG*8{C?BXK;2M zuaxc8FQ$}@t>Tlb_`&^nZFVHQdYxwViz#JutN2oQe!u2(E@h`x`Bzu*?kc{iieH-W z`OJyWS)Kru-#824@oXN6wt!g&9|QNA&$_GC$dqjOf_Z-e{t*XGYe z-Dy6blabG7V46>EKDIB<@9!^vuk221}`UPRz;r>uFx2@c4VHqWSAB%eh)Tv0E2h|7hwPTyW#`*?&81^xwYls~PWaO0l}} zj=wXuo;H9s0zLw(R-&2Dy;2*E;ymPzr_bLuG1e%sb*k-*Z%=GJ&74}fbGF}TY(34K zd*xQW;@bxsIrjydQ*9LLxWeZg)Xig#{jmCsmruZ&!+FYmY3r9ezTfgO##FGm)J75O zcY4KVjiv0c!msWcPQyA6`{^^^Ol;&k5^O#-eY3Fo#Ivz-^UlEzFzvo+icfm&&{Jw(MVu^AtISntwIeSZmVnxZV00B6v^EH_jf%V`?NrtQiRj8@jc=jUVy$6otl!w?4es}KaPMQkn}Zv_G~xRF zW)8p4hGhx&ndbL#`0ekvamxKBPPyO0!M$Jn4i4_x_$?gV`^9hJ;QIX*4(@($O!!s! zuTQwomm3mpeSW(}fBk;D2KOxZ?Hb&(;P-3F{e}&0J$}EY+;7+5j_0>)%Kdf?ZvSVi z`14iVZ`X)7pWm**eFppO8eG5MufgZQ{T2=Ge)=sMT>pCs*YCGz_|5OPXmIoUEgF0w z-0#rfzBl|14et5yJ2bfY{0H#(_oZ6IF9xf{I46MBe9kVRmRaOJ5v*>1?_;?&xNawb)ym+=c`{r* zYB&|F=Ck^eqOQ}x>ON!4>+?V@@}2>%&U+@BdgMI|tY+S*^=z=ZIen(cBWD9xEv>Z- zSC5>H`f0ASIek7ZhpU^@=aM{fHiOmDoGozm$hiWXYei1K=~kkto6~2TJaV>ztLr=$ zO+9j+2Ubhhv<%BPWM9KH*|2V>Rs`#e^QzMH`Ioz30g z^ZXL9F@DFCX~xxKFGbV0gt&{am(lF&-1V8;XSw>@yu&tQeV1w9AjcKhD`{T(uPXf7 zcko|>y_{y8{lu|v*MiOMx0dyI-@YHN9x)#PTX#8$`5;{VUe@Aj?1yOfQ-70sufuvd zzP=CBR`5J=e6hKG$9x2AZog?A-{;Lo;p)!Yca%J8`53sm-jAcH8-F8KK8=}}>l5Ii zSo7#J<|eGV`{?)VC&9a6)%9PGRdYK*W#v7e@?*~h*94A?qj zUiE0V!qpe^6td4}X=*mF!sK=nd5rlS&D_RXo9|#X$0=d&psB_4)bF=D3q4{!5BH3x zF?Ye$w_%m+^93;f@^fu}{n7t!uw(eW?);6v2d*A5_kyd(_#&G6CTg|Mm%wV>v>5lx zVE$!$7degjI`N*Vuh6W^7|+yKv0k1jefQDSJX7MRJWyV?02MHF*%M9x>knSC8>+H1$ozntTVW)wgBTW?%h31*_djyle0?np(VbTq8BtIp(j8 zHTyZ(H4FX=uzhYPzxjSiQ@gFm|0}S4)LrLaW4&Bweb3Rb^Z;Sy7PJg zE01;lPjK}*{}-CN@h@TJvCh8*yUym(XUy|h^~m`G-VC4`T?4vL09ty6mds{U1sCzrGntw}(e%phipX14mi@EOrPM-lg!qxni_ACttdwG`h z4Wp@fmc%j6PT;1hb=?_FJ;oXVK8Kh%wiVbw}SxV72rXGAcu$ps< ze$(O6&%AQuoM+r0T6w(FwASgnaRxr$m7e>VU@z}6^&@D@X`XwrWBR=@8(jS!oTHy+ z4YRO0C*JRkqu|x=!Fg!v#?QsdT~og$jt2W4G><-G{2PpV%zHk#I_CnkTxVR*$wIhV zthqk5$Z-s~I>)hS>JhUD>|D#_i@iM#O+9{d9S^pjy6d*YG?TTD}P9mSF7B=8(^ z$NruSSBrY}sTqF}_!O}9THD3gQ{hW#_Hm8m#<*-+E(b&_l^7svX2H4-w z>iV6tTJ${=Y+uLr8{;gnwWQw|XQQdd@9bq@^Hl%NZa~+!iCp#MDueB-9>2XC!H(&D zuiu&+Pu=->R$WieXY5rIxOzXE(Q>^w6D@GHSYv%^X-;#?uVXyNSOJecTM3?n9^b0x zz}2kNy+0T1<=*RSrKy=mY~Cx1yy}s+4Q$?c&#!{3MV)-VukU|z zb%W)Og~`_Nf0kj@V?EY_)Az+Xc>2Cr4_7nJoch$F|M_6Gc*kD=SBrhR5bWhX>Dxe4 za~|Tza}n4&;yt_(u6A(|<9qlLaJ+{%f#ue8Dc1OSS6l}6J#36V*DA);#yQ>$KAkbk z#6}&Lqp3esoZ~CNYH|Phyt@+aoW1`x!momF!Io*pTa%h`?`8w|@1q0q|32g2XZ+iY z*I?SA#Q8U&AqDms?tkBNZMMZmosD2)%Cx9+Iaob-6WG4-H{uqsS~D%4dn>^9)91H@ z^{k|+zsH#3sMY6~?*^|OXx8fa-m$>eI*ewmJ7J^NbHT=xX;IsGVD;c_VEaa`?O-*( zwWHP!u>JH|tM#m=savZ!YV~hg{yocU7h0_Au3+oi9UK46s|)PBgRiUNmnQrw@YMy6 z`E|q1Tc*W)dcf*&X4Zn$JTsmJ$5e~>3&CnJ*Neew-ZOEgHi0+L$~67a_Y$x^^IE$+ ze3yaKyqCk({6>zvSAZk0{^)xpSYLiWVddew2JBdm6!*mY!D`piHn2B4V?RJsf0!8g z2Mf)Z$nznvd7{sU!D`XxI}1`jr5Z=&qkT%n#qlKt|PHG)AV~*#c@_ggU8am_M&-K zM`6d%Jga+S<80mnPS5JCRs4>G-wnQ};Bi(z1vhV*7JKk%uzH-;&w$m|(45CDSha}% zELbhh>gT{}=~=xE9%o8_^t~Od&%D+y58s{OTWFE@^I)~jv^cAGfg`W}==%k*KJ&T; z^6=dYb}aYVzk_~}rWXD$f$bCiFN4*5@6=v$jbt2EcPOmj@RaprVCzedyV zUW;R|$AJ%^dF@AYulL37Pjj!wV`H!H2d8`eU=@G3ia(z4r@&7q-1GE8!Q(7^1MawG zTCBwbVD&f)-vq10S+FLxi2oK?E%xl&V72rtd zXW=n;W{uZ0P8caYb_7o55dkM*7iqW z*S}0NUT#dpKMS_Dc#r)UtY-i1u|J{N{~4P7<;Fz+pMtF+-q}9`tJ!}L_UAPF$G*vp ziTZvCuHO4!p{d7S{u-?I9L;g|z&=k?wK-fuL-pH)|d$PJ~06)kG~6WF*iEynpXSUuMJFJQG- zXs-7@*uT=$T|0Bi<9EVqV4o%G)+&!$2ZCqNyr$8tbt?Ao0w0E*PP6VKa$NcU2YMZx z*8K+DK4n_e{U%sF*7hy1nrmyF=1`0Hcfo2g&-cJ;7tvyU{tmXDGA;V?iJYm&ehq+c zDPqjwSZc;apMl_XtQxplJU2?|Yc9>bKN5RX zf#+c7(cJs_*m&P;N1k-=w};!OOpE#LfTkXMzav;J_TC(75kCy97JEM&tXA3ky8Lfc zJHf5Tact3dXLNlr{}JGrzj>U$nsL?hUrZjaMKtHX5PMvKkHsEObN)-PG5?X|N$0-{ z+&*Pm)U+#_ddz<}uv*OD9BL82J6J8|zXwfR3?b?diowRFArhgau5 z08KsiX*^ghp2brbV**^=oYpV5wnOn92zH+OoQpjE_AnVd1D}2LRj=O}_`FV~#rmCA zVDHt_X|CUySl7#Y`Ec^Y`WZ95;IV!O!Oc;o#rhqLrXK5e2v{v$zbWuoKmE=}t#XDc ze^Z_cug-lantH6?VPLgbznRoA4X$oZ>z7;GYf{?#)SS z)$nOMW!bQ71pk{dJjIRF8vYLK5xLM23gJM zY-#V(a3F?V;N`|K^s6gm6?08<2V`|NGHYDX+_-XOC^dJqca@sk z+Ivc^eeD}c`i)5Z+Phji`rA^68a&ec>MSZ-vZi~}$)&Y31oi^$ig?z5)gYUuBt==mCZN-b^w zYKhog-yX)a)uZQrNOxP^XVYe5j?AWmb9-}|JR;`{ z&2vw~ubjI!+ZVjLy>op>sj;WKe|=to`Q?2E^VeiE&=cRhPC1{h=5}_YcYx!#Wo4$k z9{oIr_Nvr7$a%5{bBxJmq9-|qtR&&7b8tNeFsHnp(b<9E*xly7mUXcO!3|8r&wZFP zFn3ILa5^_{$B^{@6vFuj4a~2}4h1jo>g{XkYAxAF`zYr#xW<}nK6?0RtL1-kKK6{P z1Lr{AUJlM5pDjX<`OO=9m=dcDHda}@VnNfg*&Eg#-90lxkzHnE9+I+RzSHQRoOOhG(MsoIeW& z-`A?_WSsiCd-Hlq;K84%)!AC~!Ti-(dxhRPM6b%u$>*2Xc`o?vEZaV81Go1!M|nAS znOySCwSSkxrTMm3=vNHUYqIPAMT{GU_*7>%4biKzTb$EXUaxmEzngvQthpw;6TP9M zqq}wBJeE0o+aizj{N0V8k2T*5KCAd1tKW~FuHk_S{oxAzks*3b_9%K~UmhFcQ=L6g zp}&BhpW&jWS1arj+4Qug-4eZ#n%6V0mf_uU8UH;8>Ek_X8LJH4`-@K_J&^Mb;e6l! zeHcEW*O%2?e;Pa(zb1e8HK+0X9z;Cf^)#Nk@mN+K&wID`)F}+>bKiW@cybZX{YvAh z*W*tc@_2g!jJG>u0<3&Jvz;%V#2T%?j^Q&Ei}&m@=dC@i zeDBRMf0x28r~f#lSYChg?~1Hr3}Z}2pNy1q&y43@$<@QS4t4Ws^G+MNrh=_gZdYu3 zBkLH(RI0mX=hY+Y7{=Tum-1!XzQ~BVAJ~|3Q(1Tj0{@?Seu-& zmLQGg9BukHBEx?fSbsVFmm~F;H@DnL31Np?wR|^?Jsl?hC-~ zoA!?*wNEbeD+|58(61}>I%LG(0XC=e??yV`vk>v`0}o5|2f@CdG5;a3zpsY=IM{c? zzC4YLzB~g~H_zXY=9IH9>V9jYj#t3e;XC2`F`EL6V_xUaLFZ4-J73*zlbrUs1y1|8 zKqG}2ZwXkN`K;SK$7#>9V?%Gu^(_svI74+9mZ9q-KZ`lOV<&*!zp40m7mzbWdUp$d&#QO0(6yhK==R@xR@k>-FD1I~ zw|A|uo8P-u=-N9I-M#Uy6?XT=`&O!Z=L+3=F00VJYlYo>+Y?>?D=KvFTH&w#hD3M1 z_pGq{&GViWx_0kcp*Ns=ZwlT1dv6L|`vZxt-Fs8mjqklFbnAJvLVqmL&F`Hl{5@0N znL>B|s};I;rm&meJCpiRoG#Cj?^^>WY%X{n!}s2^cqHe|Gks9OUx<(T!RT^+mtx)_ z=*}}%%sUiaeoHa$FtBsw&F#6B3;)Bxa*^idV1$e(Q|wyC*01z4Lo#9=~%Zf#tOM&Z#dT&J1iT!3QJdwfm0B#oX0k=QglIzT>BW z^?48+{ip+<3a70JKfg_HVmQ}*(`IbnZ~2$_?wp47`=flGI!{NQ!SK=kmcp)l8N1)$ zH#78eo;ddBZD3>H$K2STW_0=R`46ylN6ntM7IgWq(2FyXYZ%Uxe}Q`2kUr+u*2);k z88^S!*dws70~_0WhWUL5-i|Ksy8UjcM=fW8E9*TQPF{b%Yw8Q=rLj7|W01ztrcWnQ z-hK3r(FNWUDX-mcr<^&xYxIESV*QihdeP;(sks~3$B=W5d)*JV&RAC++y->{mAo0w z*~pM{_!J_STZyC3CWf)~wKl)Ya^~^=@D8wCyyO0iJr7+zeBLRp@Oc-y{Ajx5oOgry zlb>tnYmfO`z~EoGtAJWG&rR}{8InR_hYI#4{dgFU(?hl~LhtCJa6+RzAmyg;$4CYVX6X$D> z`5ytB<6=hi9CJi5I8+mY(NyTP1?`D|mPrM(-{vNu#@1^Hp z0oXe4Kx&H`?gPtt&smJ$_rY@d-iuV%*E`S;z^+N#{YZ6f&if%)e{ByS)!i3<9A-ZP zuR=OUTjYKa9J!7EV{|!Vm`6SO`4ezuKOcgVk2}^+!E&xO?pQwq%cplNIp2lor83s| zb8xKjVX(S3_xu-N_u2X$L8`|Z{~NplALnR`x_QU2^VBd@M z+xACvdHtV2s>l2IC$R6MakT04C{jLR{x7&P=2LL;(Vstq<;-ggZE_LgFW|};e}$6| zpQpjDHGLnSL6?vF-QU2@ledq5NBY=DZO<~~eE-C8j$Z`NWJJ9$>BES6waMwx<2l8Q`T12(7MAnn%V+T~r}@4@!d^BH@! z6Ih@0oQ;5!j~?#~mWv*1lS^Y7TfMEw;oUs;Y!ujiiJzIH(dDeuy&ntqaqqQ_VaQpl z*tn+`apfcKE@0!v_h%fsT-2#eF7|#r*u9rChUa4fSU!5CuD|c>B(S}SbL@H86;57% zf9F<@^Dr6gd619uAeZ*V{-~dUbbU(o(+b^nTASbK-I+Td-B|uUuWl|xwg>+?6)FE( zHjKZw9hU#SlE0()`-zW#k3XNe{$Aw27l?O8j%Jv9EHci)o5A|TTK&0pIzvACeFoUM z@w@w(U^)NpE6&PW!Oqj>&pGRP8$cAhqCwVrhhd21C%t^Qu;?{q%?dqwnh53u!3LB`+J z`ETg1JM``f{oF*~0^VBavA(m>jhn8o16}?O_TRnn|Ij7px5V>dPPtfrA6PEdx&bWb zJLk7%BXSd?mth|5F?Taqn{lmOJ#6m)r*YqjF6Z4R;=T(Uakac?R>Dg z?kd)IAz1DL#(C_G|8{T@L;g;D)ZbHZ`b3=ff{hb%-UpV8IqwI{rE@-jZatCvgJ8Kd z_lMBsBhH7xa%r54(Tx+iKLVDEIUfbfIj4yeG8y?XhU>5{W9s8M@b3ve!SHOjKlV&r zf7j}H{3JuWXH^_$wH`d3;j<6JvpN+yjp14KcjGvl+ra5ry|hB#p6FMCuPXF7tCye~ z*BWCFJ_VMKv-)YUT%1*N%7y=BV7WM}mxJZfv-%nII8)kV?q|W;?2omphwXFVH16lo z<f1jyCeu?26dHZ-B z^2-c+Yah+2uAed8&+8f5-D`2|^$hS#hR^;C_j*6%0Sxzg7BcqwMsT{65PAL{yq|M$Vx7C*=C2g^C%pQ%4!IR8F|`PB7^`451t zA%1565G?2X5y&4goFDt9u20nWV{ql({{&7x_VTA-xrZ3$*$eqIhP*X-p47eLc;G1cS#`bV(e5_xM?k6MR=7cqPmFs$`3Z25 zxXAwx^vG{q^ULWM`MK5)$v+HTZU-Xr4@Zyu))o1y;I&2mYOq}7uK`DXf-KN@*Vfg6y=GTi&)kn!`TmN@C&?}YB0bWcX;!-&1#87vojZw$HU$4IbT z?ENUPT)Ow8(XA)VGX_pR);<;-Yd3~#m(#Cu?JJ1ovz+1Dmm!ZYu;=~+hHF0&>DqlS zCKJcCtLw8{p~v}r1G+KN8h3${k6w%e%h`*ldpvs7t=+oi()CY3uZ%qrPCoiJ2`m@i zhC|7*E4sWftzX@~&&Req*mY`iE$Z?2aeIIlVRMeQ%Kojw=5rDw`nR&c4ag>j{W}?H zzdT#hh!g$OXL_MW|Mo;TM%uqAaPrZ=y})v5|E8ix|FpXvIp3Rzy*GMg>^eC4=wCfp zF8X&A^Y=lQH>UNg+rP!w-UxP`+FXlz^lx9V@0WA5Rrc>xY(A?Q(Z5r`_Uui_sCz%= f%BMY<0Vf|l*&i&I_T&Kc=!te~l-oh9sOA3vchlCX diff --git a/piet-gpu/shader/kernel2s.comp b/piet-gpu/shader/kernel2s.comp new file mode 100644 index 0000000..3eb2d00 --- /dev/null +++ b/piet-gpu/shader/kernel2s.comp @@ -0,0 +1,127 @@ +// This is "kernel 2" (strokes) in a 4-kernel pipeline. It processes the stroke +// (polyline) items in the scene and generates a list of segments for each, for +// each tile. + +#version 450 +#extension GL_GOOGLE_include_directive : enable + +layout(local_size_x = 32) in; + +layout(set = 0, binding = 0) readonly buffer SceneBuf { + uint[] scene; +}; + +layout(set = 0, binding = 1) buffer TilegroupBuf { + uint[] tilegroup; +}; + +layout(set = 0, binding = 2) buffer SegmentBuf { + uint[] segment; +}; + +layout(set = 0, binding = 3) buffer AllocBuf { + uint alloc; +}; + +#include "scene.h" +#include "tilegroup.h" +#include "segment.h" + +#include "setup.h" + +void main() { + uint tile_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILES + gl_GlobalInvocationID.x; + uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS + + (gl_GlobalInvocationID.x / TILEGROUP_WIDTH_TILES); + vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILE_WIDTH_PX, TILE_HEIGHT_PX); + TileGroupRef stroke_start = TileGroupRef(tilegroup_ix * TILEGROUP_STRIDE + TILEGROUP_STROKE_START); + uint stroke_n = tilegroup[stroke_start.offset >> 2]; + + TileHeaderRef tile_header_ref = TileHeaderRef(tile_ix * TileHeader_size); + if (stroke_n > 0) { + ChunkRef chunk_ref = ChunkRef(stroke_start.offset + 4); + Chunk chunk = Chunk_read(chunk_ref); + InstanceRef stroke_ref = InstanceRef(chunk_ref.offset + Chunk_size); + ItemHeaderRef item_header = ItemHeaderRef(atomicAdd(alloc, stroke_n * ItemHeader_size)); + TileHeader_write(tile_header_ref, TileHeader(stroke_n, item_header)); + SegmentRef seg_ref = SegmentRef(0); + uint seg_limit = 0; + // Iterate through items; stroke_n holds count remaining. + while (true) { + if (chunk.chunk_n == 0) { + chunk_ref = chunk.next; + chunk = Chunk_read(chunk_ref); + stroke_ref = InstanceRef(chunk_ref.offset + Chunk_size); + } + Instance ins = Instance_read(stroke_ref); + PietStrokePolyLine poly = PietItem_Poly_read(PietItemRef(ins.item_ref)); + + // Process the stroke polyline item. + uint max_n_segs = poly.n_points - 1; + uint reserve = max_n_segs * Segment_size; + if (seg_ref.offset + reserve > seg_limit) { + // This is a heuristic to balance atomic bandwidth and utilization. + // The output always gets a contiguous allocation. We might use + // all, some, or none of the capacity. + uint capacity_bytes = stroke_n > 1 ? reserve * 2 + 128 : reserve; + seg_ref.offset = atomicAdd(alloc, capacity_bytes); + seg_limit = seg_ref.offset + capacity_bytes; + } + uint n_segs = 0; + vec2 start = Point_read(poly.points).xy; + for (uint j = 0; j < max_n_segs; j++) { + poly.points.offset += Point_size; + vec2 end = Point_read(poly.points).xy; + + // Process one segment. + + // This logic just tests for collision. What we probably want to do + // is a clipping algorithm like Liang-Barsky, and then store coords + // relative to the tile in f16. See also: + // https://tavianator.com/fast-branchless-raybounding-box-intersections/ + + // Also note that when we go to the fancy version, we want to compute + // the (horizontal projection of) the bounding box of the intersection + // once per tilegroup, so we can assign work to individual tiles. + + float a = end.y - start.y; + float b = start.x - end.x; + float c = -(a * start.x + b * start.y); + float half_width = 0.5 * poly.width; + // Tile boundaries padded by half-width. + float xmin = xy0.x - half_width; + float ymin = xy0.y - half_width; + float xmax = xy0.x + float(TILE_WIDTH_PX) + half_width; + float ymax = xy0.y + float(TILE_HEIGHT_PX) + half_width; + float s00 = sign(b * ymin + a * xmin + c); + float s01 = sign(b * ymin + a * xmax + c); + float s10 = sign(b * ymax + a * xmin + c); + float s11 = sign(b * ymax + a * xmax + c); + // If bounding boxes intersect and not all four corners are on the same side, hit. + // Also note: this is designed to be false on NAN input. + if (max(min(start.x, end.x), xmin) < min(max(start.x, end.x), xmax) + && max(min(start.y, end.y), ymin) < min(max(start.y, end.y), ymax) + && s00 * s01 + s00 * s10 + s00 * s11 < 3.0) + { + Segment seg = Segment(start, end); + Segment_write(Segment_index(seg_ref, n_segs), seg); + n_segs++; + } + + start = end; + } + ItemHeader_write(item_header, ItemHeader(n_segs, seg_ref)); + if (--stroke_n == 0) { + break; + } + seg_ref.offset += n_segs * Segment_size; + + stroke_ref.offset += Instance_size; + chunk.chunk_n--; + item_header.offset += ItemHeader_size; + } + } else { + // As an optimization, we could just write 0 for the size. + TileHeader_write(tile_header_ref, TileHeader(stroke_n, ItemHeaderRef(0))); + } +} diff --git a/piet-gpu/shader/kernel2s.spv b/piet-gpu/shader/kernel2s.spv new file mode 100644 index 0000000000000000000000000000000000000000..7c7f48f69cd129409b108c4092967ac2799a6c5a GIT binary patch literal 16608 zcmZvh2Y_8wwT2JOOlYBmUPD3&U=kp7h(JP1f)N5i0a0cqlbdA7WG2o`Afbk;f<{46 zQKaazq1byt#PXh9v0;5rjrtx|>{!t6yZ8LLJJ)w~v;Y5JYp=EU+WVZ@GwIs$z^$rk zi)w5B_l7O2<}<0<5~ZrPt0px2c?%aWJbY|?>EXv5d!!CKR9#I!eRjmBhrXM(s()}; z!(kY9LRXtan?lpq*2Vu!^9ZE7+P0dvuy@|M=gwPrX79l2)xBfm>xLS=LxaPO-lc=1 zje+sO%NzP_UHA;ka-X&9v~SB=YkTi#qkrk4W8nuyiX?$e&fR@Kh%e;?zVCNypOk1k)*-#aieG&0&+k7KOq6@yF1 zSJ21T`VRN5cJ*UG^qyu6cP(lUJF^@7-*axh(Eo24tyNo}u4-3ewAS5I?NP^DFy2_@ zYR#+W(_ZWL)jn|Z>ep*|{w-x4p-yC;y&01{`+*k>kB#>a4>YT49yOnKo}Oxdc>J_k zA2pvra~qJy@#@yHJ$_6r=6p_ng(zB z(K`>h&wyilhc{`?ZMde?)x+^~r%`x;n%>HQl3kHW8^Lcw3qpf0?SKX(b zcS3b6+@0_ARrhK8_Eg8g+i|U3cT>~PkB~X^v2O0QFG5eXS2dfyJ+AkPQH)t$TW6ll z71pnN+kiT=7B`!z-c+OYPMk>Jsz+CK61bJSr#c1P9@Uz&non!a6PiBGd91Zh4rShZ zR;Saq$8J9Fw%#%CwxO8!=9}oGeZS@sKXX0}+?sQDbtbqT(X6T5hqrIk!)V>roO5G% zDY160&IZ?GHlOI`sb|-Ptks#9FFVTCEMs#{^NEu=EdaOX)Kx7653Y6I-PK}!`mbpI zT=KjBEcoJq#&F}bHJjc$HJ>qcde?hlQ)$Jx6if_{ebc5-a;*j@m+Nik>aO?^QH<-Z zu5Iz7=$do8sl|_O@viDNKr3!{bw7A@|7iazdgt>-`s8^lxSeN0^)|RiaQW!Sn$?@u z)zUFl&>yB*toDQ~@^QjI2V)FamTZ#TG-Z`_Z@SekJg;)ispx|`J+YO=c2d1<(}l$p7ckn`4V{h zEc8?jc%8R7$T*jw4f->y6~8%;m1uK^hDHW9or#`G&8J_T-t(}k);tfFgWI{ftF;~c zN;vzpY5rH&d}wghpy%Hhw{AA3tGXRNI6T(0-LaXiySh`(k9g=| z!Jlf{`|ESbot2B)Iu=iO*2&q2j~(eb?W%Tb;#o~RMLesePjBLuJ}7Q1$EUkm1#Yjk zyBg`>qaA##gJ0Xh*LU!HI{1ABpGW=+Si=NbH*G%u%{O5R4TH^V= zD&xtGM;V&&{5EX|-j_C?9m#uG##2i?-^ntbd@5sOe?5VXw-;>}vY2NoR_)^5sFCxZ zLi3r7#oupQn!hnsO|G^(wr%LA(Dq>5w)Ar2IL0d%^17TYt!Et{tl2Bo(KKZ^mTpus_W}LKE9K|`l?OF*E_nl%ykM_O+Uwazs}f$ z!H!jP?6le@H}4_zYWg{LI(^2@06SJ~3cfRIu6-(ddw6ZrPn+Y9rqB3ez>Zhbc5H2P z9^OgyQ#1DQ^!lnB`viLT#TeT3IhkG`#~9-jdShgMr-HSs8)FW=F&wAOF=x_SH=8lf zqIZnCW9HF2#x-g4d_9wX3T+G8^qQXo_6a`^?700{XTAZh*>_uRu4mUiYO`y8VXYa* zwOmZ|(f<;9W46}VviHHAhxVoP&fRk=Us-e4mU!#I#t6R!?7O4?t@Muf++`gbz=?l* zZTDQqeka)RiFXgUo94P7pm(mW`)%}c^SqtjIjFgAx!)l*?H{9e9-d+Ow`)F)K6y5R zec#NJZK~m;3!vGv3o`T-JXk+IsAn z*9q_`aN|E6tZ(XV-R7V%V(tvimisanu4Z%o-nSfMzB9nqK`rs;f&B=DFOXC4@C)F6 zqjn|hMPOt4eND_Eu=7!KjdH)SYBS)|>92x&?ha$DF>at4-}(O*%yVk^QU_LZ<|O|H z_(T+A`Ci@)H`WYhV7>i*TcbWS+i}#v5S|$y_u?-oJ@n4c?}+zZ>k#gJHrzArJvQ8$ zc|Q$zy!X;@zaQR7!~M>A7Y+BU^%q>b_t4n=rg{$z_gmr}G~D>!CBywztu46U5$}?* zJ3sG|;pX!m8ScB_Ju=+<-X+6*XT3{?`}4xPWVrFYOP1VwWXZipmi)mE?tL+O9q z+_Uy@!OiD=GIssFQ-oC^_%dK9}ZXZ`;~D=fF0-insG;k{$>8p0;~Bm#JPG;dp20z@#oXaGyX!bG2hO-E~3AfrsnziGya!=ef~-; zZO@^-hO_GTU7R&P58S!tOVQL*$6m0SXU4f5MBfKjcW!>e<<3pp09Z}i5_PMr*&|he6(L* z+wDCkH_%^8)6a3@?8}W{_o}SnO>p%G>N9vVSS@Gp1z;azYrBP}W^A#s{h9khu(3CQ zbB107S9g7$2YGC_fpflY1NgNWeh*l!4{QwYFlzC?53H7V=YFu- z#@hd0uv&7y9_$${*YpOsdSd-xi$Bj_W|%KT3PcC!u8Ad?L%Pw`tbLi z)t){3FgSZ=e7XLyf26QyJ|9I>5C0h0e5u99!TP8h)4f!4ec2~_Z4c2qC-Zr4{UqFT z>+ihA_YU?5TwVW%>E*r$+hO|@cn5m*^U%Cgd>X9oM4yU2YJ{g-I^sC%yD|4B2pxy0TlyoY=loVAww_Z4() zspnV0Y8TKl_G{pBtZP%#H?{mm;gec^6HQ(JuhYv@%Wr|LrFv@lZLqqJV;%*onZw^J zz5`b41DnI&U%v}ikI(nO?m_AEeYpCK#Bt0Iz>ZTl&tvpziSgfHW4L$8^FMI)`1}a$ zTuPrG!_|}LCt$~^o9Bo0YKi$%u(=cSXW-7fKSxvd4(r_i7wkCo#Q8t4u@mQUuyM*7 z`~t3?IKKotPW}2i_OHOk*LGc9-(Q2(o&QF9dGb5~_FQZz>hwFfdh+}p{9Bs3F@8g@ zmKc8o&!&}i{1aR~>-#gWQ%xSgp*n zHN4ET4O~BU^Y}ZmTIRJK*fSG85$>9^Z`;H5QIF3KV9#s#j&N(AF+0KaQO{f_fsLgt z<8}sbWL$i9f$OI}k?)n~d{;0o&AWqk*WmB<>WQ;ExLm^?@Nx}%!u3(ln7zQ|8uo^} z2U){DaDCJ>zkR{R(v~>;fy*^ahU=%EHS7=08nn9x{}!R1I8(vp8V-QFSDDX&V13jx z<{)snhJ)edK1_q_qnv zjstgodyhv`kIxC<&TsySXzCNWJ$Pe^>dA3F*!h^l@!B(=#bD2Wd@lfJ?vB%*x;_i+eD(1j zm>SyukuzoOpAELg-skkUzV5+A^gix^J{Qu|+yimub1`_^f&J zQ>SHM>tOBlw+7Z}1%2wI&vKfYbrL&Q@A8*{JsaUG;hqEk#&Rb1A-F#3enST7)%2f^ zeHeTt{@PZ7<=XuHcQyE-+BO1~`}Y^eJs&)a-Z9!7KT5Cecyo_~&8KaQUY@_-tO1{m zk7KkY=jC8?=5BulT+REMakQyf&(y=-@z!H4*!AmcUF7=udyBQRPp);?{9Prvu7s;4 zmo_!W=XW80Ps#YJusI*OHMtsYEv@}2@N3}usO#h3z^(&dhL7{R7AzO9r`JFAxgLBv z{`zQ3&KtlB8K1vV+z408oVBUtZ&5da<=$02SGRzDeE+oFOjENqV%K^dG34sW`$Dj5 z&F}axf~##Lmfs+MU%M6V`p*Gp+-+#;squ@!YL3simw+?QTFCW_{iWbWN8Fd8sVDC3 zV6_djjJpGzamJPF7yHY>{T*>%fu^4N-U(JqeYL42#w)?@XSwEAp{ZwXuLi4KfM3Sl z1#r$&&S7n_kiW% z`{?!0*}EU?-&Xa}R^~MKKlsUT0?mIn^=}M2(0u$i*W-z|HU0K9*RUgf{>^kDIR9qk zd0E`SFD>{|aHHm_*|~7zmbG34SI@sooDWv>?|QDyoNB4lMPRk;&2zwNz90E_jOT(c zq?t#1#y$_M&A8589$PQCjN1=a>!T&^5^&;b&)5O5Hse|Yd2GwT=DNCS{q0~mSS|Jy zV8_Hh2v+kPG=DyT9iwg?SJDsBtgY)ar~ERSG2PEqH0@8Zf8y-5e^=d&=CcdUy`DtB zE6u&$oj!ZL8eH!6cn4qC!LRM$Hx>L=@NG5ES$IC&yyaOKg{$W*jDgj17Mzn>YPSZg zma}jau%)zC$9F4y#}n!xYkG>+jZbF z?)7lB^|Zvj0i3wnGxkQXHse}rd2Ba>%eXIqtKC9N+!ulqS9`|32&~Px?wLHc7lW-$ z_Ut8K_c7;Au21}53UpJeJA?+XzI?>nDYEv$m_wrx9ZMSp1JN1K7i&k zh2~r*(@&*2*8}PEPQMZCT*Kem!Qa!tKUnaOfj?gJto2QB^OS3SGh99I@LRxY*+b`` zmOXq2SS_`ACs^%bTF&mfzz@>QqdjBa4c2DOT#G!m_kzo~?}MxL(GvIl;KbFQu^#|y zGp;p}$Mzv`8TZ3*we_^b{RlX5wP)-{!P<;#ZRN2&1a=Ln?I*z2+coPi*C+lDgPmL6 zwNHZ8Vt)i|j@Um1R?E+oPlFw!ZqAkTpP`v^1I?Ur{fy~3`z%enXHc9oI1PLl&F2uB zXYgS9LusDDr_txkeGXin!7p|2M+^RK@MASkT|N&tu64;;zW`RxnfoGGEoaW0YVrRv zSS@GnD`2(q%zYJ}J=C7DUju8iKF(bp+tD-vlSF_Kf`&SetRJfjqYF zfX$T}d>3q8tbzV=ed7N;urd6$97O+pnwsO?mmkm^zk%j>xjv3}FMmkW?mmmN&(p!P zXg-J2+~*neN7Q&G{gE{H{3!a&_eWrToNwm%W3YPoPr#1N?)UvkUfz`5(--FdIrX|lGz|N=4^GCRP&d#5}Pt-ofaPQRgORhhI%UpkftM$>0 z;~qUpGuH-MneVT)E&2WiPd?+APffq_cL-~JI?ZPe&03#KKexuG(w|1N)@RVC)_(_= zwf+a(G3A_|f~%+2{{*Y0*2YlF`uVk{QcJD3fUA|Y-V*M7$~;|Y>REd?IBPeCYgf}R zxq855t_g6pK4PcVTfxoMS?jIQwI$y+;N&xo`PB65to7N%^O;A>J2=0_XVEX9S?g!e zr`FpNr>ymMaL1H2n24sHT5k_lORbHemi6xdR!gmS1go`bJ)wE$+X?P`%%kquZRjVV zYxC#R1o+NiwbWx5aOz=P>!GG!^6v^R^X~>%>m#0VZl~WJp8V>`zX!UuYbZyB$8LXE4`-79;xaL>WFZri{%luQ} kYPS^m4}d3sA9~h*AiB2XKM1Ur{0D=R-?-*i(=X@oUniI3{r~^~ literal 0 HcmV?d00001 diff --git a/piet-gpu/shader/kernel3.comp b/piet-gpu/shader/kernel3.comp index ef3faef..fc4f9ea 100644 --- a/piet-gpu/shader/kernel3.comp +++ b/piet-gpu/shader/kernel3.comp @@ -16,16 +16,22 @@ layout(set = 0, binding = 1) buffer TilegroupBuf { uint[] tilegroup; }; -layout(set = 0, binding = 2) buffer PtclBuf { +// Used readonly +layout(set = 0, binding = 2) buffer SegmentBuf { + uint[] segment; +}; + +layout(set = 0, binding = 3) buffer PtclBuf { uint[] ptcl; }; -layout(set = 0, binding = 3) buffer AllocBuf { +layout(set = 0, binding = 4) buffer AllocBuf { uint alloc; }; #include "scene.h" #include "tilegroup.h" +#include "segment.h" #include "ptcl.h" #include "setup.h" @@ -45,17 +51,19 @@ void main() { uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS + (gl_GlobalInvocationID.x / TILEGROUP_WIDTH_TILES); vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILE_WIDTH_PX, TILE_HEIGHT_PX); - TileGroupRef tg_ref = TileGroupRef(tilegroup_ix * TILEGROUP_INITIAL_ALLOC); + TileGroupRef tg_ref = TileGroupRef(tilegroup_ix * TILEGROUP_STRIDE); CmdRef cmd_ref = CmdRef(tile_ix * PTCL_INITIAL_ALLOC); uint cmd_limit = cmd_ref.offset + PTCL_INITIAL_ALLOC - 2 * Cmd_size; + TileHeader stroke_th = TileHeader_read(TileHeaderRef(tile_ix * TileHeader_size)); + while (true) { uint tg_tag = TileGroup_tag(tg_ref); if (tg_tag == TileGroup_End) { break; } if (tg_tag == TileGroup_Jump) { - tg_ref = TileGroupRef(TileGroup_Jump_read(tg_ref).new_ref); + tg_ref = TileGroup_Jump_read(tg_ref).new_ref; continue; } // Assume tg_tag is `Instance`, though there will be more cases. @@ -76,6 +84,22 @@ void main() { cmd_ref.offset += Cmd_size; } break; + case PietItem_Poly: + ItemHeader stroke_item = ItemHeader_read(stroke_th.items); + stroke_th.items.offset += ItemHeader_size; + if (stroke_item.n > 0) { + PietStrokePolyLine poly = PietItem_Poly_read(item_ref); + CmdStroke cmd = CmdStroke( + stroke_item.n, + stroke_item.segments.offset, + 0.5 * poly.width, + poly.rgba_color + ); + alloc_cmd(cmd_ref, cmd_limit); + Cmd_Stroke_write(cmd_ref, cmd); + cmd_ref.offset += Cmd_size; + } + break; } tg_ref.offset += TileGroup_size; } diff --git a/piet-gpu/shader/kernel3.spv b/piet-gpu/shader/kernel3.spv index cd56c48b74ae6db457a49a98e63a7bb323b3dcd5..f5b83bc7cec35213b4f2278dd2f7955c3cb1b4b1 100644 GIT binary patch literal 19300 zcma)?2bf(|xrR5HGeaN=B%y{95_(G@KqyiZN(uo&0!nq5%uJGjDRD}Qv`|A;L{UT( zV*x}&5l}>=8I2+cDkv(TLQq5n6#)gyec!$Jf6khd>vQkc#rnSQFY8}rmzhk%knPsZ zvURed{O_$HS$>VmhM;8Gu&gQXr_Gu>Yhqu2+r)h)@2SHESwrrp&xZIk(pP9*Egjt& z#*$|wy4on(IGVn;2L9(Tk3cHf`q{Ku&C`xMZrZFPn_CwxYVPY_*4f_N+0otJ+}6?C z-rC=>xLv=YiC;%|Yv({)>d=TsI({P+F{x~JPe*tEoc8&^yh=5%p850p+WRxSD$KF; zqqkzY*V1L$*JrM!y}7r&rETZF{@#Js{t1zJ!oZ|S2jbU|jli#$p&H3D(wL6+{=++Z zTRYo}0$e3^uiE&H*=Tsp*BY8~O~2j+^IDo)dpdi1i~99-4O`p0``Z)E_3UkF>lo;> z<`LN@@LJBng{Nf|)f>~0ZETLb{-N1sW&GU!-k!7CXZLh2o5haQ=AqrIHjj0)F>uEm z98=6kn^*mO)Sb_gj<)`V^v+$)`E)lga`%`~F?V%OU8s$mtgYZ{kGa--2baV&(s^%5 zj$+=8+1N7QjQ;j6XX~D+dDZ4RJlhuTxcW8sw=Afhic(94$k%o}{>2y@WILv@#Q|BX z;Bx#ZGtaR@HOGi-*EEOQ^M7P;e|M?IH)gwoXLR@Vw{*AWyXrjEylV4o%qGIaPn-Lr z=GEccofMbx>h5JNet0$s9`S=p3^7?KYxLn#tWzc18$5HMYY{Gaez{jIe^WLYenLlQ z`?TJkf!q;pE$Uu1-%7SG+1B@TJmuPZ zUG-v})V*qJ*_6$Mdq(Sh)xB!IjoHy~3n+ZeS;;m1x@qo(KAs)VlsCN3uw&_Kaciwf zb?9_ojv=Plm&WXP@Z9zVT|6rD$vBRhS6bg3cx^64eQI7seNCC%`ud7|`7_BHPeiN5 z4yv&x*S4Pu8nP3JDQaxW-UB|ot1W#^fK5Aa+`#{ZF<9*;gtCqhb zlJ`j?PyF|yS93Nm>1F-nQC5|s{PM+D)h8^Tzt@EMlVbVvxt->?oE+zQI=I-&rtFMj zoZ>l5LTFyUx>q{Sd2j|Pe8J9B&8xea9Uy6F<({zcJ|miPKj*oyrE`9B`BDkpcSrjE zasCU?s$;D+|7yAU9BcD0bVUDOT>Km3VhU3$G zWOv(IV-r{mf{)`2&==#3&dx65%GX`}L5t8@$%W&fBWBKJ)tn94`@u!+8)eJO{Kd}v z*V1bF3&;8`%EM54bi~CA+WSd(-7{57Wmv zdlbB=rMIQ4v^_;1d7c5Md7dkHIesO3q2PPbk++h)T=42Rufk)Tp*$RF<5aSdgE-&2 z@ECV-ej6{Ixp%P96Ve9dF`g;wHV2FI~1LIV#!y`YgyF9XUT3o<2*al)UyVY0Qp7i9i_cw_beyj(A*k7kG!N(Ug=VQ2cYWW-UXQ<{hpU>6Ew-q?eCm)YLU7oKU zkmieLR+>*O^7;Hq^U24Pi)G03eH@WauG)BP!|CP5Ya2-)IY)ubr#6;3k1lQco6q=-=+*T#-ro&Hyl2aJ zwXvwqO`Eay+RU*9y*B;L;qP1`$2MU7)bt%kuT5RPJ-y=x-x+K!xw-tkM&z0RHkaC1 zVkefiSc^U2YWf+wcWEDo?>=B-)wE46ZQ^~wa_iZTUar4!2htlC_JhFIqpts9^o|+a z=eqH7b5Ey_`e%U6rKW9WX>%?|msribv*^`~v5wjF#%eeAczW|1JEz2I#?GZzGe%#3 z9*r7K0-Hz8GjTG#>lb_KSf{}C(Ka5RQ|Uc>#%eR?8T65}1+2fC=VTteHd~sv6|SFl z^S05O*LZE_?VyjmXM*)t({@&3>nv?*=JI>gT*heAzn4C8^?}W$<{I_W>#uJ7(u6xl zwR02A`qVB=G{`>>MVUUJVw_}>q1DD4l?>pz+n zb*}=OFZ@@7D>U;zL9f5(Ana?v)};MU^xD1WBj1}~$8#Oxz2rLh{*fEECA~S+TnD-D zpvb!$*u37G`cEmj_ojK{{bXL>LvsBV(3?ZeymH?~k+&CYUhhr)uPFJPl3!DD??uOn z_m<=Mev%va33_v?IgZ?Slv=FQ8}Omac?G@S@a}Q&of7W-;kS9%ea`!R9^B`N-{-+sz^5eK^W(R9*k@q( zyF9r5ewPO~zu)C4_q#l}=iKk|;QIS*9^Ch+-{!$r!2K={uHA3);19w5CJ(NCAmPs6 zZ}PBvf1j6dpPhb}hu!t?+dTLRxZmc%wfk)z-1vJE?)~*Z!hNUtT^{~EKm0BaZoPh& z2XBS@T^`)={5B810{(o$wfk)zcH{jv53b#BbNLibjrYVjn)iw4>tN21XMTLCUyP4@ zJGh$nOT=vtHqPgD#O(lAUtY%T2sT#z5bg)hrds6K8C*a1E@wuu{H;Q9WUl}AXv@k#cXo=jdl=N-FWXydBh(AcKss$P_UZMNaOuB zI1HTY#CsphU9%O~rh?UcZ%59<;p(wIM}XDLd11MZM}pOT_F9+E1GUI84V>0B9j@l{ z-@2wV-VC_9@jfr)2awD9ISd{O&HHYA%x@AJ+6O>!Mee;mzA`|Q$g|2B4? zwa3u(Gfr%suI&k6*VHk5CdsYA^L=8j7tbo~KEu>vj_(Gm-G;x<^!L!z))4y^{wIUI ztU=pJG&O4wTWi#$9(hj%r+H6i z`d)hHEswDmgJW#pBXY;~`*8_)M|yR~?x$CC-o9s+ft~j%u=#vOydSPUg;S@`*#?aP_GFbMb{uI(q``lv^qPl4;}xCu@Df^ttj4OUCnb|pO4*1Y<}+I|LX?fUr4iFLGp zgLC8hf0pJtdq3*$8oLjlr}uIn^!Xf3&3zC%H}9ESz|KGBdMntuIxpXcUjTd0#C_*` z;5NAWZ0=8gfAB@Hd5qQOy>v6Zx_N!)eo0)~Zl}*VHuHTMKBctXK`%c53tjdV{&OY0 zG1?;EonSS`GOt{l`MwI)SDSg|5qGz^wB1FYbK;!G*WlJ|+&%R2sN?J4v+*%TTddDF zz~)QW=RUZ4d@p2prpJmP)}w{GKJqL)VXPPd1ga0h1SN|7pkNs)>&SMzb z1~jkXwEc+XS1kG8y*l28;P`#V)!=m#{*gL{#by#kb?FV70J+0c=dzZv(5%W<7k@evxL3y6fn- z?U!h-t@CqCxqjw!KkuMv|2O+5j=dfY-h}405zW0GMepy;-0My0W3TT7r+a-*9ltN( z4}l*}xaa55lE>bE74A6c-rogRkG;PetQLE34QjDgUjwVf-hUmemhSz%@Yo~m5&I3W zHrLC!$iwzcaGLj9a5aCw9ee)(IPz+b*ayMd%&-B+dBPC%HbxkEZ_-O}pn$9OrKgcq^LM7BtV_=JZ?AJbzo$$N75-oSwgD z>iBbY{DnIHa>D-vezoMW)=$G7H$9I(g{#MT{25p+=4nl8u}(h+tHs{@0<3leEzZp^ z!EtW1N9?m;ZLYs_lZWkjaGLj5aJ7}R$op$>wN6BzMDyB%W~~$G z_oP|tUi9%y^luxjHMoD@nDX&;e8+_Q??L{JVmjAN;Et2dbyGC;I6Iqx)jT`S%^K9= zylf3ti?taGR!g6O+ra%dSjW*GvE#tn%S!LGM+)?cnq`0oL>ws=8HwHb&hsZ)co+z>c}998<2JIX!2SOS@-K9A|J}YzNT1_RCvC z{&z0>gFS=qqK`A@-?pY_?x2KE0Z%P?tiu6t^Q33&U1;iY#tsCl#Tj!9wOEUT!D?~F z4$($S&)A{x*t6WuoPCZShOW&$uvU5GJshm&^JX%>M}XC={XqDUVCNHiDA&h$*X*d$ z?s|%2JstNDn%6-z?`iWLOmjUCrH}QQk>?w{J~PqOV||VWtGPaL_GiIkzS?6>$AGn2 zlQ?Q}|EJQt4x>d)Q^3}AIK6x9v*!fj(&xl%xPIv#9*?FTdol;Cwi-Xzb9ef=aCO(- zoN}Lu)^Z})=bpN?%A?kNZLxVBL5o_C1Y7GgdTZT>{xss!T2F%Om)3eRntIfF3Ro>_ z-5dW?;p*0EPPw&O%jr2T#ntGg-RV+`Q>NpNFO%`+PoFE%w5^JVbJt36^b2WvC0YakEXRba=8HMkn=y2L)q^$GtEfX(4PAMaX$ z)r`M}{zEk5W1r>v81G(QTiV@caqRODZ11Icol0|`Poh7q#HY}oPIJ$j>0{5YBS-oi z`Y7C(bPqpU(p$4`RQVh=wBR*OA+g8VnZt;ccHja`ra)9Bj#e(-&? z608<`^cnDtG~>+co~Y>;<9`;Mj{iBh+Dc-~|vxV*FdcYBBz;;27V$ zj<2R)jQ<62I{s~NwKe(p#qT1%2zPw7CiCgLC(>!+z z>EqnpO`i1J-2*qqeT|yFhNd3p?(1N+ICthy3;%n;YH{wq0ah!{UGZ;x_ra|v9p{^9 z>M{5G!7+DpICnMuVyth0)3F|atF0t=JPRI#J67zm=lLNt^%(16aExUR$5PWT#(D&t zj`eN0+M04K&+{s{W5u3&FMS71J~12pv*>oIVQWe&$u)35$KEh3Ls7tQl@CVh8_JL!99o~QTG$9a03Jn4D*A>5dB zpPxWekMr~-SS`+zIn-hfKLV@8dHOL}Ej>>^fm=^H&QoaWG54pzF?Vw~cQyU$=f0R+ zUi~!Z-bX(`bG}RH<81wuSoL(=pP{M8{GI`;Ilrjo=kTaSyS1n}r}|pXM)O)qi&~bI z_!Rp0)2!z}3xZ{c_L1$=KF_ou@YEB9DK+dkO42*BEW}>$e=6 z*SWMvsXY>(xU4DtTi4^m(o1F~{G+&5^F(@6gm^{ayyErR(>5c&wjx z=cDF2MDADM^|}9mrXK6}N3dF~-xzBKJ^RWFH>^!wO7kRATU%(5n8KbR! z{Vu`gbs;U*@1hd>9JrX~`dv!z`uUuAhdi-<`uwxxF~`5c&5^F(>uBn+et!e2rR(=5 zJl0RU^HF<)7P;R7*XRB_ntH6?Kfr3SejU{EHeB7D)-QMc&cgODu=CXBT;#ES{|5W4 zG$yxYwSOP=y?iCj>oQub-{mDdSDwhB$nTE&iaBlh*d>(LzR6{`0AIvlRP z5_|(~7}&LQUfQCD4ZvzX16=0~!D{+CXSu$;*hYb!lQ!oskNw#QtiLwbLLU3GG1xO~ zjJ6ni6L5@e{!QU(=5QQ&_-VRUPxR6WZ$IL)O!wk*BQd)rMy;imu zmX>ANLR~4lX=z!xR<@Xxwr^IpXm)Rz_xx^7)dZ{McQ_Lk0$uJ)F; zj-K|`zK-?n`i)5ZI=Wgr``c27Mm*B|ja0;>vSr;JU47@auL9;(s(E#F4cXqt$m?&+_ASRR?rUFb_13QDRjYkeb|Bn5`nB{`R}ZXC zsWCQESuAxU09X+SQt`vURF?)#_}_Cd0!| zn`^A*)nV-x#pQT)*SI!*R5k@3;|CQPVlq+J@^Ji%c~-Ki;4}NzuEQmdFZZg&uVmBU z`lwrj+^eg7LtdKaa0XnTLHWzTu|VdYj$e^`Savk{ypGQH=AQ2Uya2fI)xBypjLK%i z2Yl1b91v@hG|w^k7kT!`<_sDei!!Lv|0hau?v5XbGbWoiD9+1f`)^~MI1po4b~1S3 z+P3u8%jc}_Rhx5Tb}D?p7wp=od8PN5nr9;JG0%@$e%~ecXd`#gY3KtnTQ>A?zT&nR zkYgpwhsl6GWBsaQW~`bO+etMI*#h(C_hm!25X>!(-k6}5u612 z<@1w!)y~$~>}+`H^RkN5BUYT+}``yR?k&O#f|R zwdl14VVrRdJHQYRoz|iYr+sxTm@ye?NOZd=W3_5VtSrXZ4Hq&tJ~qUUhow%pW>Au4EhNi7|^V zx4n_RSdWJ68gS&g7F^3!$*wE-Y`T2hjr7I1N_Go)UA3pWHn&x>I|uQG>~46(y$4*2 zJ1n~o?oGei=hwhI3LR6{7dEhSc|eiN^>~QBHkV5FaKU|S<-U*6$J`zVM-5MaYc(`v zPYw80vR&YGE)6^sYP=yE4v#paz{~ntJ1yJiiri}*n!WXoDK+twWb8a}Ep8<{sg5tI zTf#a-*WTdRE?-Cc`)KKhN+SNi9B>!4h3$gV)&xM^ly!&UmM z&ToO7Ujt9?iEB$doWt9AsPIyJyr)A2I#CWjwVB%z0dC)8BZG--}*dU&rrDALBg>j#tyy zxoK0EA58C9xno_Q7<(w#v1$|Wb)B_E-G_nI^mFV~`WSlz*s*Hbrj<5x9toD~qs{Nv z7&jB_I5mBJd1+IZA4hM_;B&#ok{iqK!iaSW*jQ?=o!@xY?z)9f6IdT@lkk~O?;bg} ziDt|*=)=DmtiPJJMTu>3X;U-Sne@hTj5huKW{OzLz{XN@Zs*YJukQHs6W&y67bTi8 zCo-;u-Z;i}+-39;N1Nw%75zlo5ZaL?Uk&yOz6R{LgQ+u~E!ONaLvF6_(noDx$=8*d zajd~-pO^lA+ZaXxZ;#p@NpJ{Ua zM$?-|%{+3SYif~a0el2}Z`7q=WB6RNZeyGe)?dvSa-VH!j1_Q912I>@y%&8Jb%2c( zYq|!mySnk@J{#4{*I8o6M2+2On-l(e_(Ztz-w4(>>T&J2qA_Crbk3)HdnH`Wb_A6; z&-c(G{!8G!@eAHau(9yzM7B1UH`FD=GI|CAj1LR!Ol;F;C`nBZ-PIaaOdmyO4!Z+Y{Iqsy%P2&xZf+mwfnsypU*CM zhSt(NSMJxz?1^WjtJJsQBVPws^L)j)*MS}9b0x<0z|}XG<9fl4RX>H@cdykVMnAYd z_j)w-$h`rqb~AZm+(vkP?oDXwSC_fp09NxnMm?8w+>(Ll`WI z1ngy8Z6Bto8CM*0`xrRp=JQ7G+fPkejKds6=&lUaCPTzEV*^N-sUrJ zc+a2UM zpqIx!ehz#fKI+^Oa5UG%#jP1_b?$sYhaR^41a+ttjq z1N&FN##sY4m+#=O!qxSEkX|1CUjrK>p0WBr1XqvoUk5wxN^)rb23Q~Ui1SVG6|^+Y zx8Uldc-A@YVKD#a-y_D*9^=0auCL<}H1(+CJ7DYBQr7WZus-S$=X>D#Ivzz+k8zKI z)wa^2w(o=aKmVJ*arL>2JWJ_+K(lszJU2h0_wt_9_CuPQXGR>g{21(<;=Sh@`~QVnM!Tg`EtueI6_{YKZb^HoVJ?i*1I9=P{!1Ym& zI8T7<>-a62`j&D}eg{_DN{iZl59a@TZH=o>tnDAb)~=7wnpj8szj1C{|0ij#^IbIk zU1Rs*kMv&dgFa8u)Z7QLbMv106WIC3T>l5`T%DKit^Wmk&%}M__sE~&>dUx4Plf*l zY#hgG^Im$IUfsC98~+NvrnEgnFV|+gzk#nUZO_un=VI~O_3z+TddFysc+Y{=%w=48 z#CsmBuQub#W8912sN)5&T$}Uw2iUqDw~JmLb^H^&0w2d{i}m>z*m&vsSh#w;*M~R} z+Dpl}q3{@IKDmDGbL^A7{PXngn||E{JYK^^7>nDG|hSK zK_98(xT@m*U;E1a|#$FEAW?a`m z9=11u&2>5F*yreGnp)VmfQvDOeJfmT8SCLQ;7ZzNn!4+_js7Z{YwP^XDc8@K?&sAs z?Ju!^;@E57`QvF`d(qtMJ?Z`3>R#_dAA5Zb*u4vWZ5_WZ;r^DpG2xz{TS^{#|7N&( z(!GBRTs`*wtzfm-duvdOwR#&^E%yHHV6}Ab-vN(3(jH^q3D)L%ITv}@-UUwMUJqB> zPK&*NH#p*IkFoCoYcsBEA`jbp!RCrJcpunxihYyo6ZZFm>(}T5XzDTN4}#Thruoh3 z9({=RL7FwV?sD&Y&%%dk-rwrx@xJ^B%{u&MbBtWSsQaT}=Mrc3V_-GM@1XxU&GE5M za(x^>j{Xxg?VdkzoWEh<18H9S(>#Cs(H}tb{2fFeYq7J`)ARC4H1#+yp8~5{N9@yW z;MgbaF{e+1wONxmYO>x#XkG`?qNWL8Ynn*!J>Y$EC(ZSa8gB>dm)3X(Ts`*dvtYH@ zud(3I(bQdgW6I;3{4Vf#n!2^hqt?mbDKxLcXx4ft{oyogeFc5ouXls3HTXSs{GmGj zaKax2KUVUX>lfkXN$2_{xO(izm%(arF04T<_TWLVTCB}iz-m3T*uSrWAE22>dyM@W zSet9+T;yT? z4y<2VjSOztHqcYxM`7dek}ut`@aUg${+QTdOhU)@m&ku=j(ywaTN`q3Ea4yynxa z^%VLAC2pc$NVD$K>Epf`PK^zNzYkn(6Jv~H{(a$* zUp?~ghpsL1?+;ds{0D#|zj4j4reEYg5S->e2(DH<%Zu-tgW=}CDakznO+9iS0#=LM z6Ty+&nC4c~FLF-;r@0S>t8FJw+((n)=63&L++k?yk?U}9!J6uTj%fH1#-lM}XDh+!;eH{HKA{;@lkx zRx8e3@pp{raO+9)%s^9*xgQ0Nxf{c|tLaxi_vOU$I)~=mm(ia~bH3-%$NY|Fta_Sv zCYpN8Zx&e1`9&?qz@rxJ)}rQ|>T6kn=5;sBl7!D27w!0Th5fTkXMI2WuI&&ZRhvsmV zG{e=6Y5j87ZxOaL!Ol~gbCJjTodxz@cZ|0B^=rlEbr~(zuUg_3`js@-uZ=#Q1Ivk% zuHQuo_g=a*;jV8>$zu(cz^mx#`ksxZ9?ykWg3Y~?_vljaGMYZ>&oR$)>An0LskU=y zYStg*Z=9cYxSI3Y$~rjb W)nN5iG-J57YX0po*3CZ7$NvE}W$5Gp diff --git a/piet-gpu/shader/kernel4.comp b/piet-gpu/shader/kernel4.comp index cdde198..931f28b 100644 --- a/piet-gpu/shader/kernel4.comp +++ b/piet-gpu/shader/kernel4.comp @@ -14,11 +14,17 @@ layout(set = 0, binding = 0) buffer PtclBuf { uint[] ptcl; }; -layout(set = 0, binding = 1) buffer ImageBuf { +// Used readonly +layout(set = 0, binding = 1) buffer SegmentBuf { + uint[] segment; +}; + +layout(set = 0, binding = 2) buffer ImageBuf { uint[] image; }; #include "ptcl.h" +#include "segment.h" #include "setup.h" @@ -41,10 +47,24 @@ void main() { CmdCircle circle = Cmd_Circle_read(cmd_ref); float r = length(xy + vec2(0.5, 0.5) - circle.center.xy); float alpha = clamp(0.5 + circle.radius - r, 0.0, 1.0); - vec4 fg_rgba = unpackUnorm4x8(circle.rgba_color); + vec4 fg_rgba = unpackUnorm4x8(circle.rgba_color).wzyx; // TODO: sRGB rgb = mix(rgb, fg_rgba.rgb, alpha * fg_rgba.a); break; + case Cmd_Stroke: + CmdStroke stroke = Cmd_Stroke_read(cmd_ref); + float df = 1e9; + for (int i = 0; i < stroke.n_segs; i++) { + Segment seg = Segment_read(Segment_index(SegmentRef(stroke.seg_ref), i)); + vec2 line_vec = seg.end - seg.start; + vec2 dpos = xy + vec2(0.5, 0.5) - seg.start; + float t = clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0, 1.0); + df = min(df, length(line_vec * t - dpos)); + } + fg_rgba = unpackUnorm4x8(stroke.rgba_color).wzyx; + alpha = clamp(stroke.half_width + 0.5 - df, 0.0, 1.0); + rgb = mix(rgb, fg_rgba.rgb, alpha * fg_rgba.a); + break; case Cmd_Jump: cmd_ref = CmdRef(Cmd_Jump_read(cmd_ref).new_ref); continue; diff --git a/piet-gpu/shader/kernel4.spv b/piet-gpu/shader/kernel4.spv index caef463f859ae19392f19ee45eea1b41a0ee71fe..b931f2307e6870ad8d32e8874e9413b7ba4e235a 100644 GIT binary patch literal 11640 zcmaKx37A%8*~bse3?iT+g1g9w5+IPcvZM&814ax;Wo0(Kyz|cR!fegF1Go^lrRCD5 z)t8km+M;cj*L`}ei4UtQntch0$I9_G6Gj`R5Z|Nr}b?&p5ib7p87 zKYO1n8<$Pwzh8~Z^5?*8JW7^L&L-q_+qzBb77aBz7oB{{2|DbbHRXQ#9FXmkwNN)x zdMoul4b2#4P^Qve{Xojpve>4H|Hf0_2WieGWo_#^+S=RO)}7r^9T?~sYFyM)>*%TX z)jB%sgSBd-zP+a3#Kf=OSM3?@OdVSANONeRZw8jF?(JM%AFTG&&Z~7nQCo6d+~3tT zRBL1?EsQnsE&bGds(lmGH3$IO$!TxP&?&>~i?$h8SzF^0w=F`_P zRO`<3EHraRA)Br|cm2$LOQol)V@JKSv85Q3x~n{POf%3%`Wic?k!(lDG~F2am|C(! ziniwt_YUN%VV-I}qvL7L4ujK&e)(ABK7F+vdETzpk?_)IY!+qw(HxGzx5%M6n_09Q z>0jqpcG%DT6g3+W1nXgqVK1IyQ*__h9*l1(Z*}L6wNxS)FyJFh@|910| zc8kh(4)i~_at}6T3rE_uWFFp4weDWF^T_zsd`8DVB|8;f`gPR%I%_*)7DgNAGcjE5 z+mrDx=At<}1)R5`_Dn7rjg_yZ+-Im!8RY1p6+Z6ET3;t^CTGjR(SLes#&n;LZyCNt z-%Z)+;QG#@-?P*!JM!U~$2sr~jcU)TVfTGY&XKYKG;8N-jetl0c52!$4*r6IpBVgt zf}d3Irff5SW0uEpT#*Oq?3Kr^f{(<#yx=2ouP*pV-0R^H_hxX!y`}Ig$JdnIHsaTu zeG)uS8LaeDJC=LK@Q3!|P1&RHi2E3LblzIB@59S^ck1%FQ}Y=r_EJ9PCkiR=?EhX>ZOu#Raz^eSgG z_T6;&&Wk#R>wOLE6Z7{E?b>?!H&=Sr_HFO4RvPvGKAU52$_~x_3w#84czc0ogSqo^ zpM_xVfV{s&;B-!x!>jp=rO4+wd)b?^74X5lujXtGxY9GQrBd=u@Gh=kUKe@n&2U}} zT=Ye(DtxrBrmPFz$=sWF9b7NR(6C*t$nWC4+B9dEjN!ZX;w{;3c>a#vUaJx=&*Lhz z&VhcHBJb;3xO>_8zH_hkP1(KhV$E8zuYmLS#6YFGt>zhS$$p5owztw<%Xdw&Z`6G1 z>eP^hkF(JxP<)TC!Fmq0&+MYg+VEHNy`;bA)w|j!cxyp({x~>|7yCAirxx+t z!)ZLZ@l4V9?nU4KhnL)M9J%iz?N7kd{$jjoe`?X6V^8~&w-Rf78Sj^H5jgTUJKH-_)~h;b;`7;4%aXS8#U<@#vzJss_i2HQ@}JZDjB zQg3TfL0gIXf?&TeJi_&gP**;^L3OY%a$2WJ`;Y& z?emu{;A}eIN5l0|w~tn8{bCHq!0khQIz1mxJrAq(0!qYP3FcW5aaY0BEa9`h@EPIl zIp4Fwem8>c*EMo(&V$>pdbIBb&)wFyUi+5P?itYTynGsMI<@ic2CGH9drE)xi1#2k z%XY7p8|M+YcT(_2;qH;(kHft;+%s_}yJx(6<>vJxYS%~2HIaM&su{zP+F*s>HRQ{HqMXS`^N8<;O+~*TY`JO{4Pnk-zLFb*UTXTf zALaULI})sR6=Ram0;?TO(ck^57XHVA)!I2ot<=X+)GR($7TdbFwf~(nG>6)=Wc?TV zo=ZKC;-h_jX}A6dcJH`nQS`H&*c_a*XM>$9`|y0omzHaC0{9qw)U|t_)NJcoF9xgK zj=y*5i4-;0`LFn&4E8YxZ6{IG%t36fk&}AFJr!(RV=SZg`6oB4x?Jfr{^#AYgxdRV zDYbFcV{E5`V{D#nxnuL4eFk_AwYp>TE>N?rYj-BtwY;oc%hh1DRTTHN=j|-8?N)%J z-Pv&UE6R4~fNiTjg<13c&<5rwTSd`s|K2s~#`dhP1si)S*x0r^7p`tj^5=qAQq;p| z9oSg*ug~+q`l(N0rqnio`Qa`v?CT-z6!Ta^t=&Al1D+2y4|BB-*Zl=>b;q-j`h^sJ zvNKCtJH4o%U)r>L|Bt8W=VM>qpZXZr{r@70`&@4O7gO|!^YM~WQ}?klW{mv;1oMwM zd@0!Wez%3sg>ZGxsB`f$ujcMMtb*n4#c9}T;Q7?*#@bA+Chnr{E^(dOy=EUvp$1x^^cOPXMBiaJ9W>sd>h67%|{&ZhQY>*J#4(~ zaCPtO6W}|*YLVkkaGK*qa5d-RVru=xmr!e4%1DfJDcHZw>7z}bUDWE%v){L`1TUvn z*WRF3Ghfg192n2|dYt(y!M0PkpWW1Ij#2+tfe)ozM!)i_ z!D_20(a$wtb4mMoHC#RVc@5aM>h^OLwOaIZ9oW8uUk}#jM*4N{-vHJ}J$zmZPTRb$ z!TiR-U4=PF*pAO)=xd|-M51I$?r1lQ}`Az{>`O+T$zi0=4npaJWrl2_qVlu zdY;|}cCY#kBY!(sZ7bOJ+SJnX>N#2Kl$CNE$-HL zfwfuw)7`3%efd6lH`u;bm3=wx_rTTNhw@v&YAY$`qjnqEd5HJPd%^mt$6CJ+%ul}7 z+S9f6KGtr$R_gatW|w>;^#>@MC~?Q%UTW$-R_Pso2ZAwUTptA6KE2~V1Xqtc{=;C~ zsmC4v5wLocxZ^*H#!r67o2&Wg>w22EcGq(mwfkWywPW*+{}^~bioSPJ%e{O29sVw` z?+x{hXs-3g!RkKo4f_eWx-pC;x4(JR_GiDbe?9}&C+-Mieip7C-_@T3t9_ah?LH5V zcJ?FJFYU+vF+iV zqp9CtuGcrfYL8GJD79~bjioJOeG6=V{{1NW`!-xXVtog!_9!J{eHUyzZ4v8xU}MGp zHuuNi>JjVvV71375$gwF<7snFzCx`QaeoY+PD#i66kI)i$NC9a?P*H1`zhFV>0JH{ zt{!{t8L(~DWBfk{o5M60{kS!y{o@Ds_FNyY#i_Farxg6{8rze;xmPE8h(E7 z??-W89zY#`PdFVMzu$Vlo;8O1?^dbZf455c1qrW!H@~1s40{(?Z3F8!iMpF&8+F%x3w531UT{6^ zQ?6gc+y-`h5wi!ZX8T_1K8o$PQfx2RC))Re9Y^f50kHnDhvfR`KbiU!6z%_J|BK_S zOasrL`1o%No|OZs{d=Nkc;FEj=rjz+;bSkG7YBwK?C8O&+#ag44LW;cDqw*#nQb z+N14dU~R^Aj^tsx0-VOZ60Vk>m8;+pS9`R560*|8-!;9R9Bd>mU1Cu21;C0qj`fT)YviW_!=Y zjTGC*`pfl+_HP24bM*ISu>Lnu9FJTd{oVU-p=kG9h~r!w20oJFb2!CwaVYf>6wk#` z)Nw9u2B+uZmW1C5zOCeu)7#+2an2&|w}aJVpT7gF7UQ%JweWu@SS|MXyTEGcK7Thn z)>nJ9eGgch^I@*?u)Pvoul_tv^#&|n7;|&V<gFnsT-}@V zDL!*4k?TCLxgJk#uHM!6P|{pK3)e5r^>c9b$o2DJwa9fU_-=~2xf)X*e{Z@MJcFWc zuJXv$eXxk)vw#x0E(DwFv#HJ1^YaiT&Gn0L{nA|ThpR`f4}jGo*F(S$Qq;}WnDY3& z=}X`vDeC4bk6cdxpGxsLiDIscsZTEPiPWc1%=Co>AAuoYZ1|eh5~Jx%d%SExogzgg-&CAMMfh z$6#&7b!_smJq=Fd{sgYJkrH>sPr(sad$j!-SetR3BYD_<4o>6#0?tgv_>TPx*f#2}`xffIQrru!hkeTRiTXR7JdFN*f#3X b=fA-Axs_s{a{Y|Cg!} zP5hfRC`re-WH3yUj7Wy0x@FVWO-pvv+Lm0n>=_1(P6}z9F=LXUNh7)e=_*ybb-WgN zH!_xd`EjW&cBX{v0y8AZVL(GNJZaffY}vfIWz!YKa&K>OM{Q4MrPx{Rt`ys0=a;~AT)K=}^AvYw=bv=C@+e^iAPiGIYjmb#%DOa=DS1GkE*iq~2 zFV`05yIS18bm>as3&|*J(5C&kS43^>@9IrEw{JPenZ6s6@!%{jU7Og^UD?Hq)*6va z0%tJ;yU6OF>0u(V^}L2;Ng>JFeGwl=w|5kMPANVCK2y7poCMeRwJ~`dJAHS( zrSkQaHsTwTF+9)>U8RnS2g~s7$T_O=XxBI{<64+=3EU8*0l5-y+y(EQqlI`mzkJ4f z3A|^Hz|D1-V$a2Ig?C@S-$U`t43B2+{Cv@0em=RF&tCKMiJPf~O6$+Wu>Le;0$_ zRz>Y`*w&TPH$L;3HvwC0j6T0HkvA3FJUL@sn?8B*EVOe5ZpOBjcrO0==vbrcKPTnu zd$usc3*a1q7h=24S@aTj!oECvv331`7$dhb!`=ru>)2!Z5kMQiEc3~Se+BlS9A1r` z&tDJr?9AVYb{(!&yd}e)LG-;9+j{!9W&ZjZHYDvCc5h;So->j4^Su#mZ!zC%u&pKU z`o!MnsQ6G}iUm-@DMRM?UKNKCEv&`~DEZSP}TcfZGn8 zw_*|Z&vkzUF-G1wZb6$DYxo$pbI4C*=1-%~#jCypiMmIy8LOi1QGlEhF?VA-0^gIO zy{nw}UTo)Osp;B&hIU?g<6Zath<{g&e*i5P@eg7fFCX%`|^4Ps+_SAmCs z$L8<|uy@`&8{ZZ0tltl@dp;KJ{>!=NV!t7B(dR_469t|DcAiCad?VU#jGW^Ay@<0g z-?cd0`%^HDXy=%Z-jBFGu{|EhVei4g9QI!OcPHY-{_P3unckYi*1t1{t$$Yz`)>R@ z6#4r7I~3S=a(DnfmcyRq@fN^m5 z^ABX$e9!wmi0^VUEAp+rH{;|TT!S>vGt_6EaqmaudcnTc4`iHkc)ns|_1%of?PpEm zk0LiCzHj3_f4PYNIJVqoF6u4lPatxN!=9oq?~49MxRZy_-c9vG%zZohlZZqAr_lP< zkK(@reH&t&ytX~KXP-gbEBo`li7&~{Ls%zJgd!9BcXpcC5)aB6dw9 z(BDMnAo8xsHzjATXZ9_`Gu%hK_}dwG6zzT9g8mL--U1}@zMJu{%JRMkHdlTmyEP2` zeZ=0^BKn=*H!E-LZRj5$*4~O(+q@ql^7bVD5po2PkC-20Tg&;4`3Yj2{7806?x%?J zFGTd;jogFS$1${i`8DtV!hRfKIxqp8_&a{vDBz@8~}=?tXOS{S!O#oJVY2ejeu&A3{6t?P%w4 zkN$tKG$2FYQnajzNj?}+gj^$wT8pVN3D_Aa{g9|TBES7r_Vik zoP4>cJ0ANWTzLhGAe>+9qWNh>DdpQM8KJMI9Y;)yf{nN1RVGXx*8G1U{ z-)P=l|0KPnj zH)3Cv;aK~*VCT)hxAVaAhqL>z2wQF;;+pJ9F81MkY`NI03$f+g&)A!bu$Lmvqd#&l z#@1(D*Cr0%CD{48E5UM}Z`6GzcGT4$xtC(=v#xt24&N&5eBCu*xz$M2U5g!c^+)bH zY<<>suf^fJ0y|%~1uVB7iMm%}M_v7qy8&CDbv-k2_%>m?H!tJ6Fbw@HL@xZ##&(V2 z-;6ES%J~gXbv;MkbKio#8u2c89?mIlMWW_&QvOWM=Yr+T--f;hF@Gy!zSx+^e;&5$ zh7d!|k3J&0!-J=U;|%lFt1mX9;o zg)O%e@i&P1yAgT!+M43{z5G(_(TKdgilbNeb{^trMxxiVu> 2; uint raw0 = ptcl[ix + 0]; uint raw1 = ptcl[ix + 1]; + uint raw2 = ptcl[ix + 2]; + uint raw3 = ptcl[ix + 3]; CmdStroke s; - s.halfWidth = uintBitsToFloat(raw0); - s.rgba_color = raw1; + s.n_segs = raw0; + s.seg_ref = raw1; + s.half_width = uintBitsToFloat(raw2); + s.rgba_color = raw3; return s; } void CmdStroke_write(CmdStrokeRef ref, CmdStroke s) { uint ix = ref.offset >> 2; - ptcl[ix + 0] = floatBitsToUint(s.halfWidth); - ptcl[ix + 1] = s.rgba_color; + ptcl[ix + 0] = s.n_segs; + ptcl[ix + 1] = s.seg_ref; + ptcl[ix + 2] = floatBitsToUint(s.half_width); + ptcl[ix + 3] = s.rgba_color; } CmdFill CmdFill_read(CmdFillRef ref) { diff --git a/piet-gpu/shader/segment.h b/piet-gpu/shader/segment.h new file mode 100644 index 0000000..517c115 --- /dev/null +++ b/piet-gpu/shader/segment.h @@ -0,0 +1,99 @@ +// Code auto-generated by piet-gpu-derive + +struct TileHeaderRef { + uint offset; +}; + +struct ItemHeaderRef { + uint offset; +}; + +struct SegmentRef { + uint offset; +}; + +struct TileHeader { + uint n; + ItemHeaderRef items; +}; + +#define TileHeader_size 8 + +TileHeaderRef TileHeader_index(TileHeaderRef ref, uint index) { + return TileHeaderRef(ref.offset + index * TileHeader_size); +} + +struct ItemHeader { + uint n; + SegmentRef segments; +}; + +#define ItemHeader_size 8 + +ItemHeaderRef ItemHeader_index(ItemHeaderRef ref, uint index) { + return ItemHeaderRef(ref.offset + index * ItemHeader_size); +} + +struct Segment { + vec2 start; + vec2 end; +}; + +#define Segment_size 16 + +SegmentRef Segment_index(SegmentRef ref, uint index) { + return SegmentRef(ref.offset + index * Segment_size); +} + +TileHeader TileHeader_read(TileHeaderRef ref) { + uint ix = ref.offset >> 2; + uint raw0 = segment[ix + 0]; + uint raw1 = segment[ix + 1]; + TileHeader s; + s.n = raw0; + s.items = ItemHeaderRef(raw1); + return s; +} + +void TileHeader_write(TileHeaderRef ref, TileHeader s) { + uint ix = ref.offset >> 2; + segment[ix + 0] = s.n; + segment[ix + 1] = s.items.offset; +} + +ItemHeader ItemHeader_read(ItemHeaderRef ref) { + uint ix = ref.offset >> 2; + uint raw0 = segment[ix + 0]; + uint raw1 = segment[ix + 1]; + ItemHeader s; + s.n = raw0; + s.segments = SegmentRef(raw1); + return s; +} + +void ItemHeader_write(ItemHeaderRef ref, ItemHeader s) { + uint ix = ref.offset >> 2; + segment[ix + 0] = s.n; + segment[ix + 1] = s.segments.offset; +} + +Segment Segment_read(SegmentRef ref) { + uint ix = ref.offset >> 2; + uint raw0 = segment[ix + 0]; + uint raw1 = segment[ix + 1]; + uint raw2 = segment[ix + 2]; + uint raw3 = segment[ix + 3]; + Segment s; + s.start = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1)); + s.end = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3)); + return s; +} + +void Segment_write(SegmentRef ref, Segment s) { + uint ix = ref.offset >> 2; + segment[ix + 0] = floatBitsToUint(s.start.x); + segment[ix + 1] = floatBitsToUint(s.start.y); + segment[ix + 2] = floatBitsToUint(s.end.x); + segment[ix + 3] = floatBitsToUint(s.end.y); +} + diff --git a/piet-gpu/shader/setup.h b/piet-gpu/shader/setup.h index 9ce2de6..a644dc0 100644 --- a/piet-gpu/shader/setup.h +++ b/piet-gpu/shader/setup.h @@ -15,6 +15,15 @@ #define TILEGROUP_INITIAL_ALLOC 1024 +// Quick note on layout of tilegroups (k1 output): in the base, +// there is a region of size TILEGROUP_STRIDE for each tilegroup. +// At offset 0 are the main instances, encoded with Jump. At offset +// TILEGROUP_STROKE_START are the stroke instances, encoded with +// Head and Link. +#define TILEGROUP_STRIDE 2048 +#define TILEGROUP_STROKE_START 1024 +#define TILEGROUP_STROKE_ALLOC 1024 + // TODO: compute all these #define WIDTH_IN_TILES 128 diff --git a/piet-gpu/shader/tilegroup.h b/piet-gpu/shader/tilegroup.h index 64b27d3..213ddc3 100644 --- a/piet-gpu/shader/tilegroup.h +++ b/piet-gpu/shader/tilegroup.h @@ -8,6 +8,10 @@ struct JumpRef { uint offset; }; +struct ChunkRef { + uint offset; +}; + struct TileGroupRef { uint offset; }; @@ -24,7 +28,7 @@ InstanceRef Instance_index(InstanceRef ref, uint index) { } struct Jump { - uint new_ref; + TileGroupRef new_ref; }; #define Jump_size 4 @@ -33,6 +37,17 @@ JumpRef Jump_index(JumpRef ref, uint index) { return JumpRef(ref.offset + index * Jump_size); } +struct Chunk { + uint chunk_n; + ChunkRef next; +}; + +#define Chunk_size 8 + +ChunkRef Chunk_index(ChunkRef ref, uint index) { + return ChunkRef(ref.offset + index * Chunk_size); +} + #define TileGroup_Instance 0 #define TileGroup_Jump 1 #define TileGroup_End 2 @@ -64,13 +79,29 @@ Jump Jump_read(JumpRef ref) { uint ix = ref.offset >> 2; uint raw0 = tilegroup[ix + 0]; Jump s; - s.new_ref = raw0; + s.new_ref = TileGroupRef(raw0); return s; } void Jump_write(JumpRef ref, Jump s) { uint ix = ref.offset >> 2; - tilegroup[ix + 0] = s.new_ref; + tilegroup[ix + 0] = s.new_ref.offset; +} + +Chunk Chunk_read(ChunkRef ref) { + uint ix = ref.offset >> 2; + uint raw0 = tilegroup[ix + 0]; + uint raw1 = tilegroup[ix + 1]; + Chunk s; + s.chunk_n = raw0; + s.next = ChunkRef(raw1); + return s; +} + +void Chunk_write(ChunkRef ref, Chunk s) { + uint ix = ref.offset >> 2; + tilegroup[ix + 0] = s.chunk_n; + tilegroup[ix + 1] = s.next.offset; } uint TileGroup_tag(TileGroupRef ref) { diff --git a/piet-gpu/src/main.rs b/piet-gpu/src/main.rs index 703e156..949ccc2 100644 --- a/piet-gpu/src/main.rs +++ b/piet-gpu/src/main.rs @@ -4,7 +4,7 @@ use std::path::Path; use rand::{Rng, RngCore}; -use piet::kurbo::{Circle, Point}; +use piet::kurbo::{BezPath, Circle, Line, Point, Vec2}; use piet::{Color, RenderContext}; use piet_gpu_hal::vulkan::VkInstance; @@ -22,13 +22,15 @@ const TILE_H: usize = 16; const WIDTH_IN_TILEGROUPS: usize = 4; const HEIGHT_IN_TILEGROUPS: usize = 96; -const TILEGROUP_INITIAL_ALLOC: usize = 1024; +const TILEGROUP_STRIDE: usize = 2048; -const WIDTH_IN_TILES: usize = 124; +const WIDTH_IN_TILES: usize = 128; const HEIGHT_IN_TILES: usize = 96; const PTCL_INITIAL_ALLOC: usize = 1024; -const N_CIRCLES: usize = 10_000; +const K2_PER_TILE_SIZE: usize = 8; + +const N_CIRCLES: usize = 1; fn render_scene(rc: &mut impl RenderContext) { let mut rng = rand::thread_rng(); @@ -42,6 +44,29 @@ fn render_scene(rc: &mut impl RenderContext) { let circle = Circle::new(center, radius); rc.fill(circle, &color); } + rc.stroke( + Line::new((100.0, 100.0), (200.0, 150.0)), + &Color::WHITE, + 5.0, + ); + render_cardioid(rc); +} + +fn render_cardioid(rc: &mut impl RenderContext) { + let n = 100; + let dth = std::f64::consts::PI * 2.0 / (n as f64); + let center = Point::new(1024.0, 768.0); + let r = 750.0; + let mut path = BezPath::new(); + for i in 1..n { + let p0 = center + Vec2::from_angle(i as f64 * dth) * r; + let p1 = center + Vec2::from_angle(((i * 2) % n) as f64 * dth) * r; + rc.fill(&Circle::new(p0, 8.0), &Color::WHITE); + path.move_to(p0); + path.line_to(p1); + //rc.stroke(Line::new(p0, p1), &Color::BLACK, 2.0); + } + rc.stroke(&path, &Color::BLACK, 2.0); } #[allow(unused)] @@ -80,7 +105,8 @@ fn main() { .unwrap(); device.write_buffer(&scene_buf, &scene).unwrap(); let tilegroup_buf = device.create_buffer(4 * 1024 * 1024, dev).unwrap(); - let ptcl_buf = device.create_buffer(12 * 1024 * 4096, dev).unwrap(); + let ptcl_buf = device.create_buffer(48 * 1024 * 1024, dev).unwrap(); + let segment_buf = device.create_buffer(64 * 1024 * 1024, dev).unwrap(); let image_buf = device .create_buffer((WIDTH * HEIGHT * 4) as u64, host) .unwrap(); @@ -90,7 +116,7 @@ fn main() { let k1_alloc_buf_host = device.create_buffer(4, host).unwrap(); let k1_alloc_buf_dev = device.create_buffer(4, dev).unwrap(); - let k1_alloc_start = WIDTH_IN_TILEGROUPS * HEIGHT_IN_TILEGROUPS * TILEGROUP_INITIAL_ALLOC; + let k1_alloc_start = WIDTH_IN_TILEGROUPS * HEIGHT_IN_TILEGROUPS * TILEGROUP_STRIDE; device .write_buffer(&k1_alloc_buf_host, &[k1_alloc_start as u32]) .unwrap(); @@ -103,6 +129,21 @@ fn main() { ) .unwrap(); + let k2s_alloc_buf_host = device.create_buffer(4, host).unwrap(); + let k2s_alloc_buf_dev = device.create_buffer(4, dev).unwrap(); + let k2s_alloc_start = WIDTH_IN_TILES * HEIGHT_IN_TILES * K2_PER_TILE_SIZE; + device + .write_buffer(&k2s_alloc_buf_host, &[k2s_alloc_start as u32]) + .unwrap(); + let k2s_code = include_bytes!("../shader/kernel2s.spv"); + let k2s_pipeline = device.create_simple_compute_pipeline(k2s_code, 4).unwrap(); + let k2s_ds = device + .create_descriptor_set( + &k2s_pipeline, + &[&scene_dev, &tilegroup_buf, &segment_buf, &k2s_alloc_buf_dev], + ) + .unwrap(); + let k3_alloc_buf_host = device.create_buffer(4, host).unwrap(); let k3_alloc_buf_dev = device.create_buffer(4, dev).unwrap(); let k3_alloc_start = WIDTH_IN_TILES * HEIGHT_IN_TILES * PTCL_INITIAL_ALLOC; @@ -110,24 +151,32 @@ fn main() { .write_buffer(&k3_alloc_buf_host, &[k3_alloc_start as u32]) .unwrap(); let k3_code = include_bytes!("../shader/kernel3.spv"); - let k3_pipeline = device.create_simple_compute_pipeline(k3_code, 4).unwrap(); + let k3_pipeline = device.create_simple_compute_pipeline(k3_code, 5).unwrap(); let k3_ds = device .create_descriptor_set( &k3_pipeline, - &[&scene_dev, &tilegroup_buf, &ptcl_buf, &k3_alloc_buf_dev], + &[ + &scene_dev, + &tilegroup_buf, + &segment_buf, + &ptcl_buf, + &k3_alloc_buf_dev, + ], ) .unwrap(); let k4_code = include_bytes!("../shader/kernel4.spv"); - let pipeline = device.create_simple_compute_pipeline(k4_code, 2).unwrap(); - let descriptor_set = device - .create_descriptor_set(&pipeline, &[&ptcl_buf, &image_dev]) + let k4_pipeline = device.create_simple_compute_pipeline(k4_code, 3).unwrap(); + let k4_ds = device + .create_descriptor_set(&k4_pipeline, &[&ptcl_buf, &segment_buf, &image_dev]) .unwrap(); - let query_pool = device.create_query_pool(4).unwrap(); + + let query_pool = device.create_query_pool(5).unwrap(); let mut cmd_buf = device.create_cmd_buf().unwrap(); cmd_buf.begin(); cmd_buf.copy_buffer(&scene_buf, &scene_dev); cmd_buf.copy_buffer(&k1_alloc_buf_host, &k1_alloc_buf_dev); + cmd_buf.copy_buffer(&k2s_alloc_buf_host, &k2s_alloc_buf_dev); cmd_buf.copy_buffer(&k3_alloc_buf_host, &k3_alloc_buf_dev); cmd_buf.clear_buffer(&tilegroup_buf); cmd_buf.clear_buffer(&ptcl_buf); @@ -141,36 +190,47 @@ fn main() { cmd_buf.write_timestamp(&query_pool, 1); cmd_buf.memory_barrier(); cmd_buf.dispatch( - &k3_pipeline, - &k3_ds, + &k2s_pipeline, + &k2s_ds, ((WIDTH / 512) as u32, (HEIGHT / 16) as u32, 1), ); cmd_buf.write_timestamp(&query_pool, 2); cmd_buf.memory_barrier(); cmd_buf.dispatch( - &pipeline, - &descriptor_set, - ((WIDTH / TILE_W) as u32, (HEIGHT / TILE_H) as u32, 1), + &k3_pipeline, + &k3_ds, + ((WIDTH / 512) as u32, (HEIGHT / 16) as u32, 1), ); cmd_buf.write_timestamp(&query_pool, 3); cmd_buf.memory_barrier(); + cmd_buf.dispatch( + &k4_pipeline, + &k4_ds, + ((WIDTH / TILE_W) as u32, (HEIGHT / TILE_H) as u32, 1), + ); + cmd_buf.write_timestamp(&query_pool, 4); + cmd_buf.memory_barrier(); cmd_buf.copy_buffer(&image_dev, &image_buf); cmd_buf.finish(); device.run_cmd_buf(&cmd_buf).unwrap(); let timestamps = device.reap_query_pool(query_pool).unwrap(); println!("Kernel 1 time: {:.3}ms", timestamps[0] * 1e3); println!( - "Kernel 3 time: {:.3}ms", + "Kernel 2 time: {:.3}ms", (timestamps[1] - timestamps[0]) * 1e3 ); println!( - "Render time: {:.3}ms", + "Kernel 3 time: {:.3}ms", (timestamps[2] - timestamps[1]) * 1e3 ); + println!( + "Render time: {:.3}ms", + (timestamps[3] - timestamps[2]) * 1e3 + ); /* let mut k1_data: Vec = Default::default(); - device.read_buffer(&ptcl_buf, &mut k1_data).unwrap(); + device.read_buffer(&segment_buf, &mut k1_data).unwrap(); dump_k1_data(&k1_data); */ diff --git a/piet-gpu/src/render_ctx.rs b/piet-gpu/src/render_ctx.rs index eb67132..f5b6897 100644 --- a/piet-gpu/src/render_ctx.rs +++ b/piet-gpu/src/render_ctx.rs @@ -259,7 +259,7 @@ fn flatten_shape( } _ => (), } - println!("{:?}", el); + //println!("{:?}", el); }); let n_points = points.len() as u32; let points_ref = points.encode(encoder).transmute();