From 2ed89dd65e27c5a8cb07d4e5cecd2b9b35d816d3 Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Mon, 20 Apr 2020 17:15:36 -0700 Subject: [PATCH] First draft of kernel 1 Output of kernel 1 is validated by simple inspection, next step is to wire it up properly. --- piet-gpu-derive/src/glsl.rs | 3 +- piet-gpu-hal/src/vulkan.rs | 13 +---- piet-gpu-types/src/lib.rs | 1 + piet-gpu-types/src/main.rs | 1 + piet-gpu-types/src/scene.rs | 5 +- piet-gpu-types/src/tilegroup.rs | 18 +++++++ piet-gpu/shader/build.ninja | 2 + piet-gpu/shader/image.comp | 2 +- piet-gpu/shader/image.spv | Bin 7604 -> 8096 bytes piet-gpu/shader/kernel1.comp | 83 ++++++++++++++++++++++++++++++++ piet-gpu/shader/kernel1.spv | Bin 0 -> 11644 bytes piet-gpu/shader/scene.h | 21 +++++--- piet-gpu/shader/tilegroup.h | 64 ++++++++++++++++++++++++ piet-gpu/src/main.rs | 53 +++++++++++++++++--- 14 files changed, 236 insertions(+), 30 deletions(-) create mode 100644 piet-gpu-types/src/tilegroup.rs create mode 100644 piet-gpu/shader/kernel1.comp create mode 100644 piet-gpu/shader/kernel1.spv create mode 100644 piet-gpu/shader/tilegroup.h diff --git a/piet-gpu-derive/src/glsl.rs b/piet-gpu-derive/src/glsl.rs index 5164179..617669a 100644 --- a/piet-gpu-derive/src/glsl.rs +++ b/piet-gpu-derive/src/glsl.rs @@ -219,6 +219,7 @@ fn gen_struct_write( fields: &[(String, usize, LayoutType)], ) { writeln!(r, "void {}_write({}Ref ref, {} s) {{", name, name, name).unwrap(); + writeln!(r, " uint ix = ref.offset >> 2;").unwrap(); let coverage = crate::layout::struct_coverage(fields, true); for (i, field_ixs) in coverage.iter().enumerate() { let mut pieces = Vec::new(); @@ -254,7 +255,7 @@ fn gen_struct_write( } } if !pieces.is_empty() { - write!(r, " {}[{}] = ", bufname, i).unwrap(); + write!(r, " {}[ix + {}] = ", bufname, i).unwrap(); for (j, piece) in pieces.iter().enumerate() { if j != 0 { write!(r, " | ").unwrap(); diff --git a/piet-gpu-hal/src/vulkan.rs b/piet-gpu-hal/src/vulkan.rs index c4a07a4..e788919 100644 --- a/piet-gpu-hal/src/vulkan.rs +++ b/piet-gpu-hal/src/vulkan.rs @@ -449,13 +449,7 @@ impl crate::CmdBuf for CmdBuf { unsafe fn clear_buffer(&self, buffer: &Buffer) { let device = &self.device.device; - device.cmd_fill_buffer( - self.cmd_buf, - buffer.buffer, - 0, - vk::WHOLE_SIZE, - 0 - ); + device.cmd_fill_buffer(self.cmd_buf, buffer.buffer, 0, vk::WHOLE_SIZE, 0); } unsafe fn copy_buffer(&self, src: &Buffer, dst: &Buffer) { @@ -465,10 +459,7 @@ impl crate::CmdBuf for CmdBuf { self.cmd_buf, src.buffer, dst.buffer, - &[vk::BufferCopy::builder() - .size(size) - .build() - ] + &[vk::BufferCopy::builder().size(size).build()], ); } diff --git a/piet-gpu-types/src/lib.rs b/piet-gpu-types/src/lib.rs index 60c11ab..44d4843 100644 --- a/piet-gpu-types/src/lib.rs +++ b/piet-gpu-types/src/lib.rs @@ -1,3 +1,4 @@ pub mod encoder; pub mod ptcl; pub mod scene; +pub mod tilegroup; diff --git a/piet-gpu-types/src/main.rs b/piet-gpu-types/src/main.rs index 2a20c3b..7ed941f 100644 --- a/piet-gpu-types/src/main.rs +++ b/piet-gpu-types/src/main.rs @@ -5,6 +5,7 @@ fn main() { .expect("provide a module name"); match mod_name.as_str() { "scene" => print!("{}", piet_gpu_types::scene::gen_gpu_scene()), + "tilegroup" => print!("{}", piet_gpu_types::tilegroup::gen_gpu_tilegroup()), "ptcl" => print!("{}", piet_gpu_types::ptcl::gen_gpu_ptcl()), _ => println!("Oops, unknown module name"), } diff --git a/piet-gpu-types/src/scene.rs b/piet-gpu-types/src/scene.rs index 8e4ec3c..5f95c40 100644 --- a/piet-gpu-types/src/scene.rs +++ b/piet-gpu-types/src/scene.rs @@ -8,8 +8,7 @@ piet_gpu! { #[rust_encode] mod scene { struct Bbox { - // TODO: this should be i16 - bbox: [u16; 4], + bbox: [i16; 4], } struct Point { xy: [f32; 2], @@ -19,6 +18,7 @@ piet_gpu! { // Note: both of the following items are actually arrays items: Ref, bboxes: Ref, + offset: Point, } struct PietCircle { rgba_color: u32, @@ -45,6 +45,7 @@ piet_gpu! { points: Ref, } enum PietItem { + Group(SimpleGroup), Circle(PietCircle), Line(PietStrokeLine), Fill(PietFill), diff --git a/piet-gpu-types/src/tilegroup.rs b/piet-gpu-types/src/tilegroup.rs new file mode 100644 index 0000000..4824178 --- /dev/null +++ b/piet-gpu-types/src/tilegroup.rs @@ -0,0 +1,18 @@ +use piet_gpu_derive::piet_gpu; + +piet_gpu! { + #[gpu_write] + mod tilegroup { + struct Instance { + // Note: a better type would be `Ref` but to do that we + // would need cross-module references. Punt for now. + item_ref: u32, + // A better type would be Point. + offset: [f32; 2], + } + enum TileGroup { + Instance(Instance), + End, + } + } +} diff --git a/piet-gpu/shader/build.ninja b/piet-gpu/shader/build.ninja index 037540b..5befa7f 100644 --- a/piet-gpu/shader/build.ninja +++ b/piet-gpu/shader/build.ninja @@ -8,3 +8,5 @@ rule glsl command = $glslang_validator -V -o $out $in build image.spv: glsl image.comp | scene.h + +build kernel1.spv: glsl kernel1.comp | scene.h tilegroup.h diff --git a/piet-gpu/shader/image.comp b/piet-gpu/shader/image.comp index b3e906c..60739d5 100644 --- a/piet-gpu/shader/image.comp +++ b/piet-gpu/shader/image.comp @@ -32,7 +32,7 @@ void main() { // which is horribly wasteful, but the goal is to get *some* output and // then optimize. - SimpleGroup group = SimpleGroup_read(SimpleGroupRef(0)); + SimpleGroup group = PietItem_Group_read(PietItemRef(0)); for (uint i = 0; i < group.n_items; i++) { PietItemRef item_ref = PietItem_index(group.items, i); uint tag = PietItem_tag(item_ref); diff --git a/piet-gpu/shader/image.spv b/piet-gpu/shader/image.spv index b1b6eb761f2658ffdc14f3b287a6348cfafc0675..527c9ae2b14983aaa723ab098d6d68b77092cab6 100644 GIT binary patch literal 8096 zcmaKv32 z4PzILPm(do1peDGCP|+e$yk&mnVgJE+twYscB~q#cCK1;`f3BFB@JnuF-H;8NL#>q zN|jz6U&p4StIbHU+))yGHjq6@?u8VRNlELDV(ZSGtvk*wcJ%ib2dn$L%f;?WZ@Ji6 z87OyDEBngEO~}PndONy@I`aXIMC8YBq#`Djv{%a2wraWOqVnFfN;RLpy?Y1C)r6?2 z{8x`FR(dP+H4u;%c>wOO6Has`T`C zmsEqnTg4KhaVQ;%s(=D~CM(l|Ar-eQH79URfz+nQ3FvvKb8 z9>ypS)-_K~eT=Ee0(fqWBW9b^DYHK3`Sgi7H-X)$t(Ad}?s7Wiv?g_*`dE$0LU=va zn#QG?aRXg@O2v-8?!JLqt;Ra1j&g6coYS1sK&i7b#GE`oC&KGFk0`$`k5pefKa0(g zo}VenvaG-7v{)^5jqG&&?A9huoM&(8NZ+Z+%6#8+%l~Km?D*H$ZpBFdX-P}Ie{BK( ze*)sI!Iwa-{)XgiaAjD%kZe^iU6H;cyo2Y$cd>xwEkh%BLCt4Soz{GNPk1lPwZB$XP^`V|siMEFmoN&61b*76jR>%sk{fl^QAyMZ?H-UQC) zy}8Cuqe=7JMjLtV0N3*rlDkLodq(lY@ZtT%A>Iz;nvlLHySj_5-FmAd;cD`k8- zd~a9LYlpufX@=Lz*Kr(H zz_#&koIT%%+lUUn2=3ELe>LAx_V--+9`Fg?T%#%XJLk9H_{@J6W7o&4)oge^zcojG zYt84EHxo~j=HCu5|6FV)Z&Tkr&Gfdvcs4aUUK8fC0FU3en&wwCNx51Ru@f_{znS?@ z%D8d*?C-ZK`cKdNYIE?-pl!mAA=lBga_g|)Oj@;Oe6wif_SffnM^5KrJ!<;=?&w>< zzkV~s`qj;0KG#>DeeKgk8~K{SwfX6*z2o)S*ZS=5`n%4?%9qd@17OczZ zQqA$q`(2D@eemx=nh&46eg$X6b@@JKjJjj|kk)=N#*e_(puPgX_@|irGK)$drH$PG z0rPB(+!msyMa*Q2%ke2GKd{lUr-2yf^qj`~%X8{`O71#uq;*f!Txa>FOf!!)ZG}5f@blr0vjVUCxcT-^TFEAJ1|7Kat~}eZP+puix)u@Izp~k9qDlGPry1H!`?;-JIi&?{`xF zS**|fwBNuy|2t8=GvCZKbsxg{z1EPft3LY~_bu!&)(`hid^^*ObiZ6+c3pOV zevoP2Bjeo@wTS;QSZybV!#ng7OilB#rl`yP({CR4%6qnsapuwf99xO0>vwO}#O`Ao z_6+Cem$aTE+b5~{5!zp4KKg${tKZfdJ-fff%(0oVT-V=W=3j$F%rzW)g`pYK-lji>!Hrf&V7GkJ{lSFrWy^UTTDQK|j@hE2unqt6)6 zt$O7CJ6KJhXHxDuI@f<-%P@8Qo?*4X?ck{6pJ4rWW^;ZLtY*wp^nIGv$NB4f3R83b z;;83e;HbwtAh(`{wExChFm-G2&Zybf^Y%RU0Co-W@-ZA1H4n{ZW^^fVEZBaoZ|s?| z4e0s~W_=4_`>Ri(?+juZ)o8b1`mMoxsqX$A0*?oqe=}xX_M3pF9(*F$evYMo65JT| zDVzeesbKziN7np)3w-}OrgvX|oWU7j$Bg;7cSobCTf5(cnHYb`rp#v^-x=!G<$aq4 zeu!3XpV^o(aUbVon!1n8SS;ps41%@AS{w_u|1G(gxo~yg3C{C4u>I8Asnt5>fz`uY z6ZPZK_)At{)@nV*Iv4BK@4bz4Wh?hid(LdF)%BTAtoP;sb;uXM)rP_5)2EhyPdtJa zYqb!cV_uleV-Z}KR4bQxO(I|1FYunjL3B+*nIjT*IKZ-;vBhO>)`5yA@@sh>ntGh4R*qHo#;S9KX)O04; zzA@i*V70Ya%w;{;e)`%uYu2*?Q-7W@#Zl`_@Epu%7G|x}X=h{BcMPq+(X9Dwtd^_B zw~pc$i1^*w*{=W8FPIdQ!VDdE!FFDJs+;--o`%d0Jma} zqd)q-5UkI<)-Dg<&Q!1Gy%4VES&O_Efg`W}=(`K7&%CaIJbW(#JJyw~q3_QZV`|}l z3D`d2e<@h4o%QfL_cF{r>aL^T#Ft~Pt@CqCxpC%nKVO0Acdx~<*Ym&&FrVWw_j)ev ze9XN*fj0JfH#oo7ouhbV6z|RPDtIWzJx|wXJkCN1?zs80um`RlXQ2bE7H45MSS{AB z3|5PCzZa~QKMP&(*fagn_cE|P*U!1h!}m(?ZY=U%4pz&bg>HD{)gOI(!1~PV8p*@g z2i}cE-dBOu`ms1;1K`N3Kl%=W^_kbTmWOX2*g3@7UIBLf<9x`CiTM3sYm0ZotHEma zpFsN>%>Kie~Dy_pd`!kF~iLtacD{ zoTF&3!_=+G^Cb7Trf2^3*c?pVaW1BP17;luF#E`ja~|G>H)8s|C*rs#3&AI1K8rB# zNi*$Y%zJVYZQPR^z~giLrcwOn9KRKOTaJ4#?#Oto&70tklfM@?!qwwmycw((>uU{a zu|IDCtHoNr6|B~V#W{N$IL?{===*lCKG(~+$iw#za6a!l;cB;Fk@sET$g4m4z8kF1 zysn8neD47}R;J=Ya%1f88T%Nf-}5Jq^EVE>4D(rn z#qWouV9(#lw6PW+2dn4L%O~LKabE5Mt64|v(eecc zT3y=~%;yv=YF!Ps)>CP%wVC!FEMMyv;Kt=^{UTgFYW)&eEoyB7e;HG^R&&bZ|AX%Z M`#w>(R=KtQ7X#ES761SM literal 7604 zcmZXX2asK5702&p--eJzLJB1$?503M3Iqa3Vhl-$OE8*+sOSv(^0ND~Ja}8!eH#*F z1`tq01q+G=ow3a*;tXO#v4t**y^!K~(p4~6&g~K`j|0&=3&iCE>mevW2 z4$88YY!d&R+LGnZ%xnTomQBef=DKs+j%}-k8$GMmoP2@-)3eq*&X_}pX+xKgzDliM z$7hfk@NzSATkI+snXOdMvO58#Y;xAQt=zeNd*`+-+i0Q^rQi8M5ODt(Gi=+1H?n#iv2FA}1wBMf*PV);r{%sj=0UxIT7Sbud7O@% z&)B-9Y!*0=Lpv$4&)$8mF*%!yo%_o})k@E3PetXtp}dzl#5H@Fm>rJ2qt-WAuXYX% zj11--xW2qky8e;iO}hs68f1*vXDp^In+Hzn<#BR8{pA{y9UkR8+a}Z8vvHpDF4m|H zH)p*7oYXsDX9-VBWBZ#=PVBE8+f&(G8|toC^X=v9%lo9gF9eUp+S9~L&S$81SEbxN zP#+j-_9}1x-PQg^HR0@ksM1p#Df(Xm9;2JZ?1C(_Q;;5eHS+Oi5jKsUhH1qJhb?J^jHtA z*#qEO?rY0_pzS^Ia^zeo`yqNPzLY(50B+5G2Iku2wSRtquarFu?#|zf=Gu=O;BUjD!u7l zk@Km^qph>N;5wPb>2MPfztd;(-;HP=zfETn9e7dBW?3isa;=E@o6+(92JRS*E#>Fu zcloFyerZd(UbAPz)B5%t_3bsSFYX{7m9KvahV_p?X7Oe7+t5L_`Py^n(fzd}J`3>p zd(-6n1Rewr&c7vD^DJ8~Gc4n>RY!@OB&xek1@ z(PH!UIq#_HUaTpn&))!j3+Tu1x3+$HYdKqa^NeptN39O*=H8n5&QhOi+nf2$Ufx*o zv1nu5vv@gLzu0*mk8Vd=kmUu=KUf7ecMZ~BVB^;#&S5_NosKrfUN@q}&VCcx7`b%? z-du35W6l=D$M{aP@$%u{hTVeb??StG_bNWOz@9^++Y z_MQfA2hT|G60m#n9>%xGd+0Yu>^U8W_DtlQv)J#BoOSJK71)IWpA2@L6?i=#ze{q8 zck&eGZ@zo+ao$hDw4q(Yv%d{-e`4?I?c?wr3HF@sO7I}|y$SYPaDRgRc0G__XXI~P ztS9z&F0kjHcCA5O6TM-uGae{>vvEWw`rGYR&r{k@C&_Tz7!cs=uR z#-ByJ|Jz}GXFgYO@;-!TFJRKnRiAmreIAh;1p7{Wq2P>l=3-;@eG!qnx_E!ST5!Hc z#(O4m5q~$f+;%R1(F-$%@of0{bt zZy>H`eQoP`?mt4T*Fdc2nf(}%w}0=MIM(_Jw*Bby&WTT>Q}cd`Ohe4kXN>n&K5G90 zTTY*MQtTYv>o1XIh`fI9uw2;AG5Yuww*H%oJ^vb8&X_02{Vm$Z{pgvb`t8B@Qr`3PJ^cq_{q?yte^>rl@b+n)e<9|%miXU@G4fNn z1#(X!uC*M||2Xml;+l`4^~W9j54LN@emuKp5qW!m3jG{nz0(nW)_DdkZ(sY-|3!SS z#O9b_OuUaR76>WK3Fz3@1bp@uvuG{yuT5e~aPoc=+-DoMdGcNKx*u*LwtR>)kv|BI zf3sDHz1okl?!~_K``*UAQj2}3y=Q8Bbv~1b^}V^AKEwxu<-8Zx(vIr&_V4#!9Czd}Y`OHEJsd0_F-H_J+o&CLIucI)?qW_yVaxfq!aW5yAKO~`qSgXz z*Q+PBjt0v|tqyFtb|h*o#I~NksI>^&T5*p&uf<^bsC5jsoPVQ4tz)sRr_Y)A8z>ib zS76^pt#rRD;pDF>_I5nBoPV=K-U-;|rL$ZOCm;9eL~L{AW0ogj+k@YtPV(1a`?rzr zt?~9KXWX-#?(-1;_BjauG{k2zaw>8DR!k{ue}|?b-tp<^=yNr;G3j^VM6mp5PuS+h zeow|e30Z^0UQWR_PhS^z&3;Zr z8)u{R^G zqd#)DV(YW6y^F*5BJ8y8i@|c%Mo+ro#VOaS0K*T{kf*tIBR;IuSE2F*4lB_^RO2nK1U&*^%3a# zh-ZB?I?nby?DVX6j>FY)_<{rvVqa9?IQv(DT_-*JSA*r_?9144>k;>54{|Z93btIF z{Vr^|^z6IAaYp(hw+CCFb$6o0;oFU!*6jt$rDuOWIO^(;+#0q%>pBy0_%6gw>-K@= z>PXzFe(b2LKXM1K^;y^1io-XAy%UMq4r4p-xDR4uB7Owh-r`%Z2V2hkN$86a^Baik ziH(W;z1a3({*mZ?h@APCpkITSALk}E#(eMGrHFp-qjucKh1g3FpGAoGu>-vr@jf1d zj{A5Sc6uML9EYz-@O9YNCwK<-jRlUGT@H4g^gdnzmXG`RT5P%4mp#bET&}{Fi~D#r zw%isZ&i8fLalZN^_x0HNoVR-shwly8Y27!1<*r4d?whcquKvh4yTc15?M^E;<67gA%L{BTQ z?P(R-ccKG*3*tKwJ$@A1xU|QQf#u`8ZpN02^J>TbI3n+PSyLSU5BCY|C5XJeitY7( Dt@!{c diff --git a/piet-gpu/shader/kernel1.comp b/piet-gpu/shader/kernel1.comp new file mode 100644 index 0000000..436b8bd --- /dev/null +++ b/piet-gpu/shader/kernel1.comp @@ -0,0 +1,83 @@ +#version 450 +#extension GL_GOOGLE_include_directive : enable + +// It's possible we should lay this out with x and do our own math. +layout(local_size_x = 1, local_size_y = 32) in; + +layout(set = 0, binding = 0) readonly buffer SceneBuf { + uint[] scene; +}; + +layout(set = 0, binding = 1) buffer TilegroupBuf { + uint[] tilegroup; +}; + +#include "scene.h" +#include "tilegroup.h" + +// TODO: compute this +#define WIDTH_IN_TILEGROUPS 4 + +#define TILEGROUP_WIDTH 512 +#define TILEGROUP_HEIGHT 16 + +#define INITIAL_ALLOC 1024 + +#define MAX_STACK 8 + +struct StackElement { + PietItemRef group; + uint index; + vec2 offset; +}; + +void main() { + StackElement stack[MAX_STACK]; + uint stack_ix = 0; + uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS + gl_GlobalInvocationID.x; + TileGroupRef tg_ref = TileGroupRef(tilegroup_ix * INITIAL_ALLOC); + vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILEGROUP_WIDTH, TILEGROUP_HEIGHT); + PietItemRef root = PietItemRef(0); + SimpleGroup group = PietItem_Group_read(root); + StackElement tos = StackElement(root, 0, group.offset.xy); + + while (true) { + if (tos.index < group.n_items) { + Bbox bbox = Bbox_read(Bbox_index(group.bboxes, tos.index)); + vec4 bb = vec4(bbox.bbox) + tos.offset.xyxy; + bool hit = max(bb.x, xy0.x) < min(bb.z, xy0.x + float(TILEGROUP_WIDTH)) + && max(bb.y, xy0.y) < min(bb.w, xy0.y + float(TILEGROUP_HEIGHT)); + bool is_group = false; + if (hit) { + PietItemRef item_ref = PietItem_index(group.items, tos.index); + is_group = PietItem_tag(item_ref) == PietItem_Group; + } + if (hit && !is_group) { + PietItemRef item_ref = PietItem_index(group.items, tos.index); + Instance ins = Instance(item_ref.offset, tos.offset); + TileGroup_Instance_write(tg_ref, ins); + tg_ref.offset += TileGroup_size; + // TODO: bump allocate if allocation exceeded + } + if (is_group) { + PietItemRef item_ref = PietItem_index(group.items, tos.index); + tos.index++; + if (tos.index < group.n_items) { + stack[stack_ix++] = tos; + } + group = PietItem_Group_read(item_ref); + tos = StackElement(item_ref, 0, tos.offset + group.offset.xy); + } else { + tos.index++; + } + } else { + // processed all items in this group; pop the stack + if (stack_ix == 0) { + break; + } + tos = stack[--stack_ix]; + group = PietItem_Group_read(tos.group); + } + } + TileGroup_End_write(tg_ref); +} diff --git a/piet-gpu/shader/kernel1.spv b/piet-gpu/shader/kernel1.spv new file mode 100644 index 0000000000000000000000000000000000000000..0e9a497e3388a17e9fb102a7409c01a7dae8a420 GIT binary patch literal 11644 zcmai(33yyp6~|wjOejl{rBGyPC$vDJd!dw4wziZ|D6~=pQE*7pX&6Y8m`qB`A}tgY z6jXK;Q1(T3WM9Nm1k{gB0m1J>1^0agML~bRH}51bBj4vEm-9dWv)psfz3zS8D6-F7;Mdb(BjTmCkZ$X{D#! zR;{ck>o+XtSLtl)=v$h1DBzKwU!6rIbCz_iJfgfTtx(;oYuU2iay6mmjg#T%`jjf2 zOUo>hn}vnr`)>qgO-Th z^=*r+%~MEr059sQbXHvqmpd%yHL#w>q!~T+L%Q4QUMp9bbE9M|IJKAZlSk%sq3PUX z@T<+;lg!G`aDI8Of%yx`IP{!vTBn>>XQ{$&^!9Tcx1`3@)}x>2 z&|Z~$2RKjmz#PMq@#wiZ2CXE=Q|G{Xb|a>?o=uX;;Mm<#wY5FAAh`ZO{M?60{jtN7 z-Se?&I|fbvKPil#)*oL;ri15q_EuXv+sZc5KFWCwtg(>HKo37{P5dY4WzU$k{~XBM z%YpbUl6}x4zI1XAA+gF}W0l!07BnrJxMJBfZ!&A&n>_P%3Vz)i`1$RjKH6H(hQ zd9Q(5wo2xr_xqM&qQO=5+y8%AX)UwK8FkG8*Xr6dX&E%vyw0WnXBynodNa$ddiNPDI9GDioJ98+t0Lt1OC-iG8{$e8Q= znqM|oV{&0#+~w#IcU>;-rn

2E{cdch_`}VOraR$XcGpx|Xzd%^pmpjuq?^$`Thz(9UJ`aNPnKp1gG$gy>RPE|b z>)97P@Y&Fq?1w%ue`7McPM6&tA(l!?+H0=hNnUPRun5Y@KpjV;hYuGR&z|cg@B%BZ~}k z+D~=kwe5_IoV$R{DK`psTxN3(^2V8CSEM%m)h8m&;X2iQz-w1`4nNmpjy=KVk{gAu zpV>8=H8v)DWp;UcI1}kQjMHX5KU*W;{$TUT>3;xHe|i0_XI_pQk8Z!`GrZKzbr{1w zhkg{enCbeL80Ou9`IjJ#v6eQZy0tGw>LWKR)4d1eB5ym`OaBT|o4La7my{t4?LA1> z<{8j#9eoV@s(md|yXQsyvP^Hz^s6(yh>ZOAf}KG^r<{FJ_gdx7& z0DB&`C-z>jb$OP}^$oDOh#d^G)=N~Wyq`mgJH7v^$y6+?A zRo@>uBBy)ie9s8GwfU|Qx@XRJjnK9Gt`WLD^t~W-?*ZQpLiZl^-5_-9o1N3O`)&|+ z`|GU$hKSo7-8gea+!pBatFyQ*!N$rvx93bQ{I>$j#XKXy za^CBUsKw8KZNT!zdoI@q43;Z0%$<-gWzYP>)*Xfa~j>3n#C?&kyyPti)Ugf`=o`qfMWANO||s&*Ou@+acw( z`;3xv&b9D|faPNSBjFB3m+zuxpZ5zGa>lsVhk>m#)>VW%99@1Poi}D7L(buqiBzs3 zk3NeS=GNEReE!KfkDs-R!E$jgj)prDT|Rt13igcWeU3tx--Iq1b2OO0^jsUSJ>owG zb`IY++#~&uL6;AokAv&y_ynB%8frD>Sg@R*#WC-3VE&SwvYh%nh`(p*lML(9$1`<2 z(#tcYt;CS?Oo^kGR(a-2J=U-MJ9}y0IvMuI`%Hg( z}FWIF`r^+x6i#uFZ(Rt!;rJjV%O(q&kFEZMm#%wUY?9D?;L$db^U$T ztO9#agul;`Q_$u0Ux`%r-Zj^$V1LUtk2Za(NO|kl=X9`~w$qU6-q+^-GYvzOxN znRF7VM4qV^M&%?>ua`BwAhhIdO z*T?7Dm%z?j0^5@!@zX)ucyz^Xul+)kO^-I7bGTX&Sb#2C82G-xWOOfiX z!S~wB!E=$uXp6eP3^rfB?ysQB$NR{YV7V(85qA|h;+#)iKhHw!kuvuFYOs6nzJ3kq z<-W>)m9d`TzKWg0d*nKB{k~ogC-0osBGsd&8^QJadJ~+y{x=}iV_$CuyRYWarq4A< z`N(+-xIX8taPrZguY={hH_f387M!*< zAFEZq#SL*tE&wd|lZ{i$#9)5r>um4L(^*9eNgFO%OaUSIIeX&34 z(~z!DsXjT=U8lAAe0&w`bIDwh`$t>+kqytRs>R=*!?$MEtu+~}+Sj%|JZULC)s_-|~kJM`8%y(6dlTS#xF$NG*# zH*bD@pG22mm!0+F!E!!JJRi<^JR{b>3@jIGJpnA&&hS}NL7vDcGn_|z#GVA!W?pMo z58HBZK5rMgobS((w;LRJwMXoyz}n1f57fg}1v}UJY<(-ha{e}Y8hhjKq$e}vpTtLf zWya|fc~*hV6EUZN&r1x_sn011y)%vl`tzG4~puQkRVd=4xhXZ7=7xj3uNDHr}1faT(> zegQ0(KdWCvk29q`V!s5|W`C?*J!}_&^LZ~um&>2kOVA^)_K3X{tj)akKs{_%fSt>I z9*z7mLoV!J0UHzcE5UNU7Z;INF^rM7k5?nV%CNWg(K*%iGpGCcHHLQgS{!>l4m_UW zwJXED-UYcE!@Zt>jJ>`NoZsu4>h#;{^t*HV1Khci?eVGST27SZbdIMoJV`aejTjM{<${wu-y*M=e+}6E`Jv8M321M zBla$^HuKse^|0Lo&gcCGy4<~tcplydj=b6<_I|K7^V)0mussBJ4bj_&!S>%Z>#wd) z_&*A^w)h--3@m5-X2{1G#y`R^UR|Gv|0dWP;xqdRu$=M3kl$h$AN!{M1S9Hu5?sIc z>*3^MFTV|zdy3&aJ0PEC$Xk==N!@oG&-}BDu?%_V@%i%|hIOoC7^AMA>llapE__bLi{RL2?GgJDSerG8 zqbBR!o#8c=5j9N%Thks$@7-OHuQN7aM2#}~%5qLa9-dfe8)}i2i7+y0O*19Kh?+ovSoW-#2eN$ZfccwSM`MQ6MZcM)JpPNf~e*V9r%f<7> z`Txd<`K>GF|9fVO`QJvDi}~LH$Nc7XemVW}-znVtxeTuZ81DW4$k`co@8>Yw`vZ}l zS@p{)sLhXW?D2oM$2G{uemv)^6Q$`ThTQ>OVO5dvNk` zZr%sW#r-iI{2zwAIjvvae$GG+MR%RrT#I`ACe;8=?+>K5`u?>L=QWQJ{W}P3&kjaL hPa27p&-bJNCm%g&0?XxlvH^PZM7uT0c^;yc{{lh-+SdR8 literal 0 HcmV?d00001 diff --git a/piet-gpu/shader/scene.h b/piet-gpu/shader/scene.h index 440f491..5e36abc 100644 --- a/piet-gpu/shader/scene.h +++ b/piet-gpu/shader/scene.h @@ -33,7 +33,7 @@ struct PietItemRef { }; struct Bbox { - uvec4 bbox; + ivec4 bbox; }; #define Bbox_size 8 @@ -56,9 +56,10 @@ struct SimpleGroup { uint n_items; PietItemRef items; BboxRef bboxes; + Point offset; }; -#define SimpleGroup_size 12 +#define SimpleGroup_size 20 SimpleGroupRef SimpleGroup_index(SimpleGroupRef ref, uint index) { return SimpleGroupRef(ref.offset + index * SimpleGroup_size); @@ -116,10 +117,11 @@ PietStrokePolyLineRef PietStrokePolyLine_index(PietStrokePolyLineRef ref, uint i return PietStrokePolyLineRef(ref.offset + index * PietStrokePolyLine_size); } -#define PietItem_Circle 0 -#define PietItem_Line 1 -#define PietItem_Fill 2 -#define PietItem_Poly 3 +#define PietItem_Group 0 +#define PietItem_Circle 1 +#define PietItem_Line 2 +#define PietItem_Fill 3 +#define PietItem_Poly 4 #define PietItem_size 32 PietItemRef PietItem_index(PietItemRef ref, uint index) { @@ -131,7 +133,7 @@ Bbox Bbox_read(BboxRef ref) { uint raw0 = scene[ix + 0]; uint raw1 = scene[ix + 1]; Bbox s; - s.bbox = uvec4(raw0 & 0xffff, raw0 >> 16, raw1 & 0xffff, raw1 >> 16); + s.bbox = ivec4(int(raw0 << 16) >> 16, int(raw0) >> 16, int(raw1 << 16) >> 16, int(raw1) >> 16); return s; } @@ -153,6 +155,7 @@ SimpleGroup SimpleGroup_read(SimpleGroupRef ref) { s.n_items = raw0; s.items = PietItemRef(raw1); s.bboxes = BboxRef(raw2); + s.offset = Point_read(PointRef(ref.offset + 12)); return s; } @@ -213,6 +216,10 @@ uint PietItem_tag(PietItemRef ref) { return scene[ref.offset >> 2]; } +SimpleGroup PietItem_Group_read(PietItemRef ref) { + return SimpleGroup_read(SimpleGroupRef(ref.offset + 4)); +} + PietCircle PietItem_Circle_read(PietItemRef ref) { return PietCircle_read(PietCircleRef(ref.offset + 4)); } diff --git a/piet-gpu/shader/tilegroup.h b/piet-gpu/shader/tilegroup.h new file mode 100644 index 0000000..f1d646f --- /dev/null +++ b/piet-gpu/shader/tilegroup.h @@ -0,0 +1,64 @@ +// Code auto-generated by piet-gpu-derive + +struct InstanceRef { + uint offset; +}; + +struct TileGroupRef { + uint offset; +}; + +struct Instance { + uint item_ref; + vec2 offset; +}; + +#define Instance_size 12 + +InstanceRef Instance_index(InstanceRef ref, uint index) { + return InstanceRef(ref.offset + index * Instance_size); +} + +#define TileGroup_Instance 0 +#define TileGroup_End 1 +#define TileGroup_size 16 + +TileGroupRef TileGroup_index(TileGroupRef ref, uint index) { + return TileGroupRef(ref.offset + index * TileGroup_size); +} + +Instance Instance_read(InstanceRef ref) { + uint ix = ref.offset >> 2; + uint raw0 = tilegroup[ix + 0]; + uint raw1 = tilegroup[ix + 1]; + uint raw2 = tilegroup[ix + 2]; + Instance s; + s.item_ref = raw0; + s.offset = vec2(uintBitsToFloat(raw1), uintBitsToFloat(raw2)); + return s; +} + +void Instance_write(InstanceRef ref, Instance s) { + uint ix = ref.offset >> 2; + tilegroup[ix + 0] = s.item_ref; + tilegroup[ix + 1] = floatBitsToUint(s.offset.x); + tilegroup[ix + 2] = floatBitsToUint(s.offset.y); +} + +uint TileGroup_tag(TileGroupRef ref) { + return tilegroup[ref.offset >> 2]; +} + +Instance TileGroup_Instance_read(TileGroupRef ref) { + return Instance_read(InstanceRef(ref.offset + 4)); +} + +void TileGroup_Instance_write(TileGroupRef ref, Instance s) { + tilegroup[ref.offset >> 2] = TileGroup_Instance; + Instance_write(InstanceRef(ref.offset + 4), s); +} + +void TileGroup_End_write(TileGroupRef ref) { + tilegroup[ref.offset >> 2] = TileGroup_End; +} + diff --git a/piet-gpu/src/main.rs b/piet-gpu/src/main.rs index 0bc4375..72f0d3c 100644 --- a/piet-gpu/src/main.rs +++ b/piet-gpu/src/main.rs @@ -21,7 +21,7 @@ const N_CIRCLES: usize = 100; fn make_scene() -> Vec { let mut rng = rand::thread_rng(); let mut encoder = Encoder::new(); - let _reserve_root = encoder.alloc_chunk(SimpleGroup::fixed_size() as u32); + let _reserve_root = encoder.alloc_chunk(PietItem::fixed_size() as u32); let mut items = Vec::new(); let mut bboxes = Vec::new(); @@ -36,23 +36,30 @@ fn make_scene() -> Vec { }, radius: rng.gen_range(0.0, 50.0), }; - items.push(PietItem::Circle(circle)); let bbox = Bbox { - // TODO: real bbox - bbox: [0, 0, 0, 0], + bbox: [ + (circle.center.xy[0] - circle.radius).floor() as i16, + (circle.center.xy[1] - circle.radius).floor() as i16, + (circle.center.xy[0] + circle.radius).ceil() as i16, + (circle.center.xy[1] + circle.radius).ceil() as i16, + ], }; + items.push(PietItem::Circle(circle)); bboxes.push(bbox); } let n_items = bboxes.len() as u32; let bboxes = bboxes.encode(&mut encoder).transmute(); let items = items.encode(&mut encoder).transmute(); + let offset = Point { xy: [0.0, 0.0] }; let simple_group = SimpleGroup { n_items, bboxes, items, + offset, }; - simple_group.encode_to(&mut encoder.buf_mut()[0..SimpleGroup::fixed_size()]); + let root_item = PietItem::Group(simple_group); + root_item.encode_to(&mut encoder.buf_mut()[0..PietItem::fixed_size()]); // We should avoid this clone. encoder.buf().to_owned() } @@ -66,6 +73,14 @@ fn dump_scene(buf: &[u8]) { } } +fn dump_k1_data(k1_buf: &[u32]) { + for i in 0..k1_buf.len() { + if k1_buf[i] != 0 { + println!("{:4x}: {:8x}", i, k1_buf[i]); + } + } +} + fn main() { let instance = VkInstance::new().unwrap(); unsafe { @@ -81,35 +96,57 @@ fn main() { .create_buffer(std::mem::size_of_val(&scene[..]) as u64, dev) .unwrap(); device.write_buffer(&scene_buf, &scene).unwrap(); + let tilegroup_buf = device.create_buffer(384 * 1024, host).unwrap(); let image_buf = device .create_buffer((WIDTH * HEIGHT * 4) as u64, host) .unwrap(); let image_dev = device .create_buffer((WIDTH * HEIGHT * 4) as u64, dev) .unwrap(); + + let k1_code = include_bytes!("../shader/kernel1.spv"); + let k1_pipeline = device.create_simple_compute_pipeline(k1_code, 2).unwrap(); + let k1_ds = device + .create_descriptor_set(&k1_pipeline, &[&scene_dev, &tilegroup_buf]) + .unwrap(); + let code = include_bytes!("../shader/image.spv"); let pipeline = device.create_simple_compute_pipeline(code, 2).unwrap(); let descriptor_set = device .create_descriptor_set(&pipeline, &[&scene_dev, &image_dev]) .unwrap(); - let query_pool = device.create_query_pool(2).unwrap(); + let query_pool = device.create_query_pool(3).unwrap(); let mut cmd_buf = device.create_cmd_buf().unwrap(); cmd_buf.begin(); cmd_buf.copy_buffer(&scene_buf, &scene_dev); + cmd_buf.clear_buffer(&tilegroup_buf); cmd_buf.memory_barrier(); cmd_buf.write_timestamp(&query_pool, 0); + cmd_buf.dispatch( + &k1_pipeline, + &k1_ds, + ((WIDTH / 512) as u32, (HEIGHT / 512) as u32, 1), + ); + cmd_buf.write_timestamp(&query_pool, 1); + cmd_buf.memory_barrier(); cmd_buf.dispatch( &pipeline, &descriptor_set, ((WIDTH / TILE_W) as u32, (HEIGHT / TILE_H) as u32, 1), ); - cmd_buf.write_timestamp(&query_pool, 1); + cmd_buf.write_timestamp(&query_pool, 2); cmd_buf.memory_barrier(); cmd_buf.copy_buffer(&image_dev, &image_buf); cmd_buf.finish(); device.run_cmd_buf(&cmd_buf).unwrap(); let timestamps = device.reap_query_pool(query_pool).unwrap(); - println!("Render time: {:.3}ms", timestamps[0] * 1e3); + println!("Kernel 1 time: {:.3}ms", timestamps[0] * 1e3); + println!("Render time: {:.3}ms", (timestamps[1] - timestamps[0]) * 1e3); + + let mut k1_data: Vec = Default::default(); + device.read_buffer(&tilegroup_buf, &mut k1_data).unwrap(); + dump_k1_data(&k1_data); + let mut img_data: Vec = Default::default(); // Note: because png can use a `&[u8]` slice, we could avoid an extra copy // (probably passing a slice into a closure). But for now: keep it simple.