From 343e4c3075a1df6c4bef9b39160a35aae2e6a878 Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Tue, 12 May 2020 13:38:26 -0700 Subject: [PATCH] Binning stage Adds a binning stage. This is a first draft, and a number of loose ends exist. --- piet-gpu-types/src/bins.rs | 19 ++++ piet-gpu-types/src/lib.rs | 3 + piet-gpu-types/src/main.rs | 1 + piet-gpu/bin/cli.rs | 13 +-- piet-gpu/shader/binning.comp | 169 ++++++++++++++++++++++++++++++++++ piet-gpu/shader/binning.spv | Bin 0 -> 14804 bytes piet-gpu/shader/bins.h | 60 ++++++++++++ piet-gpu/shader/build.ninja | 4 +- piet-gpu/shader/elements.comp | 13 +++ piet-gpu/shader/elements.spv | Bin 32624 -> 35448 bytes piet-gpu/src/lib.rs | 51 +++++++++- 11 files changed, 322 insertions(+), 11 deletions(-) create mode 100644 piet-gpu-types/src/bins.rs create mode 100644 piet-gpu/shader/binning.comp create mode 100644 piet-gpu/shader/binning.spv create mode 100644 piet-gpu/shader/bins.h diff --git a/piet-gpu-types/src/bins.rs b/piet-gpu-types/src/bins.rs new file mode 100644 index 0000000..88f16f1 --- /dev/null +++ b/piet-gpu-types/src/bins.rs @@ -0,0 +1,19 @@ +use piet_gpu_derive::piet_gpu; + +// The output of the binning stage, organized as a linked list of chunks. + +piet_gpu! { + #[gpu_write] + mod bins { + struct BinInstance { + element_ix: u32, + } + + struct BinChunk { + // First chunk can have n = 0, subsequent ones not. + n: u32, + next: Ref, + // Instances follow + } + } +} diff --git a/piet-gpu-types/src/lib.rs b/piet-gpu-types/src/lib.rs index 1759c4d..29ed806 100644 --- a/piet-gpu-types/src/lib.rs +++ b/piet-gpu-types/src/lib.rs @@ -1,4 +1,7 @@ +// Structures used only internally probably don't need to be pub. + pub mod annotated; +pub mod bins; pub mod encoder; pub mod fill_seg; pub mod ptcl; diff --git a/piet-gpu-types/src/main.rs b/piet-gpu-types/src/main.rs index 68e6487..41ae021 100644 --- a/piet-gpu-types/src/main.rs +++ b/piet-gpu-types/src/main.rs @@ -7,6 +7,7 @@ fn main() { "scene" => print!("{}", piet_gpu_types::scene::gen_gpu_scene()), "state" => print!("{}", piet_gpu_types::state::gen_gpu_state()), "annotated" => print!("{}", piet_gpu_types::annotated::gen_gpu_annotated()), + "bins" => print!("{}", piet_gpu_types::bins::gen_gpu_bins()), "tilegroup" => print!("{}", piet_gpu_types::tilegroup::gen_gpu_tilegroup()), "segment" => print!("{}", piet_gpu_types::segment::gen_gpu_segment()), "fill_seg" => print!("{}", piet_gpu_types::fill_seg::gen_gpu_fill_seg()), diff --git a/piet-gpu/bin/cli.rs b/piet-gpu/bin/cli.rs index 82f3491..4a4fed3 100644 --- a/piet-gpu/bin/cli.rs +++ b/piet-gpu/bin/cli.rs @@ -41,7 +41,7 @@ fn main() -> Result<(), Error> { let fence = device.create_fence(false)?; let mut cmd_buf = device.create_cmd_buf()?; - let query_pool = device.create_query_pool(2)?; + let query_pool = device.create_query_pool(3)?; let mut ctx = PietGpuRenderContext::new(); render_scene(&mut ctx); @@ -58,13 +58,14 @@ fn main() -> Result<(), Error> { cmd_buf.finish(); device.run_cmd_buf(&cmd_buf, &[], &[], Some(&fence))?; device.wait_and_reset(&[fence])?; - let timestamps = device.reap_query_pool(&query_pool).unwrap(); - println!("Element kernel time: {:.3}ms", timestamps[0] * 1e3); + let ts = device.reap_query_pool(&query_pool).unwrap(); + println!("Element kernel time: {:.3}ms", ts[0] * 1e3); + println!("Binning kernel time: {:.3}ms", (ts[1] - ts[0]) * 1e3); /* - let mut data: Vec = Default::default(); - device.read_buffer(&renderer.state_buf, &mut data).unwrap(); - dump_state(&data); + let mut data: Vec = Default::default(); + device.read_buffer(&renderer.bin_buf, &mut data).unwrap(); + piet_gpu::dump_k1_data(&data); */ let mut img_data: Vec = Default::default(); diff --git a/piet-gpu/shader/binning.comp b/piet-gpu/shader/binning.comp new file mode 100644 index 0000000..bf7bbae --- /dev/null +++ b/piet-gpu/shader/binning.comp @@ -0,0 +1,169 @@ +// The binning stage of the pipeline. + +#version 450 +#extension GL_GOOGLE_include_directive : enable + +#define N_ROWS 4 +#define WG_SIZE 32 +#define LG_WG_SIZE 5 +#define TILE_SIZE (WG_SIZE * N_ROWS) + +// TODO: move these to setup file +#define N_TILE_X 16 +#define N_TILE_Y 16 +#define N_TILE (N_TILE_X * N_TILE_Y) +#define N_SLICE (N_TILE / 32) +#define N_WG 16 // Number of workgroups, should be 1 per SM + +#define BIN_INITIAL_ALLOC 64 +#define BIN_ALLOC 256 + +layout(local_size_x = N_TILE, local_size_y = 1) in; + +layout(set = 0, binding = 0) buffer AnnotatedBuf { + uint[] annotated; +}; + +layout(set = 0, binding = 1) buffer AllocBuf { + uint n_elements; + // Will be incremented atomically to claim tiles + uint tile_ix; + uint alloc; +}; + +layout(set = 0, binding = 2) buffer BinsBuf { + uint[] bins; +}; + +#include "annotated.h" +#include "bins.h" +#include "setup.h" + +// scale factors useful for converting coordinates to bins +#define SX (1.0 / float(N_TILE_X * TILE_WIDTH_PX)) +#define SY (1.0 / float(N_TILE_Y * TILE_HEIGHT_PX)) + +// Note: cudaraster has N_TILE + 1 to cut down on bank conflicts. +shared uint bitmaps[N_SLICE][N_TILE]; +shared uint sh_my_tile; + +void main() { + BinChunkRef chunk_ref = BinChunkRef((gl_LocalInvocationID.x * N_WG + gl_WorkGroupID.x) * BIN_INITIAL_ALLOC); + uint chunk_limit = chunk_ref.offset + BIN_INITIAL_ALLOC - BinInstance_size; + uint chunk_n = 0; + BinInstanceRef instance_ref = BinInstanceRef(chunk_ref.offset + BinChunk_size); + while (true) { + if (gl_LocalInvocationID.x == 0) { + sh_my_tile = atomicAdd(tile_ix, 1); + } + barrier(); + uint my_tile = sh_my_tile; + if (my_tile * N_TILE >= n_elements) { + break; + } + + for (uint i = 0; i < N_SLICE; i++) { + bitmaps[i][gl_LocalInvocationID.x] = 0; + } + barrier(); + + // Read inputs and determine coverage of bins + uint element_ix = my_tile * N_TILE + gl_LocalInvocationID.x; + AnnotatedRef ref = AnnotatedRef(element_ix * Annotated_size); + uint tag = Annotated_tag(ref); + int x0 = 0, y0 = 0, x1 = 0, y1 = 0; + switch (tag) { + case Annotated_Line: + AnnoLineSeg line = Annotated_Line_read(ref); + x0 = int(floor((min(line.p0.x, line.p1.x) - line.stroke.x) * SX)); + y0 = int(floor((min(line.p0.y, line.p1.y) - line.stroke.y) * SY)); + x1 = int(ceil((max(line.p0.x, line.p1.x) + line.stroke.x) * SX)); + y1 = int(ceil((max(line.p0.y, line.p1.y) + line.stroke.y) * SY)); + break; + case Annotated_Fill: + case Annotated_Stroke: + // Note: we take advantage of the fact that fills and strokes + // have compatible layout. + AnnoFill fill = Annotated_Fill_read(ref); + x0 = int(floor(fill.bbox.x * SX)); + y0 = int(floor(fill.bbox.y * SY)); + x1 = int(ceil(fill.bbox.z * SX)); + y1 = int(ceil(fill.bbox.w * SY)); + break; + } + // At this point, we run an iterator over the coverage area, + // trying to keep divergence low. + // Right now, it's just a bbox, but we'll get finer with + // segments. + x0 = clamp(x0, 0, N_TILE_X); + x1 = clamp(x1, x0, N_TILE_X); + y0 = clamp(y0, 0, N_TILE_Y); + y1 = clamp(y1, y0, N_TILE_Y); + if (x0 == x1) y1 = y0; + int x = x0, y = y0; + uint my_slice = gl_LocalInvocationID.x / 32; + uint my_mask = 1 << (gl_LocalInvocationID.x & 31); + while (y < y1) { + atomicOr(bitmaps[my_slice][y * N_TILE_X + x], my_mask); + x++; + if (x == x1) { + x = x0; + y++; + } + } + + barrier(); + // Allocate output segments. + uint element_count = 0; + for (uint i = 0; i < N_SLICE; i++) { + element_count += bitCount(bitmaps[i][gl_LocalInvocationID.x]); + } + // element_count is number of elements covering bin for this invocation. + if (element_count > 0 && chunk_n > 0) { + uint new_chunk = instance_ref.offset; + if (new_chunk + min(32, element_count * 4) > chunk_limit) { + new_chunk = atomicAdd(alloc, BIN_ALLOC); + chunk_limit = new_chunk + BIN_ALLOC - BinInstance_size; + } + BinChunk_write(chunk_ref, BinChunk(chunk_n, BinChunkRef(new_chunk))); + chunk_ref = BinChunkRef(new_chunk); + instance_ref = BinInstanceRef(new_chunk + BinChunk_size); + chunk_n = 0; + } + // TODO: allocate output here + + // Iterate over bits set. + uint slice_ix = 0; + uint bitmap = bitmaps[0][gl_LocalInvocationID.x]; + while (true) { + if (bitmap == 0) { + slice_ix++; + if (slice_ix == N_SLICE) { + break; + } + bitmap = bitmaps[slice_ix][gl_LocalInvocationID.x]; + if (bitmap == 0) { + continue; + } + } + element_ix = my_tile * N_TILE + slice_ix * 32 + findLSB(bitmap); + // At this point, element_ix refers to an element that covers this bin. + + // TODO: batch allocated based on element_count; this is divergent + if (instance_ref.offset > chunk_limit) { + uint new_chunk = atomicAdd(alloc, BIN_ALLOC); + BinChunk_write(chunk_ref, BinChunk(chunk_n, BinChunkRef(new_chunk))); + chunk_ref = BinChunkRef(new_chunk); + instance_ref = BinInstanceRef(new_chunk + BinChunk_size); + chunk_n = 0; + chunk_limit = new_chunk + BIN_ALLOC - BinInstance_size; + } + BinInstance_write(instance_ref, BinInstance(element_ix)); + chunk_n++; + instance_ref.offset += BinInstance_size; + // clear LSB + bitmap &= bitmap - 1; + } + } + BinChunk_write(chunk_ref, BinChunk(chunk_n, BinChunkRef(0))); +} diff --git a/piet-gpu/shader/binning.spv b/piet-gpu/shader/binning.spv new file mode 100644 index 0000000000000000000000000000000000000000..4cc5d36f5d587a905b5158380672988de7e0d49d GIT binary patch literal 14804 zcma)?2bf(|xrR5Hk^q4aLazxuKucTJT;)ykbE>#a>V}D4>EURlxV%XMf4c;knOqH)myi-}nFPU;iq5?S0PBF>dqm zRkczzf&co(RrNKw8i!ITe!n+yYHmK>QzVWr_ZEneAPwWNm&(@>7Bgx`A~*lVJGbJ<9mnX{$nbgO*j25@+^i6%XQaP(;Wjud z=^LA|VlFe5?7Z_nwCSK#RgI9Gd92a2pF1!(Xu{^J=2hl91#Ue3v~}~Jn%BtL3wnF{ zh6jg7T6wCQ_ksn(7i#aSCRXcEr~PY^Y&vgcsDDSbHvX*~yQ}q@wqw0x{R`_^ZS|w> zwLH&>)kbjpseOCKde1H=BLSL@>}Ny#Tm7tFO&i`ANe zR`~W@FfuUKpZO^abXd<&vyBSN@;$U2v0C%iQEd+%rMRDVZ1r)#IZKAlGja1)^D6UZ zOPfpWr_KGY=H*RoJGuL4sQ*Il8so29&7@Ad*9;)Nwq;+-Xfu`%sPyEnXszRJ)EU$6 z;MSNrs=dIiIqa-v>C<~beJ**=&w(%7Z*!J#$D4ag7x!i_g57TiDsp*#97av7df4)# zTYQ(;Pi*mB!%uH<)2!R~wYaI)vCpMW&X<9c^Ht#GId@i9xA?MtuW9jR{a)MR%lf^( z#hZRRsvF_0embkW!HauGdKXbUhI?1w9o2pC8ze#rVYWns~6y;Ro{S3mv!5TY{=1bdKO*OGd3_-pTV5MXtM_g zhx>~4SMwU`Y3@Qs=$O?t+1eX3n&*12F{mB;q^7;OO#!DvZJWRcYQN5EI(Wgr*rMLW zqji2e!B@QRJF1yz+1LAkITi8kto8*jU%QTK7W_hXwtdV7U)10^>g*Qv1JrBWSse`a zxk)N@yhGs2<8@XS!_R7-7xs6_3igibCOAh^9rqUSMK$iKZUr;m(ZPYfWwAfg(iZiO zo`-*T^&mXs>>FM(yGPFvo)?& z&{}gW@2d8Mx5wRog*ctn>^6RQ8$Ys*pV7w8EciU;ZZ`RMQ#vUJlfxWpFP|G;^BB8Y zM~yq{b7nIQT(VN?Dh7t5D00vk7CeAGMTe_)LVW8^_pdP$%|euyNIlwK4k1m-FUlF8_#yy^x2bIA7i>+ zd)InvjkjyK&up*o*MW_*G5O40BU{I_G!5Hg_?0}Gne9}|H0JS)MGy! zyi&oB1()qlhP&6|-vf4i!#&WtD8?V4cAT!0d~w6wPx?FV5sLk3zkynR*HM03!`&~A zJNLBX_TH82_kC(}RCC;N?_V|d;qR#(qj~H92kMn5>iWxd_&?eB zWZX}|XHmlc3HBaYi!3_nY&yMLcTwzPRchzWK8z{%*{kNg=b4}_#Wt&>IVNqkk$XO< zrQbE-6DZ;9fF0~c_-{pRJT>{&)N1Bx{B6MYuWo#~&tJ8~+Y`Qh|&AaRM^87P@_o-zjbLIHW z(aSY{9>qQA7wei_)-1n2x-*=R7@3DgmZvVcg#O^)idrG)=-&4Zv-}jV~-%@b%_kAUHx$i0^ z_g$srzN3`f_mptg!vzcXOdMBm^V`k`wLwC(x%;C z!M0UTtf#^1Pf?DdKktLTQPf|@4tGDx?Z^4~J4MZBM)Lbdqo`QLzfz7R4_x0iGcQoVP0Bl=z$Gs7?m*dv9Aw|t`i_JZAtZr_ecaBrrrj4() z>o$#gGm4k?%^SOQ#Qlbf^enTLU=QmdPr&qg)(#(e6-DQf1PJ>+}J5sfW=M>c-0f9ZD= zy0(d&1-3gH%%|G2iJ?6)jse@3@9t%cW6`u-PR_~e;Ju%Jz zXS`*M9&~LP@0noZsAs&rVD)mm3*da}@oLX_`@q_=_vFc8AvohL=e!?XTl~&y{My&| zY;S&T^>;>NXitm*aK>BqbuPNLH#ci}9@seQ8Sfxiy&Uf%IG=ibv}e3SU~P_f zjyfed41+V?a?Tf{Ym49O8^8AT9YNQY@s5J|)Z;aV_QV(iXS`)!OVG7tyyt_Bqn`0z z09G%@dm)@pJznh@??qs3j@L6p?z!t3;aT9>kh9%rnb${B^-+RH@?9aU*FZY6b!f|;HUJG{4Co)U& z_ksD;e;3l`nSUL1)<%2Uz8~y(!ao2mV_gsDQ^(R*dt%v6d)j^w>{=v`4}tmAZMBto z=%+nxKMZ!9;U58)v2K9#sblG@J$cwpd)nRzo>lOVg3UGW#2*9eqwX{L6VzV5lWF@n zMa_FooVcF^n^V?A?wZ8cI=(l9%|YLrsO7PJ3Y<8%fNRdZ>$}mX;Y+D=u6BcOLsP#B zANd_%wc9Dizm-}o?~0!Vx5u~>P5oO^%d1jEFr{U=~!=vQRMI6YhDH0M%_04?zR#?HTk=jYeP3R7yFx~{(CXwz^a3P{C_tj+R|qyxa_kF zu4WA5%H!MJ_+}2q!_~^)(xZSOvkiyf!?)49mLv)1PCsoHYCuL@t<_^0oQXzID& zR|9*$yHB;nZ*{OX{r`H z6gB%5JI;(pJ#jY$8#nVm72H1m)6mp2|C@oGf7@zHtj)pNGXL_}wg8v&wk2FG^R_iO z`D@#XqGtYL^LNeU>hjF1b^4hOoYiF?W)Sa81Xrz{NcNcK`xw|WxdivfCT%Low!}U>j z{r8~ua{aZoLO_go>}^6OAZHs%RUc;s~N+%^7tOq_+}mE!qv(%YaU$P z_O5|koHHx24o0`%#5x45Rz3p`g{zm(05$h&*1|e54+FawoWE`8dp=xE`&RHHz~*E= zhlAzfBdPVxTpb0TfuBCw68~th<4DfOz}0fbXj98RJQm!(5067rPmJTiYK}4OPJpMK z`N{Q59w&mmZ}O}>39RORi_gi8Prmz{0$0=5n6_6d&w(`?K8f0WKDpuJm{Ym6{;$%H zd?k4LJGE}Je6OAkS38ZOk9)NToW1Jb;pFCWCbj;V|6Z_rRUd6Hb1Ljf@$bL>?z=k0 zYa(S2+E1WfjbcudsPnhqy};uOKC6u%*2a%+t37Rx0&6p_b10AP7;qW)IJla>NhR*_;KbFQwkLqK8P~Ot z$9599jC%@P?PN;go(fJ}?P+@&SetQOQ+aG>fXldN!qr;$V{3kU;fbp~Z5M#G8P~lb zk8L5ijC&Sbt)G&7&ju&1_Ov|*tj)OYF?np~foD;CHm*!PNKuP@5!f|}eF&`PGh`L& zVTx_k-KUGGUr%xGx_|6bu3ut~f?bcq90RMxz69*N#(q9nEqz`9wvBrFybx@kIXmV0 zCFVt7=QJ_@4XhUX#bDGKU>+o-3{H-ham_nKTkW3Ed5CW`h~I7h@ebEbgT zp?IxD@ywY_y*9-&XI<)?IhTRQ7yPO=esvqarj1|Q#;-5<4d5Fao-^ihxOupCnVT!X z>N#Vs1gmA-=BO6`w}91h#=I4*R-Q3$gJ-X5PusVHwYhH`t30-MfXle=gsYWj%)8); zt37Ss4c2B{=S&{kd%$Jf_rlf6Gv?pniK{(r{{yVexX!&iw(G!U-1ouNPNw9ZeLpyH zwWsX|z}k%K8p>n)Ah?YCA-Gz3#(Wr_xZ2b9BVcXDbuY+c`zY8s&tCW#_(qC*L4UbE z@&7p3xy(KD39wr1p9DK6vEKw%i~VM>V~hP3uv+Y&0-Iy(w}RD<;*MC8`qLEqG_Gy6 zoBM6lw^MGU;OW%cQ_On@>YT~-5`v<{l8RtV_wYik6 z1kH8N!KE|*wHT}x>UdKL@;K3#!$=He+E{|*nbXIE64r|xcQX*{1UF7vHuF3u^Yp&tLc}% rehn`B`VCy|aEfvEq<(~AUn7*Vzu&?ip`^dxfzzLH>`zU<%=Nzjd;0OI literal 0 HcmV?d00001 diff --git a/piet-gpu/shader/bins.h b/piet-gpu/shader/bins.h new file mode 100644 index 0000000..3ce06e0 --- /dev/null +++ b/piet-gpu/shader/bins.h @@ -0,0 +1,60 @@ +// Code auto-generated by piet-gpu-derive + +struct BinInstanceRef { + uint offset; +}; + +struct BinChunkRef { + uint offset; +}; + +struct BinInstance { + uint element_ix; +}; + +#define BinInstance_size 4 + +BinInstanceRef BinInstance_index(BinInstanceRef ref, uint index) { + return BinInstanceRef(ref.offset + index * BinInstance_size); +} + +struct BinChunk { + uint n; + BinChunkRef next; +}; + +#define BinChunk_size 8 + +BinChunkRef BinChunk_index(BinChunkRef ref, uint index) { + return BinChunkRef(ref.offset + index * BinChunk_size); +} + +BinInstance BinInstance_read(BinInstanceRef ref) { + uint ix = ref.offset >> 2; + uint raw0 = bins[ix + 0]; + BinInstance s; + s.element_ix = raw0; + return s; +} + +void BinInstance_write(BinInstanceRef ref, BinInstance s) { + uint ix = ref.offset >> 2; + bins[ix + 0] = s.element_ix; +} + +BinChunk BinChunk_read(BinChunkRef ref) { + uint ix = ref.offset >> 2; + uint raw0 = bins[ix + 0]; + uint raw1 = bins[ix + 1]; + BinChunk s; + s.n = raw0; + s.next = BinChunkRef(raw1); + return s; +} + +void BinChunk_write(BinChunkRef ref, BinChunk s) { + uint ix = ref.offset >> 2; + bins[ix + 0] = s.n; + bins[ix + 1] = s.next.offset; +} + diff --git a/piet-gpu/shader/build.ninja b/piet-gpu/shader/build.ninja index b429a71..4628fd2 100644 --- a/piet-gpu/shader/build.ninja +++ b/piet-gpu/shader/build.ninja @@ -20,4 +20,6 @@ build kernel3.spv: glsl kernel3.comp | scene.h tilegroup.h segment.h fill_seg.h build kernel4.spv: glsl kernel4.comp | ptcl.h segment.h fill_seg.h setup.h -build elements.spv: glsl elements.comp | scene.h state.h +build elements.spv: glsl elements.comp | scene.h state.h annotated.h + +build binning.spv: glsl binning.comp | annotated.h setup.h diff --git a/piet-gpu/shader/elements.comp b/piet-gpu/shader/elements.comp index 1061fab..c31dd2e 100644 --- a/piet-gpu/shader/elements.comp +++ b/piet-gpu/shader/elements.comp @@ -1,3 +1,9 @@ +// The element processing stage, first in the pipeline. +// +// This stage is primarily about applying transforms and computing bounding +// boxes. It is organized as a scan over the input elements, producing +// annotated output elements. + #version 450 #extension GL_GOOGLE_include_directive : enable @@ -208,6 +214,13 @@ void main() { anno_stroke.linewidth = st.linewidth * sqrt(st.mat.x * st.mat.w - st.mat.y * st.mat.z); Annotated_Stroke_write(out_ref, anno_stroke); break; + case Element_Fill: + Fill fill = Element_Fill_read(this_ref); + AnnoFill anno_fill; + anno_fill.rgba_color = fill.rgba_color; + anno_fill.bbox = st.bbox; + Annotated_Fill_write(out_ref, anno_fill); + break; default: Annotated_Nop_write(out_ref); break; diff --git a/piet-gpu/shader/elements.spv b/piet-gpu/shader/elements.spv index 41d9bc150159c4d8e83ae3898d8255e12b2a95c0..afb63b582b1547a2ecc25bf900952a97b01489fb 100644 GIT binary patch literal 35448 zcma)^1%O?}*@h3<-QW(vA;Fyj!8KTr;I7M(Y#%=^8c&YYRK=iJ?-t=}S3SJhP2H2n8YN7cAyt@^=K)$~>W zMn81e_QRH+*fV1J!K-{vgPE$fhMhJuS5sHIwbRCQjT)~Vs}pkGeb4w6uZt*V@Gr! zvgE{`36qBREH}k@xk)RoxYnc<*TTMyd8w)i1T^Oi;D!&qIcKIHM(x{o@7tSG&TC4} z_G)(I+O9E6%3kA{VP{R*xf;Ksai^~afNT5C3Ef>IQsR_cjS{hKtLDVESzkvrH+b7g zT_ehR87%4LnpsWJAh-0>?n@B5>YVF&qMX+zy z-CivY-elD1(U?$6M?;1xYHqKVtZn4`^PjTUgnjqv>Ks09^tcJeovvC2&RBhFsDqdJ zm)5tj?mDXFz}xps7O)n0C7XI$h|BwH$!hhSszp-8ZO902cP2s;|;on?j|91fWcPRXKE&RW=cmBip zcj@(?x!SYv|BvhJKg8LiH_l8|R}rW8ZvWpyVa>Evd-ulZUkwKj89R2|l(W3KFXX+- zb?TZcY#ZxT&MW7jvg_2(4)>R`=itP~dEr_ZQ03xJ`Y~POJG-0D_{sLIQ`>uTIkwGpHdA#Cc;D`x#ydu1U&Zsk z)NwU_6C3XgZPgv%QJgxy1KbOq&~f@%|8~3kGxi{{o zO}UQSUVYJ&2h%n3x=^TVm8RTQO^uwq3xJb%P}8nA@8V6lH}5h{xi|0knsRU6Rhn{d z-Zh(YowvPOuPHYk6`k{qoAPR@YZ!9s8U;>WW14n#U2WCG!tT(*?#LFq;|se}3%fI0 z?9MOjE-&n^?qk@=i`>9+&dq4Hs#*=xUVVq&d0+|x%WOif!rK#d-XneeAk4o zG4(zBs73yBi~L!O{7sA8pF^OGH&Y+Et(pzFIluj@xxl^We(!p3^4^=Sd?qw%YpdqL z-plpBZf~5ns~XuxDVi_V((7KDO=EKadaPIXe1JTJ$qMDD5m*6O2cuXa^8iZ4vKAsw7Fe)qEP=+2$TO*mlagmII`Z?;i$oO{DJ@7Xks z?`E(WHg0&==*`9+MAtKF+*mD}wxh6Zo|PTdIB?H?&EveOZ?DF~m9_UE`>bc}NB7aS zSI4%7y(#Ar z3Z|oapvmRq3#P4l46Zqjw(3=JeZId59@n#9_k?DV$TdWVcfwDJOG^7^McFRDtoYiozxA5j$cr>secBovM^eMz=Z=MYb?uGkkU@n!fb(%m z{Eo&Mueyxqxu1BR>18}+E&#LkmsP6Glp@!r-|$QSd-j>)Zu)_ zH~!A4ytexLU26OXg7ueM5OxrK>7N+RrMiJMV=PXe7)yYSA*XK1T4(J`ft9sU=l7BE zTOO>RoI1aURHvU0XUf{B8%!TRA9(bWQ|AMq^QWH=f6CgZTZ=w^YlHQZbKcjX*UvrU zy!+jrT*kZ}SWZ7)}(jw$la;3W%rcd)fb-w8Iq zvSS%epRtSpI~F5ZqKI^#{EPrQS`+RIrZk21aSn(M#4;^VYoqt2Lz(i>A>b;k6&g~U7>Y)m=t z!7=pO%j+TTrcdx4^v6KcJCOZ($$d4*a&ua;M><%?=L zYx410cJJx`VlBHSybt*-^gj5Rr>x!F^!#aX-Unqr_vGf_|MO8`=4XCz=EJjfYG%}iWX>v;z526mn?Ym8xcWQN<| z9fPu;)pCw;&l;EaW*@j?(8u1-a_#5Gehk<>4U(R@!)>t3?FGqIV@xP+h%j>V~I|6?i{@2x5pTxhu=8ShM*f~$U+iIP> z_R79X@TU>)o*L_;|9!~LRs8R-_44{F``*EyhW{fqF6Z|-xM`-EGGAMIm3=mnTa`6qPpHP_n)oLS>V~b)HNIPV|MXSf zyO2Hid{2ztd+oboWc9u$M%KTlkk$LHSn7ROjI92ILiSwsT`_uP-xW*Qcg4u&d#pwF zoi2L)-)xb6uPgPw*OjvGb*1b(U1akOC}iXNZWq0>?{=l^yIo}c2N$yTzTZW!>^oj5 zuV2W{m+yGdD{oxL#`pa$dOt&bzl*Hi_q)jY`;Hgc&jH`@BCGcuFY-ua-|-?_ukUw} z&FA}FWbJ*wi|o1W`(0$?`+gVM&vM`IBKtXXK_Q#Z_q*uLcYh)4|6n2ex#9a=?9J!< zU1XmVe7}oqJm2jitM}b5vU=a`l!vg3JUjQId7k=tybgQFecoB~>#|qfU&_&Sf#puX zU)$keInUh0>;{jZl`%&m%g1kDu$<@cF?Fo{kd3F#-0o$$u;s3>XGzsjv zWd9kt4yHeZ=B577T5q4R90oSV-Q@Cf_;9eC^YH~Sln9RK6M#&cYrZ^~=;bqaCxV?j`OMwPVCT+S z%;OxN0+u(1=cKauRC;5WQ@hi^m(XjYE-_CB8`JUYr>w1W?0h><_8He1V8@m9btYKu zU1IC6POiL%&Uf;i4K|<8av8%p$nyGomMd%TUb>Jrf>y5ki)vmQpW7}**3UI%OrH}j zL6+C%Qu@ni7tr)IkFqw&a}~HX&((19+Ia3<12&IiHIL5|*CNYn<8#Jj+LbhY&7-VM z^4tJ!&2uB1yf%*KCa`&YFUWXqMwZvc@!UeYo~EyPl(k8oJHV}Z?u3)q#^>R?z~;%l z@OkELWO;4wp}&`QJ568nC~K2E4}x3sJOn4Njh` zU-KwyBlh#$`|p~19PFC3_gr|A{wbQ5`loBXeR4emHb%~cXTfs)m;=uRj%^TA_`5v+{ z9gi`UwMoohfD_X?-=|$qlTV!=)O_mvC9*Nyf5ueSCNcj9oS5#B4{7cZd2QTBzp8m- zy4QYF_pDR%-?_$)BW%f&HW%Bo8Q*FG2K_cLpG-S%9zU9Xz!W-hgz@h_h9Ad z{s?wYs{5E;IiD4O0$+l^KI*j5R^C09b!h*e_Q{{owXVy*z{zKx{|Yv)>%qEQGk-&t z*T(zxciIIseXT=T8})nA|AW?==aZV(#x?ROvUyx1=5ZZ;hAgj*YwdHIYfWGCC~K2E z{{*+@`4^nLHm>h4z~(8>?Jtq#wQ)RO(XOZIYaV57lILr1Yo2f5Kg(WK_InlUQ$KyJ&zYm_cPZ*! z8#C1UZh1tP!#`JSxPOz7s3+m>e$vMyB#GVV>8hdUy`JD0dfaQFqwLaft<#Mm)1IroH zwL3r9y(q7Z`*i`Zyf)7Ff?(f&C)Y8RwMoo{z=`R;8;I<_lh?+*IS4FoOz+XcU}NSU zDQlCMi+~f;Iu}L0o?bq6E(VrQor{BwnRTPAO=2zqPE7aHlF05Sd2Kx7mjcTh)3vfR z*qB)>%GxC6GT_8??<|Y#-jR>Za$tF5y1teN8#C)mSsU#=R zTMI0gcQfTxX~r_PIKJzE_071}182;BH=^u(`F=SBJdQclC-xhtr#X)G>6J4N8-kq& z>@F-%c+Ze2z+BOASo4TR&%Es1jGqCS@`l!=pBYOGxZvo!hddcPa zI1Jf%IUlzKYa^d`^sT}2ThV++_va+rfaT4xxjZfPZx1%Fy6xzdea|%C4&aUH<<)OX zFPD0D1j~8$>_+ePB`Z^YR~pN{+KHYaR6Eo2r`m=8c@Nba$NF9YTbDUrrdPHGzl(eo zd>p;J>&HC$8#iOv6Pz*l{gbl(v*WiHcr$u=*n8@kWIWHUE@b(wxl^7w z`+((aUe+~)8FF6_hcl-B?g3-C|3|>7JBvDu(G50^d~%Njx8~j#PCmK!1IyW*PmcZG z+@s)J%g1jVSYP@0jR$wq za;__Dr`~h_Kyd3gC&0;PoD;!vHm`Dyd*B?Wb(M2G2~J(cc@Wq*^2vQLxHb17aPrB0 zC|J(cI>(2>C3iW;hr_E&?jyj)kF>$(a7@gI|i(;eEg0DJIDID zj+M3ZK6%z34>qp4Sv%voCr<`vPo4x;R@aU0RItA0 zI)z@@bIp5p8u(;-ebkkE)-%8uo_}Y7H=-HiOnPPO^=EWvgLkBtSAPb*oVEM8cP>~i zd)Bk+JY@OUoDV*ywlRjXHkq%Bz?rWL!OEGhi^10rQ(oT-=;e&j(SKNRspR@4B+NSj^ya`@i>~1dX{JFAo z;vC!pukJ*0D&Gp$S3bVCfvv+F<$k;!PF?KoDD0jua@+~8E;;T3>nneJo%e3Axz)|U zy0(^kz|QYNH1+Yn7wkN!&s^OHb}qBGl(lo5?yU#F*<1I6mDTw>5D$U%b=^HkubjR0 zF!&5?^ifyttzD3f;of=-yb;Y9kJ2lq)R&xwoD|mXFQT z;3sMuV<>B*-u>|m*n3p&k7tqPWAhx?7+G)1+GLJ?0L~me4^~#^@1DE}*4NxG&?{$- zUIL$sjXvtiIohq3SEu(G!rqvkAFqK2)ADTf6R=$BQzvJw zOy}kAgRMq={tU_AV-q{SGtl&%i9YjrJy@G^&Tl}LPklFn_04nM&0x8kXsP)Yuzu?D z^O;-0^52kCoLYT08bI@!gJylR(9cP;zPaf0Gu}JEjxF-NE%M_n^0O`SONH#u3}36| zjQ>t#^Oobk3t9dd=GFUi4^1w6%9`XdKM#TBG8f+m%RNl6EPlFTpDR6wB0jo2vbEF*IbKo-W50K@arzP$S;KWrQ-xtB^ zjO$!0NB2W;8TS=rInUX|eHEOz>f`$(usY*<&y=J43D~*uJK~r)oL-=~viWm-Cvr`ZZWS`{_4exw~o3y*10l{<;wl^ zd*rMm_3`}!Se3s|BftI?x%ks zC$9SVegal!TwW!$fk<-Ve2pMC>Q zT=nt&H&~r;mXdoQ!srbU*Eemb!C zB>L%*<+2xN0P7>4y*MM-zdPnSHK($6i8&M4`;nM4Bg;iU3)p#$epY0;kGz0O=N1Sg*}X&_iG^W>Q1V!tq0E@#prV7c;4S`;~J zTYY>N1FLhLIXBADEdeg$E{QBxo=Hm~C$9SVE)7;^T<2Ihx@Ey-+~ttv$}?$sjO)Epj&4P88FwXQx$;a}898y)$9FJTopHVA%F(R~F5|9-ELWaMt0O0_`uMH^ zR%cw-j&gKsfy=mSBg^%p>5bS-N16u?+*4}M!yGGE;;uE>m#3>dx6cFvq4$A#M~R~eN4Cv*F$b~!MuOFQZi{nnFNtm$n%7b^&+R4Xm!^4c zFH4_udlY^-H$At<6!HY{#9Fo{_4^|mr#wdwfRoQTIvOmOxiW`b=3*>ZF6Zbtuv~eL zjz`Y=Rv+I3!Rnk3YgLY}2P~IbCxNX$Yg1Vp?VY28YQ6I(&it)_ZY7%6_h`=F^7Jdx zoWGUnGk=HTSI*y&g?t?N_*za)hanrMoWH~2T@=pn0vgJV)8XVZe`kQ@GJocf%Q(*h%Vqw~2FsQ6cMfvq zPknsP1*>!ZtW`O>3&3*O;}?SEtof?Je zSel#yz?n$usGW+T&uw3*{gS{uwKLeJF{#med8~t-&x#*tK z^wv6%{?GW8wf+HFyRz1g;p9{6AHi~|bs_Bkge-5Z##FY}LFoPhUbN0-t;(s@XNetY zUfa`B>keRR-HF~>eFpmszp~c9BWqXI`VTny)cOfnF10R){in$C)@n>;Yqgfo!9M@V zTdQ(v^|^6Zn%B;>)Vd4UT6d$j)+Ok_!LO|K3uNueTEB#oPpw~pYy6q^qRLhpK@NOL)NaWb$U4Y z)H(xLF14;Yqge{!9(cftyMX-0vrEUAzkC|v|H;4@P0J$NcvGU>ppBOa7(7a>>68IQflhemU)u ze_3#ue>r5iJfoR^dF13T$G-x+y5#>JST6Zj1Sh|7%`c~2@~;Fg^RJ97=Q=lz`3EDL z-!)W@e-(Ij$-gRCF8NmjC%%+-s zKMn!QWj`82F7vSgST6fgClm(wn}hRV~DYg4dX zx%W0hHdpJ~-yB|D=6(yXT=EYCC%#*@amF(XRuuI?*dMKxt3TUYY$0k1Ck_XNu&|6btaH?H~Rv};}aM-bQRP@3!gAo{~A&Yt4sa~V7cU<2u^UrxK^KMY*vKO9-ET>D2LCx5y2kAzp3{6~T1lK*IM@*CIua@r;TG2k-) zvB+}$8lOAO@7s<;HoxmtKllCdaPrB00$47&PXs5oG0iQfUF*I(j(A?j(A;-N(;rK7 zuN_aH^Zq1!<;yjHGMs$&)+t~)YfUYuBBvJh)*|PAYOUpDIIk0EspZ5P`#gFQ&ALvZ z_Y4?8e;%pmS$K6Re~mMiN%6FGIOw{E%S-faFH;Vk6V*k{AZ z=lnPaEa#f?-F-OyxybUyw0>ppZ8y3Lz>ZU$V^PlU@GJmJl z*faEWn)7!iz4PT6dlhjqf7)DK%bCB6kd0B!-^Fn9nZHZGa^?J8ik$gV?|9^zd#gEr zmm#;tz8p?I^LGVUF7r2uTCPNvH>UL~JAVhGy9Vqy)j1aB%-^-(Bhcxiu66#-Mdx)E zE%SGFjeY()hvxj9NALXke0D2wGJo3KR?C^c>yV96&fjD>`OM$-V7YSsZa~ibsdqeb zIUA-W=8ed$v2TKt&-~pCmdpH&#_twnd1G3?vhz0<-R)qUJ#_`7mE1+Ol4PlJ=A{JeVx zUR`oL3)WXYV|)&rFzkpYl9PfkmmCqPI0B4NWpiS=oFTu`%Hp%rrU~6*il;imjUR`{D1=e3a zzP|=%?da>;k<&IdzX4~=<(l{iUR~^d3(lBJyWhd9%b0%;)>l5he*k}1j*sEh#qN*b zfz$H(>qcnooz&I!&KtwEdI{{dj*#ahE)4ea-!V`(h$iRrmpC;o0?yo(>tj)Pb+KEluq)T<;_&JotLs<-tgpO( z&nG$UKjm4-T9>3*lh1+LTaPs_P48vR+AKwrvu1H>Sq7Y%%lo}7yt>#eSJ;*JYI%5d zsd)vkzVg#z4~CPE z%_`um57&mWHr8YwV;VbaWi@c?T3H=VK5JzSuw1!T)fCwUMoYu`pVx-Zr8>JGtPdkb+H>-*p=&HQ+Rc$YcsIE^3Hv7+Gjm%4z^C`M|3?-@B8 z{SI)-i_y%ZZDQ{TZjHSYocz|DXs+{}!E(lFNA95Uamfrc&p>UpNz7fqt>fJlPCoVS z29~oWR{VE|YprPyIQjVR36`_PKjYmC&hh%pWFBo3yA#|Rdv7@TjJFFcXB;nMW!&y- zZMAW|xNgC&%u#kedXi3FE}~My}cj2 zy4Z~ZCr7!5_J>!O90!2)l|Pm{?mOUUu({PG-Wag+Ik_I2^CPEiY{r69TbXklyt>$p z2dB1jo(_apm)a(P^_7qBL~wGHXGjmcy4XzuCr4S!LGbF5<6yA9@)`djU~{WWyhFhm zzwsQuoVKw!44m4^aUBk?E_O$NQ(JitkAzp3+KvM2D<9vZ!O2nj9s{o~cE^H~qnxMX z;MFC^@nC%$e0~2)OuXM^>X&;36KY;JXlcP=>NH=g5{(>6Befm2&~ z|Ide47rP6<*7h!~w7U>qU23}stgn20F9s(^Id_-9tBc*G%5{$NUR(yRE;%j->noq} zUja6^y2QIu?mKF}3SM1&uLkQcAKz=h&Rxdq+{tMhn`^-tS6T0M@akeW8JuyIb9X(w zx{T`vu)gx~y%C%o<(jw&UR~^N1}8^Z%PsKglH*pezVaFWZD4b&OT62`nY%LH9q{Vn zdnZ_b`S{)i&fMwi+%>p{P4ivkZm{*vPw#Vs?;iJn<@0=bFIdhPc^A14S>Ac{Ip;yJ zm(Q2#9-w`n=5vmC2e& z@8(}Fy+jGn1#^v3!*vh(#Gy=y|h%;7J=nM3cdvU4~)_WuLkj9%U`x*p`5 zL+9W_u-s&@@qBmq6|#J6eht>fd5X<%kmd9I{SnylELQug&wctWIQL0^-!Fa#C!go< z--G3h;4!{J~n@>ZOXd;1Sg-m{|vV7C29JrPu+h3x7Phv zIQi85H?W*>yo{Cloo{Wm@%-|hd+)s0xp$v}bMM@H%HF$qiSrqFOL}=@xmV>fzR$sO zld;Kt`6se`Z2kq-rksN>kmWN6UxIV*^j9DMufY1}I}q2$*U0km{{|d?W2w&^{u|sn zhfa`u=5Q(}jAoq7U3u@d)h0Fd1GkR54Ng8b?X^ug?hZKl)ZHJPd#}Ix_)iUP9rrYF z^6{S*9Die}&$y=pw~l*yIQfiw2C$rQyo@Du?R&qq)uvoKo)1|&v!Kh`@myAR?F_qJq^~xhX_vU$f?MNm2PdDn+k@q7#$ZZ%x=e{M%HO@02Y P%$>mUj?Xz%b`Jj+QCs&H literal 32624 zcma)^2Y_BhxrPtyCWKBXp+iCqCG_4CAfW^ZgkB7rWD^2OHYA(SNkSD6uuud^yPktnQh!Cv*?%bsjc*(@nRZz3KMYw=pkOHIsnmTnyasp*QD} z^rNYLZ0F>joN`{hIoqpckZZffEGc`RTPW>+JDUCe#IbkL0|1>+4qyL5`g|YmoCwjml+>%BeB-uG|@$H}Cse;Kb}x zbV%-Cvuf?zsx`4s-RpqIcF*iOt`j4$b<276*4-0EUR!yu6AVD# zj~&i!?PJ@ws=f)!+E}hVwQT`DaAwE!S(CbEPHRq6BcHriZ*Bdmt&w|d!QL-9uW23K z$o(6#_qW@aQ#-mj7N~WxYCHNK-#&%*uw?IDf7@c;oWDh@oxt@rAJ^S6Ioqmb=Qr>F zjy-;hS0f5PC%;eH|L|`54=?;j7XI#$|BwIfh5ufKe`C4+mjJHcJq!PR3;*UK`@aL| zzfX_3PuTu6Lnbp|uj$u$W4!MXk?u40Bx;wLa3PlzkzCKY3&9FI> zHcR8d!sWP*qHnIvw(1k$Sq;8_)j_Xw&w+sK-j(+%$2bAG*H*j!m7G_3?&vS?SjUg= zI#GRW9q&}ub?=$5Y~|U|8}~nl+*4a)s}&<|DqtD!7Q-g-C~b_@b2DR|49-}Ot@CuV z#V0a>h>fwzd-aZWiOLraYiihz+qY}Rf0mUnIi?!9r{UMkJ*bhlH%4PO|Htj@6Qmeg zz2G8%8BmH*PNXHQg)H?V;bTpMz2v~q34*7vVTQ#&R%@8h6qF7kw~Y2!T}XPH0yqfty_ zEj0IlYoc5;|Es<^&eMqNx_8W{(~oc9MXNLEr*+I2*V%mMOSbS+@%%4!oP*!2#yezNbqRP1H`aH;D~#Q7LgQW8cfzZX$4=;+-Z^4+&svuA znk7$fO;#PD(2hCC+?F?xsKajJ=>HUk8H>GVpHCdu2J*r^vT=KB1+x?;4<&xO}QuU(oMN1@A6H# zC+{jvxhL=J!(M<~Z7`JHRtKW_EaX zc`xp1ksoZ4A8wH!Z;_wsBezx0A~)x$fAs>m=ZxxEw@uzT@5q_ksHLrX341T+esKU2`P8hZy_(<0 zw!OL(`FP&BqQ9&~e=G6?zeyT-+pF99=-aACk*9LNdJi6J(LIUWUHd)NN7r7xuWkw- zw@gkyPGY~4Cr=%BNY~8cM$PP+J!8+^n&VuMgS0t+0~+7a*o^L)&@pw->2v71r*uu% zvT3_0w#{>_Uo{ZieN6MHZR*>rrQynX8iYKhu5U;mU3;}=i@Z*YJhVmLppV>EZGzk} zxp{xK0?#^ToR65sSa)v9YX?lf>X0UvM+*$^MsUq>v{hZ= z`s|$p?&?0Kb7nop3y^!q-(FqVNA6eMgxq-|zb4FT{LIjZeH+}&t`i#gyWqsW8(hYI z1i6g;Xdk()dP3j&K0OU?J}cU*XZqN+SI_m4`&BO>cONrl7O%mL=SAb(?PJqkz1$+d zirh83yW!&eylISjue{Y_`vG#fKR)cE@8=C`?C-_{v7W~@;mWbD+aj;mA`e66rG@Wf z!?jl%_0jjMHbwRfVYN1lta(eg?5nN%*tb^)waACG$Vc>%`&AQ>8;|dvv#ZH59O}xq zV_P)|JFijHcqM&*8q-nj!N2p2UZEj3pX0#B?*R6rs977fal_x|_Zl=W<0%hmNKN@K zoRPipxzF{Pxn_?|TZ2nH&(t!W@(^q}5*zV7Km09frCRnkH|3RUc|2oNUa^)ZAeZ@_ zBd;>QT=F|NWqxJzQwG0JE!e1I2-ezL({ha2_=9O)tD*5XxF+W>cvVA|8;tG3wXA*! zt7MT{)=r)N{&p7srE0z0Aan!i2h)}(*3$IK*0CD$GW2q`A;`NO*S_=Ti*rz}7D3{W_e!oNwpl zh}uS-YwJjQ*M`38jM+h-nB&35lr!dpT9(mft{HQo#&=lmMe0e8Q?NJ3$+iEfHI}!TcYe)fZqG(`zSO&C&!Znq zTY$DfEuRneQa0B`w6$tk`^#yLWmU#}CA~I||0;TA=j#jf+Q@BR%btmHiF*y$OZ)5S z)fp@L8^Ebc{$^U~e><}0kN$VkI~T4)<$G$`v1VNNfy;Q$Be&Ojuf7%guAGy}G29-3RKgsb$xI z^PcC4^X{`mS-ThMog+EtUD;=c+_L=p9zf>fUU0^5-su+0wIQQ2PZzRW+4R-+_=fGg zont!_SuVEX4Vg{nzq{b${|X$}&YnLXfSp$<$M7M5k{i5ZQ1*Ezr~cnHF7HJ<1DS_D z_C61_Uj_Rm!S0dgY5FgP?0u8Zz1dGkwO3mr^G|~}zOwJQa>m=f#`@^L zBeLU;|4y}DUVmlZiTS7Dzgvy%@o;l&hMhg-fO>&BCGei zD6;;83b}Dk7P5N3jbg8UokI2;_1h?VWxtI|*>9uB#`F87l>LT@tpA)A+3%QA?{`co z`yErte#1mIpWiW&jqkTiDf=x`%8wSZ{!bLL{(jGt_RqA)e$zzne7(>jzgWn|_j@Mx zJ|F#_iLCz3LN=b?G|}IQ>^Dth^?uVtJ_6ar6WRLLEM)WfJrlk5e$Pbq-1d7Wvhn?% zi98Y6@0rLxcl?%#Y(BqdBAd_enaKM4Jrh~I-!sa?DcrNp&EWax^KwT7_vHd`@;kA& z+z-mpwSnc%!e85d$a0>MiMbHCKdp>809ihM3xnmJC&s)w)*{HpQ)h1Xo?LP-29`^V z#ldpELyVzd$F&5qy#DT0W&PDH1C~?gK2_e9+@9adg3qLvcO1^`a$xtj?=1Q#YbSPJ zdlvnZ^J96i=ZO6~%kxNiTkfuNwl;WidVSPs;~6ENxmy?P z+{tI|)&o0t)?yy#czv+EF+A6l#T(EY!<^b}2%bx?jk?4f1~#VS*H2kn=h*pnp6oNO zjlhm8>uY1MobPJ-tCK75q4S-5n}W^fyH>`q8M3_op1sQ2yO*{Gx32qb;N-ROJ!@OA zey%BF`dr@*Sza5T_dC$GqUmcMWo?pY7jSEy;c)WWcutG}o5!)5$LIg9$nx6kM!!4F zcLjaTqpVHxi~_gj*#k~q8^^OJ*gU?sXFPi$%WLC!_NI-b>1!TkZIWj{aBH41aPr#t z{<=TdJh>OXe;j};ugzHc18Mux^fiyNHpz1+xHZpVaPr#tJUAR|9^YRbk7wKw$nx48 zNq-dW5SqT`QPxK6^Vf6FH8l?GnzZ*^=%61@^HM*d*4ro7M6fY(E_8zBUdG3BLHTH! zv5YN#2Hx)%+a%z{r$%F;4*}rhDmBntMrJ8~593HE&G!`qKsKiP&zQ>EB<7jm#B`6GMRSkHYvVpTyXKAQ zUONZbnC>-WDr=LN=YkW{{qRYe`$0Z7=heJ1-B;%$8`FJdOl57f_ssuPtygyeSUI|j z!0t(P7t$-|S#dFVF8=zc(?(l)_gL1U{r|R4K8>z*U48~mKJ)xpuyM0Le*E*E7QSbBNQgCaY%i!d-agAIKHjit>@w<*bk1Vf^YwZe}YfWGCC~K2ESAtvf zTm>hujqCdhVDpsc_SMMp+Blvs(niwsHIK43$#V_3HP5wh^4hqTt^=E=Jh#7uEU%4g z^2@Y+Y5JN+S)1g!9^9JeD{%7KxV~-xo2NXtzltocjqCJlG}o!V=26zhbuymb^U6MJ z`0LoPHtW8vG0PD&l&%Hu$=F-*7seoT<+C9U^!#DcJHNa zMU&UYGycAs*T(t&0rK6n%(t>OiTOitV!H3{r|nFW*T%j1qnbCS_vis+W9A+yYm=A{ zf)mp^e@q)mlTV#Lsrl6TQ)FXi-6(65m=A#y)BW@_n)^v!8_)Qk*Ss-ZE5ATCX4ZdKR29KLb{FzWly?9z2kyPwf8;mUkS_(JN;j{sML$jH6DQ zr|ISO_j~On@S?TuMSA7b_E)gA>Gv|dva$7h1?+d8KI*i2fnGlTuYngV zur~7fM*lll{tcSn=*N=dA7FWNyh^W}`rihdSKV9m%6@119{Ns$Z~jK3{!L`L)b}n} z&a>xzdaw5xo&3LOEc@y`dR71AU*0=;{jKXrejBkKWBWUavbj0hs-wWu>E&HF#@F9? zsr@5xYX1`O#NN^#&F;FgI9MEbr_>R*f{dZy%4xH_W(Hg+uX-0&G3^)v>I5NjUlVEd|zBK7Iqigy=cXm9N3vd!N!qK?iIkTxmSdfPwth#a<JUR`pp3O0^>=4v&tHL1&7tqyLT-x!{I8>7pf^|x1L>m7=26Y#$D^5*cjS~+X?dAAu@E_>E9YI9`y*lYpz zH(YIup{z~jYin@k%inU9Ghf?)oiBNP{q0xI*skYo!TOf(@Y})38{awK9<1NoTHXOH z7yXX4et%+mUhIUdjaRsxYtA(pZWlOxy}}KzIqk+UyPjbq;EZVueYBZRE_J(ttyi0j zX*aO*mGz~poqEr?-NBBz{GJ#IC!e!$RBh9G7VZJBE_QnscDGZHbK)HA1+VUGaw_i) z)>l5hqrui;j&eWl1FtT2`xbWoK9d~#!K+J-F<^b=&#d$A4>q^D#aP$Yasb%*U7e;r z{$s(;gZj+Xfnet{drMh6Yd9AFgTdKb2Z5Ex;Ku-wtQdcK$gqBRwt)_-rIb~`j8QCL-Wt?+_S!?cr8NPn)>{6lSOOn zzFdrE?j`7*3%Ltu+IYWG&!=iW@`cFy=H2CDu$+IsFpoHn;nOtz)aB0{K2!4_kyD&n zeeMsUdHMH*taT~+Rr9PAzBrmm-_D9RFp= z^7q$c`#e}K>&2SnGCyAg%VjRE1ReBbO*y)+fXldF zMV514CGOY2iK{-oUk9r*u5+Xu-A&*!?l+L-Zl)#fE#SmeAK!0+)fv~hR*vpAa2fYo z$a0<`iTiDE;;N7D9bk3F^`0q5_g%1aAy#li~cUKV~qa$V7W2eKhOHRY5K@} z-|wNnm*!gVewb5PyTtqf*zqOi55aQL-w(FV=zj#3OU?(t`p75egJ5%JUn^^8O!vf( zY3kiC;_R1|z^l@{R;IaMR-|8r=6+d?KKtV#aJgR|Zjpb}BLB8UezHY=wncuikY5A8 zUd!1}KSQ=I=QVTnbFh5&(=Whsm(!ekYnF@sFTrxzPrm}omHX+}$XQ3~Wu3gDo6J_a2faa$a3X=`U7&}s*mpKzkm~0eSBX4t23@^Lpi#a zz-8ROBFnu@OWaq$iK{-ouY%PX*EOab-QU38%j~r`z;e<59qc`c{vTku?8P_1`p9Q5 zz6Eyex=zihtX*Qh4fcK{<~v}y=-&l9uhG8;mP^img7uM4&VPZ;nfCFj4v`p74zFGc3OoaQ){wKL{Q^b6E_&m3{ioFVAeqIs=B^UN7c zzb4HyXKnhNIqmo@RLK6FrBYtJMfSf%D)s)iNTuw5i&V-(TI6+FWdB>F*!y<^{cn+! zb0+mewr=lr=CVJWe9okWz;clL(y*ymdn|)4Ok!foGsge z-D^2Rm9|#M~Y%7yS-k*Fp3DQ-uZf{7Rb9)SaIX4%EKd_Jw z10Pg1eVJ=dN5e7JVy^f&iYm#-$TLboDXYN zj_wGsTxvZMZ2eiA%Gzk}9355boj-BrZ)0?u(Y!XHIe#0`Z%T9iHmA@09gSZ(e-jIN z3i#MsPE8%i#wq7-Je+*yZvt2@^JfmZjI$Fgm-(9nmMiCPGIHimeSD7rt8@OWRXMuj zz;aomQ^9i9K8=1l&GBSDm9^2|IqRzR&Zju@xiz}&XkOdUoX;)kx1~9s+tX)0XX01R z=bRS#lookji+pyAe1403aUov(9^LPSSF8Aj| zuv~e6PD0N8Q6Jxv!Rov}j$JvrQ^952(~#xL_k`1t6IXqF=YrK4*Ev*Wu3>RgUfga2fYg$a3X- z!iC6*t3JLLfz=t;wV@o{r@>|1&mhZ{?+KqpPF(fz{Tx`Gab07|(Om}iUS?li4wj4l z^I-2u^jCo8qQ4UC+(v&DST6c6fSsf0uLjHIcjhmG&1qbH)w|xWp}&^q9>|_n);_VX zQ%6heFM;Kv|1#L|Mt?n6F3+{Efc23#=RNGL8^Gqw^IKUvV|rG6wbpx%h;xqgN4FEr zYX_R=$N>5sX`UlH(`O&w*ofD6AKwHgpM88YSk8T%_o{Co=l-hCIBx-~vnFwBS_a<{ zG_PG~scAUans%l4Y+H){4*WdZQsZsN+LdR`?QrrrJH7>$b8Y*Z>XP)|MwWN28B;lb zmU<_65H|AGs+?N=)*DIl+KrZ4cL!VRD0*vMk^Ub1%38mNtX)~_U2yWL_4{DC)Vc!p zcO%PNt1*?W)mrWauUhA_R^`+>1b#1?*B-Rgx+mCL_olbj!SoN}SJwJNWbMjY?}w95 ztv>?GrPkH4e*jtDT8*h}twYfL7`#@U%UYFFtM3x~(!55~QtLioYu%6DTGyq27{9XC zhmf@^YyBCVd}{qUST41$gZ(d%<*n72%GPQvzXbdKCvUCFsnzH70W_~MwA8vk*jmTZ zTdVIokKgmbX@8DqE|yJOTDyOWs$me!8ES} zX{q%fu(cjSZ>_#dJcVCb>+g}ZD{K7&oP28iBUmoAZh`%i$nw@|Ol51emZ!nK$H-f& za%x==ejLr~NSd`CMt@X|kD&hq&AN}Kw{G93pCv|F_jAbllyyH3C!cfc&tN&vE$cLf zT+XVO!EzbrU%_(MVDC9M4g3ntddwrQ??C!jYn|u0=k06AaycJg2fswq&$#B7(=Pe{ z1}^izfh>0eKFR-g(a>@S|IQflhemU)u|7~!Y{~ctxyw5oPcWLIo zk5-QVy;_(2|3sEc{(pg!-?-+N(=Pen2bcLjK$gpUkNH2OC4V{ok7`}={~K8@`B^?a z`HgFSIqi~v0pv3Og2-~N^UQx6viV&@<@np-)g^yFuw3%@2WR}oHNTv8$-fY|%s&8G zF7Hu}e_`b0FUP+Kyt?FH6fBqgi-D8hxaODBuKcr5#K!-38eP>%b3cx!pH$2cBWPQs0xFno>_Ty4ux$H+{$YnkTg5|Owmj=r@?|Bzk2HASbJj=q# zXY9*?Gj?M*b~){mYY@21wLG$%bDVp>0N58$f#s5aWpMHv*ZgwYCI2em zGXJW`a^>Dz4LSMCd%rroy5t`WmP`I2;N&;1`Q@}r{x!g5{xy;1%DuN1a`Ic3zMg$+ z!>ddFb-;4Tzb-iWjca~6?UH{exXiyEvRt|M)<;f$>q`C&;MFDnhG4nm9|lf-7vT=QGP$!BkE1(vhc)Uq{lYEf@3a_*o8h&by0<|#Mp^f^aPp~pJFr|?_x8xCTfKG5HTP!o_q836TVwAC zC!h0UC$OAr%5Q>0>32q!H>UL~dv6a%Hv;T9)j1aB{C$;O!Q;^Bqpo%SPDJN5o0j>T zQ)AE26KKxgN%YR&c>2-A$^2=vPc3Kuc0)EsIe)vu$!GpXg5}Ej8-<+tQ}1}>ntQ7` ze|sRe#@-W7KJ&L1ST6I|K`nbD%Nx`Bm7Tu{==KFWPIZn&IrFz4_!xBhs4M61Wcax> zuTyEx-zoH`75H?p^EZ#)`BOiJIGI16l?S)wnbdrD+aJ9#oIlrm_V5Al>f9&#jRhOu zIV|lCgjbipA9WB|U-|eR4F0$rhrp|g-J#&*D9_Tv;MFC^;b48`7h`muw?}{}v+?JM z)}T$|9tlqVGVW3E>SFf^aPpVWl5z0rlK*J1zVh+y0DoML@$l+mHvya+<#|03UR`o@ zg7uZp7$<=H^(v=r zY^H)U=5oHK!K;hiba2L8*3kv8E@Pen)>l5h$AdpE$4q#2v6}@>j`ALN!>db<*NnOAGjCUH*yCx3arPJ~w%yOY4lU(UtJ@amHP6tKSX@jVs%aXC(dR~Ng}!O2n9 zGFP6K9P_~X%4dx8!5O19Xp{SY2G}{!Cb`Z8Ta#<29M4(s>f(DgSbzEWo&(O>(bu&j zr)_M`1!v6Vn)oEVy4alu&X`NP^WoKH%ol+5m5=YIz#o_6LU?tty9k^d<=VLzUR`p0 z8mzB;=JhjR=hfdztU;T^{VX{7%ebF|R~NfWz{y|E^`-FYlK(QWzVh+C9Q<)PJ`b-h zc2|Itqg*>z!mCS;tHAooXN+F}XN=aMP455IVCO)an|VQ z>%dt%&Y5dRPTSag37j#P^YvwTb+Nl1oH3Vmd<9-z#(V=lS!*vHK=C`OCSu6<%HP-v-uKKEAhu zKQ70&;MK+M+u-CVYql4b{0=x{v<7W*|Gx`%4zx+G?}4q!wNsAgE_ikE z{XSTK`S{)q&f2j~*N&XF!+B2n8`*teA|P4lQvd+V|0U(kD5vo=4c$yu{FwLAx2}~xz{zK={1Gfyu9YW|<+E0v275X0>Yk#>nNOVg{1Z51 zDA&p}@akgsY++ZfmFM8q-BquZ=fV2Qe~sL(jX%@mo!b}aQ@*6I9cZ#~*(-t1%h8rXUD-sxu?$M-k- zj8B`_X>yKFoOSUAIOEIO`8%9^{+qFXfaPpno@w!a6E5p2{%^s_$Nz1xoXtyr&&Z|e z-+@#1??9SI+r)ks+#35mIQjiJ(Ol>M1j`wx9l0M3$P6^kKy9^2%zuGf$NN5;8M{x4-|2J69#y<`Jj90<&`p#q?Z4-L|bgi)$)Si~{wt?juLFtT@ zal5az)yDOb`)yzDbN74JZ9BHkN9#9mKX`Sq>ksx^Y_(emUR|Dp1Hk&q$9G|Ha+G^} z5qNd6TNIoe#&2<-y5O*0KV;y5v|9tgn2=zY^Ho>Jo2d zaK>*u$1kUCY*qoMwsKsn!mEqjYT(pX-ow@5)upz>SDJhI62CB zS_@uXa;y#3S3cuk2W)P2iMK8|<2SG4m(w;jL&2%7ob&bI)x~aoaB3^ZumQZf)V3j5 zU-|eB11Cp0u8rW;#cpG8a+G$Pz^hA+O~LxgXZ)Li&8;r+HV0??#&i5~+Qw!JaB3^> z|CaFTVz(98+U}#3c3Z=%OKsbL^_7qBw&3I_=WaWAb+OwXoE+u7*a2Q$a_k7!S3cw4 z32bh4iMKP@ysb6w0H?$9DwSxr?uJC#P*}b_Hi#Wxcz>tBc+4;Eb!ByOHqf zGOkfzedXi32RJ#(HL)kWy4dXnPL8scz2Vg*$7ry=@)`d=U~{WWynVr$yE5K>@ap0_ z2CTn)eD?=u?(}u;8eGGs`7Lq)*n00}j`J-t7EV6zmj{C7jFE4VgOKH&N8fV}1$+5^ zsqPTk5j5X(#KU<$_#5tFV8=5OY(AeEhr`KR-@)|CJ|~T-zj5M!B-naO|D)jKX#%znh`%Oq$nx+Sd3Epg)7=e4Ry~KmVPAUw(Vn=Ga2^xA|#> zd?)g}Le_qMA-{%vX(6k>tdKWj4sR`F^|uxB6!ec4viipg`4aR`7P9)MYS}q)9LFI$ zPUp+9DEs$Woa3qBo#^G&J9ate+wZd(V8`qCnR?}}LO-k4`<|j+d1kG@2X1z)_bgJc z++FKk+jDAt){^q{TJKsqq1NYqE9V@t{u7a{+5I$^-s^pKh`cf>s*~thF4f8O{8OF6 zzkJs|mEQMHdt;qO?|hw3@0!prb9e?gbLjn5b`Jf!e`kV6(91iuE+Sk5^z z-qQ4EBg@C;oZ7~Dip{xj@_GOMB-ruzy`jJQ+^6%vxlj80eQ`dVeBQq=0LvLC@7>M| zVxw+twMk8%0=L$EA)I_{E~;(Hx-W*4Pu-seTlY|!{_0crXTYs>e-=(Yb$<>lXB;nM zrGDpITWvhQyyxCK?{)6o<>1^q_nxx%ZYAP;9z2p>-dOHcxs2}$uv~fXu0)oP%~iEc zIR{^Wlg}Jn4bHvOUw!<)2sTH4195#^gDfBaYr*k1mio-$b>P-H{1Tje=J3m4IpbvR z%6qS^HmT`)aO=3g0w*7v8)}hF-{WAl5k_2gNvtWE0uL#h*c~EV6uTo&#I2HpWoaM!nDV z=fTD(*WjO#~urahThO##0J(!E^n0!Zl39P;Euj0JFx@XU&d7Vx3{ndBTb7(&A zK1pv*@5QV5$!FbqFJ6O_&$ra;;8$q!8Sme~`jvC?2Aq6;AN)I5&gSJl&j0t$Kj2)W zBWT(s?wjD&xNpJ9C+^!|Ih!%)dg8uQbH>#!|6R$uU~{U^|G(FJV0p*q94b49{{!r0 BF?0X` diff --git a/piet-gpu/src/lib.rs b/piet-gpu/src/lib.rs index 0753054..437a31a 100644 --- a/piet-gpu/src/lib.rs +++ b/piet-gpu/src/lib.rs @@ -32,6 +32,8 @@ const K2_PER_TILE_SIZE: usize = 8; const N_CIRCLES: usize = 1; +const N_WG: u32 = 16; + pub fn render_scene(rc: &mut impl RenderContext) { let mut rng = rand::thread_rng(); for _ in 0..N_CIRCLES { @@ -98,10 +100,10 @@ fn dump_scene(buf: &[u8]) { } #[allow(unused)] -fn dump_k1_data(k1_buf: &[u32]) { +pub fn dump_k1_data(k1_buf: &[u32]) { for i in 0..k1_buf.len() { if k1_buf[i] != 0 { - println!("{:4x}: {:8x}", i, k1_buf[i]); + println!("{:4x}: {:8x}", i * 4, k1_buf[i]); } } } @@ -114,10 +116,17 @@ pub struct Renderer { pub state_buf: D::Buffer, pub anno_buf: D::Buffer, + pub bin_buf: D::Buffer, el_pipeline: D::Pipeline, el_ds: D::DescriptorSet, + bin_pipeline: D::Pipeline, + bin_ds: D::DescriptorSet, + + bin_alloc_buf_host: D::Buffer, + bin_alloc_buf_dev: D::Buffer, + /* k1_alloc_buf_host: D::Buffer, k1_alloc_buf_dev: D::Buffer, @@ -149,6 +158,9 @@ impl Renderer { let host = MemFlags::host_coherent(); let dev = MemFlags::device_local(); + let n_elements = scene.len() / piet_gpu_types::scene::Element::fixed_size(); + println!("scene: {} elements", n_elements); + let scene_buf = device .create_buffer(std::mem::size_of_val(&scene[..]) as u64, host) .unwrap(); @@ -159,6 +171,7 @@ impl Renderer { let state_buf = device.create_buffer(64 * 1024 * 1024, dev)?; let anno_buf = device.create_buffer(64 * 1024 * 1024, dev)?; + let bin_buf = device.create_buffer(64 * 1024 * 1024, dev)?; let image_dev = device.create_image2d(WIDTH as u32, HEIGHT as u32, dev)?; let el_code = include_bytes!("../shader/elements.spv"); @@ -169,8 +182,25 @@ impl Renderer { &[], )?; - let n_elements = scene.len() / piet_gpu_types::scene::Element::fixed_size(); - println!("scene: {} elements", n_elements); + let bin_alloc_buf_host = device.create_buffer(12, host)?; + let bin_alloc_buf_dev = device.create_buffer(12, dev)?; + + // TODO: constants + let bin_alloc_start = 256 * 64 * N_WG; + device + .write_buffer(&bin_alloc_buf_host, &[ + n_elements as u32, + 0, + bin_alloc_start, + ]) + ?; + let bin_code = include_bytes!("../shader/binning.spv"); + let bin_pipeline = device.create_simple_compute_pipeline(bin_code, 3, 0)?; + let bin_ds = device.create_descriptor_set( + &bin_pipeline, + &[&anno_buf, &bin_alloc_buf_dev, &bin_buf], + &[], + )?; /* let tilegroup_buf = device.create_buffer(4 * 1024 * 1024, dev)?; @@ -253,14 +283,20 @@ impl Renderer { image_dev, el_pipeline, el_ds, + bin_pipeline, + bin_ds, state_buf, anno_buf, + bin_buf, + bin_alloc_buf_host, + bin_alloc_buf_dev, n_elements, }) } pub unsafe fn record(&self, cmd_buf: &mut impl CmdBuf, query_pool: &D::QueryPool) { cmd_buf.copy_buffer(&self.scene_buf, &self.scene_dev); + cmd_buf.copy_buffer(&self.bin_alloc_buf_host, &self.bin_alloc_buf_dev); cmd_buf.memory_barrier(); cmd_buf.image_barrier( &self.image_dev, @@ -276,6 +312,13 @@ impl Renderer { ); cmd_buf.write_timestamp(&query_pool, 1); cmd_buf.memory_barrier(); + cmd_buf.dispatch( + &self.bin_pipeline, + &self.bin_ds, + (N_WG, 1, 1), + ); + cmd_buf.write_timestamp(&query_pool, 2); + cmd_buf.memory_barrier(); cmd_buf.image_barrier(&self.image_dev, ImageLayout::General, ImageLayout::BlitSrc); } }