From 9a8854ffab17188e6eff6462254b04aa33fcb205 Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Mon, 11 May 2020 20:01:06 -0700 Subject: [PATCH] Experimenting with sort-middle Starting a prototype that explores the sort-middle approach. This commit has a prefix sum pass computing state per element. --- piet-gpu-types/src/lib.rs | 1 + piet-gpu-types/src/main.rs | 1 + piet-gpu-types/src/scene.rs | 43 ++++++ piet-gpu-types/src/state.rs | 14 ++ piet-gpu/bin/cli.rs | 49 +++---- piet-gpu/bin/winit.rs | 11 +- piet-gpu/shader/build.ninja | 3 + piet-gpu/shader/elements.comp | 173 ++++++++++++++++++++++++ piet-gpu/shader/elements.spv | Bin 0 -> 26760 bytes piet-gpu/shader/scene.h | 238 ++++++++++++++++++++++++++++++++++ piet-gpu/shader/state.h | 59 +++++++++ piet-gpu/src/lib.rs | 204 ++++++++++++----------------- piet-gpu/src/pico_svg.rs | 8 +- piet-gpu/src/render_ctx.rs | 226 ++++++++++++++++---------------- 14 files changed, 762 insertions(+), 268 deletions(-) create mode 100644 piet-gpu-types/src/state.rs create mode 100644 piet-gpu/shader/elements.comp create mode 100644 piet-gpu/shader/elements.spv create mode 100644 piet-gpu/shader/state.h diff --git a/piet-gpu-types/src/lib.rs b/piet-gpu-types/src/lib.rs index d85df70..288f71c 100644 --- a/piet-gpu-types/src/lib.rs +++ b/piet-gpu-types/src/lib.rs @@ -3,5 +3,6 @@ pub mod fill_seg; pub mod ptcl; pub mod scene; pub mod segment; +pub mod state; pub mod test; pub mod tilegroup; diff --git a/piet-gpu-types/src/main.rs b/piet-gpu-types/src/main.rs index c0b9d7e..033bec4 100644 --- a/piet-gpu-types/src/main.rs +++ b/piet-gpu-types/src/main.rs @@ -5,6 +5,7 @@ fn main() { .expect("provide a module name"); match mod_name.as_str() { "scene" => print!("{}", piet_gpu_types::scene::gen_gpu_scene()), + "state" => print!("{}", piet_gpu_types::state::gen_gpu_state()), "tilegroup" => print!("{}", piet_gpu_types::tilegroup::gen_gpu_tilegroup()), "segment" => print!("{}", piet_gpu_types::segment::gen_gpu_segment()), "fill_seg" => print!("{}", piet_gpu_types::fill_seg::gen_gpu_fill_seg()), diff --git a/piet-gpu-types/src/scene.rs b/piet-gpu-types/src/scene.rs index 5f95c40..7451c9c 100644 --- a/piet-gpu-types/src/scene.rs +++ b/piet-gpu-types/src/scene.rs @@ -4,6 +4,8 @@ pub use self::scene::{ Bbox, PietCircle, PietFill, PietItem, PietStrokeLine, PietStrokePolyLine, Point, SimpleGroup, }; +pub use self::scene::{CubicSeg, Element, Fill, LineSeg, QuadSeg, SetLineWidth, Stroke, Transform}; + piet_gpu! { #[rust_encode] mod scene { @@ -51,5 +53,46 @@ piet_gpu! { Fill(PietFill), Poly(PietStrokePolyLine), } + + // New approach follows (above to be deleted) + struct LineSeg { + p0: [f32; 2], + p1: [f32; 2], + } + struct QuadSeg { + p0: [f32; 2], + p1: [f32; 2], + p2: [f32; 2], + } + struct CubicSeg { + p0: [f32; 2], + p1: [f32; 2], + p2: [f32; 2], + p3: [f32; 2], + } + struct Fill { + rgba_color: u32, + } + struct Stroke { + rgba_color: u32, + } + struct SetLineWidth { + width: f32, + } + struct Transform { + mat: [f32; 4], + translate: [f32; 2], + } + enum Element { + Nop, + // The segments need a flag to indicate fill/stroke + Line(LineSeg), + Quad(QuadSeg), + Cubic(CubicSeg), + Stroke(Stroke), + Fill(Fill), + SetLineWidth(SetLineWidth), + Transform(Transform), + } } } diff --git a/piet-gpu-types/src/state.rs b/piet-gpu-types/src/state.rs new file mode 100644 index 0000000..35076f0 --- /dev/null +++ b/piet-gpu-types/src/state.rs @@ -0,0 +1,14 @@ +use piet_gpu_derive::piet_gpu; + +piet_gpu! { + #[gpu_write] + mod state { + struct State { + mat: [f32; 4], + translate: [f32; 2], + bbox: [f32; 4], + linewidth: f32, + flags: u32, + } + } +} diff --git a/piet-gpu/bin/cli.rs b/piet-gpu/bin/cli.rs index 839c262..82f3491 100644 --- a/piet-gpu/bin/cli.rs +++ b/piet-gpu/bin/cli.rs @@ -5,7 +5,7 @@ use std::path::Path; use piet_gpu_hal::vulkan::VkInstance; use piet_gpu_hal::{CmdBuf, Device, Error, MemFlags}; -use piet_gpu::{PietGpuRenderContext, Renderer, render_scene, WIDTH, HEIGHT}; +use piet_gpu::{render_scene, PietGpuRenderContext, Renderer, HEIGHT, WIDTH}; #[allow(unused)] fn dump_scene(buf: &[u8]) { @@ -16,6 +16,24 @@ fn dump_scene(buf: &[u8]) { } } +#[allow(unused)] +fn dump_state(buf: &[u8]) { + for i in 0..(buf.len() / 48) { + let j = i * 48; + let floats = (0..11).map(|k| { + let mut buf_f32 = [0u8; 4]; + buf_f32.copy_from_slice(&buf[j + k * 4..j + k * 4 + 4]); + f32::from_le_bytes(buf_f32) + }).collect::>(); + println!("{}: [{} {} {} {} {} {}] ({}, {})-({} {}) {} {}", + i, + floats[0], floats[1], floats[2], floats[3], floats[4], floats[5], + floats[6], floats[7], floats[8], floats[9], + floats[10], buf[j + 44]); + } + +} + fn main() -> Result<(), Error> { let (instance, _) = VkInstance::new(None)?; unsafe { @@ -23,7 +41,7 @@ fn main() -> Result<(), Error> { let fence = device.create_fence(false)?; let mut cmd_buf = device.create_cmd_buf()?; - let query_pool = device.create_query_pool(6)?; + let query_pool = device.create_query_pool(2)?; let mut ctx = PietGpuRenderContext::new(); render_scene(&mut ctx); @@ -31,7 +49,8 @@ fn main() -> Result<(), Error> { //dump_scene(&scene); let renderer = Renderer::new(&device, scene)?; - let image_buf = device.create_buffer((WIDTH * HEIGHT * 4) as u64, MemFlags::host_coherent())?; + let image_buf = + device.create_buffer((WIDTH * HEIGHT * 4) as u64, MemFlags::host_coherent())?; cmd_buf.begin(); renderer.record(&mut cmd_buf, &query_pool); @@ -40,28 +59,12 @@ fn main() -> Result<(), Error> { device.run_cmd_buf(&cmd_buf, &[], &[], Some(&fence))?; device.wait_and_reset(&[fence])?; let timestamps = device.reap_query_pool(&query_pool).unwrap(); - println!("Kernel 1 time: {:.3}ms", timestamps[0] * 1e3); - println!( - "Kernel 2s time: {:.3}ms", - (timestamps[1] - timestamps[0]) * 1e3 - ); - println!( - "Kernel 2f time: {:.3}ms", - (timestamps[2] - timestamps[1]) * 1e3 - ); - println!( - "Kernel 3 time: {:.3}ms", - (timestamps[3] - timestamps[2]) * 1e3 - ); - println!( - "Render time: {:.3}ms", - (timestamps[4] - timestamps[3]) * 1e3 - ); + println!("Element kernel time: {:.3}ms", timestamps[0] * 1e3); /* - let mut k1_data: Vec = Default::default(); - device.read_buffer(&segment_buf, &mut k1_data).unwrap(); - dump_k1_data(&k1_data); + let mut data: Vec = Default::default(); + device.read_buffer(&renderer.state_buf, &mut data).unwrap(); + dump_state(&data); */ let mut img_data: Vec = Default::default(); diff --git a/piet-gpu/bin/winit.rs b/piet-gpu/bin/winit.rs index e5f174a..1c263bb 100644 --- a/piet-gpu/bin/winit.rs +++ b/piet-gpu/bin/winit.rs @@ -1,7 +1,7 @@ use piet_gpu_hal::vulkan::VkInstance; use piet_gpu_hal::{CmdBuf, Device, Error, ImageLayout}; -use piet_gpu::{PietGpuRenderContext, Renderer, render_scene, WIDTH, HEIGHT}; +use piet_gpu::{render_scene, PietGpuRenderContext, Renderer, HEIGHT, WIDTH}; use winit::{ event::{Event, WindowEvent}, @@ -69,7 +69,8 @@ fn main() -> Result<(), Error> { device.wait_and_reset(&[frame_fences[frame_idx]]).unwrap(); let timestamps = device.reap_query_pool(query_pool).unwrap(); - window.set_title(&format!("k1: {:.3}ms, k2s: {:.3}ms, k2f: {:.3}ms, k3: {:.3}ms, k4: {:.3}ms", + window.set_title(&format!( + "k1: {:.3}ms, k2s: {:.3}ms, k2f: {:.3}ms, k3: {:.3}ms, k4: {:.3}ms", timestamps[0] * 1e3, (timestamps[1] - timestamps[0]) * 1e3, (timestamps[2] - timestamps[1]) * 1e3, @@ -93,11 +94,7 @@ fn main() -> Result<(), Error> { ImageLayout::BlitDst, ); cmd_buf.blit_image(&renderer.image_dev, &swap_image); - cmd_buf.image_barrier( - &swap_image, - ImageLayout::BlitDst, - ImageLayout::Present, - ); + cmd_buf.image_barrier(&swap_image, ImageLayout::BlitDst, ImageLayout::Present); cmd_buf.finish(); device diff --git a/piet-gpu/shader/build.ninja b/piet-gpu/shader/build.ninja index 0aaecae..b429a71 100644 --- a/piet-gpu/shader/build.ninja +++ b/piet-gpu/shader/build.ninja @@ -18,3 +18,6 @@ build kernel2f.spv: glsl kernel2f.comp | scene.h tilegroup.h fill_seg.h setup.h build kernel3.spv: glsl kernel3.comp | scene.h tilegroup.h segment.h fill_seg.h ptcl.h setup.h build kernel4.spv: glsl kernel4.comp | ptcl.h segment.h fill_seg.h setup.h + + +build elements.spv: glsl elements.comp | scene.h state.h diff --git a/piet-gpu/shader/elements.comp b/piet-gpu/shader/elements.comp new file mode 100644 index 0000000..5cede7c --- /dev/null +++ b/piet-gpu/shader/elements.comp @@ -0,0 +1,173 @@ +#version 450 +#extension GL_GOOGLE_include_directive : enable + +#define N_ROWS 4 +#define WG_SIZE 32 +#define LG_WG_SIZE 5 +#define TILE_SIZE (WG_SIZE * N_ROWS) + +layout(local_size_x = WG_SIZE, local_size_y = 1) in; + +layout(set = 0, binding = 0) readonly buffer SceneBuf { + uint[] scene; +}; + +// This will be used for inter-wprkgroup aggregates +layout(set = 0, binding = 1) buffer StateBuf { + uint[] state; +}; + +#include "scene.h" +#include "state.h" + +#define FLAG_SET_LINEWIDTH 1 +#define FLAG_RESET_BBOX 2 + +// This is almost like a monoid (the interaction between transformation and +// bounding boxes is approximate) +State combine_state(State a, State b) { + State c; + c.bbox.x = min(a.mat.x * b.bbox.x, a.mat.x * b.bbox.z) + min(a.mat.z * b.bbox.y, a.mat.z * b.bbox.w) + a.translate.x; + c.bbox.y = min(a.mat.y * b.bbox.x, a.mat.y * b.bbox.z) + min(a.mat.w * b.bbox.y, a.mat.w * b.bbox.w) + a.translate.y; + c.bbox.z = max(a.mat.x * b.bbox.x, a.mat.x * b.bbox.z) + max(a.mat.z * b.bbox.y, a.mat.z * b.bbox.w) + a.translate.x; + c.bbox.w = max(a.mat.y * b.bbox.x, a.mat.y * b.bbox.z) + max(a.mat.w * b.bbox.y, a.mat.w * b.bbox.w) + a.translate.y; + if ((a.flags & FLAG_RESET_BBOX) == 0 && b.bbox.z <= b.bbox.x && b.bbox.w <= b.bbox.y) { + c.bbox = a.bbox; + } else if ((a.flags & FLAG_RESET_BBOX) == 0 && (a.bbox.z > a.bbox.x || a.bbox.w > a.bbox.y)) { + c.bbox.xy = min(a.bbox.xy, c.bbox.xy); + c.bbox.zw = max(a.bbox.zw, c.bbox.zw); + } + // It would be more concise to cast to matrix types; ah well. + c.mat.x = a.mat.x * b.mat.x + a.mat.z * b.mat.y; + c.mat.y = a.mat.y * b.mat.x + a.mat.w * b.mat.y; + c.mat.z = a.mat.x * b.mat.z + a.mat.z * b.mat.w; + c.mat.w = a.mat.y * b.mat.z + a.mat.w * b.mat.w; + c.translate.x = a.mat.x * b.translate.x + a.mat.z * b.translate.y + a.translate.x; + c.translate.y = a.mat.y * b.translate.x + a.mat.w * b.translate.y + a.translate.y; + c.linewidth = (b.flags & FLAG_SET_LINEWIDTH) == 0 ? a.linewidth : b.linewidth; + c.flags = a.flags | b.flags; + return c; +} + +State map_element(ElementRef ref) { + // TODO: it would *probably* be more efficient to make the memory read patterns less + // divergent, though it would be more wasted memory. + uint tag = Element_tag(ref); + State c; + c.bbox = vec4(0.0, 0.0, 0.0, 0.0); + c.mat = vec4(1.0, 0.0, 0.0, 1.0); + c.translate = vec2(0.0, 0.0); + c.linewidth = 0.0; + c.flags = 0; + switch (tag) { + case Element_Line: + LineSeg line = Element_Line_read(ref); + c.bbox.xy = min(line.p0, line.p1); + c.bbox.zw = max(line.p0, line.p1); + break; + case Element_Quad: + QuadSeg quad = Element_Quad_read(ref); + c.bbox.xy = min(min(quad.p0, quad.p1), quad.p2); + c.bbox.zw = max(max(quad.p0, quad.p1), quad.p2); + break; + case Element_Cubic: + CubicSeg cubic = Element_Cubic_read(ref); + c.bbox.xy = min(min(cubic.p0, cubic.p1), min(cubic.p2, cubic.p3)); + c.bbox.zw = max(max(cubic.p0, cubic.p1), max(cubic.p2, cubic.p3)); + break; + case Element_Fill: + case Element_Stroke: + c.flags = FLAG_RESET_BBOX; + break; + case Element_SetLineWidth: + SetLineWidth lw = Element_SetLineWidth_read(ref); + c.linewidth = lw.width; + c.flags = FLAG_SET_LINEWIDTH; + break; + case Element_Transform: + Transform t = Element_Transform_read(ref); + c.mat = t.mat; + c.translate = t.translate; + break; + } + return c; +} + +// We should be able to use an array of structs but the NV shader compiler +// doesn't seem to like it :/ +//shared State sh_state[WG_SIZE]; +shared vec4 sh_mat[WG_SIZE]; +shared vec2 sh_translate[WG_SIZE]; +shared vec4 sh_bbox[WG_SIZE]; +shared float sh_width[WG_SIZE]; +shared uint sh_flags[WG_SIZE]; + +void main() { + State th_state[N_ROWS]; + // this becomes an atomic counter + uint tile_ix = gl_WorkGroupID.x; + + uint ix = tile_ix * TILE_SIZE + gl_LocalInvocationID.x * N_ROWS; + ElementRef ref = ElementRef(ix * Element_size); + + th_state[0] = map_element(ref); + for (uint i = 1; i < N_ROWS; i++) { + // discussion question: would it be faster to load using more coherent patterns + // into thread memory? This is kinda strided. + th_state[i] = combine_state(th_state[i - 1], map_element(Element_index(ref, i))); + } + State agg = th_state[N_ROWS - 1]; + sh_mat[gl_LocalInvocationID.x] = agg.mat; + sh_translate[gl_LocalInvocationID.x] = agg.translate; + sh_bbox[gl_LocalInvocationID.x] = agg.bbox; + sh_width[gl_LocalInvocationID.x] = agg.linewidth; + sh_flags[gl_LocalInvocationID.x] = agg.flags; + for (uint i = 0; i < LG_WG_SIZE; i++) { + barrier(); + if (gl_LocalInvocationID.x >= (1 << i)) { + State other; + uint ix = gl_LocalInvocationID.x - (1 << i); + other.mat = sh_mat[ix]; + other.translate = sh_translate[ix]; + other.bbox = sh_bbox[ix]; + other.linewidth = sh_width[ix]; + other.flags = sh_flags[ix]; + agg = combine_state(other, agg); + } + barrier(); + sh_mat[gl_LocalInvocationID.x] = agg.mat; + sh_translate[gl_LocalInvocationID.x] = agg.translate; + sh_bbox[gl_LocalInvocationID.x] = agg.bbox; + sh_width[gl_LocalInvocationID.x] = agg.linewidth; + sh_flags[gl_LocalInvocationID.x] = agg.flags; + } + + // TODO: if last invocation in wg, publish agg. + + barrier(); + State exclusive; + exclusive.bbox = vec4(0.0, 0.0, 0.0, 0.0); + exclusive.mat = vec4(1.0, 0.0, 0.0, 1.0); + exclusive.translate = vec2(0.0, 0.0); + exclusive.linewidth = 0.0; + exclusive.flags = 0; + // TODO: do decoupled look-back + + State row = exclusive; + if (gl_LocalInvocationID.x > 0) { + uint ix = gl_LocalInvocationID.x - 1; + State other; + other.mat = sh_mat[ix]; + other.translate = sh_translate[ix]; + other.bbox = sh_bbox[ix]; + other.linewidth = sh_width[ix]; + other.flags = sh_flags[ix]; + row = combine_state(row, other); + } + for (uint i = 0; i < N_ROWS; i++) { + State this_state = combine_state(row, th_state[i]); + // We write the state now for development purposes, but the + // actual goal is to write transformed and annotated elements. + State_write(StateRef((ix + i) * State_size), this_state); + } +} diff --git a/piet-gpu/shader/elements.spv b/piet-gpu/shader/elements.spv new file mode 100644 index 0000000000000000000000000000000000000000..e97226cab6d9f24f9299099f741ac590504b77ac GIT binary patch literal 26760 zcma)@2bf(|xrR5Hna~LkdI)VMLKr}?gMbb11w|1N z5V24ML5g5O#fqp9MGy;!Aia3s_niGrR*rcd@7Cq|{_n4=?md&SW56cMR@E}qa{T9# z&Z_yWRteq{b->J?Sv|9RkL;ORQB8}3uI5agI=`p4 zf?26rd9mHZ8MCMKoHcxY@4N+*dq*sG9&**jZC{uI7$3?uyk~;M#uTyq@kUDRFVGW{KE# zRBK|}s;{eB2YkeW?kQzGaz14}>mk>6&HTzfsYf~WSf8?MICuM2eH(&n|0U{MoO^M7 z8(^FIHUjUrVA71qb%CytoX_I=x~gHwu~TOaaz3e1xvWt+HOAhZJ9kuT-8Tg%<`PAR zWCdGQYu{0Af_6}~ z^kv*7>M9j&wQY%CY8wGQYF_v3`BUf2n`L1<=9Bj+=hXwyO-lgw3RBRak1|bh4x46uhRKrtJF2FSnUk1kJrTB?rGW1Z6~fZ?>qJTtz7M1 z_&NDYr2P-?mj7;r|K5ead-MO}zgOYEU*X@}CjTXXD%7=&B9_Pv~Vmi#;Oevv`kmR^yPHcJ%HKWuNR1<@WvIev|k7 zO`0_4EV|}ilJgACWbLx=W9$7rb!PXp`Se}ffJf0c^G-Z--i+R!?2(k*Jeu`Q-Q-4` zF?9#p_RW6k$Q<3~+#N}**7@kDCV*Qx)~b#NPo6Vtl3QuMbFoC0rA7Sj8J*{2iQ7#R zA4fl_iC3&nrk~Y4cVbWLy_9VKv2*o13ER|sDtHF7=sRbQeD|5nW9mESbmR$>duI3S zyP*H9%lXWgr#GHs_A$me^u*xp(vlapWRGZ5|Gt(yswH<+9|5%HtfRUHoLa7H+10gl zRyVZd{v~wd6W(XLWl^-hr-}_nwx#V@vL+9t0%s6X4{1vSrtw_vx10pZB?z z+@JUPmfWBBrIy^E_qCQ>=k2WCZpmIf&AJA#!I>|wE#!`BdF0f!88~%q*|Mwa>ZrCW z>~<;a_E^HMvl`u!`|CfjCHL2VXiM&|e|$^sum9+l+&^z8BDd=6tj-0`?Vi^?s~*cm zZSsfO3enl<=?*HZ-uLNFP zXGb*%xn&IU^qtig;L3Tr!Ps?uUtdDkS$(rj zzN1aPt4+RV3Av-XAGv#4YkeL8&!0ZgYri?x$69jvSqykVLGZ8;acP9sFvx>eDUQEp3^(MXI?$V4Urd*zq8tC3AwA<6S?OsenObv{L!Hq zdq22&bIxqyL%=z&hl9)5lab5VJxj+Dp z7Zmas{O1yTAPu@Yi2psE-p6nDgQ!08CQY{Dd!nPsF?Hb1=9%BPU)Pd{74mhQy~Xic z=T~(Z&-0sjp4&2>@-Xb_n)%%C{ygN*1LpVVKIQdm`CPTI=Q3Lbn?IP}Tvw=jlXjsudV+6ToeDHVEyHWzz(A?{S(8v zS2vVqjLqm1V{@=EPFDVZyT_Fa?ZoH^!mByoCjZCa;ae?SWZ7< z`;sx1bFREAz4H`#Pq5>TJR0m6A|C)AUdRW7jj!HX52H`5hl8zEZU|@`eQHbHu^nIA zYUAEGg5I??hB~=p=+!H?xX$Xtnlr!qaU%Ut+A_4QYPkXSQ8wQc+NQOv{VbZfHl+T! z^x8O<)9IDnyYuL^k=wnNeMieB?gFrn_PmaoI%7qD9yoQ$_tiGhUyAJ6(*MKs&Yf$c zd_^t0uG(AoRkSkRoyhKk#Jd;VMbrNQdgHma(Z35W>+2+#Hrg*&<6*VE`8I${ecRT0 z_k#ALYI(IAIQ$r8boanSl+r?6i)l^fgNv)>#QC|c7EiIuk81Oobev5u|E3$1le)M|EINHUVml3H{$=R z8taqzes`!h-jiV0Bk`W9b@JLPFRD4~d8Wqt=>KPA=PLfs)p~jTmHmcEJulR_oZr{s zJOi1pb1`-=Z%t&^&v&EoJ=3mk&W;7UMc&%SIU0xMb_W%y~z6e%~#5P@0GIOdy#$b_`Mfd zf4}#XN3*J)$&b@Kr{4Q}vc~S8D{6i(c7W@x9Nkr5xr_1F_LE>a&vIgZ8thpvV}1r% zK7Ln&sX&fHl8|jySL<$`+vc5iE#~BZVEBRQ@i837Fk|@_oA}?>b?k;Q|CTZ z-j&?mGuMOt-H5#7aIU@tcK1!TkZIb6_ z;PyN}hm+UF@2p>d&Er_j<9q6t$nx6w{`xiTF`B;SQPw7TegkgL^8}o{Hjd}FVDtF> zneqG%Sza5*^LyIkG=0sZtWEO#0ol*Edb*IUM?Uoo1k2~XTNZ2_dDo!%P6sFE@?bgZa6hg9_I$}};~88LEU%6GdL^*m z0u6M=RMsXjR|Y4hXK58=&yu`0p0`0@d1HDeR|Ol>@fcHCo5WlVoS4?RI`ZT6@~Lw$ zSUz>G0XC-R&zQ>EB<7mn#Pp1;g}jJfUK`KR+F*HOde+tf8`HC9Ol55nb6s#^dLGt8 z_B_bPW__@{F+EosfQ{+7GN!UN+I!~@srBmo=2DJsDA+Tp&hIbfyeo!*eOKtCP8)6I zJ!6D#?nC=U{Lj+P$;Rl~_vI#V@|ovN!NzTn%eve%?}3xo#`W3^?7q_1I+V3he>wc- z;PyPj;pDY(k8A-pk9)*C?xQW?1!TkZIWkeaC@E+aPr!?zqbLKr@Xhf zg_GCD@oWcn|LJQUWo?pYB)C1#_HgprxR*wO%~RgnJHW|nKuZ{a_7qEHCdwW+nd2QUMyMf)O`kF^s8~2I#viFs}^X|RBJ38;8d_U}gESK}E zPOiM`y_c7^&z$qUu=VGKoK@x3u(v+-d)N8~xvfumjau*C*r(Pfzw%nO-nkxK>vL|E zSFQEV(Z01lXHa`Si-EaUs~e>Mo#Hwnnai>O$~Q^!liCj-7Ak$v$=Ufy=rs zLYB+9Q72cfq4WJtwM?}O&EHk|^XbYoAAeW6Gxhj8B7aXRc2D~EapqoyKKryAtWATK zdM1J8BTokFn>q3ye&wdnGM7`q`l-wBAEtrj-zBFwwGIaRH*`M!yN>m(O1~z}`qrk; z@6`NT3&$3DPMdsAo4l}1eqSN`JNS>(a>joevUwY{jCUqj{(k1wci(K9T<#oel8Zh6 z1Y5~vF3teUdG1CN!GAm8&qbR59@wB)AK$aV>fAq$O*y)A!DZa{BFlO2C+_*+`LqU2 zeS9wft23^1q#WIa;4U}G|$Vr^c&DTFGJ{aKCS_m=jFOK`Gz+6<~I4ZHu?58`JO_40Q_Jr=R94DY+Vgn z=IV3cYiT)8p9jm$qB-~0EEoGPfaP+Yz6h2p&(rnD*+=T*`z5eC<2rWb=)Mdt_ob;fm%DM$AZ*tN`A`yN;>`iH@;N%W6^<#HCk57tLMXYmJM_ilq` zPG#*9^M_#9BQbvjmW%#Tu=5)IkHK=u`4g}{^2zyAusL)7m9Q{UcbN^Y7d!NB1Xi8TT1vx$|g= z`z$zd)yMbGV0FfIj+LYP3%HE?S7f<9TH^i2A6UFg)CRzNv|L$uKM`C3RY)a_l|OOuY=3DZy?K+chZ~4 ziK{-oZ-Lbr*FC5l-8m}`OE1BtmdvRw4*fL+Vz z*F}~~&h^0h$S3FeU~}efP}VLnHw3$mi8%yWF8YnYu1EAk!E(tt46KiQa&8PZXMQK4 zter83k!usMdhcyYu2h9-AJ%p z=6HLsoVEKqo*ih8$J+H*)<%ElY{y#fe2O!l+o2mp^BGBVKDVLYp5}b+K%e>ZZ^|=& z&etAo^5{1Cz&82NHhFxTd~_lE_gE*^a@J>8Wb10sGB3Nq$!C3b2g_xBtXVGhdxGV% zK6|O7mFu%Na@MD*XATFFb02th&aY!vPTYOLW!(Lc<;wTm{>X`|KE4Nl)fv}0RE};8 zxQu%cvRwI|I~X}})yMY`usY+q7Ru3$1($ITLzXMwbB7}*uKM_n1FJKxYpNXG5#TcJ zk;ro8du{@9;;N7DQDAk(b#Ev~cMQ0Udn~eC`JOutIdRp;_js^6-j6tHU&{Y0=_^bN3c8+|ueF8WDe=P3HgV7c*}q;1(#Q^4jluD+QWX`TW1 zyR|E8pV-sD&QoGf2g^l21MGOCKNT#Od-*i5KJwf_rhPqQX* zY8s61?lhlWX{l*9ur=*L@7=a4{kiy+?}9UtwQJCl=PWq++#P3w<=or;PH+(YImq&^ zmob(7+Zk&)54;w=ytOK)*7e}`rupniORal>t#u!IYh9QABK*o)FF@9=K})R*;p9{6 zg<@K2T?gAfWO-{frn0qK%f;Xg>E*3eIkgUh-;d@qnwDDk1zYR>^wv6*{zLecwZ0!& zy9O<_UJ56lT0a1mORXDW`$1%RYc-~_wGKn~VeqE(^46-HT7CD8q4^v@ORWcjt@R*! zYu%jw3jE4iKZdMbgO*w^gOg9KmxJX}>t@(~99iC4jj3#{)^a7-?-zM%RZgv*uS01* z2h&pPAz*7AOK+{dpRdNRto4(~+BImY^;2;2srA!fxzsuW+s`1&TdOgZt<_pS3m!!; zZ>`FybpZTPG@m1A)_OSoku@GqKY?c5N7Gxk-wf9fqpbT{WPKX6)crX)`P>Dc2g~K1 zZVb8De+ev?aozxy%R7A(_{%iwF^{~ytI^+B>%1eq!@hznmpkVs@b$C?I^#M|a@r;T z&EPWsEy!|t_nZH#wB(mh{#$EZ@_!9kF8OZ*C%g3J8(AMCI6qm$!}cq%W0SV&w$JPe@2#jmgYB?>;D|h{2S2g=iT?0nosV( zBFiQB-@wUjOmoX=m)w5`m$_d+mV2I-@4Xkn=KcqL{Qe1+Pp+51$z=?4$!S;qJMcK- z98dE(hUR<6cg?W{J`U`A=LCA+JL+F1j_)01=i_x=V6FF1>wf$foG}`-@_f7kuP*EO zD%kk#=jb(fb(b*T&h6`9edXi(26$;X-h@{dySKo}QSQ~Z;ngL_J79g~S7J2geHYAs zo4>!d25l1e-{9mg<1$s1y4Wp)oc!fk9RRN``8&Y+%Ez}8ytEu$@akeW5S$$4c~}-+ zU2-f3)>l4bTpmoh%`sYoHW$|Oz5>`e&?dQ71Y1-4-Mc$uSD7uYBfp2e9)xoMsK$B<_yjR~NfIz{yeWoju{zCC6T1edRO8y}=oyHE5Id-v{g*Xp>x{!Pey7Dc5XYZD{e` z53GNaufNy#N6y~q!{5Clr)}aN0M3}p`8p6@UF^nyGv>06gW%O=%m;(@m5=Ws;HBj_ z6kc8I#)6ZhuUO;5;MFC^;b48`Gq2;onOAGjCUM7ulfPWABjDA=?nrR*mvb=zUS0AZ z1=d$SzDI+Xmg5+Bb+J1ZoE&8>$LUW?j^n}l%4duxfHOvG&?f7DBG@_5Cb>=mTa$aI z9M8${>f(C}SbzEWP6TJ~Sf_hOPTSGEC;d6R8*KdYn`{!C{KMGzv+!iFTp##GxCVF% zP2PR<8gZwBef+y$bv-mWzrVz(X&N|l*N1)HOVi=i#coDnSMJqQ;nm$;*KrzHUwQv- zFgfjC;akXBXVR?6??COX$C_u;`&hF!vuJYGEKV(Rz^S=h@44{mVt0CBSFY7Ocy+0H zK3HFQYfeu4)Z7cUCdZ+@^;q*6^r=~!1+;T$)+}~id)Q!Sf?Zd?SDa&i$9xu?d~D7J zXMeail(n%Y^BB|E*(>LQ+xNr&5wO1U&V6#)XFq%tY@N=J_SU2ALVD}7kL}06&Z}#u zpK%=D<@6b!HkZ-l^cQDed>owdW$#=8CqIrewK_qr1k2fcywl=;6W}tNkYO76Reiqz5 z-v5D%i@?zW^tn z@qQ63XB;16W!#=?ZMAW~WWDXnbMAT1zP%pod^Bj~dHNE(y4c+S_Fina`!c+`ya#Ut z>nk7MuYi-Ij~F@IH^Hlm-Ob?SD9_L>@amG|t6+WQFJ#628TeMPxz#1!*TBwaYIc6) zw2jSe;M7*;{5rh4*nI<>+RAzQCcL`T_ARi!^6|YLoE+sHatFM+*xdt1+uvAYkP+WLw${4Tt@)OJ5uU-|ex z08Wm+!uLUVb+LO0oE+smeGgt;ay$&yS3cu^1Z-|~iT8bQ#@|%^2%OsbiZMJ2uP(Ly7_6^+e18H?j&fW-g;y85$H2)^+Wib(U2^;!tgn2={|m6W z)g|69!5P2tTz@%jWAiI;YAe_O*YN6M_c++vuA-H8zkyem+MWRGD<9wAf|H}Kn7iM> ztBc+5!O2ms#gp*rlH)0`zVaFWBCxsDCEg#v=54R}kMQc^`!ra8`S|_`?A*oIxs%g2 zHqU@FuCm@|;nl_N&)|%!oV(}X)n#0N0qZLt-@k&BqudjJgI5>3zk`#btmS!lb;!{~us;t4q8W!I`@<-ap~h#rGw!{_^pC8JxM(*SV9^b~N|3Ka2beY`x{rBCo*7 z=lk+iu$(dSefb))yz}UH&YNH#zvI-sL3@YhcaGS6qgcncz>cT^my=${lA^W@eL52Kbk%p z*R1u8TCZIG|AN=9_1;DLE0_Pj;B{(!_L6e>{|jEX)@Qwya}QbnddSx7c^XFV^A=xK z^2!XMTA!ZnTx~$lf2$4oAHTJS(EI&qZ>){zov)$v?g{-ehns>khpw-(b2u3L_kjJo z9eKy-evoqxuYlhSEY|=V&z~JON0yJxaIiMcQ*5?Cme2S1mSD%@&l~!y&pK@d&N}Ju z&lg+6$>;lf1X#{E`R;aJ5Sw*tt4(U!2Halvws7*X*{-%J>mCUwpSrgPTla99{_0cr zC~$k-JHW}O?j6B$#_=&$>UX}i)yDhFHFxb?>#W_b;H;fzPuaEe?}l~*`*%$8#`3Jn zWqiAX&zUG_khkIkN7ZOS>=3t2vMus1ksr@#96?*rC9zXNf9j7FA^|Gwb(8%urW za6fSS9PSS%pE*1LEN7g|UAgw!YLl7{1h6~mTO>>y>l$Gd~A*bYg5kQ@yPO-gA>5k?YY-qef&=Z>tF7j zlaS?Ob8>By_rLq=6gc_hmJAn+7MJx~GF3llPAP>f=8H+&*uo!pX&6E_?kDt@As?N@7MnW DvZB!T literal 0 HcmV?d00001 diff --git a/piet-gpu/shader/scene.h b/piet-gpu/shader/scene.h index 5e36abc..84ef80d 100644 --- a/piet-gpu/shader/scene.h +++ b/piet-gpu/shader/scene.h @@ -32,6 +32,38 @@ struct PietItemRef { uint offset; }; +struct LineSegRef { + uint offset; +}; + +struct QuadSegRef { + uint offset; +}; + +struct CubicSegRef { + uint offset; +}; + +struct FillRef { + uint offset; +}; + +struct StrokeRef { + uint offset; +}; + +struct SetLineWidthRef { + uint offset; +}; + +struct TransformRef { + uint offset; +}; + +struct ElementRef { + uint offset; +}; + struct Bbox { ivec4 bbox; }; @@ -128,6 +160,97 @@ PietItemRef PietItem_index(PietItemRef ref, uint index) { return PietItemRef(ref.offset + index * PietItem_size); } +struct LineSeg { + vec2 p0; + vec2 p1; +}; + +#define LineSeg_size 16 + +LineSegRef LineSeg_index(LineSegRef ref, uint index) { + return LineSegRef(ref.offset + index * LineSeg_size); +} + +struct QuadSeg { + vec2 p0; + vec2 p1; + vec2 p2; +}; + +#define QuadSeg_size 24 + +QuadSegRef QuadSeg_index(QuadSegRef ref, uint index) { + return QuadSegRef(ref.offset + index * QuadSeg_size); +} + +struct CubicSeg { + vec2 p0; + vec2 p1; + vec2 p2; + vec2 p3; +}; + +#define CubicSeg_size 32 + +CubicSegRef CubicSeg_index(CubicSegRef ref, uint index) { + return CubicSegRef(ref.offset + index * CubicSeg_size); +} + +struct Fill { + uint rgba_color; +}; + +#define Fill_size 4 + +FillRef Fill_index(FillRef ref, uint index) { + return FillRef(ref.offset + index * Fill_size); +} + +struct Stroke { + uint rgba_color; +}; + +#define Stroke_size 4 + +StrokeRef Stroke_index(StrokeRef ref, uint index) { + return StrokeRef(ref.offset + index * Stroke_size); +} + +struct SetLineWidth { + float width; +}; + +#define SetLineWidth_size 4 + +SetLineWidthRef SetLineWidth_index(SetLineWidthRef ref, uint index) { + return SetLineWidthRef(ref.offset + index * SetLineWidth_size); +} + +struct Transform { + vec4 mat; + vec2 translate; +}; + +#define Transform_size 24 + +TransformRef Transform_index(TransformRef ref, uint index) { + return TransformRef(ref.offset + index * Transform_size); +} + +#define Element_Nop 0 +#define Element_Line 1 +#define Element_Quad 2 +#define Element_Cubic 3 +#define Element_Stroke 4 +#define Element_Fill 5 +#define Element_SetLineWidth 6 +#define Element_Transform 7 +#define Element_size 36 + +ElementRef Element_index(ElementRef ref, uint index) { + return ElementRef(ref.offset + index * Element_size); +} + Bbox Bbox_read(BboxRef ref) { uint ix = ref.offset >> 2; uint raw0 = scene[ix + 0]; @@ -236,3 +359,118 @@ PietStrokePolyLine PietItem_Poly_read(PietItemRef ref) { return PietStrokePolyLine_read(PietStrokePolyLineRef(ref.offset + 4)); } +LineSeg LineSeg_read(LineSegRef ref) { + uint ix = ref.offset >> 2; + uint raw0 = scene[ix + 0]; + uint raw1 = scene[ix + 1]; + uint raw2 = scene[ix + 2]; + uint raw3 = scene[ix + 3]; + LineSeg s; + s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1)); + s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3)); + return s; +} + +QuadSeg QuadSeg_read(QuadSegRef ref) { + uint ix = ref.offset >> 2; + uint raw0 = scene[ix + 0]; + uint raw1 = scene[ix + 1]; + uint raw2 = scene[ix + 2]; + uint raw3 = scene[ix + 3]; + uint raw4 = scene[ix + 4]; + uint raw5 = scene[ix + 5]; + QuadSeg s; + s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1)); + s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3)); + s.p2 = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5)); + return s; +} + +CubicSeg CubicSeg_read(CubicSegRef ref) { + uint ix = ref.offset >> 2; + uint raw0 = scene[ix + 0]; + uint raw1 = scene[ix + 1]; + uint raw2 = scene[ix + 2]; + uint raw3 = scene[ix + 3]; + uint raw4 = scene[ix + 4]; + uint raw5 = scene[ix + 5]; + uint raw6 = scene[ix + 6]; + uint raw7 = scene[ix + 7]; + CubicSeg s; + s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1)); + s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3)); + s.p2 = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5)); + s.p3 = vec2(uintBitsToFloat(raw6), uintBitsToFloat(raw7)); + return s; +} + +Fill Fill_read(FillRef ref) { + uint ix = ref.offset >> 2; + uint raw0 = scene[ix + 0]; + Fill s; + s.rgba_color = raw0; + return s; +} + +Stroke Stroke_read(StrokeRef ref) { + uint ix = ref.offset >> 2; + uint raw0 = scene[ix + 0]; + Stroke s; + s.rgba_color = raw0; + return s; +} + +SetLineWidth SetLineWidth_read(SetLineWidthRef ref) { + uint ix = ref.offset >> 2; + uint raw0 = scene[ix + 0]; + SetLineWidth s; + s.width = uintBitsToFloat(raw0); + return s; +} + +Transform Transform_read(TransformRef ref) { + uint ix = ref.offset >> 2; + uint raw0 = scene[ix + 0]; + uint raw1 = scene[ix + 1]; + uint raw2 = scene[ix + 2]; + uint raw3 = scene[ix + 3]; + uint raw4 = scene[ix + 4]; + uint raw5 = scene[ix + 5]; + Transform s; + s.mat = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3)); + s.translate = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5)); + return s; +} + +uint Element_tag(ElementRef ref) { + return scene[ref.offset >> 2]; +} + +LineSeg Element_Line_read(ElementRef ref) { + return LineSeg_read(LineSegRef(ref.offset + 4)); +} + +QuadSeg Element_Quad_read(ElementRef ref) { + return QuadSeg_read(QuadSegRef(ref.offset + 4)); +} + +CubicSeg Element_Cubic_read(ElementRef ref) { + return CubicSeg_read(CubicSegRef(ref.offset + 4)); +} + +Stroke Element_Stroke_read(ElementRef ref) { + return Stroke_read(StrokeRef(ref.offset + 4)); +} + +Fill Element_Fill_read(ElementRef ref) { + return Fill_read(FillRef(ref.offset + 4)); +} + +SetLineWidth Element_SetLineWidth_read(ElementRef ref) { + return SetLineWidth_read(SetLineWidthRef(ref.offset + 4)); +} + +Transform Element_Transform_read(ElementRef ref) { + return Transform_read(TransformRef(ref.offset + 4)); +} + diff --git a/piet-gpu/shader/state.h b/piet-gpu/shader/state.h new file mode 100644 index 0000000..2547b93 --- /dev/null +++ b/piet-gpu/shader/state.h @@ -0,0 +1,59 @@ +// Code auto-generated by piet-gpu-derive + +struct StateRef { + uint offset; +}; + +struct State { + vec4 mat; + vec2 translate; + vec4 bbox; + float linewidth; + uint flags; +}; + +#define State_size 48 + +StateRef State_index(StateRef ref, uint index) { + return StateRef(ref.offset + index * State_size); +} + +State State_read(StateRef ref) { + uint ix = ref.offset >> 2; + uint raw0 = state[ix + 0]; + uint raw1 = state[ix + 1]; + uint raw2 = state[ix + 2]; + uint raw3 = state[ix + 3]; + uint raw4 = state[ix + 4]; + uint raw5 = state[ix + 5]; + uint raw6 = state[ix + 6]; + uint raw7 = state[ix + 7]; + uint raw8 = state[ix + 8]; + uint raw9 = state[ix + 9]; + uint raw10 = state[ix + 10]; + uint raw11 = state[ix + 11]; + State s; + s.mat = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3)); + s.translate = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5)); + s.bbox = vec4(uintBitsToFloat(raw6), uintBitsToFloat(raw7), uintBitsToFloat(raw8), uintBitsToFloat(raw9)); + s.linewidth = uintBitsToFloat(raw10); + s.flags = raw11; + return s; +} + +void State_write(StateRef ref, State s) { + uint ix = ref.offset >> 2; + state[ix + 0] = floatBitsToUint(s.mat.x); + state[ix + 1] = floatBitsToUint(s.mat.y); + state[ix + 2] = floatBitsToUint(s.mat.z); + state[ix + 3] = floatBitsToUint(s.mat.w); + state[ix + 4] = floatBitsToUint(s.translate.x); + state[ix + 5] = floatBitsToUint(s.translate.y); + state[ix + 6] = floatBitsToUint(s.bbox.x); + state[ix + 7] = floatBitsToUint(s.bbox.y); + state[ix + 8] = floatBitsToUint(s.bbox.z); + state[ix + 9] = floatBitsToUint(s.bbox.w); + state[ix + 10] = floatBitsToUint(s.linewidth); + state[ix + 11] = s.flags; +} + diff --git a/piet-gpu/src/lib.rs b/piet-gpu/src/lib.rs index a47737a..82b20c8 100644 --- a/piet-gpu/src/lib.rs +++ b/piet-gpu/src/lib.rs @@ -1,5 +1,5 @@ -mod render_ctx; mod pico_svg; +mod render_ctx; pub use render_ctx::PietGpuRenderContext; @@ -8,6 +8,8 @@ use rand::{Rng, RngCore}; use piet::kurbo::{BezPath, Circle, Line, Point, Vec2}; use piet::{Color, RenderContext}; +use piet_gpu_types::encoder::Encode; + use piet_gpu_hal::{CmdBuf, Device, Error, ImageLayout, MemFlags}; use pico_svg::PicoSvg; @@ -110,6 +112,12 @@ pub struct Renderer { scene_buf: D::Buffer, scene_dev: D::Buffer, + pub state_buf: D::Buffer, + + el_pipeline: D::Pipeline, + el_ds: D::DescriptorSet, + + /* k1_alloc_buf_host: D::Buffer, k1_alloc_buf_dev: D::Buffer, k2s_alloc_buf_host: D::Buffer, @@ -131,6 +139,8 @@ pub struct Renderer { k3_ds: D::DescriptorSet, k4_pipeline: D::Pipeline, k4_ds: D::DescriptorSet, + */ + n_elements: usize, } impl Renderer { @@ -146,175 +156,123 @@ impl Renderer { .unwrap(); device.write_buffer(&scene_buf, &scene)?; + let state_buf = device.create_buffer(4 * 1024 * 1024, dev)?; + let image_dev = device.create_image2d(WIDTH as u32, HEIGHT as u32, dev)?; + + let el_code = include_bytes!("../shader/elements.spv"); + let el_pipeline = device.create_simple_compute_pipeline(el_code, 2, 0)?; + let el_ds = device.create_descriptor_set( + &el_pipeline, + &[&scene_dev, &state_buf], + &[], + )?; + + let n_elements = scene.len() / piet_gpu_types::scene::Element::fixed_size(); + println!("scene: {} elements", n_elements); + + /* let tilegroup_buf = device.create_buffer(4 * 1024 * 1024, dev)?; let ptcl_buf = device.create_buffer(48 * 1024 * 1024, dev)?; let segment_buf = device.create_buffer(64 * 1024 * 1024, dev)?; let fill_seg_buf = device.create_buffer(64 * 1024 * 1024, dev)?; - let image_dev = device.create_image2d(WIDTH as u32, HEIGHT as u32, dev)?; let k1_alloc_buf_host = device.create_buffer(4, host)?; let k1_alloc_buf_dev = device.create_buffer(4, dev)?; let k1_alloc_start = WIDTH_IN_TILEGROUPS * HEIGHT_IN_TILEGROUPS * TILEGROUP_STRIDE; device.write_buffer(&k1_alloc_buf_host, &[k1_alloc_start as u32])?; let k1_code = include_bytes!("../shader/kernel1.spv"); - let k1_pipeline = device - .create_simple_compute_pipeline(k1_code, 3, 0)?; - let k1_ds = device - .create_descriptor_set( - &k1_pipeline, - &[&scene_dev, &tilegroup_buf, &k1_alloc_buf_dev], - &[], - )?; + let k1_pipeline = device.create_simple_compute_pipeline(k1_code, 3, 0)?; + let k1_ds = device.create_descriptor_set( + &k1_pipeline, + &[&scene_dev, &tilegroup_buf, &k1_alloc_buf_dev], + &[], + )?; let k2s_alloc_buf_host = device.create_buffer(4, host)?; let k2s_alloc_buf_dev = device.create_buffer(4, dev)?; let k2s_alloc_start = WIDTH_IN_TILES * HEIGHT_IN_TILES * K2_PER_TILE_SIZE; - device - .write_buffer(&k2s_alloc_buf_host, &[k2s_alloc_start as u32]) - ?; + device.write_buffer(&k2s_alloc_buf_host, &[k2s_alloc_start as u32])?; let k2s_code = include_bytes!("../shader/kernel2s.spv"); - let k2s_pipeline = device - .create_simple_compute_pipeline(k2s_code, 4, 0) - ?; - let k2s_ds = device - .create_descriptor_set( - &k2s_pipeline, - &[&scene_dev, &tilegroup_buf, &segment_buf, &k2s_alloc_buf_dev], - &[], - ) - ?; + let k2s_pipeline = device.create_simple_compute_pipeline(k2s_code, 4, 0)?; + let k2s_ds = device.create_descriptor_set( + &k2s_pipeline, + &[&scene_dev, &tilegroup_buf, &segment_buf, &k2s_alloc_buf_dev], + &[], + )?; let k2f_alloc_buf_host = device.create_buffer(4, host)?; let k2f_alloc_buf_dev = device.create_buffer(4, dev)?; let k2f_alloc_start = WIDTH_IN_TILES * HEIGHT_IN_TILES * K2_PER_TILE_SIZE; - device - .write_buffer(&k2f_alloc_buf_host, &[k2f_alloc_start as u32]) - ?; + device.write_buffer(&k2f_alloc_buf_host, &[k2f_alloc_start as u32])?; let k2f_code = include_bytes!("../shader/kernel2f.spv"); let k2f_pipeline = device.create_simple_compute_pipeline(k2f_code, 4, 0)?; - let k2f_ds = device - .create_descriptor_set( - &k2f_pipeline, - &[ - &scene_dev, - &tilegroup_buf, - &fill_seg_buf, - &k2f_alloc_buf_dev, - ], - &[], - ) - ?; + let k2f_ds = device.create_descriptor_set( + &k2f_pipeline, + &[ + &scene_dev, + &tilegroup_buf, + &fill_seg_buf, + &k2f_alloc_buf_dev, + ], + &[], + )?; let k3_alloc_buf_host = device.create_buffer(4, host)?; let k3_alloc_buf_dev = device.create_buffer(4, dev)?; let k3_alloc_start = WIDTH_IN_TILES * HEIGHT_IN_TILES * PTCL_INITIAL_ALLOC; - device - .write_buffer(&k3_alloc_buf_host, &[k3_alloc_start as u32]) - ?; + device.write_buffer(&k3_alloc_buf_host, &[k3_alloc_start as u32])?; let k3_code = include_bytes!("../shader/kernel3.spv"); let k3_pipeline = device.create_simple_compute_pipeline(k3_code, 6, 0)?; - let k3_ds = device - .create_descriptor_set( - &k3_pipeline, - &[ - &scene_dev, - &tilegroup_buf, - &segment_buf, - &fill_seg_buf, - &ptcl_buf, - &k3_alloc_buf_dev, - ], - &[], - ) - ?; + let k3_ds = device.create_descriptor_set( + &k3_pipeline, + &[ + &scene_dev, + &tilegroup_buf, + &segment_buf, + &fill_seg_buf, + &ptcl_buf, + &k3_alloc_buf_dev, + ], + &[], + )?; let k4_code = include_bytes!("../shader/kernel4.spv"); let k4_pipeline = device.create_simple_compute_pipeline(k4_code, 3, 1)?; - let k4_ds = device - .create_descriptor_set(&k4_pipeline, &[&ptcl_buf, &segment_buf, &fill_seg_buf], &[&image_dev]) - ?; + let k4_ds = device.create_descriptor_set( + &k4_pipeline, + &[&ptcl_buf, &segment_buf, &fill_seg_buf], + &[&image_dev], + )?; + */ Ok(Renderer { scene_buf, scene_dev, image_dev, - k1_alloc_buf_host, - k1_alloc_buf_dev, - k2s_alloc_buf_host, - k2s_alloc_buf_dev, - k2f_alloc_buf_host, - k2f_alloc_buf_dev, - k3_alloc_buf_host, - k3_alloc_buf_dev, - tilegroup_buf, - ptcl_buf, - k1_pipeline, - k1_ds, - k2s_pipeline, - k2s_ds, - k2f_pipeline, - k2f_ds, - k3_pipeline, - k3_ds, - k4_pipeline, - k4_ds, + el_pipeline, + el_ds, + state_buf, + n_elements, }) } pub unsafe fn record(&self, cmd_buf: &mut impl CmdBuf, query_pool: &D::QueryPool) { cmd_buf.copy_buffer(&self.scene_buf, &self.scene_dev); - // Note: we could use one alloc buf and reuse it. But we'll stick with - // multiple ones for clarity. - cmd_buf.copy_buffer(&self.k1_alloc_buf_host, &self.k1_alloc_buf_dev); - cmd_buf.copy_buffer(&self.k2s_alloc_buf_host, &self.k2s_alloc_buf_dev); - cmd_buf.copy_buffer(&self.k2f_alloc_buf_host, &self.k2f_alloc_buf_dev); - cmd_buf.copy_buffer(&self.k3_alloc_buf_host, &self.k3_alloc_buf_dev); - // Note: these clears aren't necessary, and are here to make inspection - // of the buffers cleaner. Can likely be removed. - cmd_buf.clear_buffer(&self.tilegroup_buf); - cmd_buf.clear_buffer(&self.ptcl_buf); cmd_buf.memory_barrier(); - cmd_buf.image_barrier(&self.image_dev, ImageLayout::Undefined, ImageLayout::General); + cmd_buf.image_barrier( + &self.image_dev, + ImageLayout::Undefined, + ImageLayout::General, + ); cmd_buf.reset_query_pool(&query_pool); cmd_buf.write_timestamp(&query_pool, 0); cmd_buf.dispatch( - &self.k1_pipeline, - &self.k1_ds, - ((WIDTH / 512) as u32, (HEIGHT / 512) as u32, 1), + &self.el_pipeline, + &self.el_ds, + ((self.n_elements / 128) as u32, 1, 1), ); cmd_buf.write_timestamp(&query_pool, 1); cmd_buf.memory_barrier(); - cmd_buf.dispatch( - &self.k2s_pipeline, - &self.k2s_ds, - ((WIDTH / 512) as u32, (HEIGHT / 16) as u32, 1), - ); - cmd_buf.write_timestamp(&query_pool, 2); - // Note: this barrier is not necessary (k2f does not depend on - // k2s output), but I'm keeping it here to increase transparency - // of performance. - cmd_buf.memory_barrier(); - cmd_buf.dispatch( - &self.k2f_pipeline, - &self.k2f_ds, - ((WIDTH / 512) as u32, (HEIGHT / 16) as u32, 2), - ); - cmd_buf.write_timestamp(&query_pool, 3); - cmd_buf.memory_barrier(); - cmd_buf.dispatch( - &self.k3_pipeline, - &self.k3_ds, - ((WIDTH / 512) as u32, (HEIGHT / 16) as u32, 3), - ); - cmd_buf.write_timestamp(&query_pool, 4); - cmd_buf.memory_barrier(); - cmd_buf.dispatch( - &self.k4_pipeline, - &self.k4_ds, - ((WIDTH / TILE_W) as u32, (HEIGHT / TILE_H) as u32, 1), - ); - cmd_buf.write_timestamp(&query_pool, 5); - cmd_buf.memory_barrier(); cmd_buf.image_barrier(&self.image_dev, ImageLayout::General, ImageLayout::BlitSrc); } } diff --git a/piet-gpu/src/pico_svg.rs b/piet-gpu/src/pico_svg.rs index a4c92d0..4ddf94b 100644 --- a/piet-gpu/src/pico_svg.rs +++ b/piet-gpu/src/pico_svg.rs @@ -41,10 +41,14 @@ impl PicoSvg { let path = Affine::scale(scale) * bp; if let Some(fill_color) = el.attribute("fill") { let color = parse_color(fill_color); - items.push(Item::Fill(FillItem { color, path: path.clone() })); + items.push(Item::Fill(FillItem { + color, + path: path.clone(), + })); } if let Some(stroke_color) = el.attribute("stroke") { - let width = f64::from_str(el.attribute("stroke-width").ok_or("missing width")?)?; + let width = + f64::from_str(el.attribute("stroke-width").ok_or("missing width")?)?; let color = parse_color(stroke_color); items.push(Item::Stroke(StrokeItem { width, color, path })); } diff --git a/piet-gpu/src/render_ctx.rs b/piet-gpu/src/render_ctx.rs index 6367301..ad84a60 100644 --- a/piet-gpu/src/render_ctx.rs +++ b/piet-gpu/src/render_ctx.rs @@ -2,7 +2,11 @@ use std::borrow::Cow; use piet_gpu_types::encoder::{Encode, Encoder, Ref}; use piet_gpu_types::scene; -use piet_gpu_types::scene::{Bbox, PietCircle, PietFill, PietItem, PietStrokePolyLine, SimpleGroup}; +use piet_gpu_types::scene::{ + Bbox, PietCircle, PietFill, PietItem, PietStrokePolyLine, SimpleGroup, +}; + +use piet_gpu_types::scene::{CubicSeg, Element, Fill, LineSeg, QuadSeg, SetLineWidth, Stroke}; use piet::kurbo::{Affine, PathEl, Point, Rect, Shape}; @@ -27,10 +31,10 @@ pub struct PietGpuText; pub struct PietGpuRenderContext { encoder: Encoder, - bboxes: Vec, - items: Vec, + elements: Vec, // Will probably need direct accesss to hal Device to create images etc. inner_text: PietGpuText, + stroke_width: f32, } #[derive(Clone)] @@ -43,47 +47,22 @@ const TOLERANCE: f64 = 0.25; impl PietGpuRenderContext { pub fn new() -> PietGpuRenderContext { - let mut encoder = Encoder::new(); - let _reserve_root = encoder.alloc_chunk(PietItem::fixed_size() as u32); - let bboxes = Vec::new(); - let items = Vec::new(); + let encoder = Encoder::new(); + let elements = Vec::new(); let inner_text = PietGpuText; + let stroke_width = 0.0; PietGpuRenderContext { encoder, - bboxes, - items, + elements, inner_text, + stroke_width, } } pub fn get_scene_buf(&mut self) -> &[u8] { - let n_items = self.bboxes.len() as u32; - let bboxes = self.bboxes.encode(&mut self.encoder).transmute(); - let items = self.items.encode(&mut self.encoder).transmute(); - let offset = scene::Point { xy: [0.0, 0.0] }; - let simple_group = SimpleGroup { - n_items, - bboxes, - items, - offset, - }; - let root_item = PietItem::Group(simple_group); - root_item.encode_to(&mut self.encoder.buf_mut()[0..PietItem::fixed_size()]); + self.elements.encode(&mut self.encoder); self.encoder.buf() } - - fn push_item(&mut self, item: PietItem, bbox: Rect) { - let scene_bbox = Bbox { - bbox: [ - bbox.x0.floor() as i16, - bbox.y0.floor() as i16, - bbox.x1.ceil() as i16, - bbox.y1.ceil() as i16, - ], - }; - self.items.push(item); - self.bboxes.push(scene_bbox); - } } impl RenderContext for PietGpuRenderContext { @@ -107,20 +86,19 @@ impl RenderContext for PietGpuRenderContext { fn clear(&mut self, _color: Color) {} fn stroke(&mut self, shape: impl Shape, brush: &impl IntoBrush, width: f64) { - let bbox = shape.bounding_box(); - let brush = brush.make_brush(self, || bbox).into_owned(); + let width = width as f32; + if self.stroke_width != width { + self.elements + .push(Element::SetLineWidth(SetLineWidth { width })); + self.stroke_width = width; + } + let brush = brush.make_brush(self, || shape.bounding_box()).into_owned(); let path = shape.to_bez_path(TOLERANCE); - let (n_points, points) = flatten_shape(&mut self.encoder, path); + self.encode_path(path); match brush { PietGpuBrush::Solid(rgba_color) => { - let poly_line = PietStrokePolyLine { - rgba_color, - width: width as f32, - n_points, - points, - }; - let bbox = bbox.inset(-0.5 * width); - self.push_item(PietItem::Poly(poly_line), bbox); + let stroke = Stroke { rgba_color }; + self.elements.push(Element::Stroke(stroke)); } _ => (), } @@ -136,35 +114,13 @@ impl RenderContext for PietGpuRenderContext { } fn fill(&mut self, shape: impl Shape, brush: &impl IntoBrush) { - let bbox = shape.bounding_box(); let brush = brush.make_brush(self, || shape.bounding_box()).into_owned(); - - if let Some(circle) = shape.as_circle() { - match brush { - PietGpuBrush::Solid(rgba_color) => { - let piet_circle = PietCircle { - rgba_color, - center: to_scene_point(circle.center), - radius: circle.radius as f32, - }; - let bbox = circle.bounding_box(); - self.push_item(PietItem::Circle(piet_circle), bbox); - } - _ => {} - } - return; - } let path = shape.to_bez_path(TOLERANCE); - let (n_points, points) = flatten_shape(&mut self.encoder, path); + self.encode_path(path); match brush { PietGpuBrush::Solid(rgba_color) => { - let fill = PietFill { - flags: 0, - rgba_color, - n_points, - points, - }; - self.push_item(PietItem::Fill(fill), bbox); + let fill = Fill { rgba_color }; + self.elements.push(Element::Fill(fill)); } _ => (), } @@ -241,45 +197,96 @@ impl RenderContext for PietGpuRenderContext { } } -fn flatten_shape( - encoder: &mut Encoder, - path: impl Iterator, -) -> (u32, Ref) { - let mut points = Vec::new(); - let mut start_pt = None; - let mut last_pt = None; - piet::kurbo::flatten(path, TOLERANCE, |el| { - match el { - PathEl::MoveTo(p) => { - let scene_pt = to_scene_point(p); - start_pt = Some(clone_scene_pt(&scene_pt)); - if !points.is_empty() { - points.push(scene::Point { - xy: [std::f32::NAN, std::f32::NAN], - }); +impl PietGpuRenderContext { + fn encode_path(&mut self, path: impl Iterator) { + let flatten = false; + if flatten { + let mut start_pt = None; + let mut last_pt = None; + piet::kurbo::flatten(path, TOLERANCE, |el| { + match el { + PathEl::MoveTo(p) => { + let scene_pt = to_f32_2(p); + last_pt = Some(scene_pt); + } + PathEl::LineTo(p) => { + let scene_pt = to_f32_2(p); + let seg = LineSeg { + p0: last_pt.unwrap(), + p1: scene_pt, + }; + self.elements.push(Element::Line(seg)); + last_pt = Some(scene_pt); + } + PathEl::ClosePath => { + if let (Some(start), Some(last)) = (start_pt.take(), last_pt.take()) { + let seg = LineSeg { + p0: last, + p1: start, + }; + self.elements.push(Element::Line(seg)); + } + } + _ => (), } - last_pt = Some(clone_scene_pt(&scene_pt)); - points.push(scene_pt); - } - PathEl::LineTo(p) => { - let scene_pt = to_scene_point(p); - last_pt = Some(clone_scene_pt(&scene_pt)); - points.push(scene_pt); - } - PathEl::ClosePath => { - if let (Some(start), Some(last)) = (start_pt.take(), last_pt.take()) { - if start.xy != last.xy { - points.push(start); + //println!("{:?}", el); + }); + } else { + let mut start_pt = None; + let mut last_pt = None; + for el in path { + match el { + PathEl::MoveTo(p) => { + let scene_pt = to_f32_2(p); + last_pt = Some(scene_pt); + } + PathEl::LineTo(p) => { + let scene_pt = to_f32_2(p); + let seg = LineSeg { + p0: last_pt.unwrap(), + p1: scene_pt, + }; + self.elements.push(Element::Line(seg)); + last_pt = Some(scene_pt); + } + PathEl::QuadTo(p1, p2) => { + let scene_p1 = to_f32_2(p1); + let scene_p2 = to_f32_2(p2); + let seg = QuadSeg { + p0: last_pt.unwrap(), + p1: scene_p1, + p2: scene_p2, + }; + self.elements.push(Element::Quad(seg)); + last_pt = Some(scene_p2); + } + PathEl::CurveTo(p1, p2, p3) => { + let scene_p1 = to_f32_2(p1); + let scene_p2 = to_f32_2(p2); + let scene_p3 = to_f32_2(p3); + let seg = CubicSeg { + p0: last_pt.unwrap(), + p1: scene_p1, + p2: scene_p2, + p3: scene_p3, + }; + self.elements.push(Element::Cubic(seg)); + last_pt = Some(scene_p3); + } + PathEl::ClosePath => { + if let (Some(start), Some(last)) = (start_pt.take(), last_pt.take()) { + let seg = LineSeg { + p0: last, + p1: start, + }; + self.elements.push(Element::Line(seg)); + } } } + //println!("{:?}", el); } - _ => (), } - //println!("{:?}", el); - }); - let n_points = points.len() as u32; - let points_ref = points.encode(encoder).transmute(); - (n_points, points_ref) + } } impl Text for PietGpuText { @@ -360,13 +367,6 @@ impl IntoBrush for PietGpuBrush { } } -fn to_scene_point(point: Point) -> scene::Point { - scene::Point { - xy: [point.x as f32, point.y as f32], - } -} - -// TODO: allow #[derive(Clone)] in piet-gpu-derive. -fn clone_scene_pt(p: &scene::Point) -> scene::Point { - scene::Point { xy: p.xy } +fn to_f32_2(point: Point) -> [f32; 2] { + [point.x as f32, point.y as f32] }