From 61598d2da0358dd0607176a485d79ee654c88f1e Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Wed, 13 Jul 2022 14:40:41 -0700 Subject: [PATCH] Gather timing statistics from RenderDriver Also change command line binaries to use new abstraction. --- piet-gpu/bin/cli.rs | 47 ++++++------------- piet-gpu/bin/winit.rs | 63 ++++++++----------------- piet-gpu/src/lib.rs | 9 +++- piet-gpu/src/render_driver.rs | 86 ++++++++++++++++++++++++++++++----- 4 files changed, 115 insertions(+), 90 deletions(-) diff --git a/piet-gpu/bin/cli.rs b/piet-gpu/bin/cli.rs index 79914bf..df86158 100644 --- a/piet-gpu/bin/cli.rs +++ b/piet-gpu/bin/cli.rs @@ -6,7 +6,7 @@ use clap::{App, Arg}; use piet_gpu_hal::{BufferUsage, Error, Instance, InstanceFlags, Session}; -use piet_gpu::{test_scenes, PicoSvg, PietGpuRenderContext, Renderer}; +use piet_gpu::{test_scenes, PicoSvg, PietGpuRenderContext, RenderDriver, Renderer}; const WIDTH: usize = 2048; const HEIGHT: usize = 1536; @@ -231,9 +231,6 @@ fn main() -> Result<(), Error> { let device = instance.device(None)?; let session = Session::new(device); - let mut cmd_buf = session.cmd_buf()?; - let query_pool = session.create_query_pool(Renderer::QUERY_POOL_SIZE)?; - let mut ctx = PietGpuRenderContext::new(); if let Some(input) = matches.value_of("INPUT") { let mut scale = matches @@ -253,40 +250,22 @@ fn main() -> Result<(), Error> { test_scenes::render_blend_grid(&mut ctx); } - let mut renderer = Renderer::new(&session, WIDTH, HEIGHT, 1)?; - renderer.upload_render_ctx(&mut ctx, 0)?; + let renderer = Renderer::new(&session, WIDTH, HEIGHT, 1)?; + let mut render_driver = RenderDriver::new(&session, 1, renderer); + let start = std::time::Instant::now(); + render_driver.upload_render_ctx(&session, &mut ctx)?; let image_usage = BufferUsage::MAP_READ | BufferUsage::COPY_DST; let image_buf = session.create_buffer((WIDTH * HEIGHT * 4) as u64, image_usage)?; - cmd_buf.begin(); - renderer.record(&mut cmd_buf, &query_pool, 0); - cmd_buf.copy_image_to_buffer(&renderer.image_dev, &image_buf); - cmd_buf.finish_timestamps(&query_pool); - cmd_buf.host_barrier(); - cmd_buf.finish(); - let start = std::time::Instant::now(); - let submitted = session.run_cmd_buf(cmd_buf, &[], &[])?; - submitted.wait()?; + render_driver.run_coarse(&session)?; + let target = render_driver.record_fine(&session)?; + target + .cmd_buf + .copy_image_to_buffer(target.image, &image_buf); + render_driver.submit(&session, &[], &[])?; + render_driver.wait(&session); println!("elapsed = {:?}", start.elapsed()); - let ts = session.fetch_query_pool(&query_pool).unwrap(); - if !ts.is_empty() { - println!("Element kernel time: {:.3}ms", ts[0] * 1e3); - println!( - "Tile allocation kernel time: {:.3}ms", - (ts[1] - ts[0]) * 1e3 - ); - println!("Coarse path kernel time: {:.3}ms", (ts[2] - ts[1]) * 1e3); - println!("Backdrop kernel time: {:.3}ms", (ts[3] - ts[2]) * 1e3); - println!("Binning kernel time: {:.3}ms", (ts[4] - ts[3]) * 1e3); - println!("Coarse raster kernel time: {:.3}ms", (ts[5] - ts[4]) * 1e3); - println!("Render kernel time: {:.3}ms", (ts[6] - ts[5]) * 1e3); - } - - /* - let mut data: Vec = Default::default(); - renderer.memory_buf_dev.read(&mut data).unwrap(); - piet_gpu::dump_k1_data(&data[2..]); - */ + render_driver.get_timing_stats(&session, 0).print_summary(); let mut img_data: Vec = Default::default(); // Note: because png can use a `&[u8]` slice, we could avoid an extra copy diff --git a/piet-gpu/bin/winit.rs b/piet-gpu/bin/winit.rs index 8f84da4..8438371 100644 --- a/piet-gpu/bin/winit.rs +++ b/piet-gpu/bin/winit.rs @@ -1,8 +1,8 @@ use piet::kurbo::Point; use piet::{RenderContext, Text, TextAttribute, TextLayoutBuilder}; -use piet_gpu_hal::{CmdBuf, Error, ImageLayout, Instance, Session, SubmittedCmdBuf}; +use piet_gpu_hal::{Error, ImageLayout, Instance, Session}; -use piet_gpu::{test_scenes, PicoSvg, PietGpuRenderContext, Renderer}; +use piet_gpu::{test_scenes, PicoSvg, PietGpuRenderContext, RenderDriver, Renderer}; use clap::{App, Arg}; @@ -69,13 +69,9 @@ fn main() -> Result<(), Error> { let present_semaphores = (0..NUM_FRAMES) .map(|_| session.create_semaphore()) .collect::, Error>>()?; - let query_pools = (0..NUM_FRAMES) - .map(|_| session.create_query_pool(Renderer::QUERY_POOL_SIZE)) - .collect::, Error>>()?; - let mut cmd_bufs: [Option; NUM_FRAMES] = Default::default(); - let mut submitted: [Option; NUM_FRAMES] = Default::default(); - let mut renderer = Renderer::new(&session, WIDTH, HEIGHT, NUM_FRAMES)?; + let renderer = Renderer::new(&session, WIDTH, HEIGHT, NUM_FRAMES)?; + let mut render_driver = RenderDriver::new(&session, NUM_FRAMES, renderer); let mut mode = 0usize; event_loop.run(move |event, _, control_flow| { @@ -106,26 +102,13 @@ fn main() -> Result<(), Error> { Event::RedrawRequested(window_id) if window_id == window.id() => { let frame_idx = current_frame % NUM_FRAMES; - if let Some(submitted) = submitted[frame_idx].take() { - cmd_bufs[frame_idx] = submitted.wait().unwrap(); - let ts = session.fetch_query_pool(&query_pools[frame_idx]).unwrap(); - if !ts.is_empty() { - info_string = format!( - "{:.3}ms :: e:{:.3}ms|alloc:{:.3}ms|cp:{:.3}ms|bd:{:.3}ms|bin:{:.3}ms|cr:{:.3}ms|r:{:.3}ms", - ts[10] * 1e3, - ts[0] * 1e3, - (ts[1] - ts[0]) * 1e3, - (ts[2] - ts[1]) * 1e3, - (ts[4] - ts[3]) * 1e3, - (ts[6] - ts[5]) * 1e3, - (ts[8] - ts[7]) * 1e3, - (ts[10] - ts[9]) * 1e3, - ); - } + if current_frame >= NUM_FRAMES { + let stats = render_driver.get_timing_stats(&session, frame_idx); + info_string = stats.short_summary(); } let mut ctx = PietGpuRenderContext::new(); - let test_blend = true; + let test_blend = false; if let Some(svg) = &svg { test_scenes::render_svg(&mut ctx, svg); } else if test_blend { @@ -168,16 +151,15 @@ fn main() -> Result<(), Error> { test_scenes::render_anim_frame(&mut ctx, current_frame); } render_info_string(&mut ctx, &info_string); - if let Err(e) = renderer.upload_render_ctx(&mut ctx, frame_idx) { + if let Err(e) = render_driver.upload_render_ctx(&session, &mut ctx) { println!("error in uploading: {}", e); } let (image_idx, acquisition_semaphore) = swapchain.next().unwrap(); let swap_image = swapchain.image(image_idx); - let query_pool = &query_pools[frame_idx]; - let mut cmd_buf = cmd_bufs[frame_idx].take().unwrap_or_else(|| session.cmd_buf().unwrap()); - cmd_buf.begin(); - renderer.record(&mut cmd_buf, &query_pool, frame_idx); + render_driver.run_coarse(&session).unwrap(); + let target = render_driver.record_fine(&session).unwrap(); + let cmd_buf = target.cmd_buf; // Image -> Swapchain cmd_buf.image_barrier( @@ -185,32 +167,25 @@ fn main() -> Result<(), Error> { ImageLayout::Undefined, ImageLayout::BlitDst, ); - cmd_buf.blit_image(&renderer.image_dev, &swap_image); + cmd_buf.blit_image(target.image, &swap_image); cmd_buf.image_barrier(&swap_image, ImageLayout::BlitDst, ImageLayout::Present); - cmd_buf.finish(); - - submitted[frame_idx] = Some(session - .run_cmd_buf( - cmd_buf, + render_driver + .submit( + &session, &[&acquisition_semaphore], &[&present_semaphores[frame_idx]], ) - .unwrap()); + .unwrap(); swapchain .present(image_idx, &[&present_semaphores[frame_idx]]) .unwrap(); + render_driver.next_buffer(); current_frame += 1; } Event::LoopDestroyed => { - for cmd_buf in &mut submitted { - // Wait for command list submission, otherwise dropping of renderer may - // cause validation errors (and possibly crashes). - if let Some(cmd_buf) = cmd_buf.take() { - cmd_buf.wait().unwrap(); - } - } + render_driver.wait_all(&session); } _ => (), } diff --git a/piet-gpu/src/lib.rs b/piet-gpu/src/lib.rs index 96a57e9..bfb5f19 100644 --- a/piet-gpu/src/lib.rs +++ b/piet-gpu/src/lib.rs @@ -179,7 +179,13 @@ impl RenderConfig { impl Renderer { /// The number of query pool entries needed to run the renderer. - pub const QUERY_POOL_SIZE: u32 = 12; + pub const QUERY_POOL_SIZE: u32 = Self::COARSE_QUERY_POOL_SIZE + Self::FINE_QUERY_POOL_SIZE; + + /// The number of query pool entries needed to run the coarse pipeline. + pub const COARSE_QUERY_POOL_SIZE: u32 = 10; + + /// The number of query pool entries needed to run the fine pipeline. + pub const FINE_QUERY_POOL_SIZE: u32 = 2; pub unsafe fn new( session: &Session, @@ -597,6 +603,7 @@ impl Renderer { query_pool: &QueryPool, query_start: u32, ) { + cmd_buf.reset_query_pool(&query_pool); cmd_buf.begin_debug_label("Fine raster"); let mut pass = cmd_buf.begin_compute_pass(&ComputePassDescriptor::timer( &query_pool, diff --git a/piet-gpu/src/render_driver.rs b/piet-gpu/src/render_driver.rs index ebeeae8..cd49399 100644 --- a/piet-gpu/src/render_driver.rs +++ b/piet-gpu/src/render_driver.rs @@ -36,9 +36,17 @@ pub struct TargetState<'a> { pub image: &'a Image, } +#[derive(Default, Debug)] +pub struct TimingStats { + coarse: Vec, + fine: Vec, +} + struct RenderFrame { cmd_buf: CmdBufState, - query_pool: QueryPool, + coarse_query_pool: QueryPool, + fine_query_pool: QueryPool, + timing_stats: TimingStats, } enum CmdBufState { @@ -58,10 +66,14 @@ impl RenderDriver { .map(|_| { // Maybe should allocate here so it doesn't happen on first frame? let cmd_buf = CmdBufState::default(); - let query_pool = session.create_query_pool(Renderer::QUERY_POOL_SIZE)?; + let coarse_query_pool = + session.create_query_pool(Renderer::COARSE_QUERY_POOL_SIZE)?; + let fine_query_pool = session.create_query_pool(Renderer::FINE_QUERY_POOL_SIZE)?; Ok(RenderFrame { cmd_buf, - query_pool, + coarse_query_pool, + fine_query_pool, + timing_stats: TimingStats::default(), }) }) .collect::>() @@ -127,14 +139,15 @@ impl RenderDriver { cmd_buf.begin(); // TODO: probably want to return query results as well self.renderer - .record_coarse(cmd_buf, &frame.query_pool, self.buf_ix); + .record_coarse(cmd_buf, &frame.coarse_query_pool, self.buf_ix); self.renderer.record_readback(cmd_buf); let cmd_buf = frame.cmd_buf.cmd_buf(session)?; - cmd_buf.finish_timestamps(&frame.query_pool); + cmd_buf.finish_timestamps(&frame.coarse_query_pool); cmd_buf.host_barrier(); cmd_buf.finish(); frame.cmd_buf.submit(session, &[], &[])?; frame.cmd_buf.wait(); + frame.timing_stats.coarse = session.fetch_query_pool(&frame.coarse_query_pool)?; let mut result = Vec::new(); // TODO: consider read method for single POD value self.renderer.memory_buf_readback.read(&mut result)?; @@ -146,7 +159,7 @@ impl RenderDriver { pub fn run_coarse(&mut self, session: &Session) -> Result<(), Error> { loop { let mem_header = self.try_run_coarse(session)?; - println!("{:?}", mem_header); + //println!("{:?}", mem_header); if mem_header.mem_error == 0 { let blend_needed = mem_header.blend_offset as u64; if blend_needed > self.renderer.blend_size() { @@ -173,7 +186,8 @@ impl RenderDriver { let frame = &mut self.frames[self.buf_ix]; let cmd_buf = frame.cmd_buf.cmd_buf(session)?; unsafe { - self.renderer.record_fine(cmd_buf, &frame.query_pool, 0); + self.renderer + .record_fine(cmd_buf, &frame.fine_query_pool, 0); } let image = &self.renderer.image_dev; Ok(TargetState { cmd_buf, image }) @@ -189,7 +203,7 @@ impl RenderDriver { let frame = &mut self.frames[self.buf_ix]; let cmd_buf = frame.cmd_buf.cmd_buf(session)?; unsafe { - cmd_buf.finish_timestamps(&frame.query_pool); + cmd_buf.finish_timestamps(&frame.fine_query_pool); cmd_buf.host_barrier(); cmd_buf.finish(); frame @@ -200,15 +214,38 @@ impl RenderDriver { Ok(()) } - pub fn wait(&mut self) { - self.frames[self.buf_ix].cmd_buf.wait(); - self.pending = None; + unsafe fn wait_frame(&mut self, session: &Session, buf_ix: usize) { + let frame = &mut self.frames[buf_ix]; + frame.cmd_buf.wait(); + if let Ok(stats) = session.fetch_query_pool(&frame.fine_query_pool) { + frame.timing_stats.fine = stats; + } + if self.pending == Some(buf_ix) { + self.pending = None; + } + } + + pub unsafe fn wait(&mut self, session: &Session) { + self.wait_frame(session, self.buf_ix); } /// Move to the next buffer. pub fn next_buffer(&mut self) { self.buf_ix = (self.buf_ix + 1) % self.frames.len() } + + pub unsafe fn get_timing_stats(&mut self, session: &Session, buf_ix: usize) -> &TimingStats { + self.wait_frame(session, buf_ix); + &self.frames[buf_ix].timing_stats + } + + pub fn wait_all(&mut self, session: &Session) { + for buf_ix in 0..self.frames.len() { + unsafe { + self.wait_frame(session, buf_ix); + } + } + } } impl Default for CmdBufState { @@ -265,3 +302,30 @@ impl CmdBufState { } } } + +impl TimingStats { + pub fn print_summary(&self) { + let ts = &self.coarse; + println!("Element time: {:.3}ms", ts[0] * 1e3); + println!("Clip + bin + tile time: {:.3}ms", (ts[2] - ts[1]) * 1e3); + println!("Coarse path time: {:.3}ms", (ts[4] - ts[2]) * 1e3); + println!("Backdrop time: {:.3}ms", (ts[6] - ts[5]) * 1e3); + println!("Coarse raster kernel time: {:.3}ms", (ts[8] - ts[7]) * 1e3); + println!("Fine kernel time: {:.3}ms", self.fine[0] * 1e3); + } + + pub fn short_summary(&self) -> String { + let ts = &self.coarse; + let el = ts[0] * 1e3; + let cl = (ts[2] - ts[1]) * 1e3; + let cp = (ts[4] - ts[3]) * 1e3; + let bd = (ts[6] - ts[5]) * 1e3; + let cr = (ts[8] - ts[7]) * 1e3; + let fr = self.fine[0] * 1e3; + let total = el + cl + cp + bd + cr + fr; + format!( + "{:.3}ms :: el:{:.3}ms|cl:{:.3}ms|cp:{:.3}ms|bd:{:.3}ms|cr:{:.3}ms|fr:{:.3}ms", + total, el, cl, cp, bd, cr, fr + ) + } +}