Gather timing statistics from RenderDriver

Also change command line binaries to use new abstraction.
This commit is contained in:
Raph Levien 2022-07-13 14:40:41 -07:00
parent 9930c9d1de
commit 61598d2da0
4 changed files with 115 additions and 90 deletions

View file

@ -6,7 +6,7 @@ use clap::{App, Arg};
use piet_gpu_hal::{BufferUsage, Error, Instance, InstanceFlags, Session};
use piet_gpu::{test_scenes, PicoSvg, PietGpuRenderContext, Renderer};
use piet_gpu::{test_scenes, PicoSvg, PietGpuRenderContext, RenderDriver, Renderer};
const WIDTH: usize = 2048;
const HEIGHT: usize = 1536;
@ -231,9 +231,6 @@ fn main() -> Result<(), Error> {
let device = instance.device(None)?;
let session = Session::new(device);
let mut cmd_buf = session.cmd_buf()?;
let query_pool = session.create_query_pool(Renderer::QUERY_POOL_SIZE)?;
let mut ctx = PietGpuRenderContext::new();
if let Some(input) = matches.value_of("INPUT") {
let mut scale = matches
@ -253,40 +250,22 @@ fn main() -> Result<(), Error> {
test_scenes::render_blend_grid(&mut ctx);
}
let mut renderer = Renderer::new(&session, WIDTH, HEIGHT, 1)?;
renderer.upload_render_ctx(&mut ctx, 0)?;
let renderer = Renderer::new(&session, WIDTH, HEIGHT, 1)?;
let mut render_driver = RenderDriver::new(&session, 1, renderer);
let start = std::time::Instant::now();
render_driver.upload_render_ctx(&session, &mut ctx)?;
let image_usage = BufferUsage::MAP_READ | BufferUsage::COPY_DST;
let image_buf = session.create_buffer((WIDTH * HEIGHT * 4) as u64, image_usage)?;
cmd_buf.begin();
renderer.record(&mut cmd_buf, &query_pool, 0);
cmd_buf.copy_image_to_buffer(&renderer.image_dev, &image_buf);
cmd_buf.finish_timestamps(&query_pool);
cmd_buf.host_barrier();
cmd_buf.finish();
let start = std::time::Instant::now();
let submitted = session.run_cmd_buf(cmd_buf, &[], &[])?;
submitted.wait()?;
render_driver.run_coarse(&session)?;
let target = render_driver.record_fine(&session)?;
target
.cmd_buf
.copy_image_to_buffer(target.image, &image_buf);
render_driver.submit(&session, &[], &[])?;
render_driver.wait(&session);
println!("elapsed = {:?}", start.elapsed());
let ts = session.fetch_query_pool(&query_pool).unwrap();
if !ts.is_empty() {
println!("Element kernel time: {:.3}ms", ts[0] * 1e3);
println!(
"Tile allocation kernel time: {:.3}ms",
(ts[1] - ts[0]) * 1e3
);
println!("Coarse path kernel time: {:.3}ms", (ts[2] - ts[1]) * 1e3);
println!("Backdrop kernel time: {:.3}ms", (ts[3] - ts[2]) * 1e3);
println!("Binning kernel time: {:.3}ms", (ts[4] - ts[3]) * 1e3);
println!("Coarse raster kernel time: {:.3}ms", (ts[5] - ts[4]) * 1e3);
println!("Render kernel time: {:.3}ms", (ts[6] - ts[5]) * 1e3);
}
/*
let mut data: Vec<u32> = Default::default();
renderer.memory_buf_dev.read(&mut data).unwrap();
piet_gpu::dump_k1_data(&data[2..]);
*/
render_driver.get_timing_stats(&session, 0).print_summary();
let mut img_data: Vec<u8> = Default::default();
// Note: because png can use a `&[u8]` slice, we could avoid an extra copy

View file

@ -1,8 +1,8 @@
use piet::kurbo::Point;
use piet::{RenderContext, Text, TextAttribute, TextLayoutBuilder};
use piet_gpu_hal::{CmdBuf, Error, ImageLayout, Instance, Session, SubmittedCmdBuf};
use piet_gpu_hal::{Error, ImageLayout, Instance, Session};
use piet_gpu::{test_scenes, PicoSvg, PietGpuRenderContext, Renderer};
use piet_gpu::{test_scenes, PicoSvg, PietGpuRenderContext, RenderDriver, Renderer};
use clap::{App, Arg};
@ -69,13 +69,9 @@ fn main() -> Result<(), Error> {
let present_semaphores = (0..NUM_FRAMES)
.map(|_| session.create_semaphore())
.collect::<Result<Vec<_>, Error>>()?;
let query_pools = (0..NUM_FRAMES)
.map(|_| session.create_query_pool(Renderer::QUERY_POOL_SIZE))
.collect::<Result<Vec<_>, Error>>()?;
let mut cmd_bufs: [Option<CmdBuf>; NUM_FRAMES] = Default::default();
let mut submitted: [Option<SubmittedCmdBuf>; NUM_FRAMES] = Default::default();
let mut renderer = Renderer::new(&session, WIDTH, HEIGHT, NUM_FRAMES)?;
let renderer = Renderer::new(&session, WIDTH, HEIGHT, NUM_FRAMES)?;
let mut render_driver = RenderDriver::new(&session, NUM_FRAMES, renderer);
let mut mode = 0usize;
event_loop.run(move |event, _, control_flow| {
@ -106,26 +102,13 @@ fn main() -> Result<(), Error> {
Event::RedrawRequested(window_id) if window_id == window.id() => {
let frame_idx = current_frame % NUM_FRAMES;
if let Some(submitted) = submitted[frame_idx].take() {
cmd_bufs[frame_idx] = submitted.wait().unwrap();
let ts = session.fetch_query_pool(&query_pools[frame_idx]).unwrap();
if !ts.is_empty() {
info_string = format!(
"{:.3}ms :: e:{:.3}ms|alloc:{:.3}ms|cp:{:.3}ms|bd:{:.3}ms|bin:{:.3}ms|cr:{:.3}ms|r:{:.3}ms",
ts[10] * 1e3,
ts[0] * 1e3,
(ts[1] - ts[0]) * 1e3,
(ts[2] - ts[1]) * 1e3,
(ts[4] - ts[3]) * 1e3,
(ts[6] - ts[5]) * 1e3,
(ts[8] - ts[7]) * 1e3,
(ts[10] - ts[9]) * 1e3,
);
}
if current_frame >= NUM_FRAMES {
let stats = render_driver.get_timing_stats(&session, frame_idx);
info_string = stats.short_summary();
}
let mut ctx = PietGpuRenderContext::new();
let test_blend = true;
let test_blend = false;
if let Some(svg) = &svg {
test_scenes::render_svg(&mut ctx, svg);
} else if test_blend {
@ -168,16 +151,15 @@ fn main() -> Result<(), Error> {
test_scenes::render_anim_frame(&mut ctx, current_frame);
}
render_info_string(&mut ctx, &info_string);
if let Err(e) = renderer.upload_render_ctx(&mut ctx, frame_idx) {
if let Err(e) = render_driver.upload_render_ctx(&session, &mut ctx) {
println!("error in uploading: {}", e);
}
let (image_idx, acquisition_semaphore) = swapchain.next().unwrap();
let swap_image = swapchain.image(image_idx);
let query_pool = &query_pools[frame_idx];
let mut cmd_buf = cmd_bufs[frame_idx].take().unwrap_or_else(|| session.cmd_buf().unwrap());
cmd_buf.begin();
renderer.record(&mut cmd_buf, &query_pool, frame_idx);
render_driver.run_coarse(&session).unwrap();
let target = render_driver.record_fine(&session).unwrap();
let cmd_buf = target.cmd_buf;
// Image -> Swapchain
cmd_buf.image_barrier(
@ -185,32 +167,25 @@ fn main() -> Result<(), Error> {
ImageLayout::Undefined,
ImageLayout::BlitDst,
);
cmd_buf.blit_image(&renderer.image_dev, &swap_image);
cmd_buf.blit_image(target.image, &swap_image);
cmd_buf.image_barrier(&swap_image, ImageLayout::BlitDst, ImageLayout::Present);
cmd_buf.finish();
submitted[frame_idx] = Some(session
.run_cmd_buf(
cmd_buf,
render_driver
.submit(
&session,
&[&acquisition_semaphore],
&[&present_semaphores[frame_idx]],
)
.unwrap());
.unwrap();
swapchain
.present(image_idx, &[&present_semaphores[frame_idx]])
.unwrap();
render_driver.next_buffer();
current_frame += 1;
}
Event::LoopDestroyed => {
for cmd_buf in &mut submitted {
// Wait for command list submission, otherwise dropping of renderer may
// cause validation errors (and possibly crashes).
if let Some(cmd_buf) = cmd_buf.take() {
cmd_buf.wait().unwrap();
}
}
render_driver.wait_all(&session);
}
_ => (),
}

View file

@ -179,7 +179,13 @@ impl RenderConfig {
impl Renderer {
/// The number of query pool entries needed to run the renderer.
pub const QUERY_POOL_SIZE: u32 = 12;
pub const QUERY_POOL_SIZE: u32 = Self::COARSE_QUERY_POOL_SIZE + Self::FINE_QUERY_POOL_SIZE;
/// The number of query pool entries needed to run the coarse pipeline.
pub const COARSE_QUERY_POOL_SIZE: u32 = 10;
/// The number of query pool entries needed to run the fine pipeline.
pub const FINE_QUERY_POOL_SIZE: u32 = 2;
pub unsafe fn new(
session: &Session,
@ -597,6 +603,7 @@ impl Renderer {
query_pool: &QueryPool,
query_start: u32,
) {
cmd_buf.reset_query_pool(&query_pool);
cmd_buf.begin_debug_label("Fine raster");
let mut pass = cmd_buf.begin_compute_pass(&ComputePassDescriptor::timer(
&query_pool,

View file

@ -36,9 +36,17 @@ pub struct TargetState<'a> {
pub image: &'a Image,
}
#[derive(Default, Debug)]
pub struct TimingStats {
coarse: Vec<f64>,
fine: Vec<f64>,
}
struct RenderFrame {
cmd_buf: CmdBufState,
query_pool: QueryPool,
coarse_query_pool: QueryPool,
fine_query_pool: QueryPool,
timing_stats: TimingStats,
}
enum CmdBufState {
@ -58,10 +66,14 @@ impl RenderDriver {
.map(|_| {
// Maybe should allocate here so it doesn't happen on first frame?
let cmd_buf = CmdBufState::default();
let query_pool = session.create_query_pool(Renderer::QUERY_POOL_SIZE)?;
let coarse_query_pool =
session.create_query_pool(Renderer::COARSE_QUERY_POOL_SIZE)?;
let fine_query_pool = session.create_query_pool(Renderer::FINE_QUERY_POOL_SIZE)?;
Ok(RenderFrame {
cmd_buf,
query_pool,
coarse_query_pool,
fine_query_pool,
timing_stats: TimingStats::default(),
})
})
.collect::<Result<_, Error>>()
@ -127,14 +139,15 @@ impl RenderDriver {
cmd_buf.begin();
// TODO: probably want to return query results as well
self.renderer
.record_coarse(cmd_buf, &frame.query_pool, self.buf_ix);
.record_coarse(cmd_buf, &frame.coarse_query_pool, self.buf_ix);
self.renderer.record_readback(cmd_buf);
let cmd_buf = frame.cmd_buf.cmd_buf(session)?;
cmd_buf.finish_timestamps(&frame.query_pool);
cmd_buf.finish_timestamps(&frame.coarse_query_pool);
cmd_buf.host_barrier();
cmd_buf.finish();
frame.cmd_buf.submit(session, &[], &[])?;
frame.cmd_buf.wait();
frame.timing_stats.coarse = session.fetch_query_pool(&frame.coarse_query_pool)?;
let mut result = Vec::new();
// TODO: consider read method for single POD value
self.renderer.memory_buf_readback.read(&mut result)?;
@ -146,7 +159,7 @@ impl RenderDriver {
pub fn run_coarse(&mut self, session: &Session) -> Result<(), Error> {
loop {
let mem_header = self.try_run_coarse(session)?;
println!("{:?}", mem_header);
//println!("{:?}", mem_header);
if mem_header.mem_error == 0 {
let blend_needed = mem_header.blend_offset as u64;
if blend_needed > self.renderer.blend_size() {
@ -173,7 +186,8 @@ impl RenderDriver {
let frame = &mut self.frames[self.buf_ix];
let cmd_buf = frame.cmd_buf.cmd_buf(session)?;
unsafe {
self.renderer.record_fine(cmd_buf, &frame.query_pool, 0);
self.renderer
.record_fine(cmd_buf, &frame.fine_query_pool, 0);
}
let image = &self.renderer.image_dev;
Ok(TargetState { cmd_buf, image })
@ -189,7 +203,7 @@ impl RenderDriver {
let frame = &mut self.frames[self.buf_ix];
let cmd_buf = frame.cmd_buf.cmd_buf(session)?;
unsafe {
cmd_buf.finish_timestamps(&frame.query_pool);
cmd_buf.finish_timestamps(&frame.fine_query_pool);
cmd_buf.host_barrier();
cmd_buf.finish();
frame
@ -200,15 +214,38 @@ impl RenderDriver {
Ok(())
}
pub fn wait(&mut self) {
self.frames[self.buf_ix].cmd_buf.wait();
self.pending = None;
unsafe fn wait_frame(&mut self, session: &Session, buf_ix: usize) {
let frame = &mut self.frames[buf_ix];
frame.cmd_buf.wait();
if let Ok(stats) = session.fetch_query_pool(&frame.fine_query_pool) {
frame.timing_stats.fine = stats;
}
if self.pending == Some(buf_ix) {
self.pending = None;
}
}
pub unsafe fn wait(&mut self, session: &Session) {
self.wait_frame(session, self.buf_ix);
}
/// Move to the next buffer.
pub fn next_buffer(&mut self) {
self.buf_ix = (self.buf_ix + 1) % self.frames.len()
}
pub unsafe fn get_timing_stats(&mut self, session: &Session, buf_ix: usize) -> &TimingStats {
self.wait_frame(session, buf_ix);
&self.frames[buf_ix].timing_stats
}
pub fn wait_all(&mut self, session: &Session) {
for buf_ix in 0..self.frames.len() {
unsafe {
self.wait_frame(session, buf_ix);
}
}
}
}
impl Default for CmdBufState {
@ -265,3 +302,30 @@ impl CmdBufState {
}
}
}
impl TimingStats {
pub fn print_summary(&self) {
let ts = &self.coarse;
println!("Element time: {:.3}ms", ts[0] * 1e3);
println!("Clip + bin + tile time: {:.3}ms", (ts[2] - ts[1]) * 1e3);
println!("Coarse path time: {:.3}ms", (ts[4] - ts[2]) * 1e3);
println!("Backdrop time: {:.3}ms", (ts[6] - ts[5]) * 1e3);
println!("Coarse raster kernel time: {:.3}ms", (ts[8] - ts[7]) * 1e3);
println!("Fine kernel time: {:.3}ms", self.fine[0] * 1e3);
}
pub fn short_summary(&self) -> String {
let ts = &self.coarse;
let el = ts[0] * 1e3;
let cl = (ts[2] - ts[1]) * 1e3;
let cp = (ts[4] - ts[3]) * 1e3;
let bd = (ts[6] - ts[5]) * 1e3;
let cr = (ts[8] - ts[7]) * 1e3;
let fr = self.fine[0] * 1e3;
let total = el + cl + cp + bd + cr + fr;
format!(
"{:.3}ms :: el:{:.3}ms|cl:{:.3}ms|cp:{:.3}ms|bd:{:.3}ms|cr:{:.3}ms|fr:{:.3}ms",
total, el, cl, cp, bd, cr, fr
)
}
}