mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 12:41:30 +11:00
Gather timing statistics from RenderDriver
Also change command line binaries to use new abstraction.
This commit is contained in:
parent
9930c9d1de
commit
61598d2da0
|
@ -6,7 +6,7 @@ use clap::{App, Arg};
|
|||
|
||||
use piet_gpu_hal::{BufferUsage, Error, Instance, InstanceFlags, Session};
|
||||
|
||||
use piet_gpu::{test_scenes, PicoSvg, PietGpuRenderContext, Renderer};
|
||||
use piet_gpu::{test_scenes, PicoSvg, PietGpuRenderContext, RenderDriver, Renderer};
|
||||
|
||||
const WIDTH: usize = 2048;
|
||||
const HEIGHT: usize = 1536;
|
||||
|
@ -231,9 +231,6 @@ fn main() -> Result<(), Error> {
|
|||
let device = instance.device(None)?;
|
||||
let session = Session::new(device);
|
||||
|
||||
let mut cmd_buf = session.cmd_buf()?;
|
||||
let query_pool = session.create_query_pool(Renderer::QUERY_POOL_SIZE)?;
|
||||
|
||||
let mut ctx = PietGpuRenderContext::new();
|
||||
if let Some(input) = matches.value_of("INPUT") {
|
||||
let mut scale = matches
|
||||
|
@ -253,40 +250,22 @@ fn main() -> Result<(), Error> {
|
|||
test_scenes::render_blend_grid(&mut ctx);
|
||||
}
|
||||
|
||||
let mut renderer = Renderer::new(&session, WIDTH, HEIGHT, 1)?;
|
||||
renderer.upload_render_ctx(&mut ctx, 0)?;
|
||||
let renderer = Renderer::new(&session, WIDTH, HEIGHT, 1)?;
|
||||
let mut render_driver = RenderDriver::new(&session, 1, renderer);
|
||||
let start = std::time::Instant::now();
|
||||
render_driver.upload_render_ctx(&session, &mut ctx)?;
|
||||
let image_usage = BufferUsage::MAP_READ | BufferUsage::COPY_DST;
|
||||
let image_buf = session.create_buffer((WIDTH * HEIGHT * 4) as u64, image_usage)?;
|
||||
|
||||
cmd_buf.begin();
|
||||
renderer.record(&mut cmd_buf, &query_pool, 0);
|
||||
cmd_buf.copy_image_to_buffer(&renderer.image_dev, &image_buf);
|
||||
cmd_buf.finish_timestamps(&query_pool);
|
||||
cmd_buf.host_barrier();
|
||||
cmd_buf.finish();
|
||||
let start = std::time::Instant::now();
|
||||
let submitted = session.run_cmd_buf(cmd_buf, &[], &[])?;
|
||||
submitted.wait()?;
|
||||
render_driver.run_coarse(&session)?;
|
||||
let target = render_driver.record_fine(&session)?;
|
||||
target
|
||||
.cmd_buf
|
||||
.copy_image_to_buffer(target.image, &image_buf);
|
||||
render_driver.submit(&session, &[], &[])?;
|
||||
render_driver.wait(&session);
|
||||
println!("elapsed = {:?}", start.elapsed());
|
||||
let ts = session.fetch_query_pool(&query_pool).unwrap();
|
||||
if !ts.is_empty() {
|
||||
println!("Element kernel time: {:.3}ms", ts[0] * 1e3);
|
||||
println!(
|
||||
"Tile allocation kernel time: {:.3}ms",
|
||||
(ts[1] - ts[0]) * 1e3
|
||||
);
|
||||
println!("Coarse path kernel time: {:.3}ms", (ts[2] - ts[1]) * 1e3);
|
||||
println!("Backdrop kernel time: {:.3}ms", (ts[3] - ts[2]) * 1e3);
|
||||
println!("Binning kernel time: {:.3}ms", (ts[4] - ts[3]) * 1e3);
|
||||
println!("Coarse raster kernel time: {:.3}ms", (ts[5] - ts[4]) * 1e3);
|
||||
println!("Render kernel time: {:.3}ms", (ts[6] - ts[5]) * 1e3);
|
||||
}
|
||||
|
||||
/*
|
||||
let mut data: Vec<u32> = Default::default();
|
||||
renderer.memory_buf_dev.read(&mut data).unwrap();
|
||||
piet_gpu::dump_k1_data(&data[2..]);
|
||||
*/
|
||||
render_driver.get_timing_stats(&session, 0).print_summary();
|
||||
|
||||
let mut img_data: Vec<u8> = Default::default();
|
||||
// Note: because png can use a `&[u8]` slice, we could avoid an extra copy
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
use piet::kurbo::Point;
|
||||
use piet::{RenderContext, Text, TextAttribute, TextLayoutBuilder};
|
||||
use piet_gpu_hal::{CmdBuf, Error, ImageLayout, Instance, Session, SubmittedCmdBuf};
|
||||
use piet_gpu_hal::{Error, ImageLayout, Instance, Session};
|
||||
|
||||
use piet_gpu::{test_scenes, PicoSvg, PietGpuRenderContext, Renderer};
|
||||
use piet_gpu::{test_scenes, PicoSvg, PietGpuRenderContext, RenderDriver, Renderer};
|
||||
|
||||
use clap::{App, Arg};
|
||||
|
||||
|
@ -69,13 +69,9 @@ fn main() -> Result<(), Error> {
|
|||
let present_semaphores = (0..NUM_FRAMES)
|
||||
.map(|_| session.create_semaphore())
|
||||
.collect::<Result<Vec<_>, Error>>()?;
|
||||
let query_pools = (0..NUM_FRAMES)
|
||||
.map(|_| session.create_query_pool(Renderer::QUERY_POOL_SIZE))
|
||||
.collect::<Result<Vec<_>, Error>>()?;
|
||||
let mut cmd_bufs: [Option<CmdBuf>; NUM_FRAMES] = Default::default();
|
||||
let mut submitted: [Option<SubmittedCmdBuf>; NUM_FRAMES] = Default::default();
|
||||
|
||||
let mut renderer = Renderer::new(&session, WIDTH, HEIGHT, NUM_FRAMES)?;
|
||||
let renderer = Renderer::new(&session, WIDTH, HEIGHT, NUM_FRAMES)?;
|
||||
let mut render_driver = RenderDriver::new(&session, NUM_FRAMES, renderer);
|
||||
let mut mode = 0usize;
|
||||
|
||||
event_loop.run(move |event, _, control_flow| {
|
||||
|
@ -106,26 +102,13 @@ fn main() -> Result<(), Error> {
|
|||
Event::RedrawRequested(window_id) if window_id == window.id() => {
|
||||
let frame_idx = current_frame % NUM_FRAMES;
|
||||
|
||||
if let Some(submitted) = submitted[frame_idx].take() {
|
||||
cmd_bufs[frame_idx] = submitted.wait().unwrap();
|
||||
let ts = session.fetch_query_pool(&query_pools[frame_idx]).unwrap();
|
||||
if !ts.is_empty() {
|
||||
info_string = format!(
|
||||
"{:.3}ms :: e:{:.3}ms|alloc:{:.3}ms|cp:{:.3}ms|bd:{:.3}ms|bin:{:.3}ms|cr:{:.3}ms|r:{:.3}ms",
|
||||
ts[10] * 1e3,
|
||||
ts[0] * 1e3,
|
||||
(ts[1] - ts[0]) * 1e3,
|
||||
(ts[2] - ts[1]) * 1e3,
|
||||
(ts[4] - ts[3]) * 1e3,
|
||||
(ts[6] - ts[5]) * 1e3,
|
||||
(ts[8] - ts[7]) * 1e3,
|
||||
(ts[10] - ts[9]) * 1e3,
|
||||
);
|
||||
}
|
||||
if current_frame >= NUM_FRAMES {
|
||||
let stats = render_driver.get_timing_stats(&session, frame_idx);
|
||||
info_string = stats.short_summary();
|
||||
}
|
||||
|
||||
let mut ctx = PietGpuRenderContext::new();
|
||||
let test_blend = true;
|
||||
let test_blend = false;
|
||||
if let Some(svg) = &svg {
|
||||
test_scenes::render_svg(&mut ctx, svg);
|
||||
} else if test_blend {
|
||||
|
@ -168,16 +151,15 @@ fn main() -> Result<(), Error> {
|
|||
test_scenes::render_anim_frame(&mut ctx, current_frame);
|
||||
}
|
||||
render_info_string(&mut ctx, &info_string);
|
||||
if let Err(e) = renderer.upload_render_ctx(&mut ctx, frame_idx) {
|
||||
if let Err(e) = render_driver.upload_render_ctx(&session, &mut ctx) {
|
||||
println!("error in uploading: {}", e);
|
||||
}
|
||||
|
||||
let (image_idx, acquisition_semaphore) = swapchain.next().unwrap();
|
||||
let swap_image = swapchain.image(image_idx);
|
||||
let query_pool = &query_pools[frame_idx];
|
||||
let mut cmd_buf = cmd_bufs[frame_idx].take().unwrap_or_else(|| session.cmd_buf().unwrap());
|
||||
cmd_buf.begin();
|
||||
renderer.record(&mut cmd_buf, &query_pool, frame_idx);
|
||||
render_driver.run_coarse(&session).unwrap();
|
||||
let target = render_driver.record_fine(&session).unwrap();
|
||||
let cmd_buf = target.cmd_buf;
|
||||
|
||||
// Image -> Swapchain
|
||||
cmd_buf.image_barrier(
|
||||
|
@ -185,32 +167,25 @@ fn main() -> Result<(), Error> {
|
|||
ImageLayout::Undefined,
|
||||
ImageLayout::BlitDst,
|
||||
);
|
||||
cmd_buf.blit_image(&renderer.image_dev, &swap_image);
|
||||
cmd_buf.blit_image(target.image, &swap_image);
|
||||
cmd_buf.image_barrier(&swap_image, ImageLayout::BlitDst, ImageLayout::Present);
|
||||
cmd_buf.finish();
|
||||
|
||||
submitted[frame_idx] = Some(session
|
||||
.run_cmd_buf(
|
||||
cmd_buf,
|
||||
render_driver
|
||||
.submit(
|
||||
&session,
|
||||
&[&acquisition_semaphore],
|
||||
&[&present_semaphores[frame_idx]],
|
||||
)
|
||||
.unwrap());
|
||||
.unwrap();
|
||||
|
||||
swapchain
|
||||
.present(image_idx, &[&present_semaphores[frame_idx]])
|
||||
.unwrap();
|
||||
|
||||
render_driver.next_buffer();
|
||||
current_frame += 1;
|
||||
}
|
||||
Event::LoopDestroyed => {
|
||||
for cmd_buf in &mut submitted {
|
||||
// Wait for command list submission, otherwise dropping of renderer may
|
||||
// cause validation errors (and possibly crashes).
|
||||
if let Some(cmd_buf) = cmd_buf.take() {
|
||||
cmd_buf.wait().unwrap();
|
||||
}
|
||||
}
|
||||
render_driver.wait_all(&session);
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
|
|
@ -179,7 +179,13 @@ impl RenderConfig {
|
|||
|
||||
impl Renderer {
|
||||
/// The number of query pool entries needed to run the renderer.
|
||||
pub const QUERY_POOL_SIZE: u32 = 12;
|
||||
pub const QUERY_POOL_SIZE: u32 = Self::COARSE_QUERY_POOL_SIZE + Self::FINE_QUERY_POOL_SIZE;
|
||||
|
||||
/// The number of query pool entries needed to run the coarse pipeline.
|
||||
pub const COARSE_QUERY_POOL_SIZE: u32 = 10;
|
||||
|
||||
/// The number of query pool entries needed to run the fine pipeline.
|
||||
pub const FINE_QUERY_POOL_SIZE: u32 = 2;
|
||||
|
||||
pub unsafe fn new(
|
||||
session: &Session,
|
||||
|
@ -597,6 +603,7 @@ impl Renderer {
|
|||
query_pool: &QueryPool,
|
||||
query_start: u32,
|
||||
) {
|
||||
cmd_buf.reset_query_pool(&query_pool);
|
||||
cmd_buf.begin_debug_label("Fine raster");
|
||||
let mut pass = cmd_buf.begin_compute_pass(&ComputePassDescriptor::timer(
|
||||
&query_pool,
|
||||
|
|
|
@ -36,9 +36,17 @@ pub struct TargetState<'a> {
|
|||
pub image: &'a Image,
|
||||
}
|
||||
|
||||
#[derive(Default, Debug)]
|
||||
pub struct TimingStats {
|
||||
coarse: Vec<f64>,
|
||||
fine: Vec<f64>,
|
||||
}
|
||||
|
||||
struct RenderFrame {
|
||||
cmd_buf: CmdBufState,
|
||||
query_pool: QueryPool,
|
||||
coarse_query_pool: QueryPool,
|
||||
fine_query_pool: QueryPool,
|
||||
timing_stats: TimingStats,
|
||||
}
|
||||
|
||||
enum CmdBufState {
|
||||
|
@ -58,10 +66,14 @@ impl RenderDriver {
|
|||
.map(|_| {
|
||||
// Maybe should allocate here so it doesn't happen on first frame?
|
||||
let cmd_buf = CmdBufState::default();
|
||||
let query_pool = session.create_query_pool(Renderer::QUERY_POOL_SIZE)?;
|
||||
let coarse_query_pool =
|
||||
session.create_query_pool(Renderer::COARSE_QUERY_POOL_SIZE)?;
|
||||
let fine_query_pool = session.create_query_pool(Renderer::FINE_QUERY_POOL_SIZE)?;
|
||||
Ok(RenderFrame {
|
||||
cmd_buf,
|
||||
query_pool,
|
||||
coarse_query_pool,
|
||||
fine_query_pool,
|
||||
timing_stats: TimingStats::default(),
|
||||
})
|
||||
})
|
||||
.collect::<Result<_, Error>>()
|
||||
|
@ -127,14 +139,15 @@ impl RenderDriver {
|
|||
cmd_buf.begin();
|
||||
// TODO: probably want to return query results as well
|
||||
self.renderer
|
||||
.record_coarse(cmd_buf, &frame.query_pool, self.buf_ix);
|
||||
.record_coarse(cmd_buf, &frame.coarse_query_pool, self.buf_ix);
|
||||
self.renderer.record_readback(cmd_buf);
|
||||
let cmd_buf = frame.cmd_buf.cmd_buf(session)?;
|
||||
cmd_buf.finish_timestamps(&frame.query_pool);
|
||||
cmd_buf.finish_timestamps(&frame.coarse_query_pool);
|
||||
cmd_buf.host_barrier();
|
||||
cmd_buf.finish();
|
||||
frame.cmd_buf.submit(session, &[], &[])?;
|
||||
frame.cmd_buf.wait();
|
||||
frame.timing_stats.coarse = session.fetch_query_pool(&frame.coarse_query_pool)?;
|
||||
let mut result = Vec::new();
|
||||
// TODO: consider read method for single POD value
|
||||
self.renderer.memory_buf_readback.read(&mut result)?;
|
||||
|
@ -146,7 +159,7 @@ impl RenderDriver {
|
|||
pub fn run_coarse(&mut self, session: &Session) -> Result<(), Error> {
|
||||
loop {
|
||||
let mem_header = self.try_run_coarse(session)?;
|
||||
println!("{:?}", mem_header);
|
||||
//println!("{:?}", mem_header);
|
||||
if mem_header.mem_error == 0 {
|
||||
let blend_needed = mem_header.blend_offset as u64;
|
||||
if blend_needed > self.renderer.blend_size() {
|
||||
|
@ -173,7 +186,8 @@ impl RenderDriver {
|
|||
let frame = &mut self.frames[self.buf_ix];
|
||||
let cmd_buf = frame.cmd_buf.cmd_buf(session)?;
|
||||
unsafe {
|
||||
self.renderer.record_fine(cmd_buf, &frame.query_pool, 0);
|
||||
self.renderer
|
||||
.record_fine(cmd_buf, &frame.fine_query_pool, 0);
|
||||
}
|
||||
let image = &self.renderer.image_dev;
|
||||
Ok(TargetState { cmd_buf, image })
|
||||
|
@ -189,7 +203,7 @@ impl RenderDriver {
|
|||
let frame = &mut self.frames[self.buf_ix];
|
||||
let cmd_buf = frame.cmd_buf.cmd_buf(session)?;
|
||||
unsafe {
|
||||
cmd_buf.finish_timestamps(&frame.query_pool);
|
||||
cmd_buf.finish_timestamps(&frame.fine_query_pool);
|
||||
cmd_buf.host_barrier();
|
||||
cmd_buf.finish();
|
||||
frame
|
||||
|
@ -200,15 +214,38 @@ impl RenderDriver {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
pub fn wait(&mut self) {
|
||||
self.frames[self.buf_ix].cmd_buf.wait();
|
||||
unsafe fn wait_frame(&mut self, session: &Session, buf_ix: usize) {
|
||||
let frame = &mut self.frames[buf_ix];
|
||||
frame.cmd_buf.wait();
|
||||
if let Ok(stats) = session.fetch_query_pool(&frame.fine_query_pool) {
|
||||
frame.timing_stats.fine = stats;
|
||||
}
|
||||
if self.pending == Some(buf_ix) {
|
||||
self.pending = None;
|
||||
}
|
||||
}
|
||||
|
||||
pub unsafe fn wait(&mut self, session: &Session) {
|
||||
self.wait_frame(session, self.buf_ix);
|
||||
}
|
||||
|
||||
/// Move to the next buffer.
|
||||
pub fn next_buffer(&mut self) {
|
||||
self.buf_ix = (self.buf_ix + 1) % self.frames.len()
|
||||
}
|
||||
|
||||
pub unsafe fn get_timing_stats(&mut self, session: &Session, buf_ix: usize) -> &TimingStats {
|
||||
self.wait_frame(session, buf_ix);
|
||||
&self.frames[buf_ix].timing_stats
|
||||
}
|
||||
|
||||
pub fn wait_all(&mut self, session: &Session) {
|
||||
for buf_ix in 0..self.frames.len() {
|
||||
unsafe {
|
||||
self.wait_frame(session, buf_ix);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for CmdBufState {
|
||||
|
@ -265,3 +302,30 @@ impl CmdBufState {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TimingStats {
|
||||
pub fn print_summary(&self) {
|
||||
let ts = &self.coarse;
|
||||
println!("Element time: {:.3}ms", ts[0] * 1e3);
|
||||
println!("Clip + bin + tile time: {:.3}ms", (ts[2] - ts[1]) * 1e3);
|
||||
println!("Coarse path time: {:.3}ms", (ts[4] - ts[2]) * 1e3);
|
||||
println!("Backdrop time: {:.3}ms", (ts[6] - ts[5]) * 1e3);
|
||||
println!("Coarse raster kernel time: {:.3}ms", (ts[8] - ts[7]) * 1e3);
|
||||
println!("Fine kernel time: {:.3}ms", self.fine[0] * 1e3);
|
||||
}
|
||||
|
||||
pub fn short_summary(&self) -> String {
|
||||
let ts = &self.coarse;
|
||||
let el = ts[0] * 1e3;
|
||||
let cl = (ts[2] - ts[1]) * 1e3;
|
||||
let cp = (ts[4] - ts[3]) * 1e3;
|
||||
let bd = (ts[6] - ts[5]) * 1e3;
|
||||
let cr = (ts[8] - ts[7]) * 1e3;
|
||||
let fr = self.fine[0] * 1e3;
|
||||
let total = el + cl + cp + bd + cr + fr;
|
||||
format!(
|
||||
"{:.3}ms :: el:{:.3}ms|cl:{:.3}ms|cp:{:.3}ms|bd:{:.3}ms|cr:{:.3}ms|fr:{:.3}ms",
|
||||
total, el, cl, cp, bd, cr, fr
|
||||
)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue