From 7adb3006716a8167f8fbf81fb316741edc4b9bca Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Mon, 18 Oct 2021 15:52:57 -0700 Subject: [PATCH] Double-buffer scene buffer Pipeline the CPU and GPU work so that two frames can be in flight at once. This dramatically improves the performance especially on Android. Note that I've also changed the default configuration to be 3 frames in flight and FIFO mode. --- piet-gpu-hal/src/vulkan.rs | 9 +++- piet-gpu/bin/android.rs | 59 +++++++++++++----------- piet-gpu/bin/cli.rs | 6 +-- piet-gpu/bin/winit.rs | 85 +++++++++++++++++----------------- piet-gpu/src/lib.rs | 93 +++++++++++++++++++++++--------------- 5 files changed, 143 insertions(+), 109 deletions(-) diff --git a/piet-gpu-hal/src/vulkan.rs b/piet-gpu-hal/src/vulkan.rs index e1d78f2..582c34d 100644 --- a/piet-gpu-hal/src/vulkan.rs +++ b/piet-gpu-hal/src/vulkan.rs @@ -413,12 +413,17 @@ impl VkInstance { .surface_fn .get_physical_device_surface_present_modes(device.physical_device, surface.surface)?; + // Can change to MAILBOX to force high frame rates. + const PREFERRED_MODE: vk::PresentModeKHR = vk::PresentModeKHR::FIFO; let present_mode = present_modes .into_iter() - .find(|mode| mode == &vk::PresentModeKHR::MAILBOX) + .find(|mode| *mode == PREFERRED_MODE) .unwrap_or(vk::PresentModeKHR::FIFO); - let image_count = capabilities.min_image_count; + // Note: can be 2 for non-Android to improve latency, but the real answer is to + // implement some kind of frame pacing. + const PREFERRED_IMAGE_COUNT: u32 = 3; + let image_count = PREFERRED_IMAGE_COUNT.clamp(capabilities.min_image_count, capabilities.max_image_count); let mut extent = capabilities.current_extent; if extent.width == u32::MAX || extent.height == u32::MAX { // We're deciding the size. diff --git a/piet-gpu/bin/android.rs b/piet-gpu/bin/android.rs index c008704..d6d5fb5 100644 --- a/piet-gpu/bin/android.rs +++ b/piet-gpu/bin/android.rs @@ -16,6 +16,9 @@ use piet_gpu_hal::{ Swapchain, }; +use piet::kurbo::Point; +use piet::{RenderContext, Text, TextAttribute, TextLayoutBuilder}; + use piet_gpu::{test_scenes, PietGpuRenderContext, Renderer}; #[cfg_attr(target_os = "android", ndk_glue::main(backtrace = "on"))] @@ -33,8 +36,7 @@ struct GfxState { renderer: Renderer, swapchain: Swapchain, current_frame: usize, - last_frame_idx: usize, - submitted: Option, + submitted: [Option; NUM_FRAMES], query_pools: Vec, present_semaphores: Vec, } @@ -100,31 +102,24 @@ impl GfxState { ) -> Result { unsafe { let device = instance.device(surface)?; - let mut swapchain = instance.swapchain(width, height, &device, surface.unwrap())?; + let swapchain = instance.swapchain(width, height, &device, surface.unwrap())?; let session = Session::new(device); - let mut current_frame = 0; + let current_frame = 0; let present_semaphores = (0..NUM_FRAMES) .map(|_| session.create_semaphore()) .collect::, Error>>()?; let query_pools = (0..NUM_FRAMES) .map(|_| session.create_query_pool(8)) .collect::, Error>>()?; + let submitted = Default::default(); - let mut ctx = PietGpuRenderContext::new(); - test_scenes::render_anim_frame(&mut ctx, 0); + let renderer = Renderer::new(&session, width, height, NUM_FRAMES)?; - let mut renderer = Renderer::new(&session, width, height)?; - renderer.upload_render_ctx(&mut ctx)?; - - let submitted: Option = None; - let current_frame = 0; - let last_frame_idx = 0; Ok(GfxState { session, renderer, swapchain, current_frame, - last_frame_idx, submitted, query_pools, present_semaphores, @@ -135,28 +130,31 @@ impl GfxState { fn redraw(&mut self) { println!("redraw"); unsafe { - if let Some(submitted) = self.submitted.take() { + let frame_idx = self.current_frame % NUM_FRAMES; + let mut info_string = String::new(); + + if let Some(submitted) = self.submitted[frame_idx].take() { submitted.wait().unwrap(); - - let mut ctx = PietGpuRenderContext::new(); - test_scenes::render_anim_frame(&mut ctx, self.current_frame); - if let Err(e) = self.renderer.upload_render_ctx(&mut ctx) { - println!("error in uploading: {}", e); - } - let ts = self .session - .fetch_query_pool(&self.query_pools[self.last_frame_idx]) + .fetch_query_pool(&self.query_pools[frame_idx]) .unwrap(); + info_string = format!("{:.1}ms", ts.last().unwrap() * 1e3); println!("render time: {:?}", ts); } - let frame_idx = self.current_frame % NUM_FRAMES; + let mut ctx = PietGpuRenderContext::new(); + test_scenes::render_anim_frame(&mut ctx, self.current_frame); + //test_scenes::render_tiger(&mut ctx); + render_info_string(&mut ctx, &info_string); + if let Err(e) = self.renderer.upload_render_ctx(&mut ctx, frame_idx) { + println!("error in uploading: {}", e); + } let (image_idx, acquisition_semaphore) = self.swapchain.next().unwrap(); let swap_image = self.swapchain.image(image_idx); let query_pool = &self.query_pools[frame_idx]; let mut cmd_buf = self.session.cmd_buf().unwrap(); cmd_buf.begin(); - self.renderer.record(&mut cmd_buf, &query_pool); + self.renderer.record(&mut cmd_buf, &query_pool, frame_idx); // Image -> Swapchain cmd_buf.image_barrier(&swap_image, ImageLayout::Undefined, ImageLayout::BlitDst); @@ -164,7 +162,7 @@ impl GfxState { cmd_buf.image_barrier(&swap_image, ImageLayout::BlitDst, ImageLayout::Present); cmd_buf.finish(); - self.submitted = Some( + self.submitted[frame_idx] = Some( self.session .run_cmd_buf( cmd_buf, @@ -173,7 +171,6 @@ impl GfxState { ) .unwrap(), ); - self.last_frame_idx = frame_idx; self.swapchain .present(image_idx, &[&self.present_semaphores[frame_idx]]) @@ -183,3 +180,13 @@ impl GfxState { } } } + +fn render_info_string(rc: &mut impl RenderContext, info: &str) { + let layout = rc + .text() + .new_text_layout(info.to_string()) + .default_attribute(TextAttribute::FontSize(60.0)) + .build() + .unwrap(); + rc.draw_text(&layout, Point::new(110.0, 120.0)); +} diff --git a/piet-gpu/bin/cli.rs b/piet-gpu/bin/cli.rs index f67afe7..837bd55 100644 --- a/piet-gpu/bin/cli.rs +++ b/piet-gpu/bin/cli.rs @@ -248,13 +248,13 @@ fn main() -> Result<(), Error> { test_scenes::render_scene(&mut ctx); } - let mut renderer = Renderer::new(&session, WIDTH, HEIGHT)?; - renderer.upload_render_ctx(&mut ctx)?; + let mut renderer = Renderer::new(&session, WIDTH, HEIGHT, 1)?; + renderer.upload_render_ctx(&mut ctx, 0)?; let image_usage = BufferUsage::MAP_READ | BufferUsage::COPY_DST; let image_buf = session.create_buffer((WIDTH * HEIGHT * 4) as u64, image_usage)?; cmd_buf.begin(); - renderer.record(&mut cmd_buf, &query_pool); + renderer.record(&mut cmd_buf, &query_pool, 0); cmd_buf.copy_image_to_buffer(&renderer.image_dev, &image_buf); cmd_buf.host_barrier(); cmd_buf.finish(); diff --git a/piet-gpu/bin/winit.rs b/piet-gpu/bin/winit.rs index 19db3c2..a46ccd1 100644 --- a/piet-gpu/bin/winit.rs +++ b/piet-gpu/bin/winit.rs @@ -1,3 +1,5 @@ +use piet::kurbo::Point; +use piet::{RenderContext, Text, TextAttribute, TextLayoutBuilder}; use piet_gpu_hal::{Error, ImageLayout, Instance, Session, SubmittedCmdBuf}; use piet_gpu::{test_scenes, PietGpuRenderContext, Renderer}; @@ -37,6 +39,7 @@ fn main() -> Result<(), Error> { .build(&event_loop)?; let (instance, surface) = Instance::new(Some(&window))?; + let mut info_string = "info".to_string(); unsafe { let device = instance.device(surface.as_ref())?; let mut swapchain = @@ -50,27 +53,9 @@ fn main() -> Result<(), Error> { let query_pools = (0..NUM_FRAMES) .map(|_| session.create_query_pool(8)) .collect::, Error>>()?; + let mut submitted: [Option; NUM_FRAMES] = Default::default(); - let mut ctx = PietGpuRenderContext::new(); - if let Some(input) = matches.value_of("INPUT") { - let mut scale = matches - .value_of("scale") - .map(|scale| scale.parse().unwrap()) - .unwrap_or(8.0); - if matches.is_present("flip") { - scale = -scale; - } - test_scenes::render_svg(&mut ctx, input, scale); - } else { - test_scenes::render_scene(&mut ctx); - //test_scenes::render_anim_frame(&mut ctx, 0); - } - - let mut renderer = Renderer::new(&session, WIDTH, HEIGHT)?; - renderer.upload_render_ctx(&mut ctx)?; - - let mut submitted: Option = None; - let mut last_frame_idx = 0; + let mut renderer = Renderer::new(&session, WIDTH, HEIGHT, NUM_FRAMES)?; event_loop.run(move |event, _, control_flow| { *control_flow = ControlFlow::Poll; // `ControlFlow::Wait` if only re-render on event @@ -90,23 +75,10 @@ fn main() -> Result<(), Error> { Event::RedrawRequested(window_id) if window_id == window.id() => { let frame_idx = current_frame % NUM_FRAMES; - // Note: this logic is a little strange. We have two sets of renderer - // resources, so we could have two frames in flight (submit two, wait on - // the first), but we actually just wait on the last submitted. - // - // Getting this right will take some thought. - if let Some(submitted) = submitted.take() { + if let Some(submitted) = submitted[frame_idx].take() { submitted.wait().unwrap(); - if matches.value_of("INPUT").is_none() { - let mut ctx = PietGpuRenderContext::new(); - test_scenes::render_anim_frame(&mut ctx, current_frame); - if let Err(e) = renderer.upload_render_ctx(&mut ctx) { - println!("error in uploading: {}", e); - } - } - - let ts = session.fetch_query_pool(&query_pools[last_frame_idx]).unwrap(); - window.set_title(&format!( + let ts = session.fetch_query_pool(&query_pools[frame_idx]).unwrap(); + info_string = format!( "{:.3}ms :: e:{:.3}ms|alloc:{:.3}ms|cp:{:.3}ms|bd:{:.3}ms|bin:{:.3}ms|cr:{:.3}ms|r:{:.3}ms", ts[6] * 1e3, ts[0] * 1e3, @@ -116,7 +88,25 @@ fn main() -> Result<(), Error> { (ts[4] - ts[3]) * 1e3, (ts[5] - ts[4]) * 1e3, (ts[6] - ts[5]) * 1e3, - )); + ); + } + + let mut ctx = PietGpuRenderContext::new(); + if let Some(input) = matches.value_of("INPUT") { + let mut scale = matches + .value_of("scale") + .map(|scale| scale.parse().unwrap()) + .unwrap_or(8.0); + if matches.is_present("flip") { + scale = -scale; + } + test_scenes::render_svg(&mut ctx, input, scale); + } else { + test_scenes::render_anim_frame(&mut ctx, current_frame); + } + render_info_string(&mut ctx, &info_string); + if let Err(e) = renderer.upload_render_ctx(&mut ctx, frame_idx) { + println!("error in uploading: {}", e); } let (image_idx, acquisition_semaphore) = swapchain.next().unwrap(); @@ -124,7 +114,7 @@ fn main() -> Result<(), Error> { let query_pool = &query_pools[frame_idx]; let mut cmd_buf = session.cmd_buf().unwrap(); cmd_buf.begin(); - renderer.record(&mut cmd_buf, &query_pool); + renderer.record(&mut cmd_buf, &query_pool, frame_idx); // Image -> Swapchain cmd_buf.image_barrier( @@ -136,14 +126,13 @@ fn main() -> Result<(), Error> { cmd_buf.image_barrier(&swap_image, ImageLayout::BlitDst, ImageLayout::Present); cmd_buf.finish(); - submitted = Some(session + submitted[frame_idx] = Some(session .run_cmd_buf( cmd_buf, &[&acquisition_semaphore], &[&present_semaphores[frame_idx]], ) .unwrap()); - last_frame_idx = frame_idx; swapchain .present(image_idx, &[&present_semaphores[frame_idx]]) @@ -152,10 +141,12 @@ fn main() -> Result<(), Error> { current_frame += 1; } Event::LoopDestroyed => { - if let Some(submitted) = submitted.take() { + for cmd_buf in &mut submitted { // Wait for command list submission, otherwise dropping of renderer may // cause validation errors (and possibly crashes). - submitted.wait().unwrap(); + if let Some(cmd_buf) = cmd_buf.take() { + cmd_buf.wait().unwrap(); + } } } _ => (), @@ -163,3 +154,13 @@ fn main() -> Result<(), Error> { }) } } + +fn render_info_string(rc: &mut impl RenderContext, info: &str) { + let layout = rc + .text() + .new_text_layout(info.to_string()) + .default_attribute(TextAttribute::FontSize(40.0)) + .build() + .unwrap(); + rc.draw_text(&layout, Point::new(110.0, 50.0)); +} diff --git a/piet-gpu/src/lib.rs b/piet-gpu/src/lib.rs index 6c9a82d..b3e92f9 100644 --- a/piet-gpu/src/lib.rs +++ b/piet-gpu/src/lib.rs @@ -6,6 +6,7 @@ mod text; use std::convert::TryInto; +use piet_gpu_types::scene; pub use render_ctx::PietGpuRenderContext; use rand::{Rng, RngCore}; @@ -60,19 +61,20 @@ pub struct Renderer { // The reference is held by the pipelines. We will be changing // this to make the scene upload dynamic. - #[allow(dead_code)] - scene_buf: Buffer, + scene_bufs: Vec, - memory_buf_host: Buffer, + memory_buf_host: Vec, memory_buf_dev: Buffer, state_buf: Buffer, - #[allow(dead_code)] + // Staging buffers + config_bufs: Vec, + // Device config buf config_buf: Buffer, el_pipeline: Pipeline, - el_ds: DescriptorSet, + el_ds: Vec, tile_pipeline: Pipeline, tile_ds: DescriptorSet, @@ -99,13 +101,18 @@ pub struct Renderer { // Keep a reference to the image so that it is not destroyed. _bg_image: Image, - gradient_buf: Buffer, + gradient_bufs: Vec, gradients: Image, } impl Renderer { /// Create a new renderer. - pub unsafe fn new(session: &Session, width: usize, height: usize) -> Result { + pub unsafe fn new( + session: &Session, + width: usize, + height: usize, + n_bufs: usize, + ) -> Result { // For now, round up to tile alignment let width = width + (width.wrapping_neg() & (TILE_W - 1)); let height = height + (height.wrapping_neg() & (TILE_W - 1)); @@ -114,29 +121,39 @@ impl Renderer { // This may be inadequate for very complex scenes (paris etc) // TODO: separate staging buffer (if needed) - let scene_buf = session.create_buffer(1 * 1024 * 1024, host_upload).unwrap(); + let scene_bufs = (0..n_bufs) + .map(|_| session.create_buffer(8 * 1024 * 1024, host_upload).unwrap()) + .collect(); let state_buf = session.create_buffer(1 * 1024 * 1024, dev)?; let image_dev = session.create_image2d(width as u32, height as u32)?; // Note: this must be updated when the config struct size changes. const CONFIG_BUFFER_SIZE: u64 = 40; + let config_buf = session.create_buffer(CONFIG_BUFFER_SIZE, dev).unwrap(); // TODO: separate staging buffer (if needed) - let config_buf = session - .create_buffer(CONFIG_BUFFER_SIZE, host_upload) - .unwrap(); + let config_bufs = (0..n_bufs) + .map(|_| { + session + .create_buffer(CONFIG_BUFFER_SIZE, host_upload) + .unwrap() + }) + .collect(); - // Perhaps we could avoid the explicit staging buffer by having buffer creation method - // that takes both initial contents and a size. - let memory_buf_host = session.create_buffer(2 * 4, host_upload)?; + let memory_buf_host = (0..n_bufs) + .map(|_| session.create_buffer(2 * 4, host_upload).unwrap()) + .collect(); let memory_buf_dev = session.create_buffer(128 * 1024 * 1024, dev)?; let el_code = ShaderCode::Spv(include_bytes!("../shader/elements.spv")); let el_pipeline = session.create_simple_compute_pipeline(el_code, 4)?; - let el_ds = session.create_simple_descriptor_set( - &el_pipeline, - &[&memory_buf_dev, &config_buf, &scene_buf, &state_buf], - )?; + let mut el_ds = Vec::with_capacity(n_bufs); + for scene_buf in &scene_bufs { + el_ds.push(session.create_simple_descriptor_set( + &el_pipeline, + &[&memory_buf_dev, &config_buf, scene_buf, &state_buf], + )?); + } let tile_alloc_code = ShaderCode::Spv(include_bytes!("../shader/tile_alloc.spv")); let tile_pipeline = session.create_simple_compute_pipeline(tile_alloc_code, 2)?; @@ -173,7 +190,13 @@ impl Renderer { const GRADIENT_BUF_SIZE: usize = crate::gradient::N_GRADIENTS * crate::gradient::N_SAMPLES * 4; - let gradient_buf = session.create_buffer(GRADIENT_BUF_SIZE as u64, host_upload)?; + let gradient_bufs = (0..n_bufs) + .map(|_| { + session + .create_buffer(GRADIENT_BUF_SIZE as u64, host_upload) + .unwrap() + }) + .collect(); let gradients = Self::make_gradient_image(&session); let k4_code = ShaderCode::Spv(include_bytes!("../shader/kernel4.spv")); @@ -198,11 +221,12 @@ impl Renderer { Ok(Renderer { width, height, - scene_buf, + scene_bufs, memory_buf_host, memory_buf_dev, state_buf, config_buf, + config_bufs, image_dev, el_pipeline, el_ds, @@ -222,7 +246,7 @@ impl Renderer { n_paths: 0, n_pathseg: 0, _bg_image: bg_image, - gradient_buf, + gradient_bufs, gradients, }) } @@ -235,6 +259,7 @@ impl Renderer { pub fn upload_render_ctx( &mut self, render_ctx: &mut PietGpuRenderContext, + buf_ix: usize, ) -> Result<(), Error> { let n_paths = render_ctx.path_count(); let n_pathseg = render_ctx.pathseg_count(); @@ -280,28 +305,24 @@ impl Renderer { let scene = render_ctx.get_scene_buf(); self.n_elements = scene.len() / piet_gpu_types::scene::Element::fixed_size(); // TODO: reallocate scene buffer if size is inadequate - assert!(self.scene_buf.size() as usize >= scene.len()); - self.scene_buf.write(scene)?; - self.config_buf.write(config)?; - self.memory_buf_host - .write(&[alloc as u32, 0 /* Overflow flag */])?; + assert!(self.scene_bufs[buf_ix].size() as usize >= scene.len()); + self.scene_bufs[buf_ix].write(scene)?; + self.config_bufs[buf_ix].write(config)?; + self.memory_buf_host[buf_ix].write(&[alloc as u32, 0 /* Overflow flag */])?; // Upload gradient data. let ramp_data = render_ctx.get_ramp_data(); if !ramp_data.is_empty() { - assert!(self.gradient_buf.size() as usize >= std::mem::size_of_val(&*ramp_data)); - self.gradient_buf.write(&ramp_data)?; + assert!(self.gradient_bufs[buf_ix].size() as usize >= std::mem::size_of_val(&*ramp_data)); + self.gradient_bufs[buf_ix].write(&ramp_data)?; } } Ok(()) } - pub unsafe fn record(&self, cmd_buf: &mut CmdBuf, query_pool: &QueryPool) { - //cmd_buf.clear_buffer(&self.memory_buf_dev, None); - //cmd_buf.memory_barrier(); - // Only need to copy the first few words; need to upgrade HAL to be able to - // express sub-buffer copies. - cmd_buf.copy_buffer(&self.memory_buf_host, &self.memory_buf_dev); + pub unsafe fn record(&self, cmd_buf: &mut CmdBuf, query_pool: &QueryPool, buf_ix: usize) { + cmd_buf.copy_buffer(&self.config_bufs[buf_ix], &self.config_buf); + cmd_buf.copy_buffer(&self.memory_buf_host[buf_ix], &self.memory_buf_dev); cmd_buf.clear_buffer(&self.state_buf, None); cmd_buf.memory_barrier(); cmd_buf.image_barrier( @@ -315,13 +336,13 @@ impl Renderer { ImageLayout::Undefined, ImageLayout::BlitDst, ); - cmd_buf.copy_buffer_to_image(&self.gradient_buf, &self.gradients); + cmd_buf.copy_buffer_to_image(&self.gradient_bufs[buf_ix], &self.gradients); cmd_buf.image_barrier(&self.gradients, ImageLayout::BlitDst, ImageLayout::General); cmd_buf.reset_query_pool(&query_pool); cmd_buf.write_timestamp(&query_pool, 0); cmd_buf.dispatch( &self.el_pipeline, - &self.el_ds, + &self.el_ds[buf_ix], (((self.n_elements + 127) / 128) as u32, 1, 1), (128, 1, 1), );