diff --git a/piet-gpu-hal/src/vulkan.rs b/piet-gpu-hal/src/vulkan.rs index e1d78f2..582c34d 100644 --- a/piet-gpu-hal/src/vulkan.rs +++ b/piet-gpu-hal/src/vulkan.rs @@ -413,12 +413,17 @@ impl VkInstance { .surface_fn .get_physical_device_surface_present_modes(device.physical_device, surface.surface)?; + // Can change to MAILBOX to force high frame rates. + const PREFERRED_MODE: vk::PresentModeKHR = vk::PresentModeKHR::FIFO; let present_mode = present_modes .into_iter() - .find(|mode| mode == &vk::PresentModeKHR::MAILBOX) + .find(|mode| *mode == PREFERRED_MODE) .unwrap_or(vk::PresentModeKHR::FIFO); - let image_count = capabilities.min_image_count; + // Note: can be 2 for non-Android to improve latency, but the real answer is to + // implement some kind of frame pacing. + const PREFERRED_IMAGE_COUNT: u32 = 3; + let image_count = PREFERRED_IMAGE_COUNT.clamp(capabilities.min_image_count, capabilities.max_image_count); let mut extent = capabilities.current_extent; if extent.width == u32::MAX || extent.height == u32::MAX { // We're deciding the size. diff --git a/piet-gpu/bin/android.rs b/piet-gpu/bin/android.rs index c008704..d6d5fb5 100644 --- a/piet-gpu/bin/android.rs +++ b/piet-gpu/bin/android.rs @@ -16,6 +16,9 @@ use piet_gpu_hal::{ Swapchain, }; +use piet::kurbo::Point; +use piet::{RenderContext, Text, TextAttribute, TextLayoutBuilder}; + use piet_gpu::{test_scenes, PietGpuRenderContext, Renderer}; #[cfg_attr(target_os = "android", ndk_glue::main(backtrace = "on"))] @@ -33,8 +36,7 @@ struct GfxState { renderer: Renderer, swapchain: Swapchain, current_frame: usize, - last_frame_idx: usize, - submitted: Option, + submitted: [Option; NUM_FRAMES], query_pools: Vec, present_semaphores: Vec, } @@ -100,31 +102,24 @@ impl GfxState { ) -> Result { unsafe { let device = instance.device(surface)?; - let mut swapchain = instance.swapchain(width, height, &device, surface.unwrap())?; + let swapchain = instance.swapchain(width, height, &device, surface.unwrap())?; let session = Session::new(device); - let mut current_frame = 0; + let current_frame = 0; let present_semaphores = (0..NUM_FRAMES) .map(|_| session.create_semaphore()) .collect::, Error>>()?; let query_pools = (0..NUM_FRAMES) .map(|_| session.create_query_pool(8)) .collect::, Error>>()?; + let submitted = Default::default(); - let mut ctx = PietGpuRenderContext::new(); - test_scenes::render_anim_frame(&mut ctx, 0); + let renderer = Renderer::new(&session, width, height, NUM_FRAMES)?; - let mut renderer = Renderer::new(&session, width, height)?; - renderer.upload_render_ctx(&mut ctx)?; - - let submitted: Option = None; - let current_frame = 0; - let last_frame_idx = 0; Ok(GfxState { session, renderer, swapchain, current_frame, - last_frame_idx, submitted, query_pools, present_semaphores, @@ -135,28 +130,31 @@ impl GfxState { fn redraw(&mut self) { println!("redraw"); unsafe { - if let Some(submitted) = self.submitted.take() { + let frame_idx = self.current_frame % NUM_FRAMES; + let mut info_string = String::new(); + + if let Some(submitted) = self.submitted[frame_idx].take() { submitted.wait().unwrap(); - - let mut ctx = PietGpuRenderContext::new(); - test_scenes::render_anim_frame(&mut ctx, self.current_frame); - if let Err(e) = self.renderer.upload_render_ctx(&mut ctx) { - println!("error in uploading: {}", e); - } - let ts = self .session - .fetch_query_pool(&self.query_pools[self.last_frame_idx]) + .fetch_query_pool(&self.query_pools[frame_idx]) .unwrap(); + info_string = format!("{:.1}ms", ts.last().unwrap() * 1e3); println!("render time: {:?}", ts); } - let frame_idx = self.current_frame % NUM_FRAMES; + let mut ctx = PietGpuRenderContext::new(); + test_scenes::render_anim_frame(&mut ctx, self.current_frame); + //test_scenes::render_tiger(&mut ctx); + render_info_string(&mut ctx, &info_string); + if let Err(e) = self.renderer.upload_render_ctx(&mut ctx, frame_idx) { + println!("error in uploading: {}", e); + } let (image_idx, acquisition_semaphore) = self.swapchain.next().unwrap(); let swap_image = self.swapchain.image(image_idx); let query_pool = &self.query_pools[frame_idx]; let mut cmd_buf = self.session.cmd_buf().unwrap(); cmd_buf.begin(); - self.renderer.record(&mut cmd_buf, &query_pool); + self.renderer.record(&mut cmd_buf, &query_pool, frame_idx); // Image -> Swapchain cmd_buf.image_barrier(&swap_image, ImageLayout::Undefined, ImageLayout::BlitDst); @@ -164,7 +162,7 @@ impl GfxState { cmd_buf.image_barrier(&swap_image, ImageLayout::BlitDst, ImageLayout::Present); cmd_buf.finish(); - self.submitted = Some( + self.submitted[frame_idx] = Some( self.session .run_cmd_buf( cmd_buf, @@ -173,7 +171,6 @@ impl GfxState { ) .unwrap(), ); - self.last_frame_idx = frame_idx; self.swapchain .present(image_idx, &[&self.present_semaphores[frame_idx]]) @@ -183,3 +180,13 @@ impl GfxState { } } } + +fn render_info_string(rc: &mut impl RenderContext, info: &str) { + let layout = rc + .text() + .new_text_layout(info.to_string()) + .default_attribute(TextAttribute::FontSize(60.0)) + .build() + .unwrap(); + rc.draw_text(&layout, Point::new(110.0, 120.0)); +} diff --git a/piet-gpu/bin/cli.rs b/piet-gpu/bin/cli.rs index f67afe7..837bd55 100644 --- a/piet-gpu/bin/cli.rs +++ b/piet-gpu/bin/cli.rs @@ -248,13 +248,13 @@ fn main() -> Result<(), Error> { test_scenes::render_scene(&mut ctx); } - let mut renderer = Renderer::new(&session, WIDTH, HEIGHT)?; - renderer.upload_render_ctx(&mut ctx)?; + let mut renderer = Renderer::new(&session, WIDTH, HEIGHT, 1)?; + renderer.upload_render_ctx(&mut ctx, 0)?; let image_usage = BufferUsage::MAP_READ | BufferUsage::COPY_DST; let image_buf = session.create_buffer((WIDTH * HEIGHT * 4) as u64, image_usage)?; cmd_buf.begin(); - renderer.record(&mut cmd_buf, &query_pool); + renderer.record(&mut cmd_buf, &query_pool, 0); cmd_buf.copy_image_to_buffer(&renderer.image_dev, &image_buf); cmd_buf.host_barrier(); cmd_buf.finish(); diff --git a/piet-gpu/bin/winit.rs b/piet-gpu/bin/winit.rs index 19db3c2..a46ccd1 100644 --- a/piet-gpu/bin/winit.rs +++ b/piet-gpu/bin/winit.rs @@ -1,3 +1,5 @@ +use piet::kurbo::Point; +use piet::{RenderContext, Text, TextAttribute, TextLayoutBuilder}; use piet_gpu_hal::{Error, ImageLayout, Instance, Session, SubmittedCmdBuf}; use piet_gpu::{test_scenes, PietGpuRenderContext, Renderer}; @@ -37,6 +39,7 @@ fn main() -> Result<(), Error> { .build(&event_loop)?; let (instance, surface) = Instance::new(Some(&window))?; + let mut info_string = "info".to_string(); unsafe { let device = instance.device(surface.as_ref())?; let mut swapchain = @@ -50,27 +53,9 @@ fn main() -> Result<(), Error> { let query_pools = (0..NUM_FRAMES) .map(|_| session.create_query_pool(8)) .collect::, Error>>()?; + let mut submitted: [Option; NUM_FRAMES] = Default::default(); - let mut ctx = PietGpuRenderContext::new(); - if let Some(input) = matches.value_of("INPUT") { - let mut scale = matches - .value_of("scale") - .map(|scale| scale.parse().unwrap()) - .unwrap_or(8.0); - if matches.is_present("flip") { - scale = -scale; - } - test_scenes::render_svg(&mut ctx, input, scale); - } else { - test_scenes::render_scene(&mut ctx); - //test_scenes::render_anim_frame(&mut ctx, 0); - } - - let mut renderer = Renderer::new(&session, WIDTH, HEIGHT)?; - renderer.upload_render_ctx(&mut ctx)?; - - let mut submitted: Option = None; - let mut last_frame_idx = 0; + let mut renderer = Renderer::new(&session, WIDTH, HEIGHT, NUM_FRAMES)?; event_loop.run(move |event, _, control_flow| { *control_flow = ControlFlow::Poll; // `ControlFlow::Wait` if only re-render on event @@ -90,23 +75,10 @@ fn main() -> Result<(), Error> { Event::RedrawRequested(window_id) if window_id == window.id() => { let frame_idx = current_frame % NUM_FRAMES; - // Note: this logic is a little strange. We have two sets of renderer - // resources, so we could have two frames in flight (submit two, wait on - // the first), but we actually just wait on the last submitted. - // - // Getting this right will take some thought. - if let Some(submitted) = submitted.take() { + if let Some(submitted) = submitted[frame_idx].take() { submitted.wait().unwrap(); - if matches.value_of("INPUT").is_none() { - let mut ctx = PietGpuRenderContext::new(); - test_scenes::render_anim_frame(&mut ctx, current_frame); - if let Err(e) = renderer.upload_render_ctx(&mut ctx) { - println!("error in uploading: {}", e); - } - } - - let ts = session.fetch_query_pool(&query_pools[last_frame_idx]).unwrap(); - window.set_title(&format!( + let ts = session.fetch_query_pool(&query_pools[frame_idx]).unwrap(); + info_string = format!( "{:.3}ms :: e:{:.3}ms|alloc:{:.3}ms|cp:{:.3}ms|bd:{:.3}ms|bin:{:.3}ms|cr:{:.3}ms|r:{:.3}ms", ts[6] * 1e3, ts[0] * 1e3, @@ -116,7 +88,25 @@ fn main() -> Result<(), Error> { (ts[4] - ts[3]) * 1e3, (ts[5] - ts[4]) * 1e3, (ts[6] - ts[5]) * 1e3, - )); + ); + } + + let mut ctx = PietGpuRenderContext::new(); + if let Some(input) = matches.value_of("INPUT") { + let mut scale = matches + .value_of("scale") + .map(|scale| scale.parse().unwrap()) + .unwrap_or(8.0); + if matches.is_present("flip") { + scale = -scale; + } + test_scenes::render_svg(&mut ctx, input, scale); + } else { + test_scenes::render_anim_frame(&mut ctx, current_frame); + } + render_info_string(&mut ctx, &info_string); + if let Err(e) = renderer.upload_render_ctx(&mut ctx, frame_idx) { + println!("error in uploading: {}", e); } let (image_idx, acquisition_semaphore) = swapchain.next().unwrap(); @@ -124,7 +114,7 @@ fn main() -> Result<(), Error> { let query_pool = &query_pools[frame_idx]; let mut cmd_buf = session.cmd_buf().unwrap(); cmd_buf.begin(); - renderer.record(&mut cmd_buf, &query_pool); + renderer.record(&mut cmd_buf, &query_pool, frame_idx); // Image -> Swapchain cmd_buf.image_barrier( @@ -136,14 +126,13 @@ fn main() -> Result<(), Error> { cmd_buf.image_barrier(&swap_image, ImageLayout::BlitDst, ImageLayout::Present); cmd_buf.finish(); - submitted = Some(session + submitted[frame_idx] = Some(session .run_cmd_buf( cmd_buf, &[&acquisition_semaphore], &[&present_semaphores[frame_idx]], ) .unwrap()); - last_frame_idx = frame_idx; swapchain .present(image_idx, &[&present_semaphores[frame_idx]]) @@ -152,10 +141,12 @@ fn main() -> Result<(), Error> { current_frame += 1; } Event::LoopDestroyed => { - if let Some(submitted) = submitted.take() { + for cmd_buf in &mut submitted { // Wait for command list submission, otherwise dropping of renderer may // cause validation errors (and possibly crashes). - submitted.wait().unwrap(); + if let Some(cmd_buf) = cmd_buf.take() { + cmd_buf.wait().unwrap(); + } } } _ => (), @@ -163,3 +154,13 @@ fn main() -> Result<(), Error> { }) } } + +fn render_info_string(rc: &mut impl RenderContext, info: &str) { + let layout = rc + .text() + .new_text_layout(info.to_string()) + .default_attribute(TextAttribute::FontSize(40.0)) + .build() + .unwrap(); + rc.draw_text(&layout, Point::new(110.0, 50.0)); +} diff --git a/piet-gpu/src/lib.rs b/piet-gpu/src/lib.rs index 6c9a82d..b3e92f9 100644 --- a/piet-gpu/src/lib.rs +++ b/piet-gpu/src/lib.rs @@ -6,6 +6,7 @@ mod text; use std::convert::TryInto; +use piet_gpu_types::scene; pub use render_ctx::PietGpuRenderContext; use rand::{Rng, RngCore}; @@ -60,19 +61,20 @@ pub struct Renderer { // The reference is held by the pipelines. We will be changing // this to make the scene upload dynamic. - #[allow(dead_code)] - scene_buf: Buffer, + scene_bufs: Vec, - memory_buf_host: Buffer, + memory_buf_host: Vec, memory_buf_dev: Buffer, state_buf: Buffer, - #[allow(dead_code)] + // Staging buffers + config_bufs: Vec, + // Device config buf config_buf: Buffer, el_pipeline: Pipeline, - el_ds: DescriptorSet, + el_ds: Vec, tile_pipeline: Pipeline, tile_ds: DescriptorSet, @@ -99,13 +101,18 @@ pub struct Renderer { // Keep a reference to the image so that it is not destroyed. _bg_image: Image, - gradient_buf: Buffer, + gradient_bufs: Vec, gradients: Image, } impl Renderer { /// Create a new renderer. - pub unsafe fn new(session: &Session, width: usize, height: usize) -> Result { + pub unsafe fn new( + session: &Session, + width: usize, + height: usize, + n_bufs: usize, + ) -> Result { // For now, round up to tile alignment let width = width + (width.wrapping_neg() & (TILE_W - 1)); let height = height + (height.wrapping_neg() & (TILE_W - 1)); @@ -114,29 +121,39 @@ impl Renderer { // This may be inadequate for very complex scenes (paris etc) // TODO: separate staging buffer (if needed) - let scene_buf = session.create_buffer(1 * 1024 * 1024, host_upload).unwrap(); + let scene_bufs = (0..n_bufs) + .map(|_| session.create_buffer(8 * 1024 * 1024, host_upload).unwrap()) + .collect(); let state_buf = session.create_buffer(1 * 1024 * 1024, dev)?; let image_dev = session.create_image2d(width as u32, height as u32)?; // Note: this must be updated when the config struct size changes. const CONFIG_BUFFER_SIZE: u64 = 40; + let config_buf = session.create_buffer(CONFIG_BUFFER_SIZE, dev).unwrap(); // TODO: separate staging buffer (if needed) - let config_buf = session - .create_buffer(CONFIG_BUFFER_SIZE, host_upload) - .unwrap(); + let config_bufs = (0..n_bufs) + .map(|_| { + session + .create_buffer(CONFIG_BUFFER_SIZE, host_upload) + .unwrap() + }) + .collect(); - // Perhaps we could avoid the explicit staging buffer by having buffer creation method - // that takes both initial contents and a size. - let memory_buf_host = session.create_buffer(2 * 4, host_upload)?; + let memory_buf_host = (0..n_bufs) + .map(|_| session.create_buffer(2 * 4, host_upload).unwrap()) + .collect(); let memory_buf_dev = session.create_buffer(128 * 1024 * 1024, dev)?; let el_code = ShaderCode::Spv(include_bytes!("../shader/elements.spv")); let el_pipeline = session.create_simple_compute_pipeline(el_code, 4)?; - let el_ds = session.create_simple_descriptor_set( - &el_pipeline, - &[&memory_buf_dev, &config_buf, &scene_buf, &state_buf], - )?; + let mut el_ds = Vec::with_capacity(n_bufs); + for scene_buf in &scene_bufs { + el_ds.push(session.create_simple_descriptor_set( + &el_pipeline, + &[&memory_buf_dev, &config_buf, scene_buf, &state_buf], + )?); + } let tile_alloc_code = ShaderCode::Spv(include_bytes!("../shader/tile_alloc.spv")); let tile_pipeline = session.create_simple_compute_pipeline(tile_alloc_code, 2)?; @@ -173,7 +190,13 @@ impl Renderer { const GRADIENT_BUF_SIZE: usize = crate::gradient::N_GRADIENTS * crate::gradient::N_SAMPLES * 4; - let gradient_buf = session.create_buffer(GRADIENT_BUF_SIZE as u64, host_upload)?; + let gradient_bufs = (0..n_bufs) + .map(|_| { + session + .create_buffer(GRADIENT_BUF_SIZE as u64, host_upload) + .unwrap() + }) + .collect(); let gradients = Self::make_gradient_image(&session); let k4_code = ShaderCode::Spv(include_bytes!("../shader/kernel4.spv")); @@ -198,11 +221,12 @@ impl Renderer { Ok(Renderer { width, height, - scene_buf, + scene_bufs, memory_buf_host, memory_buf_dev, state_buf, config_buf, + config_bufs, image_dev, el_pipeline, el_ds, @@ -222,7 +246,7 @@ impl Renderer { n_paths: 0, n_pathseg: 0, _bg_image: bg_image, - gradient_buf, + gradient_bufs, gradients, }) } @@ -235,6 +259,7 @@ impl Renderer { pub fn upload_render_ctx( &mut self, render_ctx: &mut PietGpuRenderContext, + buf_ix: usize, ) -> Result<(), Error> { let n_paths = render_ctx.path_count(); let n_pathseg = render_ctx.pathseg_count(); @@ -280,28 +305,24 @@ impl Renderer { let scene = render_ctx.get_scene_buf(); self.n_elements = scene.len() / piet_gpu_types::scene::Element::fixed_size(); // TODO: reallocate scene buffer if size is inadequate - assert!(self.scene_buf.size() as usize >= scene.len()); - self.scene_buf.write(scene)?; - self.config_buf.write(config)?; - self.memory_buf_host - .write(&[alloc as u32, 0 /* Overflow flag */])?; + assert!(self.scene_bufs[buf_ix].size() as usize >= scene.len()); + self.scene_bufs[buf_ix].write(scene)?; + self.config_bufs[buf_ix].write(config)?; + self.memory_buf_host[buf_ix].write(&[alloc as u32, 0 /* Overflow flag */])?; // Upload gradient data. let ramp_data = render_ctx.get_ramp_data(); if !ramp_data.is_empty() { - assert!(self.gradient_buf.size() as usize >= std::mem::size_of_val(&*ramp_data)); - self.gradient_buf.write(&ramp_data)?; + assert!(self.gradient_bufs[buf_ix].size() as usize >= std::mem::size_of_val(&*ramp_data)); + self.gradient_bufs[buf_ix].write(&ramp_data)?; } } Ok(()) } - pub unsafe fn record(&self, cmd_buf: &mut CmdBuf, query_pool: &QueryPool) { - //cmd_buf.clear_buffer(&self.memory_buf_dev, None); - //cmd_buf.memory_barrier(); - // Only need to copy the first few words; need to upgrade HAL to be able to - // express sub-buffer copies. - cmd_buf.copy_buffer(&self.memory_buf_host, &self.memory_buf_dev); + pub unsafe fn record(&self, cmd_buf: &mut CmdBuf, query_pool: &QueryPool, buf_ix: usize) { + cmd_buf.copy_buffer(&self.config_bufs[buf_ix], &self.config_buf); + cmd_buf.copy_buffer(&self.memory_buf_host[buf_ix], &self.memory_buf_dev); cmd_buf.clear_buffer(&self.state_buf, None); cmd_buf.memory_barrier(); cmd_buf.image_barrier( @@ -315,13 +336,13 @@ impl Renderer { ImageLayout::Undefined, ImageLayout::BlitDst, ); - cmd_buf.copy_buffer_to_image(&self.gradient_buf, &self.gradients); + cmd_buf.copy_buffer_to_image(&self.gradient_bufs[buf_ix], &self.gradients); cmd_buf.image_barrier(&self.gradients, ImageLayout::BlitDst, ImageLayout::General); cmd_buf.reset_query_pool(&query_pool); cmd_buf.write_timestamp(&query_pool, 0); cmd_buf.dispatch( &self.el_pipeline, - &self.el_ds, + &self.el_ds[buf_ix], (((self.n_elements + 127) / 128) as u32, 1, 1), (128, 1, 1), );