Double-buffer scene buffer

Pipeline the CPU and GPU work so that two frames can be in flight at
once.

This dramatically improves the performance especially on Android. Note
that I've also changed the default configuration to be 3 frames in
flight and FIFO mode.
This commit is contained in:
Raph Levien 2021-10-18 15:52:57 -07:00
parent 8b4a6c54cd
commit 7adb300671
5 changed files with 143 additions and 109 deletions

View file

@ -413,12 +413,17 @@ impl VkInstance {
.surface_fn
.get_physical_device_surface_present_modes(device.physical_device, surface.surface)?;
// Can change to MAILBOX to force high frame rates.
const PREFERRED_MODE: vk::PresentModeKHR = vk::PresentModeKHR::FIFO;
let present_mode = present_modes
.into_iter()
.find(|mode| mode == &vk::PresentModeKHR::MAILBOX)
.find(|mode| *mode == PREFERRED_MODE)
.unwrap_or(vk::PresentModeKHR::FIFO);
let image_count = capabilities.min_image_count;
// Note: can be 2 for non-Android to improve latency, but the real answer is to
// implement some kind of frame pacing.
const PREFERRED_IMAGE_COUNT: u32 = 3;
let image_count = PREFERRED_IMAGE_COUNT.clamp(capabilities.min_image_count, capabilities.max_image_count);
let mut extent = capabilities.current_extent;
if extent.width == u32::MAX || extent.height == u32::MAX {
// We're deciding the size.

View file

@ -16,6 +16,9 @@ use piet_gpu_hal::{
Swapchain,
};
use piet::kurbo::Point;
use piet::{RenderContext, Text, TextAttribute, TextLayoutBuilder};
use piet_gpu::{test_scenes, PietGpuRenderContext, Renderer};
#[cfg_attr(target_os = "android", ndk_glue::main(backtrace = "on"))]
@ -33,8 +36,7 @@ struct GfxState {
renderer: Renderer,
swapchain: Swapchain,
current_frame: usize,
last_frame_idx: usize,
submitted: Option<SubmittedCmdBuf>,
submitted: [Option<SubmittedCmdBuf>; NUM_FRAMES],
query_pools: Vec<QueryPool>,
present_semaphores: Vec<Semaphore>,
}
@ -100,31 +102,24 @@ impl GfxState {
) -> Result<GfxState, Error> {
unsafe {
let device = instance.device(surface)?;
let mut swapchain = instance.swapchain(width, height, &device, surface.unwrap())?;
let swapchain = instance.swapchain(width, height, &device, surface.unwrap())?;
let session = Session::new(device);
let mut current_frame = 0;
let current_frame = 0;
let present_semaphores = (0..NUM_FRAMES)
.map(|_| session.create_semaphore())
.collect::<Result<Vec<_>, Error>>()?;
let query_pools = (0..NUM_FRAMES)
.map(|_| session.create_query_pool(8))
.collect::<Result<Vec<_>, Error>>()?;
let submitted = Default::default();
let mut ctx = PietGpuRenderContext::new();
test_scenes::render_anim_frame(&mut ctx, 0);
let renderer = Renderer::new(&session, width, height, NUM_FRAMES)?;
let mut renderer = Renderer::new(&session, width, height)?;
renderer.upload_render_ctx(&mut ctx)?;
let submitted: Option<SubmittedCmdBuf> = None;
let current_frame = 0;
let last_frame_idx = 0;
Ok(GfxState {
session,
renderer,
swapchain,
current_frame,
last_frame_idx,
submitted,
query_pools,
present_semaphores,
@ -135,28 +130,31 @@ impl GfxState {
fn redraw(&mut self) {
println!("redraw");
unsafe {
if let Some(submitted) = self.submitted.take() {
let frame_idx = self.current_frame % NUM_FRAMES;
let mut info_string = String::new();
if let Some(submitted) = self.submitted[frame_idx].take() {
submitted.wait().unwrap();
let mut ctx = PietGpuRenderContext::new();
test_scenes::render_anim_frame(&mut ctx, self.current_frame);
if let Err(e) = self.renderer.upload_render_ctx(&mut ctx) {
println!("error in uploading: {}", e);
}
let ts = self
.session
.fetch_query_pool(&self.query_pools[self.last_frame_idx])
.fetch_query_pool(&self.query_pools[frame_idx])
.unwrap();
info_string = format!("{:.1}ms", ts.last().unwrap() * 1e3);
println!("render time: {:?}", ts);
}
let frame_idx = self.current_frame % NUM_FRAMES;
let mut ctx = PietGpuRenderContext::new();
test_scenes::render_anim_frame(&mut ctx, self.current_frame);
//test_scenes::render_tiger(&mut ctx);
render_info_string(&mut ctx, &info_string);
if let Err(e) = self.renderer.upload_render_ctx(&mut ctx, frame_idx) {
println!("error in uploading: {}", e);
}
let (image_idx, acquisition_semaphore) = self.swapchain.next().unwrap();
let swap_image = self.swapchain.image(image_idx);
let query_pool = &self.query_pools[frame_idx];
let mut cmd_buf = self.session.cmd_buf().unwrap();
cmd_buf.begin();
self.renderer.record(&mut cmd_buf, &query_pool);
self.renderer.record(&mut cmd_buf, &query_pool, frame_idx);
// Image -> Swapchain
cmd_buf.image_barrier(&swap_image, ImageLayout::Undefined, ImageLayout::BlitDst);
@ -164,7 +162,7 @@ impl GfxState {
cmd_buf.image_barrier(&swap_image, ImageLayout::BlitDst, ImageLayout::Present);
cmd_buf.finish();
self.submitted = Some(
self.submitted[frame_idx] = Some(
self.session
.run_cmd_buf(
cmd_buf,
@ -173,7 +171,6 @@ impl GfxState {
)
.unwrap(),
);
self.last_frame_idx = frame_idx;
self.swapchain
.present(image_idx, &[&self.present_semaphores[frame_idx]])
@ -183,3 +180,13 @@ impl GfxState {
}
}
}
fn render_info_string(rc: &mut impl RenderContext, info: &str) {
let layout = rc
.text()
.new_text_layout(info.to_string())
.default_attribute(TextAttribute::FontSize(60.0))
.build()
.unwrap();
rc.draw_text(&layout, Point::new(110.0, 120.0));
}

View file

@ -248,13 +248,13 @@ fn main() -> Result<(), Error> {
test_scenes::render_scene(&mut ctx);
}
let mut renderer = Renderer::new(&session, WIDTH, HEIGHT)?;
renderer.upload_render_ctx(&mut ctx)?;
let mut renderer = Renderer::new(&session, WIDTH, HEIGHT, 1)?;
renderer.upload_render_ctx(&mut ctx, 0)?;
let image_usage = BufferUsage::MAP_READ | BufferUsage::COPY_DST;
let image_buf = session.create_buffer((WIDTH * HEIGHT * 4) as u64, image_usage)?;
cmd_buf.begin();
renderer.record(&mut cmd_buf, &query_pool);
renderer.record(&mut cmd_buf, &query_pool, 0);
cmd_buf.copy_image_to_buffer(&renderer.image_dev, &image_buf);
cmd_buf.host_barrier();
cmd_buf.finish();

View file

@ -1,3 +1,5 @@
use piet::kurbo::Point;
use piet::{RenderContext, Text, TextAttribute, TextLayoutBuilder};
use piet_gpu_hal::{Error, ImageLayout, Instance, Session, SubmittedCmdBuf};
use piet_gpu::{test_scenes, PietGpuRenderContext, Renderer};
@ -37,6 +39,7 @@ fn main() -> Result<(), Error> {
.build(&event_loop)?;
let (instance, surface) = Instance::new(Some(&window))?;
let mut info_string = "info".to_string();
unsafe {
let device = instance.device(surface.as_ref())?;
let mut swapchain =
@ -50,27 +53,9 @@ fn main() -> Result<(), Error> {
let query_pools = (0..NUM_FRAMES)
.map(|_| session.create_query_pool(8))
.collect::<Result<Vec<_>, Error>>()?;
let mut submitted: [Option<SubmittedCmdBuf>; NUM_FRAMES] = Default::default();
let mut ctx = PietGpuRenderContext::new();
if let Some(input) = matches.value_of("INPUT") {
let mut scale = matches
.value_of("scale")
.map(|scale| scale.parse().unwrap())
.unwrap_or(8.0);
if matches.is_present("flip") {
scale = -scale;
}
test_scenes::render_svg(&mut ctx, input, scale);
} else {
test_scenes::render_scene(&mut ctx);
//test_scenes::render_anim_frame(&mut ctx, 0);
}
let mut renderer = Renderer::new(&session, WIDTH, HEIGHT)?;
renderer.upload_render_ctx(&mut ctx)?;
let mut submitted: Option<SubmittedCmdBuf> = None;
let mut last_frame_idx = 0;
let mut renderer = Renderer::new(&session, WIDTH, HEIGHT, NUM_FRAMES)?;
event_loop.run(move |event, _, control_flow| {
*control_flow = ControlFlow::Poll; // `ControlFlow::Wait` if only re-render on event
@ -90,23 +75,10 @@ fn main() -> Result<(), Error> {
Event::RedrawRequested(window_id) if window_id == window.id() => {
let frame_idx = current_frame % NUM_FRAMES;
// Note: this logic is a little strange. We have two sets of renderer
// resources, so we could have two frames in flight (submit two, wait on
// the first), but we actually just wait on the last submitted.
//
// Getting this right will take some thought.
if let Some(submitted) = submitted.take() {
if let Some(submitted) = submitted[frame_idx].take() {
submitted.wait().unwrap();
if matches.value_of("INPUT").is_none() {
let mut ctx = PietGpuRenderContext::new();
test_scenes::render_anim_frame(&mut ctx, current_frame);
if let Err(e) = renderer.upload_render_ctx(&mut ctx) {
println!("error in uploading: {}", e);
}
}
let ts = session.fetch_query_pool(&query_pools[last_frame_idx]).unwrap();
window.set_title(&format!(
let ts = session.fetch_query_pool(&query_pools[frame_idx]).unwrap();
info_string = format!(
"{:.3}ms :: e:{:.3}ms|alloc:{:.3}ms|cp:{:.3}ms|bd:{:.3}ms|bin:{:.3}ms|cr:{:.3}ms|r:{:.3}ms",
ts[6] * 1e3,
ts[0] * 1e3,
@ -116,7 +88,25 @@ fn main() -> Result<(), Error> {
(ts[4] - ts[3]) * 1e3,
(ts[5] - ts[4]) * 1e3,
(ts[6] - ts[5]) * 1e3,
));
);
}
let mut ctx = PietGpuRenderContext::new();
if let Some(input) = matches.value_of("INPUT") {
let mut scale = matches
.value_of("scale")
.map(|scale| scale.parse().unwrap())
.unwrap_or(8.0);
if matches.is_present("flip") {
scale = -scale;
}
test_scenes::render_svg(&mut ctx, input, scale);
} else {
test_scenes::render_anim_frame(&mut ctx, current_frame);
}
render_info_string(&mut ctx, &info_string);
if let Err(e) = renderer.upload_render_ctx(&mut ctx, frame_idx) {
println!("error in uploading: {}", e);
}
let (image_idx, acquisition_semaphore) = swapchain.next().unwrap();
@ -124,7 +114,7 @@ fn main() -> Result<(), Error> {
let query_pool = &query_pools[frame_idx];
let mut cmd_buf = session.cmd_buf().unwrap();
cmd_buf.begin();
renderer.record(&mut cmd_buf, &query_pool);
renderer.record(&mut cmd_buf, &query_pool, frame_idx);
// Image -> Swapchain
cmd_buf.image_barrier(
@ -136,14 +126,13 @@ fn main() -> Result<(), Error> {
cmd_buf.image_barrier(&swap_image, ImageLayout::BlitDst, ImageLayout::Present);
cmd_buf.finish();
submitted = Some(session
submitted[frame_idx] = Some(session
.run_cmd_buf(
cmd_buf,
&[&acquisition_semaphore],
&[&present_semaphores[frame_idx]],
)
.unwrap());
last_frame_idx = frame_idx;
swapchain
.present(image_idx, &[&present_semaphores[frame_idx]])
@ -152,10 +141,12 @@ fn main() -> Result<(), Error> {
current_frame += 1;
}
Event::LoopDestroyed => {
if let Some(submitted) = submitted.take() {
for cmd_buf in &mut submitted {
// Wait for command list submission, otherwise dropping of renderer may
// cause validation errors (and possibly crashes).
submitted.wait().unwrap();
if let Some(cmd_buf) = cmd_buf.take() {
cmd_buf.wait().unwrap();
}
}
}
_ => (),
@ -163,3 +154,13 @@ fn main() -> Result<(), Error> {
})
}
}
fn render_info_string(rc: &mut impl RenderContext, info: &str) {
let layout = rc
.text()
.new_text_layout(info.to_string())
.default_attribute(TextAttribute::FontSize(40.0))
.build()
.unwrap();
rc.draw_text(&layout, Point::new(110.0, 50.0));
}

View file

@ -6,6 +6,7 @@ mod text;
use std::convert::TryInto;
use piet_gpu_types::scene;
pub use render_ctx::PietGpuRenderContext;
use rand::{Rng, RngCore};
@ -60,19 +61,20 @@ pub struct Renderer {
// The reference is held by the pipelines. We will be changing
// this to make the scene upload dynamic.
#[allow(dead_code)]
scene_buf: Buffer,
scene_bufs: Vec<Buffer>,
memory_buf_host: Buffer,
memory_buf_host: Vec<Buffer>,
memory_buf_dev: Buffer,
state_buf: Buffer,
#[allow(dead_code)]
// Staging buffers
config_bufs: Vec<Buffer>,
// Device config buf
config_buf: Buffer,
el_pipeline: Pipeline,
el_ds: DescriptorSet,
el_ds: Vec<DescriptorSet>,
tile_pipeline: Pipeline,
tile_ds: DescriptorSet,
@ -99,13 +101,18 @@ pub struct Renderer {
// Keep a reference to the image so that it is not destroyed.
_bg_image: Image,
gradient_buf: Buffer,
gradient_bufs: Vec<Buffer>,
gradients: Image,
}
impl Renderer {
/// Create a new renderer.
pub unsafe fn new(session: &Session, width: usize, height: usize) -> Result<Self, Error> {
pub unsafe fn new(
session: &Session,
width: usize,
height: usize,
n_bufs: usize,
) -> Result<Self, Error> {
// For now, round up to tile alignment
let width = width + (width.wrapping_neg() & (TILE_W - 1));
let height = height + (height.wrapping_neg() & (TILE_W - 1));
@ -114,29 +121,39 @@ impl Renderer {
// This may be inadequate for very complex scenes (paris etc)
// TODO: separate staging buffer (if needed)
let scene_buf = session.create_buffer(1 * 1024 * 1024, host_upload).unwrap();
let scene_bufs = (0..n_bufs)
.map(|_| session.create_buffer(8 * 1024 * 1024, host_upload).unwrap())
.collect();
let state_buf = session.create_buffer(1 * 1024 * 1024, dev)?;
let image_dev = session.create_image2d(width as u32, height as u32)?;
// Note: this must be updated when the config struct size changes.
const CONFIG_BUFFER_SIZE: u64 = 40;
let config_buf = session.create_buffer(CONFIG_BUFFER_SIZE, dev).unwrap();
// TODO: separate staging buffer (if needed)
let config_buf = session
let config_bufs = (0..n_bufs)
.map(|_| {
session
.create_buffer(CONFIG_BUFFER_SIZE, host_upload)
.unwrap();
.unwrap()
})
.collect();
// Perhaps we could avoid the explicit staging buffer by having buffer creation method
// that takes both initial contents and a size.
let memory_buf_host = session.create_buffer(2 * 4, host_upload)?;
let memory_buf_host = (0..n_bufs)
.map(|_| session.create_buffer(2 * 4, host_upload).unwrap())
.collect();
let memory_buf_dev = session.create_buffer(128 * 1024 * 1024, dev)?;
let el_code = ShaderCode::Spv(include_bytes!("../shader/elements.spv"));
let el_pipeline = session.create_simple_compute_pipeline(el_code, 4)?;
let el_ds = session.create_simple_descriptor_set(
let mut el_ds = Vec::with_capacity(n_bufs);
for scene_buf in &scene_bufs {
el_ds.push(session.create_simple_descriptor_set(
&el_pipeline,
&[&memory_buf_dev, &config_buf, &scene_buf, &state_buf],
)?;
&[&memory_buf_dev, &config_buf, scene_buf, &state_buf],
)?);
}
let tile_alloc_code = ShaderCode::Spv(include_bytes!("../shader/tile_alloc.spv"));
let tile_pipeline = session.create_simple_compute_pipeline(tile_alloc_code, 2)?;
@ -173,7 +190,13 @@ impl Renderer {
const GRADIENT_BUF_SIZE: usize =
crate::gradient::N_GRADIENTS * crate::gradient::N_SAMPLES * 4;
let gradient_buf = session.create_buffer(GRADIENT_BUF_SIZE as u64, host_upload)?;
let gradient_bufs = (0..n_bufs)
.map(|_| {
session
.create_buffer(GRADIENT_BUF_SIZE as u64, host_upload)
.unwrap()
})
.collect();
let gradients = Self::make_gradient_image(&session);
let k4_code = ShaderCode::Spv(include_bytes!("../shader/kernel4.spv"));
@ -198,11 +221,12 @@ impl Renderer {
Ok(Renderer {
width,
height,
scene_buf,
scene_bufs,
memory_buf_host,
memory_buf_dev,
state_buf,
config_buf,
config_bufs,
image_dev,
el_pipeline,
el_ds,
@ -222,7 +246,7 @@ impl Renderer {
n_paths: 0,
n_pathseg: 0,
_bg_image: bg_image,
gradient_buf,
gradient_bufs,
gradients,
})
}
@ -235,6 +259,7 @@ impl Renderer {
pub fn upload_render_ctx(
&mut self,
render_ctx: &mut PietGpuRenderContext,
buf_ix: usize,
) -> Result<(), Error> {
let n_paths = render_ctx.path_count();
let n_pathseg = render_ctx.pathseg_count();
@ -280,28 +305,24 @@ impl Renderer {
let scene = render_ctx.get_scene_buf();
self.n_elements = scene.len() / piet_gpu_types::scene::Element::fixed_size();
// TODO: reallocate scene buffer if size is inadequate
assert!(self.scene_buf.size() as usize >= scene.len());
self.scene_buf.write(scene)?;
self.config_buf.write(config)?;
self.memory_buf_host
.write(&[alloc as u32, 0 /* Overflow flag */])?;
assert!(self.scene_bufs[buf_ix].size() as usize >= scene.len());
self.scene_bufs[buf_ix].write(scene)?;
self.config_bufs[buf_ix].write(config)?;
self.memory_buf_host[buf_ix].write(&[alloc as u32, 0 /* Overflow flag */])?;
// Upload gradient data.
let ramp_data = render_ctx.get_ramp_data();
if !ramp_data.is_empty() {
assert!(self.gradient_buf.size() as usize >= std::mem::size_of_val(&*ramp_data));
self.gradient_buf.write(&ramp_data)?;
assert!(self.gradient_bufs[buf_ix].size() as usize >= std::mem::size_of_val(&*ramp_data));
self.gradient_bufs[buf_ix].write(&ramp_data)?;
}
}
Ok(())
}
pub unsafe fn record(&self, cmd_buf: &mut CmdBuf, query_pool: &QueryPool) {
//cmd_buf.clear_buffer(&self.memory_buf_dev, None);
//cmd_buf.memory_barrier();
// Only need to copy the first few words; need to upgrade HAL to be able to
// express sub-buffer copies.
cmd_buf.copy_buffer(&self.memory_buf_host, &self.memory_buf_dev);
pub unsafe fn record(&self, cmd_buf: &mut CmdBuf, query_pool: &QueryPool, buf_ix: usize) {
cmd_buf.copy_buffer(&self.config_bufs[buf_ix], &self.config_buf);
cmd_buf.copy_buffer(&self.memory_buf_host[buf_ix], &self.memory_buf_dev);
cmd_buf.clear_buffer(&self.state_buf, None);
cmd_buf.memory_barrier();
cmd_buf.image_barrier(
@ -315,13 +336,13 @@ impl Renderer {
ImageLayout::Undefined,
ImageLayout::BlitDst,
);
cmd_buf.copy_buffer_to_image(&self.gradient_buf, &self.gradients);
cmd_buf.copy_buffer_to_image(&self.gradient_bufs[buf_ix], &self.gradients);
cmd_buf.image_barrier(&self.gradients, ImageLayout::BlitDst, ImageLayout::General);
cmd_buf.reset_query_pool(&query_pool);
cmd_buf.write_timestamp(&query_pool, 0);
cmd_buf.dispatch(
&self.el_pipeline,
&self.el_ds,
&self.el_ds[buf_ix],
(((self.n_elements + 127) / 128) as u32, 1, 1),
(128, 1, 1),
);