Double-buffer scene buffer

Pipeline the CPU and GPU work so that two frames can be in flight at
once.

This dramatically improves the performance especially on Android. Note
that I've also changed the default configuration to be 3 frames in
flight and FIFO mode.
This commit is contained in:
Raph Levien 2021-10-18 15:52:57 -07:00
parent 8b4a6c54cd
commit 7adb300671
5 changed files with 143 additions and 109 deletions

View file

@ -413,12 +413,17 @@ impl VkInstance {
.surface_fn .surface_fn
.get_physical_device_surface_present_modes(device.physical_device, surface.surface)?; .get_physical_device_surface_present_modes(device.physical_device, surface.surface)?;
// Can change to MAILBOX to force high frame rates.
const PREFERRED_MODE: vk::PresentModeKHR = vk::PresentModeKHR::FIFO;
let present_mode = present_modes let present_mode = present_modes
.into_iter() .into_iter()
.find(|mode| mode == &vk::PresentModeKHR::MAILBOX) .find(|mode| *mode == PREFERRED_MODE)
.unwrap_or(vk::PresentModeKHR::FIFO); .unwrap_or(vk::PresentModeKHR::FIFO);
let image_count = capabilities.min_image_count; // Note: can be 2 for non-Android to improve latency, but the real answer is to
// implement some kind of frame pacing.
const PREFERRED_IMAGE_COUNT: u32 = 3;
let image_count = PREFERRED_IMAGE_COUNT.clamp(capabilities.min_image_count, capabilities.max_image_count);
let mut extent = capabilities.current_extent; let mut extent = capabilities.current_extent;
if extent.width == u32::MAX || extent.height == u32::MAX { if extent.width == u32::MAX || extent.height == u32::MAX {
// We're deciding the size. // We're deciding the size.

View file

@ -16,6 +16,9 @@ use piet_gpu_hal::{
Swapchain, Swapchain,
}; };
use piet::kurbo::Point;
use piet::{RenderContext, Text, TextAttribute, TextLayoutBuilder};
use piet_gpu::{test_scenes, PietGpuRenderContext, Renderer}; use piet_gpu::{test_scenes, PietGpuRenderContext, Renderer};
#[cfg_attr(target_os = "android", ndk_glue::main(backtrace = "on"))] #[cfg_attr(target_os = "android", ndk_glue::main(backtrace = "on"))]
@ -33,8 +36,7 @@ struct GfxState {
renderer: Renderer, renderer: Renderer,
swapchain: Swapchain, swapchain: Swapchain,
current_frame: usize, current_frame: usize,
last_frame_idx: usize, submitted: [Option<SubmittedCmdBuf>; NUM_FRAMES],
submitted: Option<SubmittedCmdBuf>,
query_pools: Vec<QueryPool>, query_pools: Vec<QueryPool>,
present_semaphores: Vec<Semaphore>, present_semaphores: Vec<Semaphore>,
} }
@ -100,31 +102,24 @@ impl GfxState {
) -> Result<GfxState, Error> { ) -> Result<GfxState, Error> {
unsafe { unsafe {
let device = instance.device(surface)?; let device = instance.device(surface)?;
let mut swapchain = instance.swapchain(width, height, &device, surface.unwrap())?; let swapchain = instance.swapchain(width, height, &device, surface.unwrap())?;
let session = Session::new(device); let session = Session::new(device);
let mut current_frame = 0; let current_frame = 0;
let present_semaphores = (0..NUM_FRAMES) let present_semaphores = (0..NUM_FRAMES)
.map(|_| session.create_semaphore()) .map(|_| session.create_semaphore())
.collect::<Result<Vec<_>, Error>>()?; .collect::<Result<Vec<_>, Error>>()?;
let query_pools = (0..NUM_FRAMES) let query_pools = (0..NUM_FRAMES)
.map(|_| session.create_query_pool(8)) .map(|_| session.create_query_pool(8))
.collect::<Result<Vec<_>, Error>>()?; .collect::<Result<Vec<_>, Error>>()?;
let submitted = Default::default();
let mut ctx = PietGpuRenderContext::new(); let renderer = Renderer::new(&session, width, height, NUM_FRAMES)?;
test_scenes::render_anim_frame(&mut ctx, 0);
let mut renderer = Renderer::new(&session, width, height)?;
renderer.upload_render_ctx(&mut ctx)?;
let submitted: Option<SubmittedCmdBuf> = None;
let current_frame = 0;
let last_frame_idx = 0;
Ok(GfxState { Ok(GfxState {
session, session,
renderer, renderer,
swapchain, swapchain,
current_frame, current_frame,
last_frame_idx,
submitted, submitted,
query_pools, query_pools,
present_semaphores, present_semaphores,
@ -135,28 +130,31 @@ impl GfxState {
fn redraw(&mut self) { fn redraw(&mut self) {
println!("redraw"); println!("redraw");
unsafe { unsafe {
if let Some(submitted) = self.submitted.take() { let frame_idx = self.current_frame % NUM_FRAMES;
let mut info_string = String::new();
if let Some(submitted) = self.submitted[frame_idx].take() {
submitted.wait().unwrap(); submitted.wait().unwrap();
let mut ctx = PietGpuRenderContext::new();
test_scenes::render_anim_frame(&mut ctx, self.current_frame);
if let Err(e) = self.renderer.upload_render_ctx(&mut ctx) {
println!("error in uploading: {}", e);
}
let ts = self let ts = self
.session .session
.fetch_query_pool(&self.query_pools[self.last_frame_idx]) .fetch_query_pool(&self.query_pools[frame_idx])
.unwrap(); .unwrap();
info_string = format!("{:.1}ms", ts.last().unwrap() * 1e3);
println!("render time: {:?}", ts); println!("render time: {:?}", ts);
} }
let frame_idx = self.current_frame % NUM_FRAMES; let mut ctx = PietGpuRenderContext::new();
test_scenes::render_anim_frame(&mut ctx, self.current_frame);
//test_scenes::render_tiger(&mut ctx);
render_info_string(&mut ctx, &info_string);
if let Err(e) = self.renderer.upload_render_ctx(&mut ctx, frame_idx) {
println!("error in uploading: {}", e);
}
let (image_idx, acquisition_semaphore) = self.swapchain.next().unwrap(); let (image_idx, acquisition_semaphore) = self.swapchain.next().unwrap();
let swap_image = self.swapchain.image(image_idx); let swap_image = self.swapchain.image(image_idx);
let query_pool = &self.query_pools[frame_idx]; let query_pool = &self.query_pools[frame_idx];
let mut cmd_buf = self.session.cmd_buf().unwrap(); let mut cmd_buf = self.session.cmd_buf().unwrap();
cmd_buf.begin(); cmd_buf.begin();
self.renderer.record(&mut cmd_buf, &query_pool); self.renderer.record(&mut cmd_buf, &query_pool, frame_idx);
// Image -> Swapchain // Image -> Swapchain
cmd_buf.image_barrier(&swap_image, ImageLayout::Undefined, ImageLayout::BlitDst); cmd_buf.image_barrier(&swap_image, ImageLayout::Undefined, ImageLayout::BlitDst);
@ -164,7 +162,7 @@ impl GfxState {
cmd_buf.image_barrier(&swap_image, ImageLayout::BlitDst, ImageLayout::Present); cmd_buf.image_barrier(&swap_image, ImageLayout::BlitDst, ImageLayout::Present);
cmd_buf.finish(); cmd_buf.finish();
self.submitted = Some( self.submitted[frame_idx] = Some(
self.session self.session
.run_cmd_buf( .run_cmd_buf(
cmd_buf, cmd_buf,
@ -173,7 +171,6 @@ impl GfxState {
) )
.unwrap(), .unwrap(),
); );
self.last_frame_idx = frame_idx;
self.swapchain self.swapchain
.present(image_idx, &[&self.present_semaphores[frame_idx]]) .present(image_idx, &[&self.present_semaphores[frame_idx]])
@ -183,3 +180,13 @@ impl GfxState {
} }
} }
} }
fn render_info_string(rc: &mut impl RenderContext, info: &str) {
let layout = rc
.text()
.new_text_layout(info.to_string())
.default_attribute(TextAttribute::FontSize(60.0))
.build()
.unwrap();
rc.draw_text(&layout, Point::new(110.0, 120.0));
}

View file

@ -248,13 +248,13 @@ fn main() -> Result<(), Error> {
test_scenes::render_scene(&mut ctx); test_scenes::render_scene(&mut ctx);
} }
let mut renderer = Renderer::new(&session, WIDTH, HEIGHT)?; let mut renderer = Renderer::new(&session, WIDTH, HEIGHT, 1)?;
renderer.upload_render_ctx(&mut ctx)?; renderer.upload_render_ctx(&mut ctx, 0)?;
let image_usage = BufferUsage::MAP_READ | BufferUsage::COPY_DST; let image_usage = BufferUsage::MAP_READ | BufferUsage::COPY_DST;
let image_buf = session.create_buffer((WIDTH * HEIGHT * 4) as u64, image_usage)?; let image_buf = session.create_buffer((WIDTH * HEIGHT * 4) as u64, image_usage)?;
cmd_buf.begin(); cmd_buf.begin();
renderer.record(&mut cmd_buf, &query_pool); renderer.record(&mut cmd_buf, &query_pool, 0);
cmd_buf.copy_image_to_buffer(&renderer.image_dev, &image_buf); cmd_buf.copy_image_to_buffer(&renderer.image_dev, &image_buf);
cmd_buf.host_barrier(); cmd_buf.host_barrier();
cmd_buf.finish(); cmd_buf.finish();

View file

@ -1,3 +1,5 @@
use piet::kurbo::Point;
use piet::{RenderContext, Text, TextAttribute, TextLayoutBuilder};
use piet_gpu_hal::{Error, ImageLayout, Instance, Session, SubmittedCmdBuf}; use piet_gpu_hal::{Error, ImageLayout, Instance, Session, SubmittedCmdBuf};
use piet_gpu::{test_scenes, PietGpuRenderContext, Renderer}; use piet_gpu::{test_scenes, PietGpuRenderContext, Renderer};
@ -37,6 +39,7 @@ fn main() -> Result<(), Error> {
.build(&event_loop)?; .build(&event_loop)?;
let (instance, surface) = Instance::new(Some(&window))?; let (instance, surface) = Instance::new(Some(&window))?;
let mut info_string = "info".to_string();
unsafe { unsafe {
let device = instance.device(surface.as_ref())?; let device = instance.device(surface.as_ref())?;
let mut swapchain = let mut swapchain =
@ -50,27 +53,9 @@ fn main() -> Result<(), Error> {
let query_pools = (0..NUM_FRAMES) let query_pools = (0..NUM_FRAMES)
.map(|_| session.create_query_pool(8)) .map(|_| session.create_query_pool(8))
.collect::<Result<Vec<_>, Error>>()?; .collect::<Result<Vec<_>, Error>>()?;
let mut submitted: [Option<SubmittedCmdBuf>; NUM_FRAMES] = Default::default();
let mut ctx = PietGpuRenderContext::new(); let mut renderer = Renderer::new(&session, WIDTH, HEIGHT, NUM_FRAMES)?;
if let Some(input) = matches.value_of("INPUT") {
let mut scale = matches
.value_of("scale")
.map(|scale| scale.parse().unwrap())
.unwrap_or(8.0);
if matches.is_present("flip") {
scale = -scale;
}
test_scenes::render_svg(&mut ctx, input, scale);
} else {
test_scenes::render_scene(&mut ctx);
//test_scenes::render_anim_frame(&mut ctx, 0);
}
let mut renderer = Renderer::new(&session, WIDTH, HEIGHT)?;
renderer.upload_render_ctx(&mut ctx)?;
let mut submitted: Option<SubmittedCmdBuf> = None;
let mut last_frame_idx = 0;
event_loop.run(move |event, _, control_flow| { event_loop.run(move |event, _, control_flow| {
*control_flow = ControlFlow::Poll; // `ControlFlow::Wait` if only re-render on event *control_flow = ControlFlow::Poll; // `ControlFlow::Wait` if only re-render on event
@ -90,23 +75,10 @@ fn main() -> Result<(), Error> {
Event::RedrawRequested(window_id) if window_id == window.id() => { Event::RedrawRequested(window_id) if window_id == window.id() => {
let frame_idx = current_frame % NUM_FRAMES; let frame_idx = current_frame % NUM_FRAMES;
// Note: this logic is a little strange. We have two sets of renderer if let Some(submitted) = submitted[frame_idx].take() {
// resources, so we could have two frames in flight (submit two, wait on
// the first), but we actually just wait on the last submitted.
//
// Getting this right will take some thought.
if let Some(submitted) = submitted.take() {
submitted.wait().unwrap(); submitted.wait().unwrap();
if matches.value_of("INPUT").is_none() { let ts = session.fetch_query_pool(&query_pools[frame_idx]).unwrap();
let mut ctx = PietGpuRenderContext::new(); info_string = format!(
test_scenes::render_anim_frame(&mut ctx, current_frame);
if let Err(e) = renderer.upload_render_ctx(&mut ctx) {
println!("error in uploading: {}", e);
}
}
let ts = session.fetch_query_pool(&query_pools[last_frame_idx]).unwrap();
window.set_title(&format!(
"{:.3}ms :: e:{:.3}ms|alloc:{:.3}ms|cp:{:.3}ms|bd:{:.3}ms|bin:{:.3}ms|cr:{:.3}ms|r:{:.3}ms", "{:.3}ms :: e:{:.3}ms|alloc:{:.3}ms|cp:{:.3}ms|bd:{:.3}ms|bin:{:.3}ms|cr:{:.3}ms|r:{:.3}ms",
ts[6] * 1e3, ts[6] * 1e3,
ts[0] * 1e3, ts[0] * 1e3,
@ -116,7 +88,25 @@ fn main() -> Result<(), Error> {
(ts[4] - ts[3]) * 1e3, (ts[4] - ts[3]) * 1e3,
(ts[5] - ts[4]) * 1e3, (ts[5] - ts[4]) * 1e3,
(ts[6] - ts[5]) * 1e3, (ts[6] - ts[5]) * 1e3,
)); );
}
let mut ctx = PietGpuRenderContext::new();
if let Some(input) = matches.value_of("INPUT") {
let mut scale = matches
.value_of("scale")
.map(|scale| scale.parse().unwrap())
.unwrap_or(8.0);
if matches.is_present("flip") {
scale = -scale;
}
test_scenes::render_svg(&mut ctx, input, scale);
} else {
test_scenes::render_anim_frame(&mut ctx, current_frame);
}
render_info_string(&mut ctx, &info_string);
if let Err(e) = renderer.upload_render_ctx(&mut ctx, frame_idx) {
println!("error in uploading: {}", e);
} }
let (image_idx, acquisition_semaphore) = swapchain.next().unwrap(); let (image_idx, acquisition_semaphore) = swapchain.next().unwrap();
@ -124,7 +114,7 @@ fn main() -> Result<(), Error> {
let query_pool = &query_pools[frame_idx]; let query_pool = &query_pools[frame_idx];
let mut cmd_buf = session.cmd_buf().unwrap(); let mut cmd_buf = session.cmd_buf().unwrap();
cmd_buf.begin(); cmd_buf.begin();
renderer.record(&mut cmd_buf, &query_pool); renderer.record(&mut cmd_buf, &query_pool, frame_idx);
// Image -> Swapchain // Image -> Swapchain
cmd_buf.image_barrier( cmd_buf.image_barrier(
@ -136,14 +126,13 @@ fn main() -> Result<(), Error> {
cmd_buf.image_barrier(&swap_image, ImageLayout::BlitDst, ImageLayout::Present); cmd_buf.image_barrier(&swap_image, ImageLayout::BlitDst, ImageLayout::Present);
cmd_buf.finish(); cmd_buf.finish();
submitted = Some(session submitted[frame_idx] = Some(session
.run_cmd_buf( .run_cmd_buf(
cmd_buf, cmd_buf,
&[&acquisition_semaphore], &[&acquisition_semaphore],
&[&present_semaphores[frame_idx]], &[&present_semaphores[frame_idx]],
) )
.unwrap()); .unwrap());
last_frame_idx = frame_idx;
swapchain swapchain
.present(image_idx, &[&present_semaphores[frame_idx]]) .present(image_idx, &[&present_semaphores[frame_idx]])
@ -152,10 +141,12 @@ fn main() -> Result<(), Error> {
current_frame += 1; current_frame += 1;
} }
Event::LoopDestroyed => { Event::LoopDestroyed => {
if let Some(submitted) = submitted.take() { for cmd_buf in &mut submitted {
// Wait for command list submission, otherwise dropping of renderer may // Wait for command list submission, otherwise dropping of renderer may
// cause validation errors (and possibly crashes). // cause validation errors (and possibly crashes).
submitted.wait().unwrap(); if let Some(cmd_buf) = cmd_buf.take() {
cmd_buf.wait().unwrap();
}
} }
} }
_ => (), _ => (),
@ -163,3 +154,13 @@ fn main() -> Result<(), Error> {
}) })
} }
} }
fn render_info_string(rc: &mut impl RenderContext, info: &str) {
let layout = rc
.text()
.new_text_layout(info.to_string())
.default_attribute(TextAttribute::FontSize(40.0))
.build()
.unwrap();
rc.draw_text(&layout, Point::new(110.0, 50.0));
}

View file

@ -6,6 +6,7 @@ mod text;
use std::convert::TryInto; use std::convert::TryInto;
use piet_gpu_types::scene;
pub use render_ctx::PietGpuRenderContext; pub use render_ctx::PietGpuRenderContext;
use rand::{Rng, RngCore}; use rand::{Rng, RngCore};
@ -60,19 +61,20 @@ pub struct Renderer {
// The reference is held by the pipelines. We will be changing // The reference is held by the pipelines. We will be changing
// this to make the scene upload dynamic. // this to make the scene upload dynamic.
#[allow(dead_code)] scene_bufs: Vec<Buffer>,
scene_buf: Buffer,
memory_buf_host: Buffer, memory_buf_host: Vec<Buffer>,
memory_buf_dev: Buffer, memory_buf_dev: Buffer,
state_buf: Buffer, state_buf: Buffer,
#[allow(dead_code)] // Staging buffers
config_bufs: Vec<Buffer>,
// Device config buf
config_buf: Buffer, config_buf: Buffer,
el_pipeline: Pipeline, el_pipeline: Pipeline,
el_ds: DescriptorSet, el_ds: Vec<DescriptorSet>,
tile_pipeline: Pipeline, tile_pipeline: Pipeline,
tile_ds: DescriptorSet, tile_ds: DescriptorSet,
@ -99,13 +101,18 @@ pub struct Renderer {
// Keep a reference to the image so that it is not destroyed. // Keep a reference to the image so that it is not destroyed.
_bg_image: Image, _bg_image: Image,
gradient_buf: Buffer, gradient_bufs: Vec<Buffer>,
gradients: Image, gradients: Image,
} }
impl Renderer { impl Renderer {
/// Create a new renderer. /// Create a new renderer.
pub unsafe fn new(session: &Session, width: usize, height: usize) -> Result<Self, Error> { pub unsafe fn new(
session: &Session,
width: usize,
height: usize,
n_bufs: usize,
) -> Result<Self, Error> {
// For now, round up to tile alignment // For now, round up to tile alignment
let width = width + (width.wrapping_neg() & (TILE_W - 1)); let width = width + (width.wrapping_neg() & (TILE_W - 1));
let height = height + (height.wrapping_neg() & (TILE_W - 1)); let height = height + (height.wrapping_neg() & (TILE_W - 1));
@ -114,29 +121,39 @@ impl Renderer {
// This may be inadequate for very complex scenes (paris etc) // This may be inadequate for very complex scenes (paris etc)
// TODO: separate staging buffer (if needed) // TODO: separate staging buffer (if needed)
let scene_buf = session.create_buffer(1 * 1024 * 1024, host_upload).unwrap(); let scene_bufs = (0..n_bufs)
.map(|_| session.create_buffer(8 * 1024 * 1024, host_upload).unwrap())
.collect();
let state_buf = session.create_buffer(1 * 1024 * 1024, dev)?; let state_buf = session.create_buffer(1 * 1024 * 1024, dev)?;
let image_dev = session.create_image2d(width as u32, height as u32)?; let image_dev = session.create_image2d(width as u32, height as u32)?;
// Note: this must be updated when the config struct size changes. // Note: this must be updated when the config struct size changes.
const CONFIG_BUFFER_SIZE: u64 = 40; const CONFIG_BUFFER_SIZE: u64 = 40;
let config_buf = session.create_buffer(CONFIG_BUFFER_SIZE, dev).unwrap();
// TODO: separate staging buffer (if needed) // TODO: separate staging buffer (if needed)
let config_buf = session let config_bufs = (0..n_bufs)
.map(|_| {
session
.create_buffer(CONFIG_BUFFER_SIZE, host_upload) .create_buffer(CONFIG_BUFFER_SIZE, host_upload)
.unwrap(); .unwrap()
})
.collect();
// Perhaps we could avoid the explicit staging buffer by having buffer creation method let memory_buf_host = (0..n_bufs)
// that takes both initial contents and a size. .map(|_| session.create_buffer(2 * 4, host_upload).unwrap())
let memory_buf_host = session.create_buffer(2 * 4, host_upload)?; .collect();
let memory_buf_dev = session.create_buffer(128 * 1024 * 1024, dev)?; let memory_buf_dev = session.create_buffer(128 * 1024 * 1024, dev)?;
let el_code = ShaderCode::Spv(include_bytes!("../shader/elements.spv")); let el_code = ShaderCode::Spv(include_bytes!("../shader/elements.spv"));
let el_pipeline = session.create_simple_compute_pipeline(el_code, 4)?; let el_pipeline = session.create_simple_compute_pipeline(el_code, 4)?;
let el_ds = session.create_simple_descriptor_set( let mut el_ds = Vec::with_capacity(n_bufs);
for scene_buf in &scene_bufs {
el_ds.push(session.create_simple_descriptor_set(
&el_pipeline, &el_pipeline,
&[&memory_buf_dev, &config_buf, &scene_buf, &state_buf], &[&memory_buf_dev, &config_buf, scene_buf, &state_buf],
)?; )?);
}
let tile_alloc_code = ShaderCode::Spv(include_bytes!("../shader/tile_alloc.spv")); let tile_alloc_code = ShaderCode::Spv(include_bytes!("../shader/tile_alloc.spv"));
let tile_pipeline = session.create_simple_compute_pipeline(tile_alloc_code, 2)?; let tile_pipeline = session.create_simple_compute_pipeline(tile_alloc_code, 2)?;
@ -173,7 +190,13 @@ impl Renderer {
const GRADIENT_BUF_SIZE: usize = const GRADIENT_BUF_SIZE: usize =
crate::gradient::N_GRADIENTS * crate::gradient::N_SAMPLES * 4; crate::gradient::N_GRADIENTS * crate::gradient::N_SAMPLES * 4;
let gradient_buf = session.create_buffer(GRADIENT_BUF_SIZE as u64, host_upload)?; let gradient_bufs = (0..n_bufs)
.map(|_| {
session
.create_buffer(GRADIENT_BUF_SIZE as u64, host_upload)
.unwrap()
})
.collect();
let gradients = Self::make_gradient_image(&session); let gradients = Self::make_gradient_image(&session);
let k4_code = ShaderCode::Spv(include_bytes!("../shader/kernel4.spv")); let k4_code = ShaderCode::Spv(include_bytes!("../shader/kernel4.spv"));
@ -198,11 +221,12 @@ impl Renderer {
Ok(Renderer { Ok(Renderer {
width, width,
height, height,
scene_buf, scene_bufs,
memory_buf_host, memory_buf_host,
memory_buf_dev, memory_buf_dev,
state_buf, state_buf,
config_buf, config_buf,
config_bufs,
image_dev, image_dev,
el_pipeline, el_pipeline,
el_ds, el_ds,
@ -222,7 +246,7 @@ impl Renderer {
n_paths: 0, n_paths: 0,
n_pathseg: 0, n_pathseg: 0,
_bg_image: bg_image, _bg_image: bg_image,
gradient_buf, gradient_bufs,
gradients, gradients,
}) })
} }
@ -235,6 +259,7 @@ impl Renderer {
pub fn upload_render_ctx( pub fn upload_render_ctx(
&mut self, &mut self,
render_ctx: &mut PietGpuRenderContext, render_ctx: &mut PietGpuRenderContext,
buf_ix: usize,
) -> Result<(), Error> { ) -> Result<(), Error> {
let n_paths = render_ctx.path_count(); let n_paths = render_ctx.path_count();
let n_pathseg = render_ctx.pathseg_count(); let n_pathseg = render_ctx.pathseg_count();
@ -280,28 +305,24 @@ impl Renderer {
let scene = render_ctx.get_scene_buf(); let scene = render_ctx.get_scene_buf();
self.n_elements = scene.len() / piet_gpu_types::scene::Element::fixed_size(); self.n_elements = scene.len() / piet_gpu_types::scene::Element::fixed_size();
// TODO: reallocate scene buffer if size is inadequate // TODO: reallocate scene buffer if size is inadequate
assert!(self.scene_buf.size() as usize >= scene.len()); assert!(self.scene_bufs[buf_ix].size() as usize >= scene.len());
self.scene_buf.write(scene)?; self.scene_bufs[buf_ix].write(scene)?;
self.config_buf.write(config)?; self.config_bufs[buf_ix].write(config)?;
self.memory_buf_host self.memory_buf_host[buf_ix].write(&[alloc as u32, 0 /* Overflow flag */])?;
.write(&[alloc as u32, 0 /* Overflow flag */])?;
// Upload gradient data. // Upload gradient data.
let ramp_data = render_ctx.get_ramp_data(); let ramp_data = render_ctx.get_ramp_data();
if !ramp_data.is_empty() { if !ramp_data.is_empty() {
assert!(self.gradient_buf.size() as usize >= std::mem::size_of_val(&*ramp_data)); assert!(self.gradient_bufs[buf_ix].size() as usize >= std::mem::size_of_val(&*ramp_data));
self.gradient_buf.write(&ramp_data)?; self.gradient_bufs[buf_ix].write(&ramp_data)?;
} }
} }
Ok(()) Ok(())
} }
pub unsafe fn record(&self, cmd_buf: &mut CmdBuf, query_pool: &QueryPool) { pub unsafe fn record(&self, cmd_buf: &mut CmdBuf, query_pool: &QueryPool, buf_ix: usize) {
//cmd_buf.clear_buffer(&self.memory_buf_dev, None); cmd_buf.copy_buffer(&self.config_bufs[buf_ix], &self.config_buf);
//cmd_buf.memory_barrier(); cmd_buf.copy_buffer(&self.memory_buf_host[buf_ix], &self.memory_buf_dev);
// Only need to copy the first few words; need to upgrade HAL to be able to
// express sub-buffer copies.
cmd_buf.copy_buffer(&self.memory_buf_host, &self.memory_buf_dev);
cmd_buf.clear_buffer(&self.state_buf, None); cmd_buf.clear_buffer(&self.state_buf, None);
cmd_buf.memory_barrier(); cmd_buf.memory_barrier();
cmd_buf.image_barrier( cmd_buf.image_barrier(
@ -315,13 +336,13 @@ impl Renderer {
ImageLayout::Undefined, ImageLayout::Undefined,
ImageLayout::BlitDst, ImageLayout::BlitDst,
); );
cmd_buf.copy_buffer_to_image(&self.gradient_buf, &self.gradients); cmd_buf.copy_buffer_to_image(&self.gradient_bufs[buf_ix], &self.gradients);
cmd_buf.image_barrier(&self.gradients, ImageLayout::BlitDst, ImageLayout::General); cmd_buf.image_barrier(&self.gradients, ImageLayout::BlitDst, ImageLayout::General);
cmd_buf.reset_query_pool(&query_pool); cmd_buf.reset_query_pool(&query_pool);
cmd_buf.write_timestamp(&query_pool, 0); cmd_buf.write_timestamp(&query_pool, 0);
cmd_buf.dispatch( cmd_buf.dispatch(
&self.el_pipeline, &self.el_pipeline,
&self.el_ds, &self.el_ds[buf_ix],
(((self.n_elements + 127) / 128) as u32, 1, 1), (((self.n_elements + 127) / 128) as u32, 1, 1),
(128, 1, 1), (128, 1, 1),
); );