From 47d2e0a756af10fb2f8375d6e80e4a929f561d46 Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Mon, 24 May 2021 11:44:56 -0700 Subject: [PATCH 1/4] Add create_buffer_init method Add a method to create a buffer with initial content, which requires staging buffers under the hood. This patch also changes the lower-level (Vulkan) interface to be closer to the raw Vulkan call. --- piet-gpu-hal/examples/collatz.rs | 7 +- piet-gpu-hal/src/hub.rs | 125 +++++++++++++++++++++++++++---- piet-gpu-hal/src/lib.rs | 48 +++++++++--- piet-gpu-hal/src/vulkan.rs | 94 +++++++++++++++++------ piet-gpu/bin/cli.rs | 3 +- piet-gpu/src/lib.rs | 66 ++++++---------- 6 files changed, 246 insertions(+), 97 deletions(-) diff --git a/piet-gpu-hal/examples/collatz.rs b/piet-gpu-hal/examples/collatz.rs index ddf6769..bde4084 100644 --- a/piet-gpu-hal/examples/collatz.rs +++ b/piet-gpu-hal/examples/collatz.rs @@ -7,12 +7,9 @@ fn main() { unsafe { let device = instance.device(None).unwrap(); let session = hub::Session::new(device); - let usage = BufferUsage::MAP_READ | BufferUsage::MAP_WRITE | BufferUsage::STORAGE; + let usage = BufferUsage::MAP_READ | BufferUsage::STORAGE; let src = (0..256).map(|x| x + 1).collect::>(); - let mut buffer = session - .create_buffer(std::mem::size_of_val(&src[..]) as u64, usage) - .unwrap(); - buffer.write(&src).unwrap(); + let buffer = session.create_buffer_init(&src, usage).unwrap(); let code = include_bytes!("./shader/collatz.spv"); let pipeline = session.create_simple_compute_pipeline(code, 1).unwrap(); let descriptor_set = session diff --git a/piet-gpu-hal/src/hub.rs b/piet-gpu-hal/src/hub.rs index d98108c..554a024 100644 --- a/piet-gpu-hal/src/hub.rs +++ b/piet-gpu-hal/src/hub.rs @@ -5,9 +5,11 @@ //! negotiation of backends (Vulkan, DX12), but right now it's Vulkan-only. use std::any::Any; +use std::convert::TryInto; use std::sync::{Arc, Mutex, Weak}; use crate::vulkan; +use crate::CmdBuf as CmdBufTrait; use crate::DescriptorSetBuilder as DescriptorSetBuilderTrait; use crate::PipelineBuilder as PipelineBuilderTrait; use crate::{BufferUsage, Device, Error, GpuInfo, SamplerParams}; @@ -31,6 +33,8 @@ struct SessionInner { cmd_buf_pool: Mutex>, /// Command buffers that are still pending (so resources can't be freed). pending: Mutex>, + /// A command buffer that is used for copying from staging buffers. + staging_cmd_buf: Mutex>, gpu_info: GpuInfo, } @@ -45,9 +49,12 @@ pub struct CmdBuf { pub struct SubmittedCmdBuf(Option, Weak); struct SubmittedCmdBufInner { + // It's inconsistent, cmd_buf is unpacked, staging_cmd_buf isn't. Probably + // better to chose one or the other. cmd_buf: vulkan::CmdBuf, fence: Fence, resources: Vec>, + staging_cmd_buf: Option, } #[derive(Clone)] @@ -70,6 +77,20 @@ pub struct PipelineBuilder(vulkan::PipelineBuilder); pub struct DescriptorSetBuilder(vulkan::DescriptorSetBuilder); +/// Data types that can be stored in a GPU buffer. +pub unsafe trait PlainData {} + +unsafe impl PlainData for u8 {} +unsafe impl PlainData for u16 {} +unsafe impl PlainData for u32 {} +unsafe impl PlainData for u64 {} +unsafe impl PlainData for i8 {} +unsafe impl PlainData for i16 {} +unsafe impl PlainData for i32 {} +unsafe impl PlainData for i64 {} +unsafe impl PlainData for f32 {} +unsafe impl PlainData for f64 {} + impl Session { pub fn new(device: vulkan::VkDevice) -> Session { let gpu_info = device.query_gpu_info(); @@ -78,6 +99,7 @@ impl Session { gpu_info, cmd_buf_pool: Default::default(), pending: Default::default(), + staging_cmd_buf: Default::default(), })) } @@ -107,12 +129,13 @@ impl Session { let item = pending.swap_remove(i); // TODO: wait is superfluous, can just reset let _ = self.0.device.wait_and_reset(&[item.fence]); - self.0 - .cmd_buf_pool - .lock() - .unwrap() - .push((item.cmd_buf, item.fence)); + let mut pool = self.0.cmd_buf_pool.lock().unwrap(); + pool.push((item.cmd_buf, item.fence)); std::mem::drop(item.resources); + if let Some(staging_cmd_buf) = item.staging_cmd_buf { + pool.push((staging_cmd_buf.cmd_buf, staging_cmd_buf.fence)); + std::mem::drop(staging_cmd_buf.resources); + } } else { i += 1; } @@ -126,8 +149,19 @@ impl Session { wait_semaphores: &[Semaphore], signal_semaphores: &[Semaphore], ) -> Result { - self.0.device.run_cmd_buf( - &cmd_buf.cmd_buf, + // Again, SmallVec here? + let mut cmd_bufs = Vec::with_capacity(2); + let mut staging_cmd_buf = self.0.staging_cmd_buf.lock().unwrap().take(); + if let Some(staging) = &mut staging_cmd_buf { + // With finer grained resource tracking, we might be able to avoid this in + // some cases. + staging.memory_barrier(); + staging.finish(); + cmd_bufs.push(&staging.cmd_buf); + } + cmd_bufs.push(&cmd_buf.cmd_buf); + self.0.device.run_cmd_bufs( + &cmd_bufs, wait_semaphores, signal_semaphores, Some(&cmd_buf.fence), @@ -137,6 +171,7 @@ impl Session { cmd_buf: cmd_buf.cmd_buf, fence: cmd_buf.fence, resources: cmd_buf.resources, + staging_cmd_buf, }), cmd_buf.session, )) @@ -150,11 +185,58 @@ impl Session { }))) } - pub unsafe fn create_image2d( + /// Create a buffer with initialized data. + pub fn create_buffer_init( &self, - width: u32, - height: u32, - ) -> Result { + contents: &[impl PlainData], + usage: BufferUsage, + ) -> Result { + unsafe { + self.create_buffer_init_raw( + contents.as_ptr() as *const u8, + std::mem::size_of_val(contents).try_into()?, + usage, + ) + } + } + + /// Create a buffer with initialized data, from a raw pointer memory region. + pub unsafe fn create_buffer_init_raw( + &self, + contents: *const u8, + size: u64, + usage: BufferUsage, + ) -> Result { + let use_staging_buffer = !usage.intersects(BufferUsage::MAP_READ | BufferUsage::MAP_WRITE) + && self.gpu_info().use_staging_buffers; + let create_usage = if use_staging_buffer { + BufferUsage::MAP_WRITE | BufferUsage::COPY_SRC + } else { + usage | BufferUsage::MAP_WRITE + }; + let create_buf = self.create_buffer(size, create_usage)?; + self.0 + .device + .write_buffer(&create_buf.vk_buffer(), contents, 0, size)?; + if use_staging_buffer { + let buf = self.create_buffer(size, usage | BufferUsage::COPY_DST)?; + let mut staging_cmd_buf = self.0.staging_cmd_buf.lock().unwrap(); + if staging_cmd_buf.is_none() { + let mut cmd_buf = self.cmd_buf()?; + cmd_buf.begin(); + *staging_cmd_buf = Some(cmd_buf); + } + let staging_cmd_buf = staging_cmd_buf.as_mut().unwrap(); + // This will ensure the staging buffer is deallocated + staging_cmd_buf.add_resource(&create_buf); + staging_cmd_buf.copy_buffer(create_buf.vk_buffer(), buf.vk_buffer()); + Ok(buf) + } else { + Ok(create_buf) + } + } + + pub unsafe fn create_image2d(&self, width: u32, height: u32) -> Result { let image = self.0.device.create_image2d(width, height)?; Ok(Image(Arc::new(ImageInner { image, @@ -305,16 +387,29 @@ impl Buffer { &self.0.buffer } - pub unsafe fn write(&mut self, contents: &[T]) -> Result<(), Error> { + pub unsafe fn write(&mut self, contents: &[T]) -> Result<(), Error> { if let Some(session) = Weak::upgrade(&self.0.session) { - session.device.write_buffer(&self.0.buffer, contents)?; + session.device.write_buffer( + &self.0.buffer, + contents.as_ptr() as *const u8, + 0, + std::mem::size_of_val(contents).try_into()?, + )?; } // else session lost error? Ok(()) } - pub unsafe fn read(&self, result: &mut Vec) -> Result<(), Error> { + pub unsafe fn read(&self, result: &mut Vec) -> Result<(), Error> { + let size = self.vk_buffer().size; + let len = size as usize / std::mem::size_of::(); + if len > result.len() { + result.reserve(len - result.len()); + } if let Some(session) = Weak::upgrade(&self.0.session) { - session.device.read_buffer(&self.0.buffer, result)?; + session + .device + .read_buffer(&self.0.buffer, result.as_mut_ptr() as *mut u8, 0, size)?; + result.set_len(len); } // else session lost error? Ok(()) diff --git a/piet-gpu-hal/src/lib.rs b/piet-gpu-hal/src/lib.rs index ce7c6d7..323f5e5 100644 --- a/piet-gpu-hal/src/lib.rs +++ b/piet-gpu-hal/src/lib.rs @@ -2,7 +2,6 @@ /// /// This abstraction is inspired by gfx-hal, but is specialized to the needs of piet-gpu. /// In time, it may go away and be replaced by either gfx-hal or wgpu. - use bitflags::bitflags; pub mod hub; @@ -66,6 +65,8 @@ pub struct GpuInfo { pub subgroup_size: Option, /// The GPU supports a real, grown-ass memory model. pub has_memory_model: bool, + /// Whether staging buffers should be used. + pub use_staging_buffers: bool, } #[derive(Clone, Debug)] @@ -103,11 +104,7 @@ pub trait Device: Sized { /// Maybe doesn't need result return? unsafe fn destroy_buffer(&self, buffer: &Self::Buffer) -> Result<(), Error>; - unsafe fn create_image2d( - &self, - width: u32, - height: u32, - ) -> Result; + unsafe fn create_image2d(&self, width: u32, height: u32) -> Result; /// Destroy an image. /// @@ -175,6 +172,7 @@ pub trait Device: Sized { /// All submitted commands that refer to this query pool must have completed. unsafe fn fetch_query_pool(&self, pool: &Self::QueryPool) -> Result, Error>; + #[deprecated] unsafe fn run_cmd_buf( &self, cmd_buf: &Self::CmdBuf, @@ -183,16 +181,44 @@ pub trait Device: Sized { fence: Option<&Self::Fence>, ) -> Result<(), Error>; - unsafe fn read_buffer( + unsafe fn run_cmd_bufs( &self, - buffer: &Self::Buffer, - result: &mut Vec, + cmd_buf: &[&Self::CmdBuf], + wait_semaphores: &[Self::Semaphore], + signal_semaphores: &[Self::Semaphore], + fence: Option<&Self::Fence>, ) -> Result<(), Error>; - unsafe fn write_buffer( + /// Copy data from the buffer to memory. + /// + /// Discussion question: add offset? + /// + /// # Safety + /// + /// The buffer must be valid to access. The destination memory must be valid to + /// write to. The ranges must not overlap. The offset + size must be within + /// the buffer's allocation, and size within the destination. + unsafe fn read_buffer( &self, buffer: &Self::Buffer, - contents: &[T], + dst: *mut u8, + offset: u64, + size: u64, + ) -> Result<(), Error>; + + /// Copy data from memory to the buffer. + /// + /// # Safety + /// + /// The buffer must be valid to access. The source memory must be valid to + /// read from. The ranges must not overlap. The offset + size must be within + /// the buffer's allocation, and size within the source. + unsafe fn write_buffer( + &self, + buffer: &Self::Buffer, + contents: *const u8, + offset: u64, + size: u64, ) -> Result<(), Error>; unsafe fn create_semaphore(&self) -> Result; diff --git a/piet-gpu-hal/src/vulkan.rs b/piet-gpu-hal/src/vulkan.rs index 8d2700f..959e330 100644 --- a/piet-gpu-hal/src/vulkan.rs +++ b/piet-gpu-hal/src/vulkan.rs @@ -1,6 +1,7 @@ //! Vulkan implemenation of HAL trait. use std::borrow::Cow; +use std::convert::TryInto; use std::ffi::{CStr, CString}; use std::os::raw::c_char; use std::sync::Arc; @@ -9,7 +10,9 @@ use ash::extensions::{ext::DebugUtils, khr}; use ash::version::{DeviceV1_0, EntryV1_0, InstanceV1_0, InstanceV1_1}; use ash::{vk, Device, Entry, Instance}; -use crate::{BufferUsage, Device as DeviceTrait, Error, GpuInfo, ImageLayout, SamplerParams, SubgroupSize}; +use crate::{ + BufferUsage, Device as DeviceTrait, Error, GpuInfo, ImageLayout, SamplerParams, SubgroupSize, +}; pub struct VkInstance { /// Retain the dynamic lib. @@ -60,7 +63,8 @@ pub struct VkSwapchain { pub struct Buffer { buffer: vk::Buffer, buffer_memory: vk::DeviceMemory, - size: u64, + // TODO: there should probably be a Buffer trait and this should be a method. + pub size: u64, } pub struct Image { @@ -339,6 +343,14 @@ impl VkInstance { None }; + // The question of when and when not to use staging buffers is complex, and this + // is only a first approximation. Basically, it *must* be false when buffers can + // be created with a memory type that is not host-visible. That is not guaranteed + // here but is likely to be the case. + // + // I'm still investigating what should be done in systems with Resizable BAR. + let use_staging_buffers = props.device_type != vk::PhysicalDeviceType::INTEGRATED_GPU; + // TODO: finer grained query of specific subgroup info. let has_subgroups = self.vk_version >= vk::make_version(1, 1, 0); let gpu_info = GpuInfo { @@ -346,6 +358,7 @@ impl VkInstance { has_subgroups, subgroup_size, has_memory_model, + use_staging_buffers, }; Ok(VkDevice { @@ -511,11 +524,7 @@ impl crate::Device for VkDevice { Ok(()) } - unsafe fn create_image2d( - &self, - width: u32, - height: u32, - ) -> Result { + unsafe fn create_image2d(&self, width: u32, height: u32) -> Result { let device = &self.device.device; let extent = vk::Extent3D { width, @@ -726,37 +735,78 @@ impl crate::Device for VkDevice { Ok(()) } - unsafe fn read_buffer( + /// Run the command buffers. + /// + /// This submits the command buffer for execution. The provided fence + /// is signalled when the execution is complete. + unsafe fn run_cmd_bufs( &self, - buffer: &Buffer, - result: &mut Vec, + cmd_bufs: &[&CmdBuf], + wait_semaphores: &[Self::Semaphore], + signal_semaphores: &[Self::Semaphore], + fence: Option<&Self::Fence>, ) -> Result<(), Error> { let device = &self.device.device; + + let fence = match fence { + Some(fence) => *fence, + None => vk::Fence::null(), + }; + let wait_stages = wait_semaphores + .iter() + .map(|_| vk::PipelineStageFlags::ALL_COMMANDS) + .collect::>(); + // Use SmallVec or similar here to reduce allocation? + let cmd_bufs = cmd_bufs.iter().map(|c| c.cmd_buf).collect::>(); + device.queue_submit( + self.queue, + &[vk::SubmitInfo::builder() + .command_buffers(&cmd_bufs) + .wait_semaphores(wait_semaphores) + .wait_dst_stage_mask(&wait_stages) + .signal_semaphores(signal_semaphores) + .build()], + fence, + )?; + Ok(()) + } + + unsafe fn read_buffer( + &self, + buffer: &Self::Buffer, + dst: *mut u8, + offset: u64, + size: u64, + ) -> Result<(), Error> { + let copy_size = size.try_into()?; + let device = &self.device.device; let buf = device.map_memory( buffer.buffer_memory, - 0, - buffer.size, + offset, + size, vk::MemoryMapFlags::empty(), )?; - let len = buffer.size as usize / std::mem::size_of::(); - if len > result.len() { - result.reserve(len - result.len()); - } - std::ptr::copy_nonoverlapping(buf as *const T, result.as_mut_ptr(), len); - result.set_len(len); + std::ptr::copy_nonoverlapping(buf as *const u8, dst, copy_size); device.unmap_memory(buffer.buffer_memory); Ok(()) } - unsafe fn write_buffer(&self, buffer: &Buffer, contents: &[T]) -> Result<(), Error> { + unsafe fn write_buffer( + &self, + buffer: &Buffer, + contents: *const u8, + offset: u64, + size: u64, + ) -> Result<(), Error> { + let copy_size = size.try_into()?; let device = &self.device.device; let buf = device.map_memory( buffer.buffer_memory, - 0, - std::mem::size_of_val(contents) as u64, + offset, + size, vk::MemoryMapFlags::empty(), )?; - std::ptr::copy_nonoverlapping(contents.as_ptr(), buf as *mut T, contents.len()); + std::ptr::copy_nonoverlapping(contents, buf as *mut u8, copy_size); device.unmap_memory(buffer.buffer_memory); Ok(()) } diff --git a/piet-gpu/bin/cli.rs b/piet-gpu/bin/cli.rs index 53201f8..69e0fc4 100644 --- a/piet-gpu/bin/cli.rs +++ b/piet-gpu/bin/cli.rs @@ -254,8 +254,7 @@ fn main() -> Result<(), Error> { let renderer = Renderer::new(&session, scene, n_paths, n_pathseg, n_trans)?; let image_usage = BufferUsage::MAP_READ | BufferUsage::COPY_DST; - let image_buf = - session.create_buffer((WIDTH * HEIGHT * 4) as u64, image_usage)?; + let image_buf = session.create_buffer((WIDTH * HEIGHT * 4) as u64, image_usage)?; cmd_buf.begin(); renderer.record(&mut cmd_buf, &query_pool); diff --git a/piet-gpu/src/lib.rs b/piet-gpu/src/lib.rs index 26b3c44..3c87f68 100644 --- a/piet-gpu/src/lib.rs +++ b/piet-gpu/src/lib.rs @@ -189,16 +189,18 @@ pub fn dump_k1_data(k1_buf: &[u32]) { pub struct Renderer { pub image_dev: hub::Image, // resulting image - scene_buf_host: hub::Buffer, - scene_buf_dev: hub::Buffer, + // The reference is held by the pipelines. We will be changing + // this to make the scene upload dynamic. + #[allow(dead_code)] + scene_buf: hub::Buffer, memory_buf_host: hub::Buffer, memory_buf_dev: hub::Buffer, state_buf: hub::Buffer, - config_buf_host: hub::Buffer, - config_buf_dev: hub::Buffer, + #[allow(dead_code)] + config_buf: hub::Buffer, el_pipeline: hub::Pipeline, el_ds: hub::DescriptorSet, @@ -246,21 +248,11 @@ impl Renderer { n_elements, n_paths, n_pathseg, n_trans ); - let mut scene_buf_host = session - .create_buffer(std::mem::size_of_val(&scene[..]) as u64, host_upload) - .unwrap(); - let scene_buf_dev = session - .create_buffer(std::mem::size_of_val(&scene[..]) as u64, dev) - .unwrap(); - scene_buf_host.write(&scene)?; + let scene_buf = session.create_buffer_init(&scene[..], dev).unwrap(); let state_buf = session.create_buffer(1 * 1024 * 1024, dev)?; let image_dev = session.create_image2d(WIDTH as u32, HEIGHT as u32)?; - const CONFIG_SIZE: u64 = 10 * 4; // Size of Config in setup.h. - let mut config_buf_host = session.create_buffer(CONFIG_SIZE, host_upload)?; - let config_buf_dev = session.create_buffer(CONFIG_SIZE, dev)?; - // TODO: constants const PATH_SIZE: usize = 12; const BIN_SIZE: usize = 8; @@ -280,7 +272,7 @@ impl Renderer { alloc += (n_paths * ANNO_SIZE + 3) & !3; let trans_base = alloc; alloc += (n_trans * TRANS_SIZE + 3) & !3; - config_buf_host.write(&[ + let config = &[ n_paths as u32, n_pathseg as u32, WIDTH_IN_TILES as u32, @@ -291,8 +283,11 @@ impl Renderer { pathseg_base as u32, anno_base as u32, trans_base as u32, - ])?; + ]; + let config_buf = session.create_buffer_init(&config[..], dev).unwrap(); + // Perhaps we could avoid the explicit staging buffer by having buffer creation method + // that takes both initial contents and a size. let mut memory_buf_host = session.create_buffer(2 * 4, host_upload)?; let memory_buf_dev = session.create_buffer(128 * 1024 * 1024, dev)?; memory_buf_host.write(&[alloc as u32, 0 /* Overflow flag */])?; @@ -301,36 +296,34 @@ impl Renderer { let el_pipeline = session.create_simple_compute_pipeline(el_code, 4)?; let el_ds = session.create_simple_descriptor_set( &el_pipeline, - &[&memory_buf_dev, &config_buf_dev, &scene_buf_dev, &state_buf], + &[&memory_buf_dev, &config_buf, &scene_buf, &state_buf], )?; let tile_alloc_code = include_bytes!("../shader/tile_alloc.spv"); let tile_pipeline = session.create_simple_compute_pipeline(tile_alloc_code, 2)?; let tile_ds = session - .create_simple_descriptor_set(&tile_pipeline, &[&memory_buf_dev, &config_buf_dev])?; + .create_simple_descriptor_set(&tile_pipeline, &[&memory_buf_dev, &config_buf])?; let path_alloc_code = include_bytes!("../shader/path_coarse.spv"); let path_pipeline = session.create_simple_compute_pipeline(path_alloc_code, 2)?; let path_ds = session - .create_simple_descriptor_set(&path_pipeline, &[&memory_buf_dev, &config_buf_dev])?; + .create_simple_descriptor_set(&path_pipeline, &[&memory_buf_dev, &config_buf])?; let backdrop_alloc_code = include_bytes!("../shader/backdrop.spv"); let backdrop_pipeline = session.create_simple_compute_pipeline(backdrop_alloc_code, 2)?; - let backdrop_ds = session.create_simple_descriptor_set( - &backdrop_pipeline, - &[&memory_buf_dev, &config_buf_dev], - )?; + let backdrop_ds = session + .create_simple_descriptor_set(&backdrop_pipeline, &[&memory_buf_dev, &config_buf])?; // TODO: constants let bin_code = include_bytes!("../shader/binning.spv"); let bin_pipeline = session.create_simple_compute_pipeline(bin_code, 2)?; - let bin_ds = session - .create_simple_descriptor_set(&bin_pipeline, &[&memory_buf_dev, &config_buf_dev])?; + let bin_ds = + session.create_simple_descriptor_set(&bin_pipeline, &[&memory_buf_dev, &config_buf])?; let coarse_code = include_bytes!("../shader/coarse.spv"); let coarse_pipeline = session.create_simple_compute_pipeline(coarse_code, 2)?; let coarse_ds = session - .create_simple_descriptor_set(&coarse_pipeline, &[&memory_buf_dev, &config_buf_dev])?; + .create_simple_descriptor_set(&coarse_pipeline, &[&memory_buf_dev, &config_buf])?; let bg_image = Self::make_test_bg_image(&session); @@ -358,19 +351,17 @@ impl Renderer { .create_compute_pipeline(&session, k4_code)?; let k4_ds = session .descriptor_set_builder() - .add_buffers(&[&memory_buf_dev, &config_buf_dev]) + .add_buffers(&[&memory_buf_dev, &config_buf]) .add_images(&[&image_dev]) .add_textures(&[&bg_image]) .build(&session, &k4_pipeline)?; Ok(Renderer { - scene_buf_host, - scene_buf_dev, + scene_buf, memory_buf_host, memory_buf_dev, state_buf, - config_buf_host, - config_buf_dev, + config_buf, image_dev, el_pipeline, el_ds, @@ -394,14 +385,6 @@ impl Renderer { } pub unsafe fn record(&self, cmd_buf: &mut hub::CmdBuf, query_pool: &hub::QueryPool) { - cmd_buf.copy_buffer( - self.scene_buf_host.vk_buffer(), - self.scene_buf_dev.vk_buffer(), - ); - cmd_buf.copy_buffer( - self.config_buf_host.vk_buffer(), - self.config_buf_dev.vk_buffer(), - ); cmd_buf.copy_buffer( self.memory_buf_host.vk_buffer(), self.memory_buf_dev.vk_buffer(), @@ -488,8 +471,7 @@ impl Renderer { let host_upload = BufferUsage::MAP_WRITE | BufferUsage::COPY_SRC; let mut buffer = session.create_buffer(buf.len() as u64, host_upload)?; buffer.write(buf)?; - let image = - session.create_image2d(width.try_into()?, height.try_into()?)?; + let image = session.create_image2d(width.try_into()?, height.try_into()?)?; let mut cmd_buf = session.cmd_buf()?; cmd_buf.begin(); cmd_buf.image_barrier( From 0cc72d97651d0d39459462a1b75b3f07903787ca Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Mon, 24 May 2021 13:56:24 -0700 Subject: [PATCH 2/4] Reduce allocations for retaining resources Use an enum instead of Box for resources to be retained until command buffer completion, and allow both references (which will be cloned) and owned resources (useful for staging buffers). --- piet-gpu-hal/src/hub.rs | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/piet-gpu-hal/src/hub.rs b/piet-gpu-hal/src/hub.rs index 554a024..116939c 100644 --- a/piet-gpu-hal/src/hub.rs +++ b/piet-gpu-hal/src/hub.rs @@ -4,7 +4,6 @@ //! It is likely that it will also take care of compile time and runtime //! negotiation of backends (Vulkan, DX12), but right now it's Vulkan-only. -use std::any::Any; use std::convert::TryInto; use std::sync::{Arc, Mutex, Weak}; @@ -41,7 +40,7 @@ struct SessionInner { pub struct CmdBuf { cmd_buf: vulkan::CmdBuf, fence: Fence, - resources: Vec>, + resources: Vec, session: Weak, } @@ -53,7 +52,7 @@ struct SubmittedCmdBufInner { // better to chose one or the other. cmd_buf: vulkan::CmdBuf, fence: Fence, - resources: Vec>, + resources: Vec, staging_cmd_buf: Option, } @@ -91,6 +90,12 @@ unsafe impl PlainData for i64 {} unsafe impl PlainData for f32 {} unsafe impl PlainData for f64 {} +/// A resource to retain during the lifetime of a command submission. +pub enum RetainResource { + Buffer(Buffer), + Image(Image), +} + impl Session { pub fn new(device: vulkan::VkDevice) -> Session { let gpu_info = device.query_gpu_info(); @@ -227,9 +232,9 @@ impl Session { *staging_cmd_buf = Some(cmd_buf); } let staging_cmd_buf = staging_cmd_buf.as_mut().unwrap(); - // This will ensure the staging buffer is deallocated - staging_cmd_buf.add_resource(&create_buf); + // This will ensure the staging buffer is deallocated. It would be nice to staging_cmd_buf.copy_buffer(create_buf.vk_buffer(), buf.vk_buffer()); + staging_cmd_buf.add_resource(create_buf); Ok(buf) } else { Ok(create_buf) @@ -307,8 +312,8 @@ impl CmdBuf { /// There are two choices for upholding the lifetime invariant: this function, or /// the caller can manually hold the reference. The latter is appropriate when it's /// part of retained state. - pub fn add_resource(&mut self, resource: &T) { - self.resources.push(Box::new(resource.clone())); + pub fn add_resource(&mut self, resource: impl Into) { + self.resources.push(resource.into()); } } @@ -546,3 +551,21 @@ impl<'a, T> IntoRefs<'a, T> for Vec<&'a T> { self.into_iter() } } + +impl From for RetainResource { + fn from(buf: Buffer) -> Self { + RetainResource::Buffer(buf) + } +} + +impl From for RetainResource { + fn from(img: Image) -> Self { + RetainResource::Image(img) + } +} + +impl<'a, T: Into> From<&'a T> for RetainResource { + fn from(resource: &'a T) -> Self { + resource.clone().into() + } +} From 22935fccc62e4de81ab1b7f5178e7fced3b0d574 Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Mon, 24 May 2021 14:25:13 -0700 Subject: [PATCH 3/4] Use const generics for IntoRefs Yay! Now we can use an array of any size, slice, or vector. --- piet-gpu-hal/src/hub.rs | 38 +------------------------------------- 1 file changed, 1 insertion(+), 37 deletions(-) diff --git a/piet-gpu-hal/src/hub.rs b/piet-gpu-hal/src/hub.rs index 116939c..2ad9651 100644 --- a/piet-gpu-hal/src/hub.rs +++ b/piet-gpu-hal/src/hub.rs @@ -502,43 +502,7 @@ impl<'a, T> IntoRefs<'a, T> for &'a [&'a T] { } } -// TODO: this will benefit from const generics! -impl<'a, T> IntoRefs<'a, T> for &'a [&'a T; 1] { - type Iterator = std::iter::Copied>; - fn into_refs(self) -> Self::Iterator { - self.into_iter().copied() - } -} - -impl<'a, T> IntoRefs<'a, T> for &'a [&'a T; 2] { - type Iterator = std::iter::Copied>; - fn into_refs(self) -> Self::Iterator { - self.into_iter().copied() - } -} - -impl<'a, T> IntoRefs<'a, T> for &'a [&'a T; 3] { - type Iterator = std::iter::Copied>; - fn into_refs(self) -> Self::Iterator { - self.into_iter().copied() - } -} - -impl<'a, T> IntoRefs<'a, T> for &'a [&'a T; 4] { - type Iterator = std::iter::Copied>; - fn into_refs(self) -> Self::Iterator { - self.into_iter().copied() - } -} - -impl<'a, T> IntoRefs<'a, T> for &'a [&'a T; 5] { - type Iterator = std::iter::Copied>; - fn into_refs(self) -> Self::Iterator { - self.into_iter().copied() - } -} - -impl<'a, T> IntoRefs<'a, T> for &'a [&'a T; 6] { +impl<'a, T, const N: usize> IntoRefs<'a, T> for &'a [&'a T; N] { type Iterator = std::iter::Copied>; fn into_refs(self) -> Self::Iterator { self.into_iter().copied() From 174c81ec09927624ceacf1b3b432e49644ca2318 Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Mon, 24 May 2021 15:42:25 -0700 Subject: [PATCH 4/4] Cleanup Fix bound on blanket RetainResource impl. Clean up run_cmd_buf. --- piet-gpu-hal/src/hub.rs | 2 +- piet-gpu-hal/src/lib.rs | 9 --------- piet-gpu-hal/src/vulkan.rs | 34 ---------------------------------- 3 files changed, 1 insertion(+), 44 deletions(-) diff --git a/piet-gpu-hal/src/hub.rs b/piet-gpu-hal/src/hub.rs index 2ad9651..3238376 100644 --- a/piet-gpu-hal/src/hub.rs +++ b/piet-gpu-hal/src/hub.rs @@ -528,7 +528,7 @@ impl From for RetainResource { } } -impl<'a, T: Into> From<&'a T> for RetainResource { +impl<'a, T: Clone + Into> From<&'a T> for RetainResource { fn from(resource: &'a T) -> Self { resource.clone().into() } diff --git a/piet-gpu-hal/src/lib.rs b/piet-gpu-hal/src/lib.rs index 323f5e5..ed27dd5 100644 --- a/piet-gpu-hal/src/lib.rs +++ b/piet-gpu-hal/src/lib.rs @@ -172,15 +172,6 @@ pub trait Device: Sized { /// All submitted commands that refer to this query pool must have completed. unsafe fn fetch_query_pool(&self, pool: &Self::QueryPool) -> Result, Error>; - #[deprecated] - unsafe fn run_cmd_buf( - &self, - cmd_buf: &Self::CmdBuf, - wait_semaphores: &[Self::Semaphore], - signal_semaphores: &[Self::Semaphore], - fence: Option<&Self::Fence>, - ) -> Result<(), Error>; - unsafe fn run_cmd_bufs( &self, cmd_buf: &[&Self::CmdBuf], diff --git a/piet-gpu-hal/src/vulkan.rs b/piet-gpu-hal/src/vulkan.rs index 959e330..94772e5 100644 --- a/piet-gpu-hal/src/vulkan.rs +++ b/piet-gpu-hal/src/vulkan.rs @@ -701,40 +701,6 @@ impl crate::Device for VkDevice { Ok(result) } - /// Run the command buffer. - /// - /// This submits the command buffer for execution. The provided fence - /// is signalled when the execution is complete. - unsafe fn run_cmd_buf( - &self, - cmd_buf: &CmdBuf, - wait_semaphores: &[Self::Semaphore], - signal_semaphores: &[Self::Semaphore], - fence: Option<&Self::Fence>, - ) -> Result<(), Error> { - let device = &self.device.device; - - let fence = match fence { - Some(fence) => *fence, - None => vk::Fence::null(), - }; - let wait_stages = wait_semaphores - .iter() - .map(|_| vk::PipelineStageFlags::ALL_COMMANDS) - .collect::>(); - device.queue_submit( - self.queue, - &[vk::SubmitInfo::builder() - .command_buffers(&[cmd_buf.cmd_buf]) - .wait_semaphores(wait_semaphores) - .wait_dst_stage_mask(&wait_stages) - .signal_semaphores(signal_semaphores) - .build()], - fence, - )?; - Ok(()) - } - /// Run the command buffers. /// /// This submits the command buffer for execution. The provided fence