From f73da22a69fcfb96cae394d2135c0f0a6d453b1c Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Mon, 6 Sep 2021 10:17:16 -0700 Subject: [PATCH 1/3] Clean up command buffers This patch deallocates command buffers after command submission completes (the same time as other resources are released). It should be portable and robust on all back-ends, but not necessarily the most efficient. But reuse of command buffers, as well as more efficient allocation on Vulkan and DX12, are for followup work. --- piet-gpu-hal/src/backend.rs | 6 +++- piet-gpu-hal/src/dx12.rs | 8 ++++++ piet-gpu-hal/src/dx12/wrappers.rs | 4 +++ piet-gpu-hal/src/hub.rs | 47 ++++++++++++++++++------------- piet-gpu-hal/src/macros.rs | 29 +++++++++++++++++++ piet-gpu-hal/src/metal.rs | 8 ++++++ piet-gpu-hal/src/mux.rs | 16 +++++++++++ piet-gpu-hal/src/vulkan.rs | 18 ++++++++++-- 8 files changed, 113 insertions(+), 23 deletions(-) diff --git a/piet-gpu-hal/src/backend.rs b/piet-gpu-hal/src/backend.rs index 7b1d59f..926f43a 100644 --- a/piet-gpu-hal/src/backend.rs +++ b/piet-gpu-hal/src/backend.rs @@ -16,7 +16,7 @@ //! The generic trait for backends to implement. -use crate::{mux::ShaderCode, BufferUsage, Error, GpuInfo, ImageLayout, SamplerParams}; +use crate::{BufferUsage, Error, GpuInfo, ImageLayout, SamplerParams}; pub trait Device: Sized { type Buffer: 'static; @@ -105,6 +105,9 @@ pub trait Device: Sized { fn create_cmd_buf(&self) -> Result; + /// If the command buffer was submitted, it must complete before this is called. + unsafe fn destroy_cmd_buf(&self, cmd_buf: Self::CmdBuf) -> Result<(), Error>; + fn create_query_pool(&self, n_queries: u32) -> Result; /// Get results from query pool, destroying it in the process. @@ -158,6 +161,7 @@ pub trait Device: Sized { unsafe fn create_semaphore(&self) -> Result; unsafe fn create_fence(&self, signaled: bool) -> Result; + unsafe fn destroy_fence(&self, fence: Self::Fence) -> Result<(), Error>; unsafe fn wait_and_reset(&self, fences: Vec<&mut Self::Fence>) -> Result<(), Error>; unsafe fn get_fence_status(&self, fence: &mut Self::Fence) -> Result; diff --git a/piet-gpu-hal/src/dx12.rs b/piet-gpu-hal/src/dx12.rs index bcef409..3fa57b4 100644 --- a/piet-gpu-hal/src/dx12.rs +++ b/piet-gpu-hal/src/dx12.rs @@ -316,6 +316,10 @@ impl crate::backend::Device for Dx12Device { } } + unsafe fn destroy_cmd_buf(&self, _cmd_buf: Self::CmdBuf) -> Result<(), Error> { + Ok(()) + } + fn create_query_pool(&self, n_queries: u32) -> Result { unsafe { let heap = self @@ -409,6 +413,10 @@ impl crate::backend::Device for Dx12Device { Ok(Fence { fence, event, val }) } + unsafe fn destroy_fence(&self, _fence: Self::Fence) -> Result<(), Error> { + Ok(()) + } + unsafe fn wait_and_reset(&self, fences: Vec<&mut Self::Fence>) -> Result<(), Error> { for fence in fences { // TODO: probably handle errors here. diff --git a/piet-gpu-hal/src/dx12/wrappers.rs b/piet-gpu-hal/src/dx12/wrappers.rs index 800959f..7acd8ac 100644 --- a/piet-gpu-hal/src/dx12/wrappers.rs +++ b/piet-gpu-hal/src/dx12/wrappers.rs @@ -661,6 +661,10 @@ impl Device { Ok(Fence(ComPtr::from_raw(fence))) } + pub unsafe fn destroy_fence(&self, fence: &Fence) -> Result<(), Error> { + Ok(()) + } + pub unsafe fn create_committed_resource( &self, heap_properties: &d3d12::D3D12_HEAP_PROPERTIES, diff --git a/piet-gpu-hal/src/hub.rs b/piet-gpu-hal/src/hub.rs index 8312d4e..d53833f 100644 --- a/piet-gpu-hal/src/hub.rs +++ b/piet-gpu-hal/src/hub.rs @@ -30,8 +30,13 @@ pub struct Session(Arc); struct SessionInner { device: mux::Device, + /// A pool of command buffers that can be reused. + /// + /// Currently this is not used, as it only works well on Vulkan. At some + /// point, we will want to efficiently reuse command buffers rather than + /// allocating them each time, but that is a TODO. cmd_buf_pool: Mutex>, - /// Command buffers that are still pending (so resources can't be freed). + /// Command buffers that are still pending (so resources can't be freed yet). pending: Mutex>, /// A command buffer that is used for copying from staging buffers. staging_cmd_buf: Mutex>, @@ -169,18 +174,7 @@ impl Session { let mut item = pending.swap_remove(i); // TODO: wait is superfluous, can just reset let _ = self.0.device.wait_and_reset(vec![&mut item.fence]); - - // Reuse of command buffers works on Vulkan, but not at all on - // Metal and is problematic on DX12 (the allocator is returned) - // to the pool. Punt for now. - - let mut pool = self.0.cmd_buf_pool.lock().unwrap(); - pool.push((item.cmd_buf, item.fence)); - std::mem::drop(item.resources); - if let Some(staging_cmd_buf) = item.staging_cmd_buf { - pool.push((staging_cmd_buf.cmd_buf, staging_cmd_buf.fence)); - std::mem::drop(staging_cmd_buf.resources); - } + self.0.cleanup_submitted_cmd_buf(item); } else { i += 1; } @@ -392,6 +386,25 @@ impl Session { } } +impl SessionInner { + /// Clean up a submitted command buffer. + /// + /// This drops the resources used by the command buffer and also cleans up the command + /// buffer itself. Currently that means destroying it, but at some point we'll want to + /// be better at reuse. + unsafe fn cleanup_submitted_cmd_buf(&self, item: SubmittedCmdBufInner) { + let _should_handle_err = self.device.destroy_cmd_buf(item.cmd_buf); + let _should_handle_err = self.device.destroy_fence(item.fence); + + std::mem::drop(item.resources); + if let Some(staging_cmd_buf) = item.staging_cmd_buf { + let _should_handle_err = self.device.destroy_cmd_buf(staging_cmd_buf.cmd_buf); + let _should_handle_err = self.device.destroy_fence(staging_cmd_buf.fence); + std::mem::drop(staging_cmd_buf.resources); + } + } +} + impl CmdBuf { /// Begin recording into a command buffer. /// @@ -566,14 +579,8 @@ impl SubmittedCmdBuf { if let Some(session) = Weak::upgrade(&self.1) { unsafe { session.device.wait_and_reset(vec![&mut item.fence])?; + session.cleanup_submitted_cmd_buf(item); } - // See discussion in `poll_cleanup` - session - .cmd_buf_pool - .lock() - .unwrap() - .push((item.cmd_buf, item.fence)); - std::mem::drop(item.resources); } // else session dropped error? Ok(()) diff --git a/piet-gpu-hal/src/macros.rs b/piet-gpu-hal/src/macros.rs index 8131e50..38897a8 100644 --- a/piet-gpu-hal/src/macros.rs +++ b/piet-gpu-hal/src/macros.rs @@ -72,6 +72,16 @@ macro_rules! mux_enum { } } } + $crate::mux_cfg! { + #[cfg(vk)] + #[allow(unused)] + fn vk_owned(self) -> $vk { + match self { + $name::Vk(x) => x, + _ => panic!("downcast error") + } + } + } $crate::mux_cfg! { #[cfg(dx12)] @@ -93,6 +103,16 @@ macro_rules! mux_enum { } } } + $crate::mux_cfg! { + #[cfg(dx12)] + #[allow(unused)] + fn dx12_owned(self) -> $dx12 { + match self { + $name::Dx12(x) => x, + _ => panic!("downcast error") + } + } + } $crate::mux_cfg! { #[cfg(mtl)] @@ -112,6 +132,15 @@ macro_rules! mux_enum { } } } + $crate::mux_cfg! { + #[cfg(mtl)] + #[allow(unused)] + fn mtl_owned(self) -> $mtl { + match self { + $name::Mtl(x) => x, + } + } + } } }; } diff --git a/piet-gpu-hal/src/metal.rs b/piet-gpu-hal/src/metal.rs index b8a1bf9..dbfc8d9 100644 --- a/piet-gpu-hal/src/metal.rs +++ b/piet-gpu-hal/src/metal.rs @@ -289,6 +289,10 @@ impl crate::backend::Device for MtlDevice { Ok(CmdBuf { cmd_buf }) } + unsafe fn destroy_cmd_buf(&self, _cmd_buf: Self::CmdBuf) -> Result<(), Error> { + Ok(()) + } + fn create_query_pool(&self, n_queries: u32) -> Result { // TODO Ok(QueryPool) @@ -365,6 +369,10 @@ impl crate::backend::Device for MtlDevice { Ok(Fence::Idle) } + unsafe fn destroy_fence(&self, _fence: Self::Fence) -> Result<(), Error> { + Ok(()) + } + unsafe fn wait_and_reset(&self, fences: Vec<&mut Self::Fence>) -> Result<(), Error> { for fence in fences { match fence { diff --git a/piet-gpu-hal/src/mux.rs b/piet-gpu-hal/src/mux.rs index 8f93eb6..31d2f4c 100644 --- a/piet-gpu-hal/src/mux.rs +++ b/piet-gpu-hal/src/mux.rs @@ -242,6 +242,14 @@ impl Device { } } + pub unsafe fn destroy_fence(&self, fence: Fence) -> Result<(), Error> { + mux_match! { self; + Device::Vk(d) => d.destroy_fence(fence.vk_owned()), + Device::Dx12(d) => d.destroy_fence(fence.dx12_owned()), + Device::Mtl(d) => d.destroy_fence(fence.mtl_owned()), + } + } + // Consider changing Vec to iterator (as is done in gfx-hal) pub unsafe fn wait_and_reset(&self, fences: Vec<&mut Fence>) -> Result<(), Error> { mux_match! { self; @@ -309,6 +317,14 @@ impl Device { } } + pub unsafe fn destroy_cmd_buf(&self, cmd_buf: CmdBuf) -> Result<(), Error> { + mux_match! { self; + Device::Vk(d) => d.destroy_cmd_buf(cmd_buf.vk_owned()), + Device::Dx12(d) => d.destroy_cmd_buf(cmd_buf.dx12_owned()), + Device::Mtl(d) => d.destroy_cmd_buf(cmd_buf.mtl_owned()), + } + } + pub fn create_query_pool(&self, n_queries: u32) -> Result { mux_match! { self; Device::Vk(d) => d.create_query_pool(n_queries).map(QueryPool::Vk), diff --git a/piet-gpu-hal/src/vulkan.rs b/piet-gpu-hal/src/vulkan.rs index 96cbc54..5b0dfd6 100644 --- a/piet-gpu-hal/src/vulkan.rs +++ b/piet-gpu-hal/src/vulkan.rs @@ -89,6 +89,7 @@ pub struct DescriptorSet { pub struct CmdBuf { cmd_buf: vk::CommandBuffer, + cmd_pool: vk::CommandPool, device: Arc, } @@ -620,6 +621,12 @@ impl crate::backend::Device for VkDevice { Ok(device.create_fence(&vk::FenceCreateInfo::builder().flags(flags).build(), None)?) } + unsafe fn destroy_fence(&self, fence: Self::Fence) -> Result<(), Error> { + let device = &self.device.device; + device.destroy_fence(fence, None); + Ok(()) + } + unsafe fn create_semaphore(&self) -> Result { let device = &self.device.device; Ok(device.create_semaphore(&vk::SemaphoreCreateInfo::default(), None)?) @@ -658,7 +665,7 @@ impl crate::backend::Device for VkDevice { fn create_cmd_buf(&self) -> Result { unsafe { let device = &self.device.device; - let command_pool = device.create_command_pool( + let cmd_pool = device.create_command_pool( &vk::CommandPoolCreateInfo::builder() .flags(vk::CommandPoolCreateFlags::RESET_COMMAND_BUFFER) .queue_family_index(self.qfi), @@ -666,17 +673,24 @@ impl crate::backend::Device for VkDevice { )?; let cmd_buf = device.allocate_command_buffers( &vk::CommandBufferAllocateInfo::builder() - .command_pool(command_pool) + .command_pool(cmd_pool) .level(vk::CommandBufferLevel::PRIMARY) .command_buffer_count(1), )?[0]; Ok(CmdBuf { cmd_buf, + cmd_pool, device: self.device.clone(), }) } } + unsafe fn destroy_cmd_buf(&self, cmd_buf: CmdBuf) -> Result<(), Error> { + let device = &self.device.device; + device.destroy_command_pool(cmd_buf.cmd_pool, None); + Ok(()) + } + /// Create a query pool for timestamp queries. fn create_query_pool(&self, n_queries: u32) -> Result { unsafe { From 6039916631c0767348711257bb6cd10deb6e7178 Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Tue, 19 Oct 2021 17:25:08 -0700 Subject: [PATCH 2/3] Wait on in-flight command buffers on exit If there is a command buffer in flight on exit from the winit app, wait on it so that the resources get destroyed cleanly. There may be a more aggressive strategy to quick-exit, but this is probably the most reliable approach and I see it in other code bases. --- piet-gpu/bin/winit.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/piet-gpu/bin/winit.rs b/piet-gpu/bin/winit.rs index c5f9bef..19db3c2 100644 --- a/piet-gpu/bin/winit.rs +++ b/piet-gpu/bin/winit.rs @@ -1,4 +1,3 @@ -use piet::RenderContext; use piet_gpu_hal::{Error, ImageLayout, Instance, Session, SubmittedCmdBuf}; use piet_gpu::{test_scenes, PietGpuRenderContext, Renderer}; @@ -152,6 +151,13 @@ fn main() -> Result<(), Error> { current_frame += 1; } + Event::LoopDestroyed => { + if let Some(submitted) = submitted.take() { + // Wait for command list submission, otherwise dropping of renderer may + // cause validation errors (and possibly crashes). + submitted.wait().unwrap(); + } + } _ => (), } }) From a3d3f39fbd110c81bf833a08b0808eb067cf8ab3 Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Mon, 18 Oct 2021 12:23:06 -0700 Subject: [PATCH 3/3] Wait on query results This shouldn't be necessary, but was causing NOT_READY errors. --- piet-gpu-hal/src/vulkan.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/piet-gpu-hal/src/vulkan.rs b/piet-gpu-hal/src/vulkan.rs index 5b0dfd6..e1d78f2 100644 --- a/piet-gpu-hal/src/vulkan.rs +++ b/piet-gpu-hal/src/vulkan.rs @@ -708,12 +708,16 @@ impl crate::backend::Device for VkDevice { unsafe fn fetch_query_pool(&self, pool: &Self::QueryPool) -> Result, Error> { let device = &self.device.device; let mut buf = vec![0u64; pool.n_queries as usize]; + // It's unclear to me why WAIT is needed here, as the wait on the command buffer's + // fence should make the query available, but otherwise we get sporadic NOT_READY + // results (Windows 10, AMD 5700 XT). + let flags = vk::QueryResultFlags::TYPE_64 | vk::QueryResultFlags::WAIT; device.get_query_pool_results( pool.pool, 0, pool.n_queries, &mut buf, - vk::QueryResultFlags::TYPE_64, + flags, )?; let ts0 = buf[0]; let tsp = self.timestamp_period as f64 * 1e-9;