diff --git a/piet-gpu-hal/examples/collatz.rs b/piet-gpu-hal/examples/collatz.rs index 1c9fd68..a017662 100644 --- a/piet-gpu-hal/examples/collatz.rs +++ b/piet-gpu-hal/examples/collatz.rs @@ -24,6 +24,7 @@ fn main() { cmd_buf.write_timestamp(&query_pool, 0); cmd_buf.dispatch(&pipeline, &descriptor_set, (256, 1, 1)); cmd_buf.write_timestamp(&query_pool, 1); + cmd_buf.host_barrier(); cmd_buf.finish(); device .run_cmd_buf(&cmd_buf, &[], &[], Some(&fence)) diff --git a/piet-gpu-hal/src/lib.rs b/piet-gpu-hal/src/lib.rs index 67fe18e..2924841 100644 --- a/piet-gpu-hal/src/lib.rs +++ b/piet-gpu-hal/src/lib.rs @@ -100,8 +100,21 @@ pub trait CmdBuf { size: (u32, u32, u32), ); + /// Insert an execution and memory barrier. + /// + /// Compute kernels (and other actions) after this barrier may read from buffers + /// that were written before this barrier. unsafe fn memory_barrier(&mut self); + /// Insert a barrier for host access to buffers. + /// + /// The host may read buffers written before this barrier, after the fence for + /// the command buffer is signaled. + /// + /// See http://themaister.net/blog/2019/08/14/yet-another-blog-explaining-vulkan-synchronization/ + /// ("Host memory reads") for an explanation of this barrier. + unsafe fn host_barrier(&mut self); + unsafe fn image_barrier( &mut self, image: &D::Image, @@ -120,6 +133,8 @@ pub trait CmdBuf { unsafe fn copy_image_to_buffer(&self, src: &D::Image, dst: &D::Buffer); + unsafe fn copy_buffer_to_image(&self, src: &D::Buffer, dst: &D::Image); + // low portability, dx12 doesn't support it natively unsafe fn blit_image(&self, src: &D::Image, dst: &D::Image); diff --git a/piet-gpu-hal/src/vulkan.rs b/piet-gpu-hal/src/vulkan.rs index 909823c..dee01f2 100644 --- a/piet-gpu-hal/src/vulkan.rs +++ b/piet-gpu-hal/src/vulkan.rs @@ -707,7 +707,8 @@ impl crate::Device for VkDevice { /// Run the command buffer. /// - /// This version simply blocks until it's complete. + /// This submits the command buffer for execution. The provided fence + /// is signalled when the execution is complete. unsafe fn run_cmd_buf( &self, cmd_buf: &CmdBuf, @@ -730,8 +731,8 @@ impl crate::Device for VkDevice { &[vk::SubmitInfo::builder() .command_buffers(&[cmd_buf.cmd_buf]) .wait_semaphores(wait_semaphores) - .signal_semaphores(signal_semaphores) .wait_dst_stage_mask(&wait_stages) + .signal_semaphores(signal_semaphores) .build()], fence, )?; @@ -830,6 +831,22 @@ impl crate::CmdBuf for CmdBuf { ); } + unsafe fn host_barrier(&mut self) { + let device = &self.device.device; + device.cmd_pipeline_barrier( + self.cmd_buf, + vk::PipelineStageFlags::ALL_COMMANDS, + vk::PipelineStageFlags::HOST, + vk::DependencyFlags::empty(), + &[vk::MemoryBarrier::builder() + .src_access_mask(vk::AccessFlags::MEMORY_WRITE) + .dst_access_mask(vk::AccessFlags::HOST_READ) + .build()], + &[], + &[], + ); + } + unsafe fn image_barrier( &mut self, image: &Image, @@ -900,6 +917,29 @@ impl crate::CmdBuf for CmdBuf { ); } + unsafe fn copy_buffer_to_image(&self, src: &Buffer, dst: &Image) { + let device = &self.device.device; + device.cmd_copy_buffer_to_image( + self.cmd_buf, + src.buffer, + dst.image, + vk::ImageLayout::TRANSFER_DST_OPTIMAL, + &[vk::BufferImageCopy { + buffer_offset: 0, + buffer_row_length: 0, // tight packing + buffer_image_height: 0, // tight packing + image_subresource: vk::ImageSubresourceLayers { + aspect_mask: vk::ImageAspectFlags::COLOR, + mip_level: 0, + base_array_layer: 0, + layer_count: 1, + }, + image_offset: vk::Offset3D { x: 0, y: 0, z: 0 }, + image_extent: dst.extent, + }], + ); + } + unsafe fn blit_image(&self, src: &Image, dst: &Image) { let device = &self.device.device; device.cmd_blit_image( @@ -974,7 +1014,7 @@ impl VkSwapchain { let (image_idx, _suboptimal) = self.swapchain_fn.acquire_next_image( self.swapchain, !0, - self.acquisition_semaphores[self.acquisition_idx], + acquisition_semaphore, vk::Fence::null(), )?; self.acquisition_idx = (self.acquisition_idx + 1) % self.acquisition_semaphores.len(); diff --git a/piet-gpu/bin/cli.rs b/piet-gpu/bin/cli.rs index 0b0a34f..5052dc6 100644 --- a/piet-gpu/bin/cli.rs +++ b/piet-gpu/bin/cli.rs @@ -225,6 +225,7 @@ fn main() -> Result<(), Error> { cmd_buf.begin(); renderer.record(&mut cmd_buf, &query_pool); cmd_buf.copy_image_to_buffer(&renderer.image_dev, &image_buf); + cmd_buf.host_barrier(); cmd_buf.finish(); let start = std::time::Instant::now(); device.run_cmd_buf(&cmd_buf, &[], &[], Some(&fence))?;