diff --git a/piet-gpu-hal/src/dx12.rs b/piet-gpu-hal/src/dx12.rs index 78ad449..c5e1e04 100644 --- a/piet-gpu-hal/src/dx12.rs +++ b/piet-gpu-hal/src/dx12.rs @@ -21,7 +21,7 @@ use raw_window_handle::{HasRawWindowHandle, RawWindowHandle}; use smallvec::SmallVec; -use crate::{BindType, BufferUsage, Error, GpuInfo, ImageLayout, MapMode, WorkgroupLimits, ImageFormat}; +use crate::{BindType, BufferUsage, Error, GpuInfo, ImageLayout, MapMode, WorkgroupLimits, ImageFormat, ComputePassDescriptor}; use self::{ descriptor::{CpuHeapRefOwned, DescriptorPool, GpuHeapRefOwned}, @@ -76,6 +76,7 @@ pub struct CmdBuf { c: wrappers::GraphicsCommandList, allocator: CommandAllocator, needs_reset: bool, + end_query: Option<(wrappers::QueryHeap, u32)>, } pub struct Pipeline { @@ -360,6 +361,7 @@ impl crate::backend::Device for Dx12Device { c, allocator, needs_reset: false, + end_query: None, }) } } @@ -388,11 +390,10 @@ impl crate::backend::Device for Dx12Device { let mapped = self.map_buffer(&pool.buf, 0, size as u64, MapMode::Read)?; std::ptr::copy_nonoverlapping(mapped, buf.as_mut_ptr() as *mut u8, size); self.unmap_buffer(&pool.buf, 0, size as u64, MapMode::Read)?; - let ts0 = buf[0]; let tsp = (self.ts_freq as f64).recip(); - let result = buf[1..] + let result = buf .iter() - .map(|ts| ts.wrapping_sub(ts0) as f64 * tsp) + .map(|ts| *ts as f64 * tsp) .collect(); Ok(result) } @@ -610,6 +611,16 @@ impl crate::backend::CmdBuf for CmdBuf { self.allocator.reset().is_ok() && self.c.reset(&self.allocator, None).is_ok() } + unsafe fn begin_compute_pass(&mut self, desc: &ComputePassDescriptor) { + if let Some((pool, start, end)) = &desc.timer_queries { + #[allow(irrefutable_let_patterns)] + if let crate::hub::QueryPool::Dx12(pool) = pool { + self.write_timestamp(pool, *start); + self.end_query = Some((pool.heap.clone(), *end)); + } + } + } + unsafe fn dispatch( &mut self, pipeline: &Pipeline, @@ -628,6 +639,12 @@ impl crate::backend::CmdBuf for CmdBuf { .dispatch(workgroup_count.0, workgroup_count.1, workgroup_count.2); } + unsafe fn end_compute_pass(&mut self) { + if let Some((heap, end)) = self.end_query.take() { + self.c.end_timing_query(&heap, end); + } + } + unsafe fn memory_barrier(&mut self) { // See comments in CommandBuffer::pipeline_barrier in gfx-hal dx12 backend. // The "proper" way to do this would be to name the actual buffers participating @@ -666,7 +683,7 @@ impl crate::backend::CmdBuf for CmdBuf { self.memory_barrier(); } - unsafe fn clear_buffer(&self, buffer: &Buffer, size: Option) { + unsafe fn clear_buffer(&mut self, buffer: &Buffer, size: Option) { let cpu_ref = buffer.cpu_ref.as_ref().unwrap(); let (gpu_ref, heap) = buffer .gpu_ref @@ -684,23 +701,23 @@ impl crate::backend::CmdBuf for CmdBuf { ); } - unsafe fn copy_buffer(&self, src: &Buffer, dst: &Buffer) { + unsafe fn copy_buffer(&mut self, src: &Buffer, dst: &Buffer) { // TODO: consider using copy_resource here (if sizes match) let size = src.size.min(dst.size); self.c.copy_buffer(&dst.resource, 0, &src.resource, 0, size); } - unsafe fn copy_image_to_buffer(&self, src: &Image, dst: &Buffer) { + unsafe fn copy_image_to_buffer(&mut self, src: &Image, dst: &Buffer) { self.c .copy_texture_to_buffer(&src.resource, &dst.resource, src.size.0, src.size.1); } - unsafe fn copy_buffer_to_image(&self, src: &Buffer, dst: &Image) { + unsafe fn copy_buffer_to_image(&mut self, src: &Buffer, dst: &Image) { self.c .copy_buffer_to_texture(&src.resource, &dst.resource, dst.size.0, dst.size.1); } - unsafe fn blit_image(&self, src: &Image, dst: &Image) { + unsafe fn blit_image(&mut self, src: &Image, dst: &Image) { self.c.copy_resource(&src.resource, &dst.resource); } diff --git a/piet-gpu-hal/src/dx12/wrappers.rs b/piet-gpu-hal/src/dx12/wrappers.rs index 4bbb86c..9a3fb90 100644 --- a/piet-gpu-hal/src/dx12/wrappers.rs +++ b/piet-gpu-hal/src/dx12/wrappers.rs @@ -79,7 +79,6 @@ pub struct Blob(pub ComPtr); #[derive(Clone)] pub struct ShaderByteCode { pub bytecode: d3d12::D3D12_SHADER_BYTECODE, - blob: Option, } #[derive(Clone)] @@ -741,7 +740,6 @@ impl ShaderByteCode { BytecodeLength: blob.0.GetBufferSize(), pShaderBytecode: blob.0.GetBufferPointer(), }, - blob: Some(blob), } } @@ -810,7 +808,6 @@ impl ShaderByteCode { BytecodeLength: bytecode.len(), pShaderBytecode: bytecode.as_ptr() as *const _, }, - blob: None, } } } diff --git a/piet-gpu-hal/src/hub.rs b/piet-gpu-hal/src/hub.rs index 37c59df..5c7122a 100644 --- a/piet-gpu-hal/src/hub.rs +++ b/piet-gpu-hal/src/hub.rs @@ -375,8 +375,17 @@ impl Session { /// /// This should be called after waiting on the command buffer that wrote the /// timer queries. + /// + /// The returned vector is one shorter than the number of timer queries in the + /// pool; the first value is subtracted off. It would likely be better to return + /// the raw timestamps, but that change should be made consistently. pub unsafe fn fetch_query_pool(&self, pool: &QueryPool) -> Result, Error> { - self.0.device.fetch_query_pool(pool) + let result = self.0.device.fetch_query_pool(pool)?; + // Subtract off first timestamp. + Ok(result[1..] + .iter() + .map(|ts| *ts as f64 - result[0]) + .collect()) } #[doc(hidden)] @@ -602,6 +611,10 @@ impl CmdBuf { /// Write a timestamp. /// /// The query index must be less than the size of the query pool on creation. + /// + /// Deprecation: for greater portability, set timestamp queries on compute + /// passes instead. + #[deprecated(note = "use compute pass descriptor instead")] pub unsafe fn write_timestamp(&mut self, pool: &QueryPool, query: u32) { self.cmd_buf().write_timestamp(pool, query); } diff --git a/piet-gpu-hal/src/lib.rs b/piet-gpu-hal/src/lib.rs index 241cdfd..18f6390 100644 --- a/piet-gpu-hal/src/lib.rs +++ b/piet-gpu-hal/src/lib.rs @@ -190,9 +190,15 @@ pub struct WorkgroupLimits { pub max_invocations: u32, } +/// Options for creating a compute pass. #[derive(Default)] pub struct ComputePassDescriptor<'a> { // Maybe label should go here? It does in wgpu and wgpu_hal. + /// Timer query parameters. + /// + /// To record timer queries for a compute pass, set the query pool, start + /// query index, and end query index here. The indices must be less than + /// the size of the query pool. timer_queries: Option<(&'a QueryPool, u32, u32)>, } diff --git a/piet-gpu-hal/src/vulkan.rs b/piet-gpu-hal/src/vulkan.rs index 8392899..504d947 100644 --- a/piet-gpu-hal/src/vulkan.rs +++ b/piet-gpu-hal/src/vulkan.rs @@ -15,7 +15,7 @@ use smallvec::SmallVec; use crate::backend::Device as DeviceTrait; use crate::{ BindType, BufferUsage, Error, GpuInfo, ImageFormat, ImageLayout, MapMode, SamplerParams, SubgroupSize, - WorkgroupLimits, + WorkgroupLimits, ComputePassDescriptor, }; pub struct VkInstance { @@ -92,6 +92,7 @@ pub struct CmdBuf { cmd_buf: vk::CommandBuffer, cmd_pool: vk::CommandPool, device: Arc, + end_query: Option<(vk::QueryPool, u32)>, } pub struct QueryPool { @@ -738,6 +739,7 @@ impl crate::backend::Device for VkDevice { cmd_buf, cmd_pool, device: self.device.clone(), + end_query: None, }) } } @@ -770,11 +772,10 @@ impl crate::backend::Device for VkDevice { // results (Windows 10, AMD 5700 XT). let flags = vk::QueryResultFlags::TYPE_64 | vk::QueryResultFlags::WAIT; device.get_query_pool_results(pool.pool, 0, pool.n_queries, &mut buf, flags)?; - let ts0 = buf[0]; let tsp = self.timestamp_period as f64 * 1e-9; - let result = buf[1..] + let result = buf .iter() - .map(|ts| ts.wrapping_sub(ts0) as f64 * tsp) + .map(|ts| *ts as f64 * tsp) .collect(); Ok(result) } @@ -902,6 +903,16 @@ impl crate::backend::CmdBuf for CmdBuf { true } + unsafe fn begin_compute_pass(&mut self, desc: &ComputePassDescriptor) { + if let Some((pool, start, end)) = &desc.timer_queries { + #[allow(irrefutable_let_patterns)] + if let crate::hub::QueryPool::Vk(pool) = pool { + self.write_timestamp_raw(pool.pool, *start); + self.end_query = Some((pool.pool, *end)); + } + } + } + unsafe fn dispatch( &mut self, pipeline: &Pipeline, @@ -931,6 +942,12 @@ impl crate::backend::CmdBuf for CmdBuf { ); } + unsafe fn end_compute_pass(&mut self) { + if let Some((pool, end)) = self.end_query.take() { + self.write_timestamp_raw(pool, end); + } + } + /// Insert a pipeline barrier for all memory accesses. unsafe fn memory_barrier(&mut self) { let device = &self.device.device; @@ -995,13 +1012,13 @@ impl crate::backend::CmdBuf for CmdBuf { ); } - unsafe fn clear_buffer(&self, buffer: &Buffer, size: Option) { + unsafe fn clear_buffer(&mut self, buffer: &Buffer, size: Option) { let device = &self.device.device; let size = size.unwrap_or(vk::WHOLE_SIZE); device.cmd_fill_buffer(self.cmd_buf, buffer.buffer, 0, size, 0); } - unsafe fn copy_buffer(&self, src: &Buffer, dst: &Buffer) { + unsafe fn copy_buffer(&mut self, src: &Buffer, dst: &Buffer) { let device = &self.device.device; let size = src.size.min(dst.size); device.cmd_copy_buffer( @@ -1012,7 +1029,7 @@ impl crate::backend::CmdBuf for CmdBuf { ); } - unsafe fn copy_image_to_buffer(&self, src: &Image, dst: &Buffer) { + unsafe fn copy_image_to_buffer(&mut self, src: &Image, dst: &Buffer) { let device = &self.device.device; device.cmd_copy_image_to_buffer( self.cmd_buf, @@ -1035,7 +1052,7 @@ impl crate::backend::CmdBuf for CmdBuf { ); } - unsafe fn copy_buffer_to_image(&self, src: &Buffer, dst: &Image) { + unsafe fn copy_buffer_to_image(&mut self, src: &Buffer, dst: &Image) { let device = &self.device.device; device.cmd_copy_buffer_to_image( self.cmd_buf, @@ -1058,7 +1075,7 @@ impl crate::backend::CmdBuf for CmdBuf { ); } - unsafe fn blit_image(&self, src: &Image, dst: &Image) { + unsafe fn blit_image(&mut self, src: &Image, dst: &Image) { let device = &self.device.device; device.cmd_blit_image( self.cmd_buf, @@ -1106,13 +1123,7 @@ impl crate::backend::CmdBuf for CmdBuf { } unsafe fn write_timestamp(&mut self, pool: &QueryPool, query: u32) { - let device = &self.device.device; - device.cmd_write_timestamp( - self.cmd_buf, - vk::PipelineStageFlags::COMPUTE_SHADER, - pool.pool, - query, - ); + self.write_timestamp_raw(pool.pool, query); } unsafe fn begin_debug_label(&mut self, label: &str) { @@ -1130,6 +1141,18 @@ impl crate::backend::CmdBuf for CmdBuf { } } +impl CmdBuf { + unsafe fn write_timestamp_raw(&mut self, pool: vk::QueryPool, query: u32) { + let device = &self.device.device; + device.cmd_write_timestamp( + self.cmd_buf, + vk::PipelineStageFlags::COMPUTE_SHADER, + pool, + query, + ); + } +} + impl crate::backend::DescriptorSetBuilder for DescriptorSetBuilder { fn add_buffers(&mut self, buffers: &[&Buffer]) { self.buffers.extend(buffers.iter().map(|b| b.buffer));