From 487d948217eb6e4292e6586462df2dd257da6044 Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Sun, 12 Apr 2020 22:28:03 -0700 Subject: [PATCH] [hal] Add timer queries Note: also make dispatch size programmable. --- piet-gpu-hal/examples/collatz.rs | 7 +++- piet-gpu-hal/src/lib.rs | 21 ++++++++++- piet-gpu-hal/src/vulkan.rs | 61 ++++++++++++++++++++++++++++++-- 3 files changed, 85 insertions(+), 4 deletions(-) diff --git a/piet-gpu-hal/examples/collatz.rs b/piet-gpu-hal/examples/collatz.rs index 533906d..7195891 100644 --- a/piet-gpu-hal/examples/collatz.rs +++ b/piet-gpu-hal/examples/collatz.rs @@ -14,15 +14,20 @@ fn main() { let code = include_bytes!("./shader/collatz.spv"); let pipeline = device.create_simple_compute_pipeline(code, 1).unwrap(); let descriptor_set = device.create_descriptor_set(&pipeline, &[&buffer]).unwrap(); + let query_pool = device.create_query_pool(2).unwrap(); let mut cmd_buf = device.create_cmd_buf().unwrap(); cmd_buf.begin(); - cmd_buf.dispatch(&pipeline, &descriptor_set); + cmd_buf.write_timestamp(&query_pool, 0); + cmd_buf.dispatch(&pipeline, &descriptor_set, (256, 1, 1)); + cmd_buf.write_timestamp(&query_pool, 1); cmd_buf.finish(); device.run_cmd_buf(&cmd_buf).unwrap(); + let timestamps = device.reap_query_pool(query_pool); let mut dst: Vec = Default::default(); device.read_buffer(&buffer, &mut dst).unwrap(); for (i, val) in dst.iter().enumerate().take(16) { println!("{}: {}", i, val); } + println!("{:?}", timestamps); } } diff --git a/piet-gpu-hal/src/lib.rs b/piet-gpu-hal/src/lib.rs index e4eb3b5..07814e4 100644 --- a/piet-gpu-hal/src/lib.rs +++ b/piet-gpu-hal/src/lib.rs @@ -12,6 +12,7 @@ pub trait Device: Sized { type MemFlags: MemFlags; type Pipeline; type DescriptorSet; + type QueryPool; type CmdBuf: CmdBuf; fn create_buffer(&self, size: u64, mem_flags: Self::MemFlags) -> Result; @@ -30,6 +31,17 @@ pub trait Device: Sized { fn create_cmd_buf(&self) -> Result; + fn create_query_pool(&self, n_queries: u32) -> Result; + + /// Get results from query pool, destroying it in the process. + /// + /// The returned vector is one less than the number of queries; the first is used as + /// a baseline. + /// + /// # Safety + /// All submitted commands that refer to this query pool must have completed. + unsafe fn reap_query_pool(&self, pool: Self::QueryPool) -> Result, Error>; + unsafe fn run_cmd_buf(&self, cmd_buf: &Self::CmdBuf) -> Result<(), Error>; unsafe fn read_buffer( @@ -50,9 +62,16 @@ pub trait CmdBuf { unsafe fn finish(&mut self); - unsafe fn dispatch(&mut self, pipeline: &D::Pipeline, descriptor_set: &D::DescriptorSet); + unsafe fn dispatch( + &mut self, + pipeline: &D::Pipeline, + descriptor_set: &D::DescriptorSet, + size: (u32, u32, u32), + ); unsafe fn memory_barrier(&mut self); + + unsafe fn write_timestamp(&mut self, pool: &D::QueryPool, query: u32); } pub trait MemFlags: Sized { diff --git a/piet-gpu-hal/src/vulkan.rs b/piet-gpu-hal/src/vulkan.rs index 192ac29..2acfec4 100644 --- a/piet-gpu-hal/src/vulkan.rs +++ b/piet-gpu-hal/src/vulkan.rs @@ -21,6 +21,7 @@ pub struct VkDevice { device_mem_props: vk::PhysicalDeviceMemoryProperties, queue: vk::Queue, qfi: u32, + timestamp_period: f32, } struct RawDevice { @@ -52,6 +53,11 @@ pub struct CmdBuf { device: Arc, } +pub struct QueryPool { + pool: vk::QueryPool, + n_queries: u32, +} + pub struct MemFlags(vk::MemoryPropertyFlags); impl VkInstance { @@ -108,11 +114,15 @@ impl VkInstance { let device = Arc::new(RawDevice { device }); + let props = self.instance.get_physical_device_properties(pdevice); + let timestamp_period = props.limits.timestamp_period; + Ok(VkDevice { device, device_mem_props, qfi, queue, + timestamp_period, }) } } @@ -122,6 +132,7 @@ impl crate::Device for VkDevice { type CmdBuf = CmdBuf; type DescriptorSet = DescriptorSet; type Pipeline = Pipeline; + type QueryPool = QueryPool; type MemFlags = MemFlags; fn create_buffer(&self, size: u64, mem_flags: MemFlags) -> Result { @@ -283,6 +294,37 @@ impl crate::Device for VkDevice { } } + /// Create a query pool for timestamp queries. + fn create_query_pool(&self, n_queries: u32) -> Result { + unsafe { + let device = &self.device.device; + let pool = device.create_query_pool( + &vk::QueryPoolCreateInfo::builder() + .query_type(vk::QueryType::TIMESTAMP) + .query_count(n_queries), + None, + )?; + Ok(QueryPool { pool, n_queries }) + } + } + + unsafe fn reap_query_pool(&self, pool: Self::QueryPool) -> Result, Error> { + let device = &self.device.device; + let mut buf = vec![0u64; pool.n_queries as usize]; + device.get_query_pool_results( + pool.pool, + 0, + pool.n_queries, + &mut buf, + vk::QueryResultFlags::TYPE_64, + )?; + device.destroy_query_pool(pool.pool, None); + let ts0 = buf[0]; + let tsp = self.timestamp_period as f64 * 1e-9; + let result = buf[1..].iter().map(|ts| ts.wrapping_sub(ts0) as f64 * tsp).collect(); + Ok(result) + } + /// Run the command buffer. /// /// This version simply blocks until it's complete. @@ -358,7 +400,12 @@ impl crate::CmdBuf for CmdBuf { self.device.device.end_command_buffer(self.cmd_buf).unwrap(); } - unsafe fn dispatch(&mut self, pipeline: &Pipeline, descriptor_set: &DescriptorSet) { + unsafe fn dispatch( + &mut self, + pipeline: &Pipeline, + descriptor_set: &DescriptorSet, + size: (u32, u32, u32), + ) { let device = &self.device.device; device.cmd_bind_pipeline( self.cmd_buf, @@ -373,7 +420,7 @@ impl crate::CmdBuf for CmdBuf { &[descriptor_set.descriptor_set], &[], ); - device.cmd_dispatch(self.cmd_buf, 256, 1, 1); + device.cmd_dispatch(self.cmd_buf, size.0, size.1, size.2); } /// Insert a pipeline barrier for all memory accesses. @@ -392,6 +439,16 @@ impl crate::CmdBuf for CmdBuf { &[], ); } + + unsafe fn write_timestamp(&mut self, pool: &QueryPool, query: u32) { + let device = &self.device.device; + device.cmd_write_timestamp( + self.cmd_buf, + vk::PipelineStageFlags::COMPUTE_SHADER, + pool.pool, + query, + ); + } } impl crate::MemFlags for MemFlags {