Timestamp queries

These function, but can use some work.

First, the buffer situation is worse than it should be. It should be
possible to create a single readback buffer rather then copy from
gpu-local to host-coherent.

Second, the command buffer `finish_timestamps` call doesn't correlate to
anything in Vulkan, so needs plumbing up through the hub in one form or
other when that happens. I'm inclined to make it ergonomic by doing a
bit of resource tracking that will trigger the appropriate call (and
subsequent host barrier) in the `finish` method on the command buffer.
This commit is contained in:
Raph Levien 2021-05-21 13:19:10 -07:00
parent f482921806
commit e4b16e706a
4 changed files with 69 additions and 17 deletions

View file

@ -52,6 +52,7 @@ fn toy() -> Result<(), Error> {
let buf = device.create_buffer(1024, MemFlags::host_coherent())?;
let dev_buf = device.create_buffer(1024, MemFlags::device_local())?;
let data: Vec<u32> = (1..257).collect();
let query_pool = device.create_query_pool(2)?;
unsafe {
device.write_buffer(&buf, &data)?;
let pipeline = device.create_simple_compute_pipeline(SHADER_CODE, 1, 0)?;
@ -61,9 +62,12 @@ fn toy() -> Result<(), Error> {
cmd_buf.begin();
cmd_buf.copy_buffer(&buf, &dev_buf);
cmd_buf.memory_barrier();
cmd_buf.write_timestamp(&query_pool, 0);
cmd_buf.dispatch(&pipeline, &ds, (1, 1, 1));
cmd_buf.write_timestamp(&query_pool, 1);
cmd_buf.memory_barrier();
cmd_buf.copy_buffer(&dev_buf, &buf);
cmd_buf.finish_timestamps(&query_pool);
cmd_buf.host_barrier();
cmd_buf.finish();
device.run_cmd_buf(&cmd_buf, &[], &[], Some(&fence))?;
@ -71,6 +75,7 @@ fn toy() -> Result<(), Error> {
let mut readback: Vec<u32> = Vec::new();
device.read_buffer(&buf, &mut readback)?;
println!("{:?}", readback);
println!("{:?}", device.fetch_query_pool(&query_pool));
}
Ok(())
}

View file

@ -22,6 +22,7 @@ pub struct Dx12Device {
device: Device,
command_allocator: CommandAllocator,
command_queue: CommandQueue,
ts_freq: u64,
}
#[derive(Clone)]
@ -55,7 +56,15 @@ pub struct Pipeline {
// gpu-descriptor crate.
pub struct DescriptorSet(wrappers::DescriptorHeap);
pub struct QueryPool;
pub struct QueryPool {
heap: wrappers::QueryHeap,
buf: Buffer,
// TODO: piet-dx12 manages to do this with one buffer. I think a
// HEAP_TYPE_READBACK will work, but we currently don't have fine
// grained usage flags to express this.
readback_buf: Buffer,
n_queries: u32,
}
pub struct Fence {
fence: wrappers::Fence,
@ -118,10 +127,12 @@ impl Dx12Instance {
0,
)?;
let command_allocator = device.create_command_allocator(list_type)?;
let ts_freq = command_queue.get_timestamp_frequency()?;
Ok(Dx12Device {
device,
command_queue,
command_allocator,
ts_freq,
})
}
}
@ -202,11 +213,33 @@ impl crate::Device for Dx12Device {
}
fn create_query_pool(&self, n_queries: u32) -> Result<Self::QueryPool, Error> {
todo!()
unsafe {
let heap = self
.device
.create_query_heap(d3d12::D3D12_QUERY_HEAP_TYPE_TIMESTAMP, n_queries)?;
let buf =
self.create_buffer((n_queries * 8) as u64, crate::MemFlags::device_local())?;
let readback_buf =
self.create_buffer((n_queries * 8) as u64, crate::MemFlags::host_coherent())?;
Ok(QueryPool {
heap,
buf,
readback_buf,
n_queries,
})
}
}
unsafe fn fetch_query_pool(&self, pool: &Self::QueryPool) -> Result<Vec<f64>, Error> {
todo!()
let mut buf = vec![0u64; pool.n_queries as usize];
self.read_buffer(&pool.readback_buf, &mut buf)?;
let ts0 = buf[0];
let tsp = (self.ts_freq as f64).recip();
let result = buf[1..]
.iter()
.map(|ts| ts.wrapping_sub(ts0) as f64 * tsp)
.collect();
Ok(result)
}
unsafe fn run_cmd_buf(
@ -248,6 +281,7 @@ impl crate::Device for Dx12Device {
contents: &[T],
) -> Result<(), Error> {
let len = buffer.size as usize / std::mem::size_of::<T>();
assert!(len >= contents.len());
buffer.resource.write_resource(len, contents.as_ptr())?;
Ok(())
}
@ -366,7 +400,13 @@ impl crate::CmdBuf<Dx12Device> for CmdBuf {
}
unsafe fn write_timestamp(&mut self, pool: &QueryPool, query: u32) {
todo!()
self.0.end_timing_query(&pool.heap, query);
}
unsafe fn finish_timestamps(&mut self, pool: &QueryPool) {
self.0
.resolve_timing_query_data(&pool.heap, 0, pool.n_queries, &pool.buf.resource, 0);
self.copy_buffer(&pool.buf, &pool.readback_buf);
}
}

View file

@ -107,7 +107,7 @@ impl Resource {
}
pub fn get(&self) -> *const d3d12::ID3D12Resource {
self.ptr.load(Ordering::Relaxed)
self.get_mut()
}
pub fn get_mut(&self) -> *mut d3d12::ID3D12Resource {
@ -491,6 +491,7 @@ impl Device {
Ok(RootSignature(ComPtr::from_raw(signature)))
}
// This is for indirect command submission and we probably won't use it.
pub unsafe fn create_command_signature(
&self,
root_signature: RootSignature,
@ -722,7 +723,7 @@ impl Device {
&self,
heap_type: d3d12::D3D12_QUERY_HEAP_TYPE,
num_expected_queries: u32,
) -> QueryHeap {
) -> Result<QueryHeap, Error> {
let query_heap_desc = d3d12::D3D12_QUERY_HEAP_DESC {
Type: heap_type,
Count: num_expected_queries,
@ -731,14 +732,16 @@ impl Device {
let mut query_heap = ptr::null_mut();
error_if_failed_else_unit(self.0.CreateQueryHeap(
explain_error(
self.0.CreateQueryHeap(
&query_heap_desc as *const _,
&d3d12::ID3D12QueryHeap::uuidof(),
&mut query_heap as *mut _ as *mut _,
))
.expect("could not create query heap");
),
"could not create query heap",
)?;
QueryHeap(ComPtr::from_raw(query_heap))
Ok(QueryHeap(ComPtr::from_raw(query_heap)))
}
// based on: https://github.com/microsoft/DirectX-Graphics-Samples/blob/682051ddbe4be820195fffed0bfbdbbde8611a90/Libraries/D3DX12/d3dx12.h#L1875
@ -1375,9 +1378,9 @@ impl GraphicsCommandList {
);
}
pub unsafe fn end_timing_query(&self, query_heap: QueryHeap, index: u32) {
pub unsafe fn end_timing_query(&self, query_heap: &QueryHeap, index: u32) {
self.0.EndQuery(
query_heap.0.as_raw() as *mut _,
query_heap.0.as_raw(),
d3d12::D3D12_QUERY_TYPE_TIMESTAMP,
index,
);
@ -1385,10 +1388,10 @@ impl GraphicsCommandList {
pub unsafe fn resolve_timing_query_data(
&self,
query_heap: QueryHeap,
query_heap: &QueryHeap,
start_index: u32,
num_queries: u32,
destination_buffer: Resource,
destination_buffer: &Resource,
aligned_destination_buffer_offset: u64,
) {
self.0.ResolveQueryData(

View file

@ -244,6 +244,10 @@ pub trait CmdBuf<D: Device> {
unsafe fn reset_query_pool(&mut self, pool: &D::QueryPool);
unsafe fn write_timestamp(&mut self, pool: &D::QueryPool, query: u32);
/// Prepare the timestamps for reading. This isn't required on Vulkan but
/// is required on (at least) DX12.
unsafe fn finish_timestamps(&mut self, pool: &D::QueryPool) {}
}
pub trait MemFlags: Sized + Clone + Copy {