mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-25 18:56:35 +11:00
Timestamp queries
These function, but can use some work. First, the buffer situation is worse than it should be. It should be possible to create a single readback buffer rather then copy from gpu-local to host-coherent. Second, the command buffer `finish_timestamps` call doesn't correlate to anything in Vulkan, so needs plumbing up through the hub in one form or other when that happens. I'm inclined to make it ergonomic by doing a bit of resource tracking that will trigger the appropriate call (and subsequent host barrier) in the `finish` method on the command buffer.
This commit is contained in:
parent
f482921806
commit
e4b16e706a
4 changed files with 69 additions and 17 deletions
|
@ -52,6 +52,7 @@ fn toy() -> Result<(), Error> {
|
|||
let buf = device.create_buffer(1024, MemFlags::host_coherent())?;
|
||||
let dev_buf = device.create_buffer(1024, MemFlags::device_local())?;
|
||||
let data: Vec<u32> = (1..257).collect();
|
||||
let query_pool = device.create_query_pool(2)?;
|
||||
unsafe {
|
||||
device.write_buffer(&buf, &data)?;
|
||||
let pipeline = device.create_simple_compute_pipeline(SHADER_CODE, 1, 0)?;
|
||||
|
@ -61,9 +62,12 @@ fn toy() -> Result<(), Error> {
|
|||
cmd_buf.begin();
|
||||
cmd_buf.copy_buffer(&buf, &dev_buf);
|
||||
cmd_buf.memory_barrier();
|
||||
cmd_buf.write_timestamp(&query_pool, 0);
|
||||
cmd_buf.dispatch(&pipeline, &ds, (1, 1, 1));
|
||||
cmd_buf.write_timestamp(&query_pool, 1);
|
||||
cmd_buf.memory_barrier();
|
||||
cmd_buf.copy_buffer(&dev_buf, &buf);
|
||||
cmd_buf.finish_timestamps(&query_pool);
|
||||
cmd_buf.host_barrier();
|
||||
cmd_buf.finish();
|
||||
device.run_cmd_buf(&cmd_buf, &[], &[], Some(&fence))?;
|
||||
|
@ -71,6 +75,7 @@ fn toy() -> Result<(), Error> {
|
|||
let mut readback: Vec<u32> = Vec::new();
|
||||
device.read_buffer(&buf, &mut readback)?;
|
||||
println!("{:?}", readback);
|
||||
println!("{:?}", device.fetch_query_pool(&query_pool));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -22,6 +22,7 @@ pub struct Dx12Device {
|
|||
device: Device,
|
||||
command_allocator: CommandAllocator,
|
||||
command_queue: CommandQueue,
|
||||
ts_freq: u64,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
|
@ -55,7 +56,15 @@ pub struct Pipeline {
|
|||
// gpu-descriptor crate.
|
||||
pub struct DescriptorSet(wrappers::DescriptorHeap);
|
||||
|
||||
pub struct QueryPool;
|
||||
pub struct QueryPool {
|
||||
heap: wrappers::QueryHeap,
|
||||
buf: Buffer,
|
||||
// TODO: piet-dx12 manages to do this with one buffer. I think a
|
||||
// HEAP_TYPE_READBACK will work, but we currently don't have fine
|
||||
// grained usage flags to express this.
|
||||
readback_buf: Buffer,
|
||||
n_queries: u32,
|
||||
}
|
||||
|
||||
pub struct Fence {
|
||||
fence: wrappers::Fence,
|
||||
|
@ -118,10 +127,12 @@ impl Dx12Instance {
|
|||
0,
|
||||
)?;
|
||||
let command_allocator = device.create_command_allocator(list_type)?;
|
||||
let ts_freq = command_queue.get_timestamp_frequency()?;
|
||||
Ok(Dx12Device {
|
||||
device,
|
||||
command_queue,
|
||||
command_allocator,
|
||||
ts_freq,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -202,11 +213,33 @@ impl crate::Device for Dx12Device {
|
|||
}
|
||||
|
||||
fn create_query_pool(&self, n_queries: u32) -> Result<Self::QueryPool, Error> {
|
||||
todo!()
|
||||
unsafe {
|
||||
let heap = self
|
||||
.device
|
||||
.create_query_heap(d3d12::D3D12_QUERY_HEAP_TYPE_TIMESTAMP, n_queries)?;
|
||||
let buf =
|
||||
self.create_buffer((n_queries * 8) as u64, crate::MemFlags::device_local())?;
|
||||
let readback_buf =
|
||||
self.create_buffer((n_queries * 8) as u64, crate::MemFlags::host_coherent())?;
|
||||
Ok(QueryPool {
|
||||
heap,
|
||||
buf,
|
||||
readback_buf,
|
||||
n_queries,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
unsafe fn fetch_query_pool(&self, pool: &Self::QueryPool) -> Result<Vec<f64>, Error> {
|
||||
todo!()
|
||||
let mut buf = vec![0u64; pool.n_queries as usize];
|
||||
self.read_buffer(&pool.readback_buf, &mut buf)?;
|
||||
let ts0 = buf[0];
|
||||
let tsp = (self.ts_freq as f64).recip();
|
||||
let result = buf[1..]
|
||||
.iter()
|
||||
.map(|ts| ts.wrapping_sub(ts0) as f64 * tsp)
|
||||
.collect();
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
unsafe fn run_cmd_buf(
|
||||
|
@ -248,6 +281,7 @@ impl crate::Device for Dx12Device {
|
|||
contents: &[T],
|
||||
) -> Result<(), Error> {
|
||||
let len = buffer.size as usize / std::mem::size_of::<T>();
|
||||
assert!(len >= contents.len());
|
||||
buffer.resource.write_resource(len, contents.as_ptr())?;
|
||||
Ok(())
|
||||
}
|
||||
|
@ -366,7 +400,13 @@ impl crate::CmdBuf<Dx12Device> for CmdBuf {
|
|||
}
|
||||
|
||||
unsafe fn write_timestamp(&mut self, pool: &QueryPool, query: u32) {
|
||||
todo!()
|
||||
self.0.end_timing_query(&pool.heap, query);
|
||||
}
|
||||
|
||||
unsafe fn finish_timestamps(&mut self, pool: &QueryPool) {
|
||||
self.0
|
||||
.resolve_timing_query_data(&pool.heap, 0, pool.n_queries, &pool.buf.resource, 0);
|
||||
self.copy_buffer(&pool.buf, &pool.readback_buf);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -107,7 +107,7 @@ impl Resource {
|
|||
}
|
||||
|
||||
pub fn get(&self) -> *const d3d12::ID3D12Resource {
|
||||
self.ptr.load(Ordering::Relaxed)
|
||||
self.get_mut()
|
||||
}
|
||||
|
||||
pub fn get_mut(&self) -> *mut d3d12::ID3D12Resource {
|
||||
|
@ -491,6 +491,7 @@ impl Device {
|
|||
Ok(RootSignature(ComPtr::from_raw(signature)))
|
||||
}
|
||||
|
||||
// This is for indirect command submission and we probably won't use it.
|
||||
pub unsafe fn create_command_signature(
|
||||
&self,
|
||||
root_signature: RootSignature,
|
||||
|
@ -722,7 +723,7 @@ impl Device {
|
|||
&self,
|
||||
heap_type: d3d12::D3D12_QUERY_HEAP_TYPE,
|
||||
num_expected_queries: u32,
|
||||
) -> QueryHeap {
|
||||
) -> Result<QueryHeap, Error> {
|
||||
let query_heap_desc = d3d12::D3D12_QUERY_HEAP_DESC {
|
||||
Type: heap_type,
|
||||
Count: num_expected_queries,
|
||||
|
@ -731,14 +732,16 @@ impl Device {
|
|||
|
||||
let mut query_heap = ptr::null_mut();
|
||||
|
||||
error_if_failed_else_unit(self.0.CreateQueryHeap(
|
||||
&query_heap_desc as *const _,
|
||||
&d3d12::ID3D12QueryHeap::uuidof(),
|
||||
&mut query_heap as *mut _ as *mut _,
|
||||
))
|
||||
.expect("could not create query heap");
|
||||
explain_error(
|
||||
self.0.CreateQueryHeap(
|
||||
&query_heap_desc as *const _,
|
||||
&d3d12::ID3D12QueryHeap::uuidof(),
|
||||
&mut query_heap as *mut _ as *mut _,
|
||||
),
|
||||
"could not create query heap",
|
||||
)?;
|
||||
|
||||
QueryHeap(ComPtr::from_raw(query_heap))
|
||||
Ok(QueryHeap(ComPtr::from_raw(query_heap)))
|
||||
}
|
||||
|
||||
// based on: https://github.com/microsoft/DirectX-Graphics-Samples/blob/682051ddbe4be820195fffed0bfbdbbde8611a90/Libraries/D3DX12/d3dx12.h#L1875
|
||||
|
@ -1375,9 +1378,9 @@ impl GraphicsCommandList {
|
|||
);
|
||||
}
|
||||
|
||||
pub unsafe fn end_timing_query(&self, query_heap: QueryHeap, index: u32) {
|
||||
pub unsafe fn end_timing_query(&self, query_heap: &QueryHeap, index: u32) {
|
||||
self.0.EndQuery(
|
||||
query_heap.0.as_raw() as *mut _,
|
||||
query_heap.0.as_raw(),
|
||||
d3d12::D3D12_QUERY_TYPE_TIMESTAMP,
|
||||
index,
|
||||
);
|
||||
|
@ -1385,10 +1388,10 @@ impl GraphicsCommandList {
|
|||
|
||||
pub unsafe fn resolve_timing_query_data(
|
||||
&self,
|
||||
query_heap: QueryHeap,
|
||||
query_heap: &QueryHeap,
|
||||
start_index: u32,
|
||||
num_queries: u32,
|
||||
destination_buffer: Resource,
|
||||
destination_buffer: &Resource,
|
||||
aligned_destination_buffer_offset: u64,
|
||||
) {
|
||||
self.0.ResolveQueryData(
|
||||
|
|
|
@ -244,6 +244,10 @@ pub trait CmdBuf<D: Device> {
|
|||
unsafe fn reset_query_pool(&mut self, pool: &D::QueryPool);
|
||||
|
||||
unsafe fn write_timestamp(&mut self, pool: &D::QueryPool, query: u32);
|
||||
|
||||
/// Prepare the timestamps for reading. This isn't required on Vulkan but
|
||||
/// is required on (at least) DX12.
|
||||
unsafe fn finish_timestamps(&mut self, pool: &D::QueryPool) {}
|
||||
}
|
||||
|
||||
pub trait MemFlags: Sized + Clone + Copy {
|
||||
|
|
Loading…
Add table
Reference in a new issue