mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-25 18:56:35 +11:00
Timestamp queries
These function, but can use some work. First, the buffer situation is worse than it should be. It should be possible to create a single readback buffer rather then copy from gpu-local to host-coherent. Second, the command buffer `finish_timestamps` call doesn't correlate to anything in Vulkan, so needs plumbing up through the hub in one form or other when that happens. I'm inclined to make it ergonomic by doing a bit of resource tracking that will trigger the appropriate call (and subsequent host barrier) in the `finish` method on the command buffer.
This commit is contained in:
parent
f482921806
commit
e4b16e706a
4 changed files with 69 additions and 17 deletions
|
@ -52,6 +52,7 @@ fn toy() -> Result<(), Error> {
|
||||||
let buf = device.create_buffer(1024, MemFlags::host_coherent())?;
|
let buf = device.create_buffer(1024, MemFlags::host_coherent())?;
|
||||||
let dev_buf = device.create_buffer(1024, MemFlags::device_local())?;
|
let dev_buf = device.create_buffer(1024, MemFlags::device_local())?;
|
||||||
let data: Vec<u32> = (1..257).collect();
|
let data: Vec<u32> = (1..257).collect();
|
||||||
|
let query_pool = device.create_query_pool(2)?;
|
||||||
unsafe {
|
unsafe {
|
||||||
device.write_buffer(&buf, &data)?;
|
device.write_buffer(&buf, &data)?;
|
||||||
let pipeline = device.create_simple_compute_pipeline(SHADER_CODE, 1, 0)?;
|
let pipeline = device.create_simple_compute_pipeline(SHADER_CODE, 1, 0)?;
|
||||||
|
@ -61,9 +62,12 @@ fn toy() -> Result<(), Error> {
|
||||||
cmd_buf.begin();
|
cmd_buf.begin();
|
||||||
cmd_buf.copy_buffer(&buf, &dev_buf);
|
cmd_buf.copy_buffer(&buf, &dev_buf);
|
||||||
cmd_buf.memory_barrier();
|
cmd_buf.memory_barrier();
|
||||||
|
cmd_buf.write_timestamp(&query_pool, 0);
|
||||||
cmd_buf.dispatch(&pipeline, &ds, (1, 1, 1));
|
cmd_buf.dispatch(&pipeline, &ds, (1, 1, 1));
|
||||||
|
cmd_buf.write_timestamp(&query_pool, 1);
|
||||||
cmd_buf.memory_barrier();
|
cmd_buf.memory_barrier();
|
||||||
cmd_buf.copy_buffer(&dev_buf, &buf);
|
cmd_buf.copy_buffer(&dev_buf, &buf);
|
||||||
|
cmd_buf.finish_timestamps(&query_pool);
|
||||||
cmd_buf.host_barrier();
|
cmd_buf.host_barrier();
|
||||||
cmd_buf.finish();
|
cmd_buf.finish();
|
||||||
device.run_cmd_buf(&cmd_buf, &[], &[], Some(&fence))?;
|
device.run_cmd_buf(&cmd_buf, &[], &[], Some(&fence))?;
|
||||||
|
@ -71,6 +75,7 @@ fn toy() -> Result<(), Error> {
|
||||||
let mut readback: Vec<u32> = Vec::new();
|
let mut readback: Vec<u32> = Vec::new();
|
||||||
device.read_buffer(&buf, &mut readback)?;
|
device.read_buffer(&buf, &mut readback)?;
|
||||||
println!("{:?}", readback);
|
println!("{:?}", readback);
|
||||||
|
println!("{:?}", device.fetch_query_pool(&query_pool));
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,6 +22,7 @@ pub struct Dx12Device {
|
||||||
device: Device,
|
device: Device,
|
||||||
command_allocator: CommandAllocator,
|
command_allocator: CommandAllocator,
|
||||||
command_queue: CommandQueue,
|
command_queue: CommandQueue,
|
||||||
|
ts_freq: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
|
@ -55,7 +56,15 @@ pub struct Pipeline {
|
||||||
// gpu-descriptor crate.
|
// gpu-descriptor crate.
|
||||||
pub struct DescriptorSet(wrappers::DescriptorHeap);
|
pub struct DescriptorSet(wrappers::DescriptorHeap);
|
||||||
|
|
||||||
pub struct QueryPool;
|
pub struct QueryPool {
|
||||||
|
heap: wrappers::QueryHeap,
|
||||||
|
buf: Buffer,
|
||||||
|
// TODO: piet-dx12 manages to do this with one buffer. I think a
|
||||||
|
// HEAP_TYPE_READBACK will work, but we currently don't have fine
|
||||||
|
// grained usage flags to express this.
|
||||||
|
readback_buf: Buffer,
|
||||||
|
n_queries: u32,
|
||||||
|
}
|
||||||
|
|
||||||
pub struct Fence {
|
pub struct Fence {
|
||||||
fence: wrappers::Fence,
|
fence: wrappers::Fence,
|
||||||
|
@ -118,10 +127,12 @@ impl Dx12Instance {
|
||||||
0,
|
0,
|
||||||
)?;
|
)?;
|
||||||
let command_allocator = device.create_command_allocator(list_type)?;
|
let command_allocator = device.create_command_allocator(list_type)?;
|
||||||
|
let ts_freq = command_queue.get_timestamp_frequency()?;
|
||||||
Ok(Dx12Device {
|
Ok(Dx12Device {
|
||||||
device,
|
device,
|
||||||
command_queue,
|
command_queue,
|
||||||
command_allocator,
|
command_allocator,
|
||||||
|
ts_freq,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -202,11 +213,33 @@ impl crate::Device for Dx12Device {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn create_query_pool(&self, n_queries: u32) -> Result<Self::QueryPool, Error> {
|
fn create_query_pool(&self, n_queries: u32) -> Result<Self::QueryPool, Error> {
|
||||||
todo!()
|
unsafe {
|
||||||
|
let heap = self
|
||||||
|
.device
|
||||||
|
.create_query_heap(d3d12::D3D12_QUERY_HEAP_TYPE_TIMESTAMP, n_queries)?;
|
||||||
|
let buf =
|
||||||
|
self.create_buffer((n_queries * 8) as u64, crate::MemFlags::device_local())?;
|
||||||
|
let readback_buf =
|
||||||
|
self.create_buffer((n_queries * 8) as u64, crate::MemFlags::host_coherent())?;
|
||||||
|
Ok(QueryPool {
|
||||||
|
heap,
|
||||||
|
buf,
|
||||||
|
readback_buf,
|
||||||
|
n_queries,
|
||||||
|
})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe fn fetch_query_pool(&self, pool: &Self::QueryPool) -> Result<Vec<f64>, Error> {
|
unsafe fn fetch_query_pool(&self, pool: &Self::QueryPool) -> Result<Vec<f64>, Error> {
|
||||||
todo!()
|
let mut buf = vec![0u64; pool.n_queries as usize];
|
||||||
|
self.read_buffer(&pool.readback_buf, &mut buf)?;
|
||||||
|
let ts0 = buf[0];
|
||||||
|
let tsp = (self.ts_freq as f64).recip();
|
||||||
|
let result = buf[1..]
|
||||||
|
.iter()
|
||||||
|
.map(|ts| ts.wrapping_sub(ts0) as f64 * tsp)
|
||||||
|
.collect();
|
||||||
|
Ok(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe fn run_cmd_buf(
|
unsafe fn run_cmd_buf(
|
||||||
|
@ -248,6 +281,7 @@ impl crate::Device for Dx12Device {
|
||||||
contents: &[T],
|
contents: &[T],
|
||||||
) -> Result<(), Error> {
|
) -> Result<(), Error> {
|
||||||
let len = buffer.size as usize / std::mem::size_of::<T>();
|
let len = buffer.size as usize / std::mem::size_of::<T>();
|
||||||
|
assert!(len >= contents.len());
|
||||||
buffer.resource.write_resource(len, contents.as_ptr())?;
|
buffer.resource.write_resource(len, contents.as_ptr())?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
@ -366,7 +400,13 @@ impl crate::CmdBuf<Dx12Device> for CmdBuf {
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe fn write_timestamp(&mut self, pool: &QueryPool, query: u32) {
|
unsafe fn write_timestamp(&mut self, pool: &QueryPool, query: u32) {
|
||||||
todo!()
|
self.0.end_timing_query(&pool.heap, query);
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe fn finish_timestamps(&mut self, pool: &QueryPool) {
|
||||||
|
self.0
|
||||||
|
.resolve_timing_query_data(&pool.heap, 0, pool.n_queries, &pool.buf.resource, 0);
|
||||||
|
self.copy_buffer(&pool.buf, &pool.readback_buf);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -107,7 +107,7 @@ impl Resource {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get(&self) -> *const d3d12::ID3D12Resource {
|
pub fn get(&self) -> *const d3d12::ID3D12Resource {
|
||||||
self.ptr.load(Ordering::Relaxed)
|
self.get_mut()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_mut(&self) -> *mut d3d12::ID3D12Resource {
|
pub fn get_mut(&self) -> *mut d3d12::ID3D12Resource {
|
||||||
|
@ -491,6 +491,7 @@ impl Device {
|
||||||
Ok(RootSignature(ComPtr::from_raw(signature)))
|
Ok(RootSignature(ComPtr::from_raw(signature)))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This is for indirect command submission and we probably won't use it.
|
||||||
pub unsafe fn create_command_signature(
|
pub unsafe fn create_command_signature(
|
||||||
&self,
|
&self,
|
||||||
root_signature: RootSignature,
|
root_signature: RootSignature,
|
||||||
|
@ -722,7 +723,7 @@ impl Device {
|
||||||
&self,
|
&self,
|
||||||
heap_type: d3d12::D3D12_QUERY_HEAP_TYPE,
|
heap_type: d3d12::D3D12_QUERY_HEAP_TYPE,
|
||||||
num_expected_queries: u32,
|
num_expected_queries: u32,
|
||||||
) -> QueryHeap {
|
) -> Result<QueryHeap, Error> {
|
||||||
let query_heap_desc = d3d12::D3D12_QUERY_HEAP_DESC {
|
let query_heap_desc = d3d12::D3D12_QUERY_HEAP_DESC {
|
||||||
Type: heap_type,
|
Type: heap_type,
|
||||||
Count: num_expected_queries,
|
Count: num_expected_queries,
|
||||||
|
@ -731,14 +732,16 @@ impl Device {
|
||||||
|
|
||||||
let mut query_heap = ptr::null_mut();
|
let mut query_heap = ptr::null_mut();
|
||||||
|
|
||||||
error_if_failed_else_unit(self.0.CreateQueryHeap(
|
explain_error(
|
||||||
&query_heap_desc as *const _,
|
self.0.CreateQueryHeap(
|
||||||
&d3d12::ID3D12QueryHeap::uuidof(),
|
&query_heap_desc as *const _,
|
||||||
&mut query_heap as *mut _ as *mut _,
|
&d3d12::ID3D12QueryHeap::uuidof(),
|
||||||
))
|
&mut query_heap as *mut _ as *mut _,
|
||||||
.expect("could not create query heap");
|
),
|
||||||
|
"could not create query heap",
|
||||||
|
)?;
|
||||||
|
|
||||||
QueryHeap(ComPtr::from_raw(query_heap))
|
Ok(QueryHeap(ComPtr::from_raw(query_heap)))
|
||||||
}
|
}
|
||||||
|
|
||||||
// based on: https://github.com/microsoft/DirectX-Graphics-Samples/blob/682051ddbe4be820195fffed0bfbdbbde8611a90/Libraries/D3DX12/d3dx12.h#L1875
|
// based on: https://github.com/microsoft/DirectX-Graphics-Samples/blob/682051ddbe4be820195fffed0bfbdbbde8611a90/Libraries/D3DX12/d3dx12.h#L1875
|
||||||
|
@ -1375,9 +1378,9 @@ impl GraphicsCommandList {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub unsafe fn end_timing_query(&self, query_heap: QueryHeap, index: u32) {
|
pub unsafe fn end_timing_query(&self, query_heap: &QueryHeap, index: u32) {
|
||||||
self.0.EndQuery(
|
self.0.EndQuery(
|
||||||
query_heap.0.as_raw() as *mut _,
|
query_heap.0.as_raw(),
|
||||||
d3d12::D3D12_QUERY_TYPE_TIMESTAMP,
|
d3d12::D3D12_QUERY_TYPE_TIMESTAMP,
|
||||||
index,
|
index,
|
||||||
);
|
);
|
||||||
|
@ -1385,10 +1388,10 @@ impl GraphicsCommandList {
|
||||||
|
|
||||||
pub unsafe fn resolve_timing_query_data(
|
pub unsafe fn resolve_timing_query_data(
|
||||||
&self,
|
&self,
|
||||||
query_heap: QueryHeap,
|
query_heap: &QueryHeap,
|
||||||
start_index: u32,
|
start_index: u32,
|
||||||
num_queries: u32,
|
num_queries: u32,
|
||||||
destination_buffer: Resource,
|
destination_buffer: &Resource,
|
||||||
aligned_destination_buffer_offset: u64,
|
aligned_destination_buffer_offset: u64,
|
||||||
) {
|
) {
|
||||||
self.0.ResolveQueryData(
|
self.0.ResolveQueryData(
|
||||||
|
|
|
@ -244,6 +244,10 @@ pub trait CmdBuf<D: Device> {
|
||||||
unsafe fn reset_query_pool(&mut self, pool: &D::QueryPool);
|
unsafe fn reset_query_pool(&mut self, pool: &D::QueryPool);
|
||||||
|
|
||||||
unsafe fn write_timestamp(&mut self, pool: &D::QueryPool, query: u32);
|
unsafe fn write_timestamp(&mut self, pool: &D::QueryPool, query: u32);
|
||||||
|
|
||||||
|
/// Prepare the timestamps for reading. This isn't required on Vulkan but
|
||||||
|
/// is required on (at least) DX12.
|
||||||
|
unsafe fn finish_timestamps(&mut self, pool: &D::QueryPool) {}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub trait MemFlags: Sized + Clone + Copy {
|
pub trait MemFlags: Sized + Clone + Copy {
|
||||||
|
|
Loading…
Add table
Reference in a new issue