mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-25 18:56:35 +11:00
Fix timer queries in Vulkan and DX12 backends
Current status: the piet-gpu-hal module (including the collatz example) have the new API (with queries set on compute pass) implemented. The other uses have not yet been updated. On Metal, only M1 is tested. The "command" counter style is partly implemented, but not fully wired up.
This commit is contained in:
parent
ba2b27cc3c
commit
9980c858b6
5 changed files with 85 additions and 29 deletions
|
@ -21,7 +21,7 @@ use raw_window_handle::{HasRawWindowHandle, RawWindowHandle};
|
|||
|
||||
use smallvec::SmallVec;
|
||||
|
||||
use crate::{BindType, BufferUsage, Error, GpuInfo, ImageLayout, MapMode, WorkgroupLimits, ImageFormat};
|
||||
use crate::{BindType, BufferUsage, Error, GpuInfo, ImageLayout, MapMode, WorkgroupLimits, ImageFormat, ComputePassDescriptor};
|
||||
|
||||
use self::{
|
||||
descriptor::{CpuHeapRefOwned, DescriptorPool, GpuHeapRefOwned},
|
||||
|
@ -76,6 +76,7 @@ pub struct CmdBuf {
|
|||
c: wrappers::GraphicsCommandList,
|
||||
allocator: CommandAllocator,
|
||||
needs_reset: bool,
|
||||
end_query: Option<(wrappers::QueryHeap, u32)>,
|
||||
}
|
||||
|
||||
pub struct Pipeline {
|
||||
|
@ -360,6 +361,7 @@ impl crate::backend::Device for Dx12Device {
|
|||
c,
|
||||
allocator,
|
||||
needs_reset: false,
|
||||
end_query: None,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -388,11 +390,10 @@ impl crate::backend::Device for Dx12Device {
|
|||
let mapped = self.map_buffer(&pool.buf, 0, size as u64, MapMode::Read)?;
|
||||
std::ptr::copy_nonoverlapping(mapped, buf.as_mut_ptr() as *mut u8, size);
|
||||
self.unmap_buffer(&pool.buf, 0, size as u64, MapMode::Read)?;
|
||||
let ts0 = buf[0];
|
||||
let tsp = (self.ts_freq as f64).recip();
|
||||
let result = buf[1..]
|
||||
let result = buf
|
||||
.iter()
|
||||
.map(|ts| ts.wrapping_sub(ts0) as f64 * tsp)
|
||||
.map(|ts| *ts as f64 * tsp)
|
||||
.collect();
|
||||
Ok(result)
|
||||
}
|
||||
|
@ -610,6 +611,16 @@ impl crate::backend::CmdBuf<Dx12Device> for CmdBuf {
|
|||
self.allocator.reset().is_ok() && self.c.reset(&self.allocator, None).is_ok()
|
||||
}
|
||||
|
||||
unsafe fn begin_compute_pass(&mut self, desc: &ComputePassDescriptor) {
|
||||
if let Some((pool, start, end)) = &desc.timer_queries {
|
||||
#[allow(irrefutable_let_patterns)]
|
||||
if let crate::hub::QueryPool::Dx12(pool) = pool {
|
||||
self.write_timestamp(pool, *start);
|
||||
self.end_query = Some((pool.heap.clone(), *end));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsafe fn dispatch(
|
||||
&mut self,
|
||||
pipeline: &Pipeline,
|
||||
|
@ -628,6 +639,12 @@ impl crate::backend::CmdBuf<Dx12Device> for CmdBuf {
|
|||
.dispatch(workgroup_count.0, workgroup_count.1, workgroup_count.2);
|
||||
}
|
||||
|
||||
unsafe fn end_compute_pass(&mut self) {
|
||||
if let Some((heap, end)) = self.end_query.take() {
|
||||
self.c.end_timing_query(&heap, end);
|
||||
}
|
||||
}
|
||||
|
||||
unsafe fn memory_barrier(&mut self) {
|
||||
// See comments in CommandBuffer::pipeline_barrier in gfx-hal dx12 backend.
|
||||
// The "proper" way to do this would be to name the actual buffers participating
|
||||
|
@ -666,7 +683,7 @@ impl crate::backend::CmdBuf<Dx12Device> for CmdBuf {
|
|||
self.memory_barrier();
|
||||
}
|
||||
|
||||
unsafe fn clear_buffer(&self, buffer: &Buffer, size: Option<u64>) {
|
||||
unsafe fn clear_buffer(&mut self, buffer: &Buffer, size: Option<u64>) {
|
||||
let cpu_ref = buffer.cpu_ref.as_ref().unwrap();
|
||||
let (gpu_ref, heap) = buffer
|
||||
.gpu_ref
|
||||
|
@ -684,23 +701,23 @@ impl crate::backend::CmdBuf<Dx12Device> for CmdBuf {
|
|||
);
|
||||
}
|
||||
|
||||
unsafe fn copy_buffer(&self, src: &Buffer, dst: &Buffer) {
|
||||
unsafe fn copy_buffer(&mut self, src: &Buffer, dst: &Buffer) {
|
||||
// TODO: consider using copy_resource here (if sizes match)
|
||||
let size = src.size.min(dst.size);
|
||||
self.c.copy_buffer(&dst.resource, 0, &src.resource, 0, size);
|
||||
}
|
||||
|
||||
unsafe fn copy_image_to_buffer(&self, src: &Image, dst: &Buffer) {
|
||||
unsafe fn copy_image_to_buffer(&mut self, src: &Image, dst: &Buffer) {
|
||||
self.c
|
||||
.copy_texture_to_buffer(&src.resource, &dst.resource, src.size.0, src.size.1);
|
||||
}
|
||||
|
||||
unsafe fn copy_buffer_to_image(&self, src: &Buffer, dst: &Image) {
|
||||
unsafe fn copy_buffer_to_image(&mut self, src: &Buffer, dst: &Image) {
|
||||
self.c
|
||||
.copy_buffer_to_texture(&src.resource, &dst.resource, dst.size.0, dst.size.1);
|
||||
}
|
||||
|
||||
unsafe fn blit_image(&self, src: &Image, dst: &Image) {
|
||||
unsafe fn blit_image(&mut self, src: &Image, dst: &Image) {
|
||||
self.c.copy_resource(&src.resource, &dst.resource);
|
||||
}
|
||||
|
||||
|
|
|
@ -79,7 +79,6 @@ pub struct Blob(pub ComPtr<d3dcommon::ID3DBlob>);
|
|||
#[derive(Clone)]
|
||||
pub struct ShaderByteCode {
|
||||
pub bytecode: d3d12::D3D12_SHADER_BYTECODE,
|
||||
blob: Option<Blob>,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
|
@ -741,7 +740,6 @@ impl ShaderByteCode {
|
|||
BytecodeLength: blob.0.GetBufferSize(),
|
||||
pShaderBytecode: blob.0.GetBufferPointer(),
|
||||
},
|
||||
blob: Some(blob),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -810,7 +808,6 @@ impl ShaderByteCode {
|
|||
BytecodeLength: bytecode.len(),
|
||||
pShaderBytecode: bytecode.as_ptr() as *const _,
|
||||
},
|
||||
blob: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -375,8 +375,17 @@ impl Session {
|
|||
///
|
||||
/// This should be called after waiting on the command buffer that wrote the
|
||||
/// timer queries.
|
||||
///
|
||||
/// The returned vector is one shorter than the number of timer queries in the
|
||||
/// pool; the first value is subtracted off. It would likely be better to return
|
||||
/// the raw timestamps, but that change should be made consistently.
|
||||
pub unsafe fn fetch_query_pool(&self, pool: &QueryPool) -> Result<Vec<f64>, Error> {
|
||||
self.0.device.fetch_query_pool(pool)
|
||||
let result = self.0.device.fetch_query_pool(pool)?;
|
||||
// Subtract off first timestamp.
|
||||
Ok(result[1..]
|
||||
.iter()
|
||||
.map(|ts| *ts as f64 - result[0])
|
||||
.collect())
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
|
@ -602,6 +611,10 @@ impl CmdBuf {
|
|||
/// Write a timestamp.
|
||||
///
|
||||
/// The query index must be less than the size of the query pool on creation.
|
||||
///
|
||||
/// Deprecation: for greater portability, set timestamp queries on compute
|
||||
/// passes instead.
|
||||
#[deprecated(note = "use compute pass descriptor instead")]
|
||||
pub unsafe fn write_timestamp(&mut self, pool: &QueryPool, query: u32) {
|
||||
self.cmd_buf().write_timestamp(pool, query);
|
||||
}
|
||||
|
|
|
@ -190,9 +190,15 @@ pub struct WorkgroupLimits {
|
|||
pub max_invocations: u32,
|
||||
}
|
||||
|
||||
/// Options for creating a compute pass.
|
||||
#[derive(Default)]
|
||||
pub struct ComputePassDescriptor<'a> {
|
||||
// Maybe label should go here? It does in wgpu and wgpu_hal.
|
||||
/// Timer query parameters.
|
||||
///
|
||||
/// To record timer queries for a compute pass, set the query pool, start
|
||||
/// query index, and end query index here. The indices must be less than
|
||||
/// the size of the query pool.
|
||||
timer_queries: Option<(&'a QueryPool, u32, u32)>,
|
||||
}
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@ use smallvec::SmallVec;
|
|||
use crate::backend::Device as DeviceTrait;
|
||||
use crate::{
|
||||
BindType, BufferUsage, Error, GpuInfo, ImageFormat, ImageLayout, MapMode, SamplerParams, SubgroupSize,
|
||||
WorkgroupLimits,
|
||||
WorkgroupLimits, ComputePassDescriptor,
|
||||
};
|
||||
|
||||
pub struct VkInstance {
|
||||
|
@ -92,6 +92,7 @@ pub struct CmdBuf {
|
|||
cmd_buf: vk::CommandBuffer,
|
||||
cmd_pool: vk::CommandPool,
|
||||
device: Arc<RawDevice>,
|
||||
end_query: Option<(vk::QueryPool, u32)>,
|
||||
}
|
||||
|
||||
pub struct QueryPool {
|
||||
|
@ -738,6 +739,7 @@ impl crate::backend::Device for VkDevice {
|
|||
cmd_buf,
|
||||
cmd_pool,
|
||||
device: self.device.clone(),
|
||||
end_query: None,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -770,11 +772,10 @@ impl crate::backend::Device for VkDevice {
|
|||
// results (Windows 10, AMD 5700 XT).
|
||||
let flags = vk::QueryResultFlags::TYPE_64 | vk::QueryResultFlags::WAIT;
|
||||
device.get_query_pool_results(pool.pool, 0, pool.n_queries, &mut buf, flags)?;
|
||||
let ts0 = buf[0];
|
||||
let tsp = self.timestamp_period as f64 * 1e-9;
|
||||
let result = buf[1..]
|
||||
let result = buf
|
||||
.iter()
|
||||
.map(|ts| ts.wrapping_sub(ts0) as f64 * tsp)
|
||||
.map(|ts| *ts as f64 * tsp)
|
||||
.collect();
|
||||
Ok(result)
|
||||
}
|
||||
|
@ -902,6 +903,16 @@ impl crate::backend::CmdBuf<VkDevice> for CmdBuf {
|
|||
true
|
||||
}
|
||||
|
||||
unsafe fn begin_compute_pass(&mut self, desc: &ComputePassDescriptor) {
|
||||
if let Some((pool, start, end)) = &desc.timer_queries {
|
||||
#[allow(irrefutable_let_patterns)]
|
||||
if let crate::hub::QueryPool::Vk(pool) = pool {
|
||||
self.write_timestamp_raw(pool.pool, *start);
|
||||
self.end_query = Some((pool.pool, *end));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsafe fn dispatch(
|
||||
&mut self,
|
||||
pipeline: &Pipeline,
|
||||
|
@ -931,6 +942,12 @@ impl crate::backend::CmdBuf<VkDevice> for CmdBuf {
|
|||
);
|
||||
}
|
||||
|
||||
unsafe fn end_compute_pass(&mut self) {
|
||||
if let Some((pool, end)) = self.end_query.take() {
|
||||
self.write_timestamp_raw(pool, end);
|
||||
}
|
||||
}
|
||||
|
||||
/// Insert a pipeline barrier for all memory accesses.
|
||||
unsafe fn memory_barrier(&mut self) {
|
||||
let device = &self.device.device;
|
||||
|
@ -995,13 +1012,13 @@ impl crate::backend::CmdBuf<VkDevice> for CmdBuf {
|
|||
);
|
||||
}
|
||||
|
||||
unsafe fn clear_buffer(&self, buffer: &Buffer, size: Option<u64>) {
|
||||
unsafe fn clear_buffer(&mut self, buffer: &Buffer, size: Option<u64>) {
|
||||
let device = &self.device.device;
|
||||
let size = size.unwrap_or(vk::WHOLE_SIZE);
|
||||
device.cmd_fill_buffer(self.cmd_buf, buffer.buffer, 0, size, 0);
|
||||
}
|
||||
|
||||
unsafe fn copy_buffer(&self, src: &Buffer, dst: &Buffer) {
|
||||
unsafe fn copy_buffer(&mut self, src: &Buffer, dst: &Buffer) {
|
||||
let device = &self.device.device;
|
||||
let size = src.size.min(dst.size);
|
||||
device.cmd_copy_buffer(
|
||||
|
@ -1012,7 +1029,7 @@ impl crate::backend::CmdBuf<VkDevice> for CmdBuf {
|
|||
);
|
||||
}
|
||||
|
||||
unsafe fn copy_image_to_buffer(&self, src: &Image, dst: &Buffer) {
|
||||
unsafe fn copy_image_to_buffer(&mut self, src: &Image, dst: &Buffer) {
|
||||
let device = &self.device.device;
|
||||
device.cmd_copy_image_to_buffer(
|
||||
self.cmd_buf,
|
||||
|
@ -1035,7 +1052,7 @@ impl crate::backend::CmdBuf<VkDevice> for CmdBuf {
|
|||
);
|
||||
}
|
||||
|
||||
unsafe fn copy_buffer_to_image(&self, src: &Buffer, dst: &Image) {
|
||||
unsafe fn copy_buffer_to_image(&mut self, src: &Buffer, dst: &Image) {
|
||||
let device = &self.device.device;
|
||||
device.cmd_copy_buffer_to_image(
|
||||
self.cmd_buf,
|
||||
|
@ -1058,7 +1075,7 @@ impl crate::backend::CmdBuf<VkDevice> for CmdBuf {
|
|||
);
|
||||
}
|
||||
|
||||
unsafe fn blit_image(&self, src: &Image, dst: &Image) {
|
||||
unsafe fn blit_image(&mut self, src: &Image, dst: &Image) {
|
||||
let device = &self.device.device;
|
||||
device.cmd_blit_image(
|
||||
self.cmd_buf,
|
||||
|
@ -1106,13 +1123,7 @@ impl crate::backend::CmdBuf<VkDevice> for CmdBuf {
|
|||
}
|
||||
|
||||
unsafe fn write_timestamp(&mut self, pool: &QueryPool, query: u32) {
|
||||
let device = &self.device.device;
|
||||
device.cmd_write_timestamp(
|
||||
self.cmd_buf,
|
||||
vk::PipelineStageFlags::COMPUTE_SHADER,
|
||||
pool.pool,
|
||||
query,
|
||||
);
|
||||
self.write_timestamp_raw(pool.pool, query);
|
||||
}
|
||||
|
||||
unsafe fn begin_debug_label(&mut self, label: &str) {
|
||||
|
@ -1130,6 +1141,18 @@ impl crate::backend::CmdBuf<VkDevice> for CmdBuf {
|
|||
}
|
||||
}
|
||||
|
||||
impl CmdBuf {
|
||||
unsafe fn write_timestamp_raw(&mut self, pool: vk::QueryPool, query: u32) {
|
||||
let device = &self.device.device;
|
||||
device.cmd_write_timestamp(
|
||||
self.cmd_buf,
|
||||
vk::PipelineStageFlags::COMPUTE_SHADER,
|
||||
pool,
|
||||
query,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
impl crate::backend::DescriptorSetBuilder<VkDevice> for DescriptorSetBuilder {
|
||||
fn add_buffers(&mut self, buffers: &[&Buffer]) {
|
||||
self.buffers.extend(buffers.iter().map(|b| b.buffer));
|
||||
|
|
Loading…
Add table
Reference in a new issue