mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 12:41:30 +11:00
Better DX12 descriptor management
Reduce allocation of descriptor heaps. This change also enables clearing of buffers, as the handles are needed at command dispatch time. Also updates the tests to use clear_buffers on DX12. Looking forward to being able to get rid of the compute shader workaround on Metal. This is a followup on #125, and progress toward #95
This commit is contained in:
parent
95d356c08f
commit
657f219ce8
81
piet-gpu-hal/src/bestfit.rs
Normal file
81
piet-gpu-hal/src/bestfit.rs
Normal file
|
@ -0,0 +1,81 @@
|
|||
// Copyright © 2021 piet-gpu developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those
|
||||
|
||||
//! A simple best-fit allocator.
|
||||
|
||||
use std::collections::{BTreeMap, BTreeSet};
|
||||
|
||||
/// An allocator that tracks free ranges and returns best fit.
|
||||
pub struct BestFit {
|
||||
// map offset to size of free block
|
||||
free_by_ix: BTreeMap<u32, u32>,
|
||||
// size and offset
|
||||
free_by_size: BTreeSet<(u32, u32)>,
|
||||
}
|
||||
|
||||
impl BestFit {
|
||||
pub fn new(size: u32) -> BestFit {
|
||||
let mut free_by_ix = BTreeMap::new();
|
||||
free_by_ix.insert(0, size);
|
||||
let mut free_by_size = BTreeSet::new();
|
||||
free_by_size.insert((size, 0));
|
||||
BestFit {
|
||||
free_by_ix,
|
||||
free_by_size,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn alloc(&mut self, size: u32) -> Option<u32> {
|
||||
let block = *self.free_by_size.range((size, 0)..).next()?;
|
||||
let ix = block.1;
|
||||
self.free_by_ix.remove(&ix);
|
||||
self.free_by_size.remove(&block);
|
||||
let fragment_size = block.0 - size;
|
||||
if fragment_size > 0 {
|
||||
let fragment_ix = ix + size;
|
||||
self.free_by_ix.insert(fragment_ix, fragment_size);
|
||||
self.free_by_size.insert((fragment_size, fragment_ix));
|
||||
}
|
||||
Some(ix)
|
||||
}
|
||||
|
||||
pub fn free(&mut self, ix: u32, size: u32) {
|
||||
let next_ix = size + ix;
|
||||
if let Some((&prev_ix, &prev_size)) = self.free_by_ix.range(..ix).rev().next() {
|
||||
if prev_ix + prev_size == ix {
|
||||
self.free_by_size.remove(&(prev_size, prev_ix));
|
||||
if let Some(&next_size) = self.free_by_ix.get(&next_ix) {
|
||||
// consolidate with prev and next
|
||||
let new_size = prev_size + size + next_size;
|
||||
*self.free_by_ix.get_mut(&prev_ix).unwrap() = new_size;
|
||||
self.free_by_ix.remove(&next_ix);
|
||||
self.free_by_size.remove(&(next_size, next_ix));
|
||||
self.free_by_size.insert((new_size, prev_ix));
|
||||
} else {
|
||||
// consolidate with prev
|
||||
let new_size = prev_size + size;
|
||||
*self.free_by_ix.get_mut(&prev_ix).unwrap() = new_size;
|
||||
self.free_by_size.insert((new_size, prev_ix));
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
if let Some(&next_size) = self.free_by_ix.get(&next_ix) {
|
||||
// consolidate with next
|
||||
let new_size = size + next_size;
|
||||
self.free_by_ix.remove(&next_ix);
|
||||
self.free_by_ix.insert(ix, new_size);
|
||||
self.free_by_size.remove(&(next_size, next_ix));
|
||||
self.free_by_size.insert((new_size, ix));
|
||||
} else {
|
||||
// new isolated free block
|
||||
self.free_by_ix.insert(ix, size);
|
||||
self.free_by_size.insert((size, ix));
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,14 +1,20 @@
|
|||
//! DX12 implemenation of HAL trait.
|
||||
|
||||
mod descriptor;
|
||||
mod error;
|
||||
mod wrappers;
|
||||
|
||||
use std::{cell::Cell, convert::{TryFrom, TryInto}, mem, ptr};
|
||||
use std::{
|
||||
cell::Cell,
|
||||
convert::{TryFrom, TryInto},
|
||||
mem, ptr,
|
||||
sync::{Arc, Mutex},
|
||||
};
|
||||
|
||||
use winapi::shared::minwindef::TRUE;
|
||||
use winapi::shared::{dxgi, dxgi1_2, dxgitype};
|
||||
#[allow(unused)]
|
||||
use winapi::shared::dxgi1_3; // for error reporting in debug mode
|
||||
use winapi::shared::minwindef::TRUE;
|
||||
use winapi::shared::{dxgi, dxgi1_2, dxgitype};
|
||||
use winapi::um::d3d12;
|
||||
|
||||
use raw_window_handle::{HasRawWindowHandle, RawWindowHandle};
|
||||
|
@ -17,7 +23,12 @@ use smallvec::SmallVec;
|
|||
|
||||
use crate::{BindType, BufferUsage, Error, GpuInfo, ImageLayout, WorkgroupLimits};
|
||||
|
||||
use self::wrappers::{CommandAllocator, CommandQueue, Device, Factory4, Resource, ShaderByteCode};
|
||||
use self::{
|
||||
descriptor::{CpuHeapRefOwned, DescriptorPool, GpuHeapRefOwned},
|
||||
wrappers::{
|
||||
CommandAllocator, CommandQueue, DescriptorHeap, Device, Factory4, Resource, ShaderByteCode,
|
||||
},
|
||||
};
|
||||
|
||||
pub struct Dx12Instance {
|
||||
factory: Factory4,
|
||||
|
@ -38,17 +49,26 @@ pub struct Dx12Device {
|
|||
ts_freq: u64,
|
||||
gpu_info: GpuInfo,
|
||||
memory_arch: MemoryArchitecture,
|
||||
descriptor_pool: Mutex<DescriptorPool>,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Buffer {
|
||||
resource: Resource,
|
||||
pub size: u64,
|
||||
// Always present except for query readback buffer.
|
||||
cpu_ref: Option<Arc<CpuHeapRefOwned>>,
|
||||
// Present when created with CLEAR usage. Heap is here for
|
||||
// the same reason it's in DescriptorSet, and might be removed
|
||||
// when CmdBuf has access to the descriptor pool.
|
||||
gpu_ref: Option<(Arc<GpuHeapRefOwned>, DescriptorHeap)>,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Image {
|
||||
resource: Resource,
|
||||
// Present except for swapchain images.
|
||||
cpu_ref: Option<Arc<CpuHeapRefOwned>>,
|
||||
size: (u32, u32),
|
||||
}
|
||||
|
||||
|
@ -63,13 +83,17 @@ pub struct Pipeline {
|
|||
root_signature: wrappers::RootSignature,
|
||||
}
|
||||
|
||||
// Right now, each descriptor set gets its own heap, but we'll move
|
||||
// to a more sophisticated allocation scheme, probably using the
|
||||
// gpu-descriptor crate.
|
||||
pub struct DescriptorSet(wrappers::DescriptorHeap);
|
||||
pub struct DescriptorSet {
|
||||
gpu_ref: GpuHeapRefOwned,
|
||||
// Note: the heap is only needed here so CmdBuf::dispatch can get
|
||||
// use it easily. If CmdBuf had a reference to the Device (or just
|
||||
// the descriptor pool), we could get rid of this.
|
||||
heap: DescriptorHeap,
|
||||
}
|
||||
|
||||
pub struct QueryPool {
|
||||
heap: wrappers::QueryHeap,
|
||||
// Maybe this should just be a Resource, not a full Buffer.
|
||||
buf: Buffer,
|
||||
n_queries: u32,
|
||||
}
|
||||
|
@ -85,11 +109,9 @@ pub struct Fence {
|
|||
/// semaphore is needed for presentation on DX12.
|
||||
pub struct Semaphore;
|
||||
|
||||
// TODO
|
||||
#[derive(Default)]
|
||||
pub struct DescriptorSetBuilder {
|
||||
buffers: Vec<Buffer>,
|
||||
images: Vec<Image>,
|
||||
handles: SmallVec<[d3d12::D3D12_CPU_DESCRIPTOR_HANDLE; 16]>,
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Eq)]
|
||||
|
@ -175,12 +197,14 @@ impl Dx12Instance {
|
|||
has_memory_model: false,
|
||||
use_staging_buffers,
|
||||
};
|
||||
let descriptor_pool = Default::default();
|
||||
Ok(Dx12Device {
|
||||
device,
|
||||
command_queue,
|
||||
ts_freq,
|
||||
memory_arch,
|
||||
gpu_info,
|
||||
descriptor_pool,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -251,14 +275,44 @@ impl crate::backend::Device for Dx12Device {
|
|||
//TODO: consider flag D3D12_HEAP_FLAG_ALLOW_SHADER_ATOMICS?
|
||||
let flags = d3d12::D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
|
||||
let resource = self.device.create_buffer(
|
||||
size.try_into()?,
|
||||
size,
|
||||
d3d12::D3D12_HEAP_TYPE_CUSTOM,
|
||||
page_property,
|
||||
memory_pool,
|
||||
d3d12::D3D12_RESOURCE_STATE_COMMON,
|
||||
flags,
|
||||
)?;
|
||||
Ok(Buffer { resource, size })
|
||||
let mut descriptor_pool = self.descriptor_pool.lock().unwrap();
|
||||
let cpu_ref = Arc::new(descriptor_pool.alloc_cpu(&self.device)?);
|
||||
let cpu_handle = descriptor_pool.cpu_handle(&cpu_ref);
|
||||
self.device
|
||||
.create_byte_addressed_buffer_unordered_access_view(
|
||||
&resource,
|
||||
cpu_handle,
|
||||
0,
|
||||
(size / 4).try_into()?,
|
||||
);
|
||||
let gpu_ref = if usage.contains(BufferUsage::CLEAR) {
|
||||
let gpu_ref = Arc::new(descriptor_pool.alloc_gpu(&self.device, 1)?);
|
||||
let gpu_handle = descriptor_pool.cpu_handle_of_gpu(&gpu_ref, 0);
|
||||
self.device.copy_descriptors(
|
||||
&[gpu_handle],
|
||||
&[1],
|
||||
&[cpu_handle],
|
||||
&[1],
|
||||
d3d12::D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
|
||||
);
|
||||
let heap = descriptor_pool.gpu_heap(&gpu_ref).to_owned();
|
||||
Some((gpu_ref, heap))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
Ok(Buffer {
|
||||
resource,
|
||||
size,
|
||||
cpu_ref: Some(cpu_ref),
|
||||
gpu_ref,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -272,8 +326,18 @@ impl crate::backend::Device for Dx12Device {
|
|||
let resource = self
|
||||
.device
|
||||
.create_texture2d_buffer(width.into(), height, format, true)?;
|
||||
|
||||
let mut descriptor_pool = self.descriptor_pool.lock().unwrap();
|
||||
let cpu_ref = Arc::new(descriptor_pool.alloc_cpu(&self.device)?);
|
||||
let cpu_handle = descriptor_pool.cpu_handle(&cpu_ref);
|
||||
self.device
|
||||
.create_unordered_access_view(&resource, cpu_handle);
|
||||
let size = (width, height);
|
||||
Ok(Image { resource, size })
|
||||
Ok(Image {
|
||||
resource,
|
||||
cpu_ref: Some(cpu_ref),
|
||||
size,
|
||||
})
|
||||
}
|
||||
|
||||
unsafe fn destroy_image(&self, image: &Self::Image) -> Result<(), Error> {
|
||||
|
@ -424,7 +488,9 @@ impl crate::backend::Device for Dx12Device {
|
|||
let mut i = 0;
|
||||
fn map_range_type(bind_type: BindType) -> d3d12::D3D12_DESCRIPTOR_RANGE_TYPE {
|
||||
match bind_type {
|
||||
BindType::Buffer | BindType::Image | BindType::ImageRead => d3d12::D3D12_DESCRIPTOR_RANGE_TYPE_UAV,
|
||||
BindType::Buffer | BindType::Image | BindType::ImageRead => {
|
||||
d3d12::D3D12_DESCRIPTOR_RANGE_TYPE_UAV
|
||||
}
|
||||
BindType::BufReadOnly => d3d12::D3D12_DESCRIPTOR_RANGE_TYPE_SRV,
|
||||
}
|
||||
}
|
||||
|
@ -482,9 +548,7 @@ impl crate::backend::Device for Dx12Device {
|
|||
&root_signature_desc,
|
||||
d3d12::D3D_ROOT_SIGNATURE_VERSION_1,
|
||||
)?;
|
||||
let root_signature = self
|
||||
.device
|
||||
.create_root_signature(0, root_signature_blob)?;
|
||||
let root_signature = self.device.create_root_signature(0, root_signature_blob)?;
|
||||
let desc = d3d12::D3D12_COMPUTE_PIPELINE_STATE_DESC {
|
||||
pRootSignature: root_signature.0.as_raw(),
|
||||
CS: shader.bytecode,
|
||||
|
@ -515,14 +579,21 @@ impl Dx12Device {
|
|||
fn create_readback_buffer(&self, size: u64) -> Result<Buffer, Error> {
|
||||
unsafe {
|
||||
let resource = self.device.create_buffer(
|
||||
size.try_into()?,
|
||||
size,
|
||||
d3d12::D3D12_HEAP_TYPE_READBACK,
|
||||
d3d12::D3D12_CPU_PAGE_PROPERTY_UNKNOWN,
|
||||
d3d12::D3D12_MEMORY_POOL_UNKNOWN,
|
||||
d3d12::D3D12_RESOURCE_STATE_COPY_DEST,
|
||||
d3d12::D3D12_RESOURCE_FLAG_NONE,
|
||||
)?;
|
||||
Ok(Buffer { resource, size })
|
||||
let cpu_ref = None;
|
||||
let gpu_ref = None;
|
||||
Ok(Buffer {
|
||||
resource,
|
||||
size,
|
||||
cpu_ref,
|
||||
gpu_ref,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -551,11 +622,10 @@ impl crate::backend::CmdBuf<Dx12Device> for CmdBuf {
|
|||
self.c.set_pipeline_state(&pipeline.pipeline_state);
|
||||
self.c
|
||||
.set_compute_pipeline_root_signature(&pipeline.root_signature);
|
||||
self.c.set_descriptor_heaps(&[&descriptor_set.0]);
|
||||
self.c.set_compute_root_descriptor_table(
|
||||
0,
|
||||
descriptor_set.0.get_gpu_descriptor_handle_at_offset(0),
|
||||
);
|
||||
// TODO: persist heap ix and only set if changed.
|
||||
self.c.set_descriptor_heaps(&[&descriptor_set.heap]);
|
||||
self.c
|
||||
.set_compute_root_descriptor_table(0, descriptor_set.gpu_ref.gpu_handle());
|
||||
self.c
|
||||
.dispatch(workgroup_count.0, workgroup_count.1, workgroup_count.2);
|
||||
}
|
||||
|
@ -598,10 +668,22 @@ impl crate::backend::CmdBuf<Dx12Device> for CmdBuf {
|
|||
self.memory_barrier();
|
||||
}
|
||||
|
||||
unsafe fn clear_buffer(&self, _buffer: &Buffer, _size: Option<u64>) {
|
||||
// Open question: do we call ClearUnorderedAccessViewUint or dispatch a
|
||||
// compute shader? Either way we will need descriptors here.
|
||||
todo!()
|
||||
unsafe fn clear_buffer(&self, buffer: &Buffer, size: Option<u64>) {
|
||||
let cpu_ref = buffer.cpu_ref.as_ref().unwrap();
|
||||
let (gpu_ref, heap) = buffer
|
||||
.gpu_ref
|
||||
.as_ref()
|
||||
.expect("Need to set CLEAR usage on buffer");
|
||||
// Same TODO as dispatch: track and only set if changed.
|
||||
self.c.set_descriptor_heaps(&[heap]);
|
||||
// Discussion question: would compute shader be faster? Should measure.
|
||||
self.c.clear_uav(
|
||||
gpu_ref.gpu_handle(),
|
||||
cpu_ref.handle(),
|
||||
&buffer.resource,
|
||||
0,
|
||||
size,
|
||||
);
|
||||
}
|
||||
|
||||
unsafe fn copy_buffer(&self, src: &Buffer, dst: &Buffer) {
|
||||
|
@ -638,14 +720,15 @@ impl crate::backend::CmdBuf<Dx12Device> for CmdBuf {
|
|||
|
||||
impl crate::backend::DescriptorSetBuilder<Dx12Device> for DescriptorSetBuilder {
|
||||
fn add_buffers(&mut self, buffers: &[&Buffer]) {
|
||||
// Note: we could get rid of the clone here (which is an AddRef)
|
||||
// and store a raw pointer, as it's a safety precondition that
|
||||
// the resources are kept alive til build.
|
||||
self.buffers.extend(buffers.iter().copied().cloned());
|
||||
for buf in buffers {
|
||||
self.handles.push(buf.cpu_ref.as_ref().unwrap().handle());
|
||||
}
|
||||
}
|
||||
|
||||
fn add_images(&mut self, images: &[&Image]) {
|
||||
self.images.extend(images.iter().copied().cloned());
|
||||
for img in images {
|
||||
self.handles.push(img.cpu_ref.as_ref().unwrap().handle());
|
||||
}
|
||||
}
|
||||
|
||||
fn add_textures(&mut self, _images: &[&Image]) {
|
||||
|
@ -657,34 +740,12 @@ impl crate::backend::DescriptorSetBuilder<Dx12Device> for DescriptorSetBuilder {
|
|||
device: &Dx12Device,
|
||||
_pipeline: &Pipeline,
|
||||
) -> Result<DescriptorSet, Error> {
|
||||
let n_descriptors = self.buffers.len() + self.images.len();
|
||||
let heap_desc = d3d12::D3D12_DESCRIPTOR_HEAP_DESC {
|
||||
Type: d3d12::D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
|
||||
NumDescriptors: n_descriptors.try_into()?,
|
||||
Flags: d3d12::D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE,
|
||||
NodeMask: 0,
|
||||
};
|
||||
let heap = device.device.create_descriptor_heap(&heap_desc)?;
|
||||
let mut ix = 0;
|
||||
for buffer in self.buffers {
|
||||
device
|
||||
.device
|
||||
.create_byte_addressed_buffer_unordered_access_view(
|
||||
&buffer.resource,
|
||||
heap.get_cpu_descriptor_handle_at_offset(ix),
|
||||
0,
|
||||
(buffer.size / 4).try_into()?,
|
||||
);
|
||||
ix += 1;
|
||||
}
|
||||
for image in self.images {
|
||||
device.device.create_unordered_access_view(
|
||||
&image.resource,
|
||||
heap.get_cpu_descriptor_handle_at_offset(ix),
|
||||
);
|
||||
ix += 1;
|
||||
}
|
||||
Ok(DescriptorSet(heap))
|
||||
let mut descriptor_pool = device.descriptor_pool.lock().unwrap();
|
||||
let n_descriptors = self.handles.len().try_into()?;
|
||||
let gpu_ref = descriptor_pool.alloc_gpu(&device.device, n_descriptors)?;
|
||||
gpu_ref.copy_descriptors(&device.device, &self.handles);
|
||||
let heap = descriptor_pool.gpu_heap(&gpu_ref).to_owned();
|
||||
Ok(DescriptorSet { gpu_ref, heap })
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -737,6 +798,7 @@ impl Dx12Swapchain {
|
|||
let buffer = self.swapchain.get_buffer(idx as u32);
|
||||
Image {
|
||||
resource: buffer,
|
||||
cpu_ref: None,
|
||||
size: self.size,
|
||||
}
|
||||
}
|
||||
|
|
295
piet-gpu-hal/src/dx12/descriptor.rs
Normal file
295
piet-gpu-hal/src/dx12/descriptor.rs
Normal file
|
@ -0,0 +1,295 @@
|
|||
// Copyright © 2021 piet-gpu developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those
|
||||
|
||||
//! Descriptor management.
|
||||
|
||||
use std::{
|
||||
convert::TryInto,
|
||||
ops::Deref,
|
||||
sync::{Arc, Mutex, Weak},
|
||||
};
|
||||
|
||||
use smallvec::SmallVec;
|
||||
use winapi::um::d3d12::{
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE, D3D12_DESCRIPTOR_HEAP_DESC, D3D12_DESCRIPTOR_HEAP_FLAG_NONE,
|
||||
D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
|
||||
D3D12_GPU_DESCRIPTOR_HANDLE,
|
||||
};
|
||||
|
||||
use crate::{bestfit::BestFit, Error};
|
||||
|
||||
use super::wrappers::{DescriptorHeap, Device};
|
||||
|
||||
const CPU_CHUNK_SIZE: u32 = 256;
|
||||
const GPU_CHUNK_SIZE: u32 = 4096;
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct DescriptorPool {
|
||||
cpu_visible: Vec<CpuHeap>,
|
||||
gpu_visible: Vec<GpuHeap>,
|
||||
free_list: Arc<Mutex<DescriptorFreeList>>,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct DescriptorFreeList {
|
||||
cpu_free: Vec<Vec<u32>>,
|
||||
gpu_free: Vec<BestFit>,
|
||||
}
|
||||
|
||||
struct CpuHeap {
|
||||
// Retained for lifetime reasons.
|
||||
#[allow(unused)]
|
||||
dx12_heap: DescriptorHeap,
|
||||
cpu_handle: D3D12_CPU_DESCRIPTOR_HANDLE,
|
||||
increment_size: u32,
|
||||
}
|
||||
|
||||
pub struct CpuHeapRef {
|
||||
heap_ix: usize,
|
||||
offset: u32,
|
||||
}
|
||||
|
||||
/// An owned reference to the CPU heap.
|
||||
///
|
||||
/// When dropped, the corresponding heap range will be freed.
|
||||
pub struct CpuHeapRefOwned {
|
||||
heap_ref: CpuHeapRef,
|
||||
handle: D3D12_CPU_DESCRIPTOR_HANDLE,
|
||||
free_list: Weak<Mutex<DescriptorFreeList>>,
|
||||
}
|
||||
|
||||
/// A shader-visible descriptor heap.
|
||||
struct GpuHeap {
|
||||
dx12_heap: DescriptorHeap,
|
||||
cpu_handle: D3D12_CPU_DESCRIPTOR_HANDLE,
|
||||
gpu_handle: D3D12_GPU_DESCRIPTOR_HANDLE,
|
||||
increment_size: u32,
|
||||
}
|
||||
|
||||
pub struct GpuHeapRef {
|
||||
heap_ix: usize,
|
||||
offset: u32,
|
||||
n: u32,
|
||||
}
|
||||
|
||||
/// An owned reference to the GPU heap.
|
||||
///
|
||||
/// When dropped, the corresponding heap range will be freed.
|
||||
pub struct GpuHeapRefOwned {
|
||||
heap_ref: GpuHeapRef,
|
||||
cpu_handle: D3D12_CPU_DESCRIPTOR_HANDLE,
|
||||
gpu_handle: D3D12_GPU_DESCRIPTOR_HANDLE,
|
||||
free_list: Weak<Mutex<DescriptorFreeList>>,
|
||||
}
|
||||
|
||||
impl DescriptorPool {
|
||||
pub fn alloc_cpu(&mut self, device: &Device) -> Result<CpuHeapRefOwned, Error> {
|
||||
let free_list = &self.free_list;
|
||||
let mk_owned = |heap_ref, handle| CpuHeapRefOwned {
|
||||
heap_ref,
|
||||
handle,
|
||||
free_list: Arc::downgrade(free_list),
|
||||
};
|
||||
let mut free_list = free_list.lock().unwrap();
|
||||
for (heap_ix, free) in free_list.cpu_free.iter_mut().enumerate() {
|
||||
if let Some(offset) = free.pop() {
|
||||
let handle = self.cpu_visible[heap_ix].cpu_handle(offset);
|
||||
return Ok(mk_owned(CpuHeapRef { heap_ix, offset }, handle));
|
||||
}
|
||||
}
|
||||
unsafe {
|
||||
let heap_type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
|
||||
let desc = D3D12_DESCRIPTOR_HEAP_DESC {
|
||||
Type: heap_type,
|
||||
NumDescriptors: CPU_CHUNK_SIZE,
|
||||
Flags: D3D12_DESCRIPTOR_HEAP_FLAG_NONE,
|
||||
NodeMask: 0,
|
||||
};
|
||||
let dx12_heap = device.create_descriptor_heap(&desc)?;
|
||||
let mut free = (0..CPU_CHUNK_SIZE).rev().collect::<Vec<_>>();
|
||||
let offset = free.pop().unwrap();
|
||||
debug_assert_eq!(offset, 0);
|
||||
let heap_ref = CpuHeapRef {
|
||||
heap_ix: self.cpu_visible.len(),
|
||||
offset,
|
||||
};
|
||||
let cpu_handle = dx12_heap.get_cpu_descriptor_handle_for_heap_start();
|
||||
let increment_size = device.get_descriptor_increment_size(heap_type);
|
||||
let heap = CpuHeap {
|
||||
dx12_heap,
|
||||
cpu_handle,
|
||||
increment_size,
|
||||
};
|
||||
self.cpu_visible.push(heap);
|
||||
free_list.cpu_free.push(free);
|
||||
Ok(mk_owned(heap_ref, cpu_handle))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn cpu_handle(&self, cpu_ref: &CpuHeapRef) -> D3D12_CPU_DESCRIPTOR_HANDLE {
|
||||
self.cpu_visible[cpu_ref.heap_ix].cpu_handle(cpu_ref.offset)
|
||||
}
|
||||
|
||||
pub fn alloc_gpu(&mut self, device: &Device, n: u32) -> Result<GpuHeapRefOwned, Error> {
|
||||
let free_list = &self.free_list;
|
||||
let mk_owned = |heap_ref, cpu_handle, gpu_handle| GpuHeapRefOwned {
|
||||
heap_ref,
|
||||
cpu_handle,
|
||||
gpu_handle,
|
||||
free_list: Arc::downgrade(free_list),
|
||||
};
|
||||
let mut free_list = free_list.lock().unwrap();
|
||||
for (heap_ix, free) in free_list.gpu_free.iter_mut().enumerate() {
|
||||
if let Some(offset) = free.alloc(n) {
|
||||
let heap = &self.gpu_visible[heap_ix];
|
||||
let cpu_handle = heap.cpu_handle(offset);
|
||||
let gpu_handle = heap.gpu_handle(offset);
|
||||
return Ok(mk_owned(
|
||||
GpuHeapRef { heap_ix, offset, n },
|
||||
cpu_handle,
|
||||
gpu_handle,
|
||||
));
|
||||
}
|
||||
}
|
||||
unsafe {
|
||||
let size = n.max(GPU_CHUNK_SIZE).next_power_of_two();
|
||||
let heap_type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
|
||||
let desc = D3D12_DESCRIPTOR_HEAP_DESC {
|
||||
Type: heap_type,
|
||||
NumDescriptors: size,
|
||||
Flags: D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE,
|
||||
NodeMask: 0,
|
||||
};
|
||||
let dx12_heap = device.create_descriptor_heap(&desc)?;
|
||||
let heap_ix = self.gpu_visible.len();
|
||||
let mut free = BestFit::new(size);
|
||||
let offset = free.alloc(n).unwrap();
|
||||
// We assume the first allocation is at 0, to avoid recomputing offsets.
|
||||
debug_assert_eq!(offset, 0);
|
||||
let cpu_handle = dx12_heap.get_cpu_descriptor_handle_for_heap_start();
|
||||
let gpu_handle = dx12_heap.get_gpu_descriptor_handle_for_heap_start();
|
||||
let increment_size = device.get_descriptor_increment_size(heap_type);
|
||||
let heap = GpuHeap {
|
||||
dx12_heap,
|
||||
cpu_handle,
|
||||
gpu_handle,
|
||||
increment_size,
|
||||
};
|
||||
self.gpu_visible.push(heap);
|
||||
free_list.gpu_free.push(free);
|
||||
Ok(mk_owned(
|
||||
GpuHeapRef { heap_ix, offset, n },
|
||||
cpu_handle,
|
||||
gpu_handle,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn cpu_handle_of_gpu(
|
||||
&self,
|
||||
gpu_ref: &GpuHeapRef,
|
||||
offset: u32,
|
||||
) -> D3D12_CPU_DESCRIPTOR_HANDLE {
|
||||
debug_assert!(offset < gpu_ref.n);
|
||||
let dx12_heap = &self.gpu_visible[gpu_ref.heap_ix];
|
||||
dx12_heap.cpu_handle(gpu_ref.offset + offset)
|
||||
}
|
||||
|
||||
pub fn gpu_heap(&self, gpu_ref: &GpuHeapRef) -> &DescriptorHeap {
|
||||
&self.gpu_visible[gpu_ref.heap_ix].dx12_heap
|
||||
}
|
||||
}
|
||||
|
||||
impl DescriptorFreeList {
|
||||
fn free_cpu(&mut self, cpu_ref: &CpuHeapRef) {
|
||||
self.cpu_free[cpu_ref.heap_ix].push(cpu_ref.offset);
|
||||
}
|
||||
|
||||
fn free_gpu(&mut self, gpu_ref: &GpuHeapRef) {
|
||||
self.gpu_free[gpu_ref.heap_ix].free(gpu_ref.offset, gpu_ref.n);
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for CpuHeapRefOwned {
|
||||
fn drop(&mut self) {
|
||||
if let Some(a) = self.free_list.upgrade() {
|
||||
a.lock().unwrap().free_cpu(&self.heap_ref)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl CpuHeapRefOwned {
|
||||
pub fn handle(&self) -> D3D12_CPU_DESCRIPTOR_HANDLE {
|
||||
self.handle
|
||||
}
|
||||
}
|
||||
|
||||
impl GpuHeapRefOwned {
|
||||
pub fn gpu_handle(&self) -> D3D12_GPU_DESCRIPTOR_HANDLE {
|
||||
self.gpu_handle
|
||||
}
|
||||
|
||||
pub unsafe fn copy_descriptors(&self, device: &Device, src: &[D3D12_CPU_DESCRIPTOR_HANDLE]) {
|
||||
// TODO: optimize a bit (use simple variant where appropriate)
|
||||
let n = src.len().try_into().unwrap();
|
||||
let sizes = (0..n).map(|_| 1).collect::<SmallVec<[u32; 16]>>();
|
||||
device.copy_descriptors(
|
||||
&[self.cpu_handle],
|
||||
&[n],
|
||||
src,
|
||||
&sizes,
|
||||
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for CpuHeapRefOwned {
|
||||
type Target = CpuHeapRef;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.heap_ref
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for GpuHeapRefOwned {
|
||||
fn drop(&mut self) {
|
||||
if let Some(a) = self.free_list.upgrade() {
|
||||
a.lock().unwrap().free_gpu(&self.heap_ref)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for GpuHeapRefOwned {
|
||||
type Target = GpuHeapRef;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.heap_ref
|
||||
}
|
||||
}
|
||||
|
||||
impl CpuHeap {
|
||||
fn cpu_handle(&self, offset: u32) -> D3D12_CPU_DESCRIPTOR_HANDLE {
|
||||
let mut handle = self.cpu_handle;
|
||||
handle.ptr += (offset as usize) * (self.increment_size as usize);
|
||||
handle
|
||||
}
|
||||
}
|
||||
|
||||
impl GpuHeap {
|
||||
fn cpu_handle(&self, offset: u32) -> D3D12_CPU_DESCRIPTOR_HANDLE {
|
||||
let mut handle = self.cpu_handle;
|
||||
handle.ptr += (offset as usize) * (self.increment_size as usize);
|
||||
handle
|
||||
}
|
||||
|
||||
fn gpu_handle(&self, offset: u32) -> D3D12_GPU_DESCRIPTOR_HANDLE {
|
||||
let mut handle = self.gpu_handle;
|
||||
handle.ptr += (offset as u64) * (self.increment_size as u64);
|
||||
handle
|
||||
}
|
||||
}
|
|
@ -7,6 +7,7 @@
|
|||
// except according to those terms.
|
||||
|
||||
use crate::dx12::error::{self, error_if_failed_else_unit, explain_error, Error};
|
||||
use smallvec::SmallVec;
|
||||
use std::convert::{TryFrom, TryInto};
|
||||
use std::sync::atomic::{AtomicPtr, Ordering};
|
||||
use std::{ffi, mem, ptr};
|
||||
|
@ -51,11 +52,7 @@ pub type CpuDescriptor = d3d12::D3D12_CPU_DESCRIPTOR_HANDLE;
|
|||
pub type GpuDescriptor = d3d12::D3D12_GPU_DESCRIPTOR_HANDLE;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct DescriptorHeap {
|
||||
pub heap_type: d3d12::D3D12_DESCRIPTOR_HEAP_TYPE,
|
||||
pub increment_size: u32,
|
||||
pub heap: ComPtr<d3d12::ID3D12DescriptorHeap>,
|
||||
}
|
||||
pub struct DescriptorHeap(ComPtr<d3d12::ID3D12DescriptorHeap>);
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct RootSignature(pub ComPtr<d3d12::ID3D12RootSignature>);
|
||||
|
@ -381,11 +378,7 @@ impl Device {
|
|||
"device could not create descriptor heap",
|
||||
)?;
|
||||
|
||||
Ok(DescriptorHeap {
|
||||
heap_type: heap_description.Type,
|
||||
increment_size: self.get_descriptor_increment_size(heap_description.Type),
|
||||
heap: ComPtr::from_raw(heap),
|
||||
})
|
||||
Ok(DescriptorHeap(ComPtr::from_raw(heap)))
|
||||
}
|
||||
|
||||
pub unsafe fn get_descriptor_increment_size(
|
||||
|
@ -395,6 +388,31 @@ impl Device {
|
|||
self.0.GetDescriptorHandleIncrementSize(heap_type)
|
||||
}
|
||||
|
||||
pub unsafe fn copy_descriptors(
|
||||
&self,
|
||||
dst_starts: &[d3d12::D3D12_CPU_DESCRIPTOR_HANDLE],
|
||||
dst_sizes: &[u32],
|
||||
src_starts: &[d3d12::D3D12_CPU_DESCRIPTOR_HANDLE],
|
||||
src_sizes: &[u32],
|
||||
descriptor_heap_type: d3d12::D3D12_DESCRIPTOR_HEAP_TYPE,
|
||||
) {
|
||||
debug_assert_eq!(dst_starts.len(), dst_sizes.len());
|
||||
debug_assert_eq!(src_starts.len(), src_sizes.len());
|
||||
debug_assert_eq!(
|
||||
src_sizes.iter().copied().sum::<u32>(),
|
||||
dst_sizes.iter().copied().sum()
|
||||
);
|
||||
self.0.CopyDescriptors(
|
||||
dst_starts.len().try_into().unwrap(),
|
||||
dst_starts.as_ptr(),
|
||||
dst_sizes.as_ptr(),
|
||||
src_starts.len().try_into().unwrap(),
|
||||
src_starts.as_ptr(),
|
||||
src_sizes.as_ptr(),
|
||||
descriptor_heap_type,
|
||||
);
|
||||
}
|
||||
|
||||
pub unsafe fn create_compute_pipeline_state(
|
||||
&self,
|
||||
compute_pipeline_desc: &d3d12::D3D12_COMPUTE_PIPELINE_STATE_DESC,
|
||||
|
@ -564,7 +582,7 @@ impl Device {
|
|||
|
||||
pub unsafe fn create_buffer(
|
||||
&self,
|
||||
buffer_size_in_bytes: u32,
|
||||
buffer_size_in_bytes: u64,
|
||||
heap_type: d3d12::D3D12_HEAP_TYPE,
|
||||
cpu_page: d3d12::D3D12_CPU_PAGE_PROPERTY,
|
||||
memory_pool_preference: d3d12::D3D12_MEMORY_POOL,
|
||||
|
@ -581,7 +599,7 @@ impl Device {
|
|||
};
|
||||
let resource_description = d3d12::D3D12_RESOURCE_DESC {
|
||||
Dimension: d3d12::D3D12_RESOURCE_DIMENSION_BUFFER,
|
||||
Width: buffer_size_in_bytes as u64,
|
||||
Width: buffer_size_in_bytes,
|
||||
Height: 1,
|
||||
DepthOrArraySize: 1,
|
||||
MipLevels: 1,
|
||||
|
@ -681,26 +699,12 @@ impl Device {
|
|||
}
|
||||
|
||||
impl DescriptorHeap {
|
||||
unsafe fn get_cpu_descriptor_handle_for_heap_start(&self) -> CpuDescriptor {
|
||||
self.heap.GetCPUDescriptorHandleForHeapStart()
|
||||
pub unsafe fn get_cpu_descriptor_handle_for_heap_start(&self) -> CpuDescriptor {
|
||||
self.0.GetCPUDescriptorHandleForHeapStart()
|
||||
}
|
||||
|
||||
unsafe fn get_gpu_descriptor_handle_for_heap_start(&self) -> GpuDescriptor {
|
||||
self.heap.GetGPUDescriptorHandleForHeapStart()
|
||||
}
|
||||
|
||||
pub unsafe fn get_cpu_descriptor_handle_at_offset(&self, offset: u32) -> CpuDescriptor {
|
||||
let mut descriptor = self.get_cpu_descriptor_handle_for_heap_start();
|
||||
descriptor.ptr += (offset as usize) * (self.increment_size as usize);
|
||||
|
||||
descriptor
|
||||
}
|
||||
|
||||
pub unsafe fn get_gpu_descriptor_handle_at_offset(&self, offset: u32) -> GpuDescriptor {
|
||||
let mut descriptor = self.get_gpu_descriptor_handle_for_heap_start();
|
||||
descriptor.ptr += (offset as u64) * (self.increment_size as u64);
|
||||
|
||||
descriptor
|
||||
pub unsafe fn get_gpu_descriptor_handle_for_heap_start(&self) -> GpuDescriptor {
|
||||
self.0.GetGPUDescriptorHandleForHeapStart()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -923,8 +927,8 @@ impl GraphicsCommandList {
|
|||
}
|
||||
|
||||
pub unsafe fn set_descriptor_heaps(&self, descriptor_heaps: &[&DescriptorHeap]) {
|
||||
let mut descriptor_heap_pointers: Vec<_> =
|
||||
descriptor_heaps.iter().map(|dh| dh.heap.as_raw()).collect();
|
||||
let mut descriptor_heap_pointers: SmallVec<[_; 4]> =
|
||||
descriptor_heaps.iter().map(|dh| dh.0.as_raw()).collect();
|
||||
self.0.SetDescriptorHeaps(
|
||||
u32::try_from(descriptor_heap_pointers.len())
|
||||
.expect("could not safely convert descriptor_heap_pointers.len() into u32"),
|
||||
|
@ -958,6 +962,38 @@ impl GraphicsCommandList {
|
|||
);
|
||||
}
|
||||
|
||||
pub unsafe fn clear_uav(
|
||||
&self,
|
||||
gpu_handle: d3d12::D3D12_GPU_DESCRIPTOR_HANDLE,
|
||||
cpu_handle: d3d12::D3D12_CPU_DESCRIPTOR_HANDLE,
|
||||
resource: &Resource,
|
||||
value: u32,
|
||||
size: Option<u64>,
|
||||
) {
|
||||
// In testing, only the first value seems to be used, but just in case...
|
||||
let values = [value, value, value, value];
|
||||
let mut rect = d3d12::D3D12_RECT {
|
||||
left: 0,
|
||||
right: 0,
|
||||
top: 0,
|
||||
bottom: 1,
|
||||
};
|
||||
let (num_rects, p_rects) = if let Some(size) = size {
|
||||
rect.right = (size / 4).try_into().unwrap();
|
||||
(1, &rect as *const _)
|
||||
} else {
|
||||
(0, std::ptr::null())
|
||||
};
|
||||
self.0.ClearUnorderedAccessViewUint(
|
||||
gpu_handle,
|
||||
cpu_handle,
|
||||
resource.get_mut(),
|
||||
&values,
|
||||
num_rects,
|
||||
p_rects,
|
||||
);
|
||||
}
|
||||
|
||||
/// Copy an entire resource (buffer or image)
|
||||
pub unsafe fn copy_resource(&self, src: &Resource, dst: &Resource) {
|
||||
self.0.CopyResource(dst.get_mut(), src.get_mut());
|
||||
|
|
|
@ -369,7 +369,13 @@ impl Session {
|
|||
}
|
||||
|
||||
/// Choose shader code from the available choices.
|
||||
pub fn choose_shader<'a>(&self, spv: &'a [u8], hlsl: &'a str, dxil: &'a [u8], msl: &'a str) -> ShaderCode<'a> {
|
||||
pub fn choose_shader<'a>(
|
||||
&self,
|
||||
spv: &'a [u8],
|
||||
hlsl: &'a str,
|
||||
dxil: &'a [u8],
|
||||
msl: &'a str,
|
||||
) -> ShaderCode<'a> {
|
||||
self.0.device.choose_shader(spv, hlsl, dxil, msl)
|
||||
}
|
||||
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
use bitflags::bitflags;
|
||||
|
||||
mod backend;
|
||||
mod bestfit;
|
||||
mod hub;
|
||||
|
||||
#[macro_use]
|
||||
|
|
|
@ -44,7 +44,7 @@ pub struct ClearBinding {
|
|||
pub unsafe fn run_clear_test(runner: &mut Runner, config: &Config) -> TestResult {
|
||||
let mut result = TestResult::new("clear buffers");
|
||||
let n_elements: u64 = config.size.choose(1 << 12, 1 << 20, 1 << 24);
|
||||
let out_buf = runner.buf_down(n_elements * 4);
|
||||
let out_buf = runner.buf_down(n_elements * 4, BufferUsage::empty());
|
||||
let code = ClearCode::new(runner);
|
||||
let stage = ClearStage::new_with_value(runner, n_elements, 0x42);
|
||||
let binding = stage.bind(runner, &code, &out_buf.dev_buf);
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
//
|
||||
// Also licensed under MIT license, at your choice.
|
||||
|
||||
use piet_gpu_hal::{include_shader, BackendType, BindType, DescriptorSet};
|
||||
use piet_gpu_hal::{include_shader, BackendType, BindType, BufferUsage, DescriptorSet};
|
||||
use piet_gpu_hal::{Buffer, Pipeline};
|
||||
|
||||
use crate::clear::{ClearBinding, ClearCode, ClearStage};
|
||||
|
@ -41,7 +41,7 @@ struct LinkedListBinding {
|
|||
|
||||
pub unsafe fn run_linkedlist_test(runner: &mut Runner, config: &Config) -> TestResult {
|
||||
let mut result = TestResult::new("linked list");
|
||||
let mem_buf = runner.buf_down(1024 * N_BUCKETS);
|
||||
let mem_buf = runner.buf_down(1024 * N_BUCKETS, BufferUsage::CLEAR);
|
||||
let code = LinkedListCode::new(runner);
|
||||
let stage = LinkedListStage::new(runner, &code, N_BUCKETS);
|
||||
let binding = stage.bind(runner, &code, &mem_buf.dev_buf);
|
||||
|
@ -77,7 +77,7 @@ impl LinkedListCode {
|
|||
.session
|
||||
.create_compute_pipeline(code, &[BindType::Buffer])
|
||||
.unwrap();
|
||||
let clear_code = if runner.backend_type() != BackendType::Vulkan {
|
||||
let clear_code = if runner.backend_type() == BackendType::Metal {
|
||||
Some(ClearCode::new(runner))
|
||||
} else {
|
||||
None
|
||||
|
|
|
@ -54,7 +54,7 @@ pub unsafe fn run_message_passing_test(
|
|||
variant: Variant,
|
||||
) -> TestResult {
|
||||
let mut result = TestResult::new(format!("message passing litmus, {:?}", variant));
|
||||
let out_buf = runner.buf_down(4);
|
||||
let out_buf = runner.buf_down(4, BufferUsage::CLEAR);
|
||||
let code = MessagePassingCode::new(runner, variant);
|
||||
let stage = MessagePassingStage::new(runner, &code);
|
||||
let binding = stage.bind(runner, &code, &out_buf.dev_buf);
|
||||
|
@ -92,9 +92,9 @@ impl MessagePassingCode {
|
|||
.session
|
||||
.create_compute_pipeline(code, &[BindType::Buffer, BindType::Buffer])
|
||||
.unwrap();
|
||||
// Currently, DX12 and Metal backends don't support buffer clearing, so use a
|
||||
// Currently, Metal backend doesn't support buffer clearing, so use a
|
||||
// compute shader as a workaround.
|
||||
let clear_code = if runner.backend_type() != BackendType::Vulkan {
|
||||
let clear_code = if runner.backend_type() == BackendType::Metal {
|
||||
Some(ClearCode::new(runner))
|
||||
} else {
|
||||
None
|
||||
|
@ -111,7 +111,10 @@ impl MessagePassingStage {
|
|||
let data_buf_size = 8 * N_ELEMENTS;
|
||||
let data_buf = runner
|
||||
.session
|
||||
.create_buffer(data_buf_size, BufferUsage::STORAGE | BufferUsage::COPY_DST)
|
||||
.create_buffer(
|
||||
data_buf_size,
|
||||
BufferUsage::STORAGE | BufferUsage::COPY_DST | BufferUsage::CLEAR,
|
||||
)
|
||||
.unwrap();
|
||||
let clear_stages = if let Some(clear_code) = &code.clear_code {
|
||||
let stage0 = ClearStage::new(runner, N_ELEMENTS * 2);
|
||||
|
|
|
@ -77,7 +77,7 @@ pub unsafe fn run_prefix_test(
|
|||
.session
|
||||
.create_buffer_init(&data, BufferUsage::STORAGE)
|
||||
.unwrap();
|
||||
let out_buf = runner.buf_down(data_buf.size());
|
||||
let out_buf = runner.buf_down(data_buf.size(), BufferUsage::empty());
|
||||
let code = PrefixCode::new(runner, variant);
|
||||
let stage = PrefixStage::new(runner, &code, n_elements);
|
||||
let binding = stage.bind(runner, &code, &data_buf, &out_buf.dev_buf);
|
||||
|
@ -121,7 +121,7 @@ impl PrefixCode {
|
|||
.unwrap();
|
||||
// Currently, DX12 and Metal backends don't support buffer clearing, so use a
|
||||
// compute shader as a workaround.
|
||||
let clear_code = if runner.backend_type() != BackendType::Vulkan {
|
||||
let clear_code = if runner.backend_type() == BackendType::Metal {
|
||||
Some(ClearCode::new(runner))
|
||||
} else {
|
||||
None
|
||||
|
@ -139,7 +139,10 @@ impl PrefixStage {
|
|||
let state_buf_size = 4 + 12 * n_workgroups;
|
||||
let state_buf = runner
|
||||
.session
|
||||
.create_buffer(state_buf_size, BufferUsage::STORAGE | BufferUsage::COPY_DST)
|
||||
.create_buffer(
|
||||
state_buf_size,
|
||||
BufferUsage::STORAGE | BufferUsage::COPY_DST | BufferUsage::CLEAR,
|
||||
)
|
||||
.unwrap();
|
||||
let clear_stage = if let Some(clear_code) = &code.clear_code {
|
||||
let stage = ClearStage::new(runner, state_buf_size / 4);
|
||||
|
|
|
@ -52,7 +52,7 @@ pub unsafe fn run_prefix_test(runner: &mut Runner, config: &Config) -> TestResul
|
|||
.session
|
||||
.create_buffer_init(&data, BufferUsage::STORAGE)
|
||||
.unwrap();
|
||||
let out_buf = runner.buf_down(data_buf.size());
|
||||
let out_buf = runner.buf_down(data_buf.size(), BufferUsage::empty());
|
||||
let code = PrefixTreeCode::new(runner);
|
||||
let stage = PrefixTreeStage::new(runner, n_elements);
|
||||
let binding = stage.bind(runner, &code, &out_buf.dev_buf);
|
||||
|
|
|
@ -101,19 +101,17 @@ impl Runner {
|
|||
BufUp { stage_buf, dev_buf }
|
||||
}
|
||||
|
||||
pub fn buf_down(&self, size: u64) -> BufDown {
|
||||
/// Create a buffer for download (readback).
|
||||
///
|
||||
/// The `usage` parameter need not include COPY_SRC and STORAGE.
|
||||
pub fn buf_down(&self, size: u64, usage: BufferUsage) -> BufDown {
|
||||
let stage_buf = self
|
||||
.session
|
||||
.create_buffer(size, BufferUsage::MAP_READ | BufferUsage::COPY_DST)
|
||||
.unwrap();
|
||||
// Note: the COPY_DST isn't needed in all use cases, but I don't think
|
||||
// making this tighter would help.
|
||||
let dev_buf = self
|
||||
.session
|
||||
.create_buffer(
|
||||
size,
|
||||
BufferUsage::COPY_SRC | BufferUsage::COPY_DST | BufferUsage::STORAGE,
|
||||
)
|
||||
.create_buffer(size, usage | BufferUsage::COPY_SRC | BufferUsage::STORAGE)
|
||||
.unwrap();
|
||||
BufDown { stage_buf, dev_buf }
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue