mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 20:51:29 +11:00
Merge pull request #132 from linebender/dx12_desc
Better DX12 descriptor management
This commit is contained in:
commit
ecdd7fd817
81
piet-gpu-hal/src/bestfit.rs
Normal file
81
piet-gpu-hal/src/bestfit.rs
Normal file
|
@ -0,0 +1,81 @@
|
||||||
|
// Copyright © 2021 piet-gpu developers.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||||
|
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||||
|
// option. This file may not be copied, modified, or distributed
|
||||||
|
// except according to those
|
||||||
|
|
||||||
|
//! A simple best-fit allocator.
|
||||||
|
|
||||||
|
use std::collections::{BTreeMap, BTreeSet};
|
||||||
|
|
||||||
|
/// An allocator that tracks free ranges and returns best fit.
|
||||||
|
pub struct BestFit {
|
||||||
|
// map offset to size of free block
|
||||||
|
free_by_ix: BTreeMap<u32, u32>,
|
||||||
|
// size and offset
|
||||||
|
free_by_size: BTreeSet<(u32, u32)>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl BestFit {
|
||||||
|
pub fn new(size: u32) -> BestFit {
|
||||||
|
let mut free_by_ix = BTreeMap::new();
|
||||||
|
free_by_ix.insert(0, size);
|
||||||
|
let mut free_by_size = BTreeSet::new();
|
||||||
|
free_by_size.insert((size, 0));
|
||||||
|
BestFit {
|
||||||
|
free_by_ix,
|
||||||
|
free_by_size,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn alloc(&mut self, size: u32) -> Option<u32> {
|
||||||
|
let block = *self.free_by_size.range((size, 0)..).next()?;
|
||||||
|
let ix = block.1;
|
||||||
|
self.free_by_ix.remove(&ix);
|
||||||
|
self.free_by_size.remove(&block);
|
||||||
|
let fragment_size = block.0 - size;
|
||||||
|
if fragment_size > 0 {
|
||||||
|
let fragment_ix = ix + size;
|
||||||
|
self.free_by_ix.insert(fragment_ix, fragment_size);
|
||||||
|
self.free_by_size.insert((fragment_size, fragment_ix));
|
||||||
|
}
|
||||||
|
Some(ix)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn free(&mut self, ix: u32, size: u32) {
|
||||||
|
let next_ix = size + ix;
|
||||||
|
if let Some((&prev_ix, &prev_size)) = self.free_by_ix.range(..ix).rev().next() {
|
||||||
|
if prev_ix + prev_size == ix {
|
||||||
|
self.free_by_size.remove(&(prev_size, prev_ix));
|
||||||
|
if let Some(&next_size) = self.free_by_ix.get(&next_ix) {
|
||||||
|
// consolidate with prev and next
|
||||||
|
let new_size = prev_size + size + next_size;
|
||||||
|
*self.free_by_ix.get_mut(&prev_ix).unwrap() = new_size;
|
||||||
|
self.free_by_ix.remove(&next_ix);
|
||||||
|
self.free_by_size.remove(&(next_size, next_ix));
|
||||||
|
self.free_by_size.insert((new_size, prev_ix));
|
||||||
|
} else {
|
||||||
|
// consolidate with prev
|
||||||
|
let new_size = prev_size + size;
|
||||||
|
*self.free_by_ix.get_mut(&prev_ix).unwrap() = new_size;
|
||||||
|
self.free_by_size.insert((new_size, prev_ix));
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if let Some(&next_size) = self.free_by_ix.get(&next_ix) {
|
||||||
|
// consolidate with next
|
||||||
|
let new_size = size + next_size;
|
||||||
|
self.free_by_ix.remove(&next_ix);
|
||||||
|
self.free_by_ix.insert(ix, new_size);
|
||||||
|
self.free_by_size.remove(&(next_size, next_ix));
|
||||||
|
self.free_by_size.insert((new_size, ix));
|
||||||
|
} else {
|
||||||
|
// new isolated free block
|
||||||
|
self.free_by_ix.insert(ix, size);
|
||||||
|
self.free_by_size.insert((size, ix));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,14 +1,20 @@
|
||||||
//! DX12 implemenation of HAL trait.
|
//! DX12 implemenation of HAL trait.
|
||||||
|
|
||||||
|
mod descriptor;
|
||||||
mod error;
|
mod error;
|
||||||
mod wrappers;
|
mod wrappers;
|
||||||
|
|
||||||
use std::{cell::Cell, convert::{TryFrom, TryInto}, mem, ptr};
|
use std::{
|
||||||
|
cell::Cell,
|
||||||
|
convert::{TryFrom, TryInto},
|
||||||
|
mem, ptr,
|
||||||
|
sync::{Arc, Mutex},
|
||||||
|
};
|
||||||
|
|
||||||
use winapi::shared::minwindef::TRUE;
|
|
||||||
use winapi::shared::{dxgi, dxgi1_2, dxgitype};
|
|
||||||
#[allow(unused)]
|
#[allow(unused)]
|
||||||
use winapi::shared::dxgi1_3; // for error reporting in debug mode
|
use winapi::shared::dxgi1_3; // for error reporting in debug mode
|
||||||
|
use winapi::shared::minwindef::TRUE;
|
||||||
|
use winapi::shared::{dxgi, dxgi1_2, dxgitype};
|
||||||
use winapi::um::d3d12;
|
use winapi::um::d3d12;
|
||||||
|
|
||||||
use raw_window_handle::{HasRawWindowHandle, RawWindowHandle};
|
use raw_window_handle::{HasRawWindowHandle, RawWindowHandle};
|
||||||
|
@ -17,7 +23,12 @@ use smallvec::SmallVec;
|
||||||
|
|
||||||
use crate::{BindType, BufferUsage, Error, GpuInfo, ImageLayout, WorkgroupLimits};
|
use crate::{BindType, BufferUsage, Error, GpuInfo, ImageLayout, WorkgroupLimits};
|
||||||
|
|
||||||
use self::wrappers::{CommandAllocator, CommandQueue, Device, Factory4, Resource, ShaderByteCode};
|
use self::{
|
||||||
|
descriptor::{CpuHeapRefOwned, DescriptorPool, GpuHeapRefOwned},
|
||||||
|
wrappers::{
|
||||||
|
CommandAllocator, CommandQueue, DescriptorHeap, Device, Factory4, Resource, ShaderByteCode,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
pub struct Dx12Instance {
|
pub struct Dx12Instance {
|
||||||
factory: Factory4,
|
factory: Factory4,
|
||||||
|
@ -38,17 +49,26 @@ pub struct Dx12Device {
|
||||||
ts_freq: u64,
|
ts_freq: u64,
|
||||||
gpu_info: GpuInfo,
|
gpu_info: GpuInfo,
|
||||||
memory_arch: MemoryArchitecture,
|
memory_arch: MemoryArchitecture,
|
||||||
|
descriptor_pool: Mutex<DescriptorPool>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct Buffer {
|
pub struct Buffer {
|
||||||
resource: Resource,
|
resource: Resource,
|
||||||
pub size: u64,
|
pub size: u64,
|
||||||
|
// Always present except for query readback buffer.
|
||||||
|
cpu_ref: Option<Arc<CpuHeapRefOwned>>,
|
||||||
|
// Present when created with CLEAR usage. Heap is here for
|
||||||
|
// the same reason it's in DescriptorSet, and might be removed
|
||||||
|
// when CmdBuf has access to the descriptor pool.
|
||||||
|
gpu_ref: Option<(Arc<GpuHeapRefOwned>, DescriptorHeap)>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct Image {
|
pub struct Image {
|
||||||
resource: Resource,
|
resource: Resource,
|
||||||
|
// Present except for swapchain images.
|
||||||
|
cpu_ref: Option<Arc<CpuHeapRefOwned>>,
|
||||||
size: (u32, u32),
|
size: (u32, u32),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -63,13 +83,17 @@ pub struct Pipeline {
|
||||||
root_signature: wrappers::RootSignature,
|
root_signature: wrappers::RootSignature,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Right now, each descriptor set gets its own heap, but we'll move
|
pub struct DescriptorSet {
|
||||||
// to a more sophisticated allocation scheme, probably using the
|
gpu_ref: GpuHeapRefOwned,
|
||||||
// gpu-descriptor crate.
|
// Note: the heap is only needed here so CmdBuf::dispatch can get
|
||||||
pub struct DescriptorSet(wrappers::DescriptorHeap);
|
// use it easily. If CmdBuf had a reference to the Device (or just
|
||||||
|
// the descriptor pool), we could get rid of this.
|
||||||
|
heap: DescriptorHeap,
|
||||||
|
}
|
||||||
|
|
||||||
pub struct QueryPool {
|
pub struct QueryPool {
|
||||||
heap: wrappers::QueryHeap,
|
heap: wrappers::QueryHeap,
|
||||||
|
// Maybe this should just be a Resource, not a full Buffer.
|
||||||
buf: Buffer,
|
buf: Buffer,
|
||||||
n_queries: u32,
|
n_queries: u32,
|
||||||
}
|
}
|
||||||
|
@ -85,11 +109,9 @@ pub struct Fence {
|
||||||
/// semaphore is needed for presentation on DX12.
|
/// semaphore is needed for presentation on DX12.
|
||||||
pub struct Semaphore;
|
pub struct Semaphore;
|
||||||
|
|
||||||
// TODO
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
pub struct DescriptorSetBuilder {
|
pub struct DescriptorSetBuilder {
|
||||||
buffers: Vec<Buffer>,
|
handles: SmallVec<[d3d12::D3D12_CPU_DESCRIPTOR_HANDLE; 16]>,
|
||||||
images: Vec<Image>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(PartialEq, Eq)]
|
#[derive(PartialEq, Eq)]
|
||||||
|
@ -175,12 +197,14 @@ impl Dx12Instance {
|
||||||
has_memory_model: false,
|
has_memory_model: false,
|
||||||
use_staging_buffers,
|
use_staging_buffers,
|
||||||
};
|
};
|
||||||
|
let descriptor_pool = Default::default();
|
||||||
Ok(Dx12Device {
|
Ok(Dx12Device {
|
||||||
device,
|
device,
|
||||||
command_queue,
|
command_queue,
|
||||||
ts_freq,
|
ts_freq,
|
||||||
memory_arch,
|
memory_arch,
|
||||||
gpu_info,
|
gpu_info,
|
||||||
|
descriptor_pool,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -251,14 +275,44 @@ impl crate::backend::Device for Dx12Device {
|
||||||
//TODO: consider flag D3D12_HEAP_FLAG_ALLOW_SHADER_ATOMICS?
|
//TODO: consider flag D3D12_HEAP_FLAG_ALLOW_SHADER_ATOMICS?
|
||||||
let flags = d3d12::D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
|
let flags = d3d12::D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
|
||||||
let resource = self.device.create_buffer(
|
let resource = self.device.create_buffer(
|
||||||
size.try_into()?,
|
size,
|
||||||
d3d12::D3D12_HEAP_TYPE_CUSTOM,
|
d3d12::D3D12_HEAP_TYPE_CUSTOM,
|
||||||
page_property,
|
page_property,
|
||||||
memory_pool,
|
memory_pool,
|
||||||
d3d12::D3D12_RESOURCE_STATE_COMMON,
|
d3d12::D3D12_RESOURCE_STATE_COMMON,
|
||||||
flags,
|
flags,
|
||||||
)?;
|
)?;
|
||||||
Ok(Buffer { resource, size })
|
let mut descriptor_pool = self.descriptor_pool.lock().unwrap();
|
||||||
|
let cpu_ref = Arc::new(descriptor_pool.alloc_cpu(&self.device)?);
|
||||||
|
let cpu_handle = descriptor_pool.cpu_handle(&cpu_ref);
|
||||||
|
self.device
|
||||||
|
.create_byte_addressed_buffer_unordered_access_view(
|
||||||
|
&resource,
|
||||||
|
cpu_handle,
|
||||||
|
0,
|
||||||
|
(size / 4).try_into()?,
|
||||||
|
);
|
||||||
|
let gpu_ref = if usage.contains(BufferUsage::CLEAR) {
|
||||||
|
let gpu_ref = Arc::new(descriptor_pool.alloc_gpu(&self.device, 1)?);
|
||||||
|
let gpu_handle = descriptor_pool.cpu_handle_of_gpu(&gpu_ref, 0);
|
||||||
|
self.device.copy_descriptors(
|
||||||
|
&[gpu_handle],
|
||||||
|
&[1],
|
||||||
|
&[cpu_handle],
|
||||||
|
&[1],
|
||||||
|
d3d12::D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
|
||||||
|
);
|
||||||
|
let heap = descriptor_pool.gpu_heap(&gpu_ref).to_owned();
|
||||||
|
Some((gpu_ref, heap))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
Ok(Buffer {
|
||||||
|
resource,
|
||||||
|
size,
|
||||||
|
cpu_ref: Some(cpu_ref),
|
||||||
|
gpu_ref,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -272,8 +326,18 @@ impl crate::backend::Device for Dx12Device {
|
||||||
let resource = self
|
let resource = self
|
||||||
.device
|
.device
|
||||||
.create_texture2d_buffer(width.into(), height, format, true)?;
|
.create_texture2d_buffer(width.into(), height, format, true)?;
|
||||||
|
|
||||||
|
let mut descriptor_pool = self.descriptor_pool.lock().unwrap();
|
||||||
|
let cpu_ref = Arc::new(descriptor_pool.alloc_cpu(&self.device)?);
|
||||||
|
let cpu_handle = descriptor_pool.cpu_handle(&cpu_ref);
|
||||||
|
self.device
|
||||||
|
.create_unordered_access_view(&resource, cpu_handle);
|
||||||
let size = (width, height);
|
let size = (width, height);
|
||||||
Ok(Image { resource, size })
|
Ok(Image {
|
||||||
|
resource,
|
||||||
|
cpu_ref: Some(cpu_ref),
|
||||||
|
size,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe fn destroy_image(&self, image: &Self::Image) -> Result<(), Error> {
|
unsafe fn destroy_image(&self, image: &Self::Image) -> Result<(), Error> {
|
||||||
|
@ -424,7 +488,9 @@ impl crate::backend::Device for Dx12Device {
|
||||||
let mut i = 0;
|
let mut i = 0;
|
||||||
fn map_range_type(bind_type: BindType) -> d3d12::D3D12_DESCRIPTOR_RANGE_TYPE {
|
fn map_range_type(bind_type: BindType) -> d3d12::D3D12_DESCRIPTOR_RANGE_TYPE {
|
||||||
match bind_type {
|
match bind_type {
|
||||||
BindType::Buffer | BindType::Image | BindType::ImageRead => d3d12::D3D12_DESCRIPTOR_RANGE_TYPE_UAV,
|
BindType::Buffer | BindType::Image | BindType::ImageRead => {
|
||||||
|
d3d12::D3D12_DESCRIPTOR_RANGE_TYPE_UAV
|
||||||
|
}
|
||||||
BindType::BufReadOnly => d3d12::D3D12_DESCRIPTOR_RANGE_TYPE_SRV,
|
BindType::BufReadOnly => d3d12::D3D12_DESCRIPTOR_RANGE_TYPE_SRV,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -482,9 +548,7 @@ impl crate::backend::Device for Dx12Device {
|
||||||
&root_signature_desc,
|
&root_signature_desc,
|
||||||
d3d12::D3D_ROOT_SIGNATURE_VERSION_1,
|
d3d12::D3D_ROOT_SIGNATURE_VERSION_1,
|
||||||
)?;
|
)?;
|
||||||
let root_signature = self
|
let root_signature = self.device.create_root_signature(0, root_signature_blob)?;
|
||||||
.device
|
|
||||||
.create_root_signature(0, root_signature_blob)?;
|
|
||||||
let desc = d3d12::D3D12_COMPUTE_PIPELINE_STATE_DESC {
|
let desc = d3d12::D3D12_COMPUTE_PIPELINE_STATE_DESC {
|
||||||
pRootSignature: root_signature.0.as_raw(),
|
pRootSignature: root_signature.0.as_raw(),
|
||||||
CS: shader.bytecode,
|
CS: shader.bytecode,
|
||||||
|
@ -515,14 +579,21 @@ impl Dx12Device {
|
||||||
fn create_readback_buffer(&self, size: u64) -> Result<Buffer, Error> {
|
fn create_readback_buffer(&self, size: u64) -> Result<Buffer, Error> {
|
||||||
unsafe {
|
unsafe {
|
||||||
let resource = self.device.create_buffer(
|
let resource = self.device.create_buffer(
|
||||||
size.try_into()?,
|
size,
|
||||||
d3d12::D3D12_HEAP_TYPE_READBACK,
|
d3d12::D3D12_HEAP_TYPE_READBACK,
|
||||||
d3d12::D3D12_CPU_PAGE_PROPERTY_UNKNOWN,
|
d3d12::D3D12_CPU_PAGE_PROPERTY_UNKNOWN,
|
||||||
d3d12::D3D12_MEMORY_POOL_UNKNOWN,
|
d3d12::D3D12_MEMORY_POOL_UNKNOWN,
|
||||||
d3d12::D3D12_RESOURCE_STATE_COPY_DEST,
|
d3d12::D3D12_RESOURCE_STATE_COPY_DEST,
|
||||||
d3d12::D3D12_RESOURCE_FLAG_NONE,
|
d3d12::D3D12_RESOURCE_FLAG_NONE,
|
||||||
)?;
|
)?;
|
||||||
Ok(Buffer { resource, size })
|
let cpu_ref = None;
|
||||||
|
let gpu_ref = None;
|
||||||
|
Ok(Buffer {
|
||||||
|
resource,
|
||||||
|
size,
|
||||||
|
cpu_ref,
|
||||||
|
gpu_ref,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -551,11 +622,10 @@ impl crate::backend::CmdBuf<Dx12Device> for CmdBuf {
|
||||||
self.c.set_pipeline_state(&pipeline.pipeline_state);
|
self.c.set_pipeline_state(&pipeline.pipeline_state);
|
||||||
self.c
|
self.c
|
||||||
.set_compute_pipeline_root_signature(&pipeline.root_signature);
|
.set_compute_pipeline_root_signature(&pipeline.root_signature);
|
||||||
self.c.set_descriptor_heaps(&[&descriptor_set.0]);
|
// TODO: persist heap ix and only set if changed.
|
||||||
self.c.set_compute_root_descriptor_table(
|
self.c.set_descriptor_heaps(&[&descriptor_set.heap]);
|
||||||
0,
|
self.c
|
||||||
descriptor_set.0.get_gpu_descriptor_handle_at_offset(0),
|
.set_compute_root_descriptor_table(0, descriptor_set.gpu_ref.gpu_handle());
|
||||||
);
|
|
||||||
self.c
|
self.c
|
||||||
.dispatch(workgroup_count.0, workgroup_count.1, workgroup_count.2);
|
.dispatch(workgroup_count.0, workgroup_count.1, workgroup_count.2);
|
||||||
}
|
}
|
||||||
|
@ -598,10 +668,22 @@ impl crate::backend::CmdBuf<Dx12Device> for CmdBuf {
|
||||||
self.memory_barrier();
|
self.memory_barrier();
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe fn clear_buffer(&self, _buffer: &Buffer, _size: Option<u64>) {
|
unsafe fn clear_buffer(&self, buffer: &Buffer, size: Option<u64>) {
|
||||||
// Open question: do we call ClearUnorderedAccessViewUint or dispatch a
|
let cpu_ref = buffer.cpu_ref.as_ref().unwrap();
|
||||||
// compute shader? Either way we will need descriptors here.
|
let (gpu_ref, heap) = buffer
|
||||||
todo!()
|
.gpu_ref
|
||||||
|
.as_ref()
|
||||||
|
.expect("Need to set CLEAR usage on buffer");
|
||||||
|
// Same TODO as dispatch: track and only set if changed.
|
||||||
|
self.c.set_descriptor_heaps(&[heap]);
|
||||||
|
// Discussion question: would compute shader be faster? Should measure.
|
||||||
|
self.c.clear_uav(
|
||||||
|
gpu_ref.gpu_handle(),
|
||||||
|
cpu_ref.handle(),
|
||||||
|
&buffer.resource,
|
||||||
|
0,
|
||||||
|
size,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe fn copy_buffer(&self, src: &Buffer, dst: &Buffer) {
|
unsafe fn copy_buffer(&self, src: &Buffer, dst: &Buffer) {
|
||||||
|
@ -638,14 +720,15 @@ impl crate::backend::CmdBuf<Dx12Device> for CmdBuf {
|
||||||
|
|
||||||
impl crate::backend::DescriptorSetBuilder<Dx12Device> for DescriptorSetBuilder {
|
impl crate::backend::DescriptorSetBuilder<Dx12Device> for DescriptorSetBuilder {
|
||||||
fn add_buffers(&mut self, buffers: &[&Buffer]) {
|
fn add_buffers(&mut self, buffers: &[&Buffer]) {
|
||||||
// Note: we could get rid of the clone here (which is an AddRef)
|
for buf in buffers {
|
||||||
// and store a raw pointer, as it's a safety precondition that
|
self.handles.push(buf.cpu_ref.as_ref().unwrap().handle());
|
||||||
// the resources are kept alive til build.
|
}
|
||||||
self.buffers.extend(buffers.iter().copied().cloned());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn add_images(&mut self, images: &[&Image]) {
|
fn add_images(&mut self, images: &[&Image]) {
|
||||||
self.images.extend(images.iter().copied().cloned());
|
for img in images {
|
||||||
|
self.handles.push(img.cpu_ref.as_ref().unwrap().handle());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn add_textures(&mut self, _images: &[&Image]) {
|
fn add_textures(&mut self, _images: &[&Image]) {
|
||||||
|
@ -657,34 +740,12 @@ impl crate::backend::DescriptorSetBuilder<Dx12Device> for DescriptorSetBuilder {
|
||||||
device: &Dx12Device,
|
device: &Dx12Device,
|
||||||
_pipeline: &Pipeline,
|
_pipeline: &Pipeline,
|
||||||
) -> Result<DescriptorSet, Error> {
|
) -> Result<DescriptorSet, Error> {
|
||||||
let n_descriptors = self.buffers.len() + self.images.len();
|
let mut descriptor_pool = device.descriptor_pool.lock().unwrap();
|
||||||
let heap_desc = d3d12::D3D12_DESCRIPTOR_HEAP_DESC {
|
let n_descriptors = self.handles.len().try_into()?;
|
||||||
Type: d3d12::D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
|
let gpu_ref = descriptor_pool.alloc_gpu(&device.device, n_descriptors)?;
|
||||||
NumDescriptors: n_descriptors.try_into()?,
|
gpu_ref.copy_descriptors(&device.device, &self.handles);
|
||||||
Flags: d3d12::D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE,
|
let heap = descriptor_pool.gpu_heap(&gpu_ref).to_owned();
|
||||||
NodeMask: 0,
|
Ok(DescriptorSet { gpu_ref, heap })
|
||||||
};
|
|
||||||
let heap = device.device.create_descriptor_heap(&heap_desc)?;
|
|
||||||
let mut ix = 0;
|
|
||||||
for buffer in self.buffers {
|
|
||||||
device
|
|
||||||
.device
|
|
||||||
.create_byte_addressed_buffer_unordered_access_view(
|
|
||||||
&buffer.resource,
|
|
||||||
heap.get_cpu_descriptor_handle_at_offset(ix),
|
|
||||||
0,
|
|
||||||
(buffer.size / 4).try_into()?,
|
|
||||||
);
|
|
||||||
ix += 1;
|
|
||||||
}
|
|
||||||
for image in self.images {
|
|
||||||
device.device.create_unordered_access_view(
|
|
||||||
&image.resource,
|
|
||||||
heap.get_cpu_descriptor_handle_at_offset(ix),
|
|
||||||
);
|
|
||||||
ix += 1;
|
|
||||||
}
|
|
||||||
Ok(DescriptorSet(heap))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -737,6 +798,7 @@ impl Dx12Swapchain {
|
||||||
let buffer = self.swapchain.get_buffer(idx as u32);
|
let buffer = self.swapchain.get_buffer(idx as u32);
|
||||||
Image {
|
Image {
|
||||||
resource: buffer,
|
resource: buffer,
|
||||||
|
cpu_ref: None,
|
||||||
size: self.size,
|
size: self.size,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
295
piet-gpu-hal/src/dx12/descriptor.rs
Normal file
295
piet-gpu-hal/src/dx12/descriptor.rs
Normal file
|
@ -0,0 +1,295 @@
|
||||||
|
// Copyright © 2021 piet-gpu developers.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||||
|
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||||
|
// option. This file may not be copied, modified, or distributed
|
||||||
|
// except according to those
|
||||||
|
|
||||||
|
//! Descriptor management.
|
||||||
|
|
||||||
|
use std::{
|
||||||
|
convert::TryInto,
|
||||||
|
ops::Deref,
|
||||||
|
sync::{Arc, Mutex, Weak},
|
||||||
|
};
|
||||||
|
|
||||||
|
use smallvec::SmallVec;
|
||||||
|
use winapi::um::d3d12::{
|
||||||
|
D3D12_CPU_DESCRIPTOR_HANDLE, D3D12_DESCRIPTOR_HEAP_DESC, D3D12_DESCRIPTOR_HEAP_FLAG_NONE,
|
||||||
|
D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
|
||||||
|
D3D12_GPU_DESCRIPTOR_HANDLE,
|
||||||
|
};
|
||||||
|
|
||||||
|
use crate::{bestfit::BestFit, Error};
|
||||||
|
|
||||||
|
use super::wrappers::{DescriptorHeap, Device};
|
||||||
|
|
||||||
|
const CPU_CHUNK_SIZE: u32 = 256;
|
||||||
|
const GPU_CHUNK_SIZE: u32 = 4096;
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct DescriptorPool {
|
||||||
|
cpu_visible: Vec<CpuHeap>,
|
||||||
|
gpu_visible: Vec<GpuHeap>,
|
||||||
|
free_list: Arc<Mutex<DescriptorFreeList>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct DescriptorFreeList {
|
||||||
|
cpu_free: Vec<Vec<u32>>,
|
||||||
|
gpu_free: Vec<BestFit>,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct CpuHeap {
|
||||||
|
// Retained for lifetime reasons.
|
||||||
|
#[allow(unused)]
|
||||||
|
dx12_heap: DescriptorHeap,
|
||||||
|
cpu_handle: D3D12_CPU_DESCRIPTOR_HANDLE,
|
||||||
|
increment_size: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct CpuHeapRef {
|
||||||
|
heap_ix: usize,
|
||||||
|
offset: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An owned reference to the CPU heap.
|
||||||
|
///
|
||||||
|
/// When dropped, the corresponding heap range will be freed.
|
||||||
|
pub struct CpuHeapRefOwned {
|
||||||
|
heap_ref: CpuHeapRef,
|
||||||
|
handle: D3D12_CPU_DESCRIPTOR_HANDLE,
|
||||||
|
free_list: Weak<Mutex<DescriptorFreeList>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A shader-visible descriptor heap.
|
||||||
|
struct GpuHeap {
|
||||||
|
dx12_heap: DescriptorHeap,
|
||||||
|
cpu_handle: D3D12_CPU_DESCRIPTOR_HANDLE,
|
||||||
|
gpu_handle: D3D12_GPU_DESCRIPTOR_HANDLE,
|
||||||
|
increment_size: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct GpuHeapRef {
|
||||||
|
heap_ix: usize,
|
||||||
|
offset: u32,
|
||||||
|
n: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An owned reference to the GPU heap.
|
||||||
|
///
|
||||||
|
/// When dropped, the corresponding heap range will be freed.
|
||||||
|
pub struct GpuHeapRefOwned {
|
||||||
|
heap_ref: GpuHeapRef,
|
||||||
|
cpu_handle: D3D12_CPU_DESCRIPTOR_HANDLE,
|
||||||
|
gpu_handle: D3D12_GPU_DESCRIPTOR_HANDLE,
|
||||||
|
free_list: Weak<Mutex<DescriptorFreeList>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DescriptorPool {
|
||||||
|
pub fn alloc_cpu(&mut self, device: &Device) -> Result<CpuHeapRefOwned, Error> {
|
||||||
|
let free_list = &self.free_list;
|
||||||
|
let mk_owned = |heap_ref, handle| CpuHeapRefOwned {
|
||||||
|
heap_ref,
|
||||||
|
handle,
|
||||||
|
free_list: Arc::downgrade(free_list),
|
||||||
|
};
|
||||||
|
let mut free_list = free_list.lock().unwrap();
|
||||||
|
for (heap_ix, free) in free_list.cpu_free.iter_mut().enumerate() {
|
||||||
|
if let Some(offset) = free.pop() {
|
||||||
|
let handle = self.cpu_visible[heap_ix].cpu_handle(offset);
|
||||||
|
return Ok(mk_owned(CpuHeapRef { heap_ix, offset }, handle));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
unsafe {
|
||||||
|
let heap_type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
|
||||||
|
let desc = D3D12_DESCRIPTOR_HEAP_DESC {
|
||||||
|
Type: heap_type,
|
||||||
|
NumDescriptors: CPU_CHUNK_SIZE,
|
||||||
|
Flags: D3D12_DESCRIPTOR_HEAP_FLAG_NONE,
|
||||||
|
NodeMask: 0,
|
||||||
|
};
|
||||||
|
let dx12_heap = device.create_descriptor_heap(&desc)?;
|
||||||
|
let mut free = (0..CPU_CHUNK_SIZE).rev().collect::<Vec<_>>();
|
||||||
|
let offset = free.pop().unwrap();
|
||||||
|
debug_assert_eq!(offset, 0);
|
||||||
|
let heap_ref = CpuHeapRef {
|
||||||
|
heap_ix: self.cpu_visible.len(),
|
||||||
|
offset,
|
||||||
|
};
|
||||||
|
let cpu_handle = dx12_heap.get_cpu_descriptor_handle_for_heap_start();
|
||||||
|
let increment_size = device.get_descriptor_increment_size(heap_type);
|
||||||
|
let heap = CpuHeap {
|
||||||
|
dx12_heap,
|
||||||
|
cpu_handle,
|
||||||
|
increment_size,
|
||||||
|
};
|
||||||
|
self.cpu_visible.push(heap);
|
||||||
|
free_list.cpu_free.push(free);
|
||||||
|
Ok(mk_owned(heap_ref, cpu_handle))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn cpu_handle(&self, cpu_ref: &CpuHeapRef) -> D3D12_CPU_DESCRIPTOR_HANDLE {
|
||||||
|
self.cpu_visible[cpu_ref.heap_ix].cpu_handle(cpu_ref.offset)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn alloc_gpu(&mut self, device: &Device, n: u32) -> Result<GpuHeapRefOwned, Error> {
|
||||||
|
let free_list = &self.free_list;
|
||||||
|
let mk_owned = |heap_ref, cpu_handle, gpu_handle| GpuHeapRefOwned {
|
||||||
|
heap_ref,
|
||||||
|
cpu_handle,
|
||||||
|
gpu_handle,
|
||||||
|
free_list: Arc::downgrade(free_list),
|
||||||
|
};
|
||||||
|
let mut free_list = free_list.lock().unwrap();
|
||||||
|
for (heap_ix, free) in free_list.gpu_free.iter_mut().enumerate() {
|
||||||
|
if let Some(offset) = free.alloc(n) {
|
||||||
|
let heap = &self.gpu_visible[heap_ix];
|
||||||
|
let cpu_handle = heap.cpu_handle(offset);
|
||||||
|
let gpu_handle = heap.gpu_handle(offset);
|
||||||
|
return Ok(mk_owned(
|
||||||
|
GpuHeapRef { heap_ix, offset, n },
|
||||||
|
cpu_handle,
|
||||||
|
gpu_handle,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
unsafe {
|
||||||
|
let size = n.max(GPU_CHUNK_SIZE).next_power_of_two();
|
||||||
|
let heap_type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
|
||||||
|
let desc = D3D12_DESCRIPTOR_HEAP_DESC {
|
||||||
|
Type: heap_type,
|
||||||
|
NumDescriptors: size,
|
||||||
|
Flags: D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE,
|
||||||
|
NodeMask: 0,
|
||||||
|
};
|
||||||
|
let dx12_heap = device.create_descriptor_heap(&desc)?;
|
||||||
|
let heap_ix = self.gpu_visible.len();
|
||||||
|
let mut free = BestFit::new(size);
|
||||||
|
let offset = free.alloc(n).unwrap();
|
||||||
|
// We assume the first allocation is at 0, to avoid recomputing offsets.
|
||||||
|
debug_assert_eq!(offset, 0);
|
||||||
|
let cpu_handle = dx12_heap.get_cpu_descriptor_handle_for_heap_start();
|
||||||
|
let gpu_handle = dx12_heap.get_gpu_descriptor_handle_for_heap_start();
|
||||||
|
let increment_size = device.get_descriptor_increment_size(heap_type);
|
||||||
|
let heap = GpuHeap {
|
||||||
|
dx12_heap,
|
||||||
|
cpu_handle,
|
||||||
|
gpu_handle,
|
||||||
|
increment_size,
|
||||||
|
};
|
||||||
|
self.gpu_visible.push(heap);
|
||||||
|
free_list.gpu_free.push(free);
|
||||||
|
Ok(mk_owned(
|
||||||
|
GpuHeapRef { heap_ix, offset, n },
|
||||||
|
cpu_handle,
|
||||||
|
gpu_handle,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn cpu_handle_of_gpu(
|
||||||
|
&self,
|
||||||
|
gpu_ref: &GpuHeapRef,
|
||||||
|
offset: u32,
|
||||||
|
) -> D3D12_CPU_DESCRIPTOR_HANDLE {
|
||||||
|
debug_assert!(offset < gpu_ref.n);
|
||||||
|
let dx12_heap = &self.gpu_visible[gpu_ref.heap_ix];
|
||||||
|
dx12_heap.cpu_handle(gpu_ref.offset + offset)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn gpu_heap(&self, gpu_ref: &GpuHeapRef) -> &DescriptorHeap {
|
||||||
|
&self.gpu_visible[gpu_ref.heap_ix].dx12_heap
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DescriptorFreeList {
|
||||||
|
fn free_cpu(&mut self, cpu_ref: &CpuHeapRef) {
|
||||||
|
self.cpu_free[cpu_ref.heap_ix].push(cpu_ref.offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn free_gpu(&mut self, gpu_ref: &GpuHeapRef) {
|
||||||
|
self.gpu_free[gpu_ref.heap_ix].free(gpu_ref.offset, gpu_ref.n);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Drop for CpuHeapRefOwned {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
if let Some(a) = self.free_list.upgrade() {
|
||||||
|
a.lock().unwrap().free_cpu(&self.heap_ref)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CpuHeapRefOwned {
|
||||||
|
pub fn handle(&self) -> D3D12_CPU_DESCRIPTOR_HANDLE {
|
||||||
|
self.handle
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl GpuHeapRefOwned {
|
||||||
|
pub fn gpu_handle(&self) -> D3D12_GPU_DESCRIPTOR_HANDLE {
|
||||||
|
self.gpu_handle
|
||||||
|
}
|
||||||
|
|
||||||
|
pub unsafe fn copy_descriptors(&self, device: &Device, src: &[D3D12_CPU_DESCRIPTOR_HANDLE]) {
|
||||||
|
// TODO: optimize a bit (use simple variant where appropriate)
|
||||||
|
let n = src.len().try_into().unwrap();
|
||||||
|
let sizes = (0..n).map(|_| 1).collect::<SmallVec<[u32; 16]>>();
|
||||||
|
device.copy_descriptors(
|
||||||
|
&[self.cpu_handle],
|
||||||
|
&[n],
|
||||||
|
src,
|
||||||
|
&sizes,
|
||||||
|
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Deref for CpuHeapRefOwned {
|
||||||
|
type Target = CpuHeapRef;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
&self.heap_ref
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Drop for GpuHeapRefOwned {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
if let Some(a) = self.free_list.upgrade() {
|
||||||
|
a.lock().unwrap().free_gpu(&self.heap_ref)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Deref for GpuHeapRefOwned {
|
||||||
|
type Target = GpuHeapRef;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
&self.heap_ref
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CpuHeap {
|
||||||
|
fn cpu_handle(&self, offset: u32) -> D3D12_CPU_DESCRIPTOR_HANDLE {
|
||||||
|
let mut handle = self.cpu_handle;
|
||||||
|
handle.ptr += (offset as usize) * (self.increment_size as usize);
|
||||||
|
handle
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl GpuHeap {
|
||||||
|
fn cpu_handle(&self, offset: u32) -> D3D12_CPU_DESCRIPTOR_HANDLE {
|
||||||
|
let mut handle = self.cpu_handle;
|
||||||
|
handle.ptr += (offset as usize) * (self.increment_size as usize);
|
||||||
|
handle
|
||||||
|
}
|
||||||
|
|
||||||
|
fn gpu_handle(&self, offset: u32) -> D3D12_GPU_DESCRIPTOR_HANDLE {
|
||||||
|
let mut handle = self.gpu_handle;
|
||||||
|
handle.ptr += (offset as u64) * (self.increment_size as u64);
|
||||||
|
handle
|
||||||
|
}
|
||||||
|
}
|
|
@ -7,6 +7,7 @@
|
||||||
// except according to those terms.
|
// except according to those terms.
|
||||||
|
|
||||||
use crate::dx12::error::{self, error_if_failed_else_unit, explain_error, Error};
|
use crate::dx12::error::{self, error_if_failed_else_unit, explain_error, Error};
|
||||||
|
use smallvec::SmallVec;
|
||||||
use std::convert::{TryFrom, TryInto};
|
use std::convert::{TryFrom, TryInto};
|
||||||
use std::sync::atomic::{AtomicPtr, Ordering};
|
use std::sync::atomic::{AtomicPtr, Ordering};
|
||||||
use std::{ffi, mem, ptr};
|
use std::{ffi, mem, ptr};
|
||||||
|
@ -51,11 +52,7 @@ pub type CpuDescriptor = d3d12::D3D12_CPU_DESCRIPTOR_HANDLE;
|
||||||
pub type GpuDescriptor = d3d12::D3D12_GPU_DESCRIPTOR_HANDLE;
|
pub type GpuDescriptor = d3d12::D3D12_GPU_DESCRIPTOR_HANDLE;
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct DescriptorHeap {
|
pub struct DescriptorHeap(ComPtr<d3d12::ID3D12DescriptorHeap>);
|
||||||
pub heap_type: d3d12::D3D12_DESCRIPTOR_HEAP_TYPE,
|
|
||||||
pub increment_size: u32,
|
|
||||||
pub heap: ComPtr<d3d12::ID3D12DescriptorHeap>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct RootSignature(pub ComPtr<d3d12::ID3D12RootSignature>);
|
pub struct RootSignature(pub ComPtr<d3d12::ID3D12RootSignature>);
|
||||||
|
@ -381,11 +378,7 @@ impl Device {
|
||||||
"device could not create descriptor heap",
|
"device could not create descriptor heap",
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
Ok(DescriptorHeap {
|
Ok(DescriptorHeap(ComPtr::from_raw(heap)))
|
||||||
heap_type: heap_description.Type,
|
|
||||||
increment_size: self.get_descriptor_increment_size(heap_description.Type),
|
|
||||||
heap: ComPtr::from_raw(heap),
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub unsafe fn get_descriptor_increment_size(
|
pub unsafe fn get_descriptor_increment_size(
|
||||||
|
@ -395,6 +388,31 @@ impl Device {
|
||||||
self.0.GetDescriptorHandleIncrementSize(heap_type)
|
self.0.GetDescriptorHandleIncrementSize(heap_type)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub unsafe fn copy_descriptors(
|
||||||
|
&self,
|
||||||
|
dst_starts: &[d3d12::D3D12_CPU_DESCRIPTOR_HANDLE],
|
||||||
|
dst_sizes: &[u32],
|
||||||
|
src_starts: &[d3d12::D3D12_CPU_DESCRIPTOR_HANDLE],
|
||||||
|
src_sizes: &[u32],
|
||||||
|
descriptor_heap_type: d3d12::D3D12_DESCRIPTOR_HEAP_TYPE,
|
||||||
|
) {
|
||||||
|
debug_assert_eq!(dst_starts.len(), dst_sizes.len());
|
||||||
|
debug_assert_eq!(src_starts.len(), src_sizes.len());
|
||||||
|
debug_assert_eq!(
|
||||||
|
src_sizes.iter().copied().sum::<u32>(),
|
||||||
|
dst_sizes.iter().copied().sum()
|
||||||
|
);
|
||||||
|
self.0.CopyDescriptors(
|
||||||
|
dst_starts.len().try_into().unwrap(),
|
||||||
|
dst_starts.as_ptr(),
|
||||||
|
dst_sizes.as_ptr(),
|
||||||
|
src_starts.len().try_into().unwrap(),
|
||||||
|
src_starts.as_ptr(),
|
||||||
|
src_sizes.as_ptr(),
|
||||||
|
descriptor_heap_type,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
pub unsafe fn create_compute_pipeline_state(
|
pub unsafe fn create_compute_pipeline_state(
|
||||||
&self,
|
&self,
|
||||||
compute_pipeline_desc: &d3d12::D3D12_COMPUTE_PIPELINE_STATE_DESC,
|
compute_pipeline_desc: &d3d12::D3D12_COMPUTE_PIPELINE_STATE_DESC,
|
||||||
|
@ -564,7 +582,7 @@ impl Device {
|
||||||
|
|
||||||
pub unsafe fn create_buffer(
|
pub unsafe fn create_buffer(
|
||||||
&self,
|
&self,
|
||||||
buffer_size_in_bytes: u32,
|
buffer_size_in_bytes: u64,
|
||||||
heap_type: d3d12::D3D12_HEAP_TYPE,
|
heap_type: d3d12::D3D12_HEAP_TYPE,
|
||||||
cpu_page: d3d12::D3D12_CPU_PAGE_PROPERTY,
|
cpu_page: d3d12::D3D12_CPU_PAGE_PROPERTY,
|
||||||
memory_pool_preference: d3d12::D3D12_MEMORY_POOL,
|
memory_pool_preference: d3d12::D3D12_MEMORY_POOL,
|
||||||
|
@ -581,7 +599,7 @@ impl Device {
|
||||||
};
|
};
|
||||||
let resource_description = d3d12::D3D12_RESOURCE_DESC {
|
let resource_description = d3d12::D3D12_RESOURCE_DESC {
|
||||||
Dimension: d3d12::D3D12_RESOURCE_DIMENSION_BUFFER,
|
Dimension: d3d12::D3D12_RESOURCE_DIMENSION_BUFFER,
|
||||||
Width: buffer_size_in_bytes as u64,
|
Width: buffer_size_in_bytes,
|
||||||
Height: 1,
|
Height: 1,
|
||||||
DepthOrArraySize: 1,
|
DepthOrArraySize: 1,
|
||||||
MipLevels: 1,
|
MipLevels: 1,
|
||||||
|
@ -681,26 +699,12 @@ impl Device {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl DescriptorHeap {
|
impl DescriptorHeap {
|
||||||
unsafe fn get_cpu_descriptor_handle_for_heap_start(&self) -> CpuDescriptor {
|
pub unsafe fn get_cpu_descriptor_handle_for_heap_start(&self) -> CpuDescriptor {
|
||||||
self.heap.GetCPUDescriptorHandleForHeapStart()
|
self.0.GetCPUDescriptorHandleForHeapStart()
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe fn get_gpu_descriptor_handle_for_heap_start(&self) -> GpuDescriptor {
|
pub unsafe fn get_gpu_descriptor_handle_for_heap_start(&self) -> GpuDescriptor {
|
||||||
self.heap.GetGPUDescriptorHandleForHeapStart()
|
self.0.GetGPUDescriptorHandleForHeapStart()
|
||||||
}
|
|
||||||
|
|
||||||
pub unsafe fn get_cpu_descriptor_handle_at_offset(&self, offset: u32) -> CpuDescriptor {
|
|
||||||
let mut descriptor = self.get_cpu_descriptor_handle_for_heap_start();
|
|
||||||
descriptor.ptr += (offset as usize) * (self.increment_size as usize);
|
|
||||||
|
|
||||||
descriptor
|
|
||||||
}
|
|
||||||
|
|
||||||
pub unsafe fn get_gpu_descriptor_handle_at_offset(&self, offset: u32) -> GpuDescriptor {
|
|
||||||
let mut descriptor = self.get_gpu_descriptor_handle_for_heap_start();
|
|
||||||
descriptor.ptr += (offset as u64) * (self.increment_size as u64);
|
|
||||||
|
|
||||||
descriptor
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -923,8 +927,8 @@ impl GraphicsCommandList {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub unsafe fn set_descriptor_heaps(&self, descriptor_heaps: &[&DescriptorHeap]) {
|
pub unsafe fn set_descriptor_heaps(&self, descriptor_heaps: &[&DescriptorHeap]) {
|
||||||
let mut descriptor_heap_pointers: Vec<_> =
|
let mut descriptor_heap_pointers: SmallVec<[_; 4]> =
|
||||||
descriptor_heaps.iter().map(|dh| dh.heap.as_raw()).collect();
|
descriptor_heaps.iter().map(|dh| dh.0.as_raw()).collect();
|
||||||
self.0.SetDescriptorHeaps(
|
self.0.SetDescriptorHeaps(
|
||||||
u32::try_from(descriptor_heap_pointers.len())
|
u32::try_from(descriptor_heap_pointers.len())
|
||||||
.expect("could not safely convert descriptor_heap_pointers.len() into u32"),
|
.expect("could not safely convert descriptor_heap_pointers.len() into u32"),
|
||||||
|
@ -958,6 +962,38 @@ impl GraphicsCommandList {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub unsafe fn clear_uav(
|
||||||
|
&self,
|
||||||
|
gpu_handle: d3d12::D3D12_GPU_DESCRIPTOR_HANDLE,
|
||||||
|
cpu_handle: d3d12::D3D12_CPU_DESCRIPTOR_HANDLE,
|
||||||
|
resource: &Resource,
|
||||||
|
value: u32,
|
||||||
|
size: Option<u64>,
|
||||||
|
) {
|
||||||
|
// In testing, only the first value seems to be used, but just in case...
|
||||||
|
let values = [value, value, value, value];
|
||||||
|
let mut rect = d3d12::D3D12_RECT {
|
||||||
|
left: 0,
|
||||||
|
right: 0,
|
||||||
|
top: 0,
|
||||||
|
bottom: 1,
|
||||||
|
};
|
||||||
|
let (num_rects, p_rects) = if let Some(size) = size {
|
||||||
|
rect.right = (size / 4).try_into().unwrap();
|
||||||
|
(1, &rect as *const _)
|
||||||
|
} else {
|
||||||
|
(0, std::ptr::null())
|
||||||
|
};
|
||||||
|
self.0.ClearUnorderedAccessViewUint(
|
||||||
|
gpu_handle,
|
||||||
|
cpu_handle,
|
||||||
|
resource.get_mut(),
|
||||||
|
&values,
|
||||||
|
num_rects,
|
||||||
|
p_rects,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
/// Copy an entire resource (buffer or image)
|
/// Copy an entire resource (buffer or image)
|
||||||
pub unsafe fn copy_resource(&self, src: &Resource, dst: &Resource) {
|
pub unsafe fn copy_resource(&self, src: &Resource, dst: &Resource) {
|
||||||
self.0.CopyResource(dst.get_mut(), src.get_mut());
|
self.0.CopyResource(dst.get_mut(), src.get_mut());
|
||||||
|
|
|
@ -369,7 +369,13 @@ impl Session {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Choose shader code from the available choices.
|
/// Choose shader code from the available choices.
|
||||||
pub fn choose_shader<'a>(&self, spv: &'a [u8], hlsl: &'a str, dxil: &'a [u8], msl: &'a str) -> ShaderCode<'a> {
|
pub fn choose_shader<'a>(
|
||||||
|
&self,
|
||||||
|
spv: &'a [u8],
|
||||||
|
hlsl: &'a str,
|
||||||
|
dxil: &'a [u8],
|
||||||
|
msl: &'a str,
|
||||||
|
) -> ShaderCode<'a> {
|
||||||
self.0.device.choose_shader(spv, hlsl, dxil, msl)
|
self.0.device.choose_shader(spv, hlsl, dxil, msl)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
use bitflags::bitflags;
|
use bitflags::bitflags;
|
||||||
|
|
||||||
mod backend;
|
mod backend;
|
||||||
|
mod bestfit;
|
||||||
mod hub;
|
mod hub;
|
||||||
|
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
|
|
|
@ -14,6 +14,7 @@
|
||||||
//
|
//
|
||||||
// Also licensed under MIT license, at your choice.
|
// Also licensed under MIT license, at your choice.
|
||||||
|
|
||||||
|
mod clear;
|
||||||
mod util;
|
mod util;
|
||||||
|
|
||||||
use std::mem;
|
use std::mem;
|
||||||
|
@ -39,6 +40,7 @@ pub struct MtlDevice {
|
||||||
device: metal::Device,
|
device: metal::Device,
|
||||||
cmd_queue: Arc<Mutex<metal::CommandQueue>>,
|
cmd_queue: Arc<Mutex<metal::CommandQueue>>,
|
||||||
gpu_info: GpuInfo,
|
gpu_info: GpuInfo,
|
||||||
|
helpers: Arc<Helpers>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct MtlSurface {
|
pub struct MtlSurface {
|
||||||
|
@ -78,6 +80,7 @@ pub struct Semaphore;
|
||||||
|
|
||||||
pub struct CmdBuf {
|
pub struct CmdBuf {
|
||||||
cmd_buf: metal::CommandBuffer,
|
cmd_buf: metal::CommandBuffer,
|
||||||
|
helpers: Arc<Helpers>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct QueryPool;
|
pub struct QueryPool;
|
||||||
|
@ -93,6 +96,10 @@ pub struct DescriptorSet {
|
||||||
images: Vec<Image>,
|
images: Vec<Image>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct Helpers {
|
||||||
|
clear_pipeline: metal::ComputePipelineState,
|
||||||
|
}
|
||||||
|
|
||||||
impl MtlInstance {
|
impl MtlInstance {
|
||||||
pub fn new(
|
pub fn new(
|
||||||
window_handle: Option<&dyn HasRawWindowHandle>,
|
window_handle: Option<&dyn HasRawWindowHandle>,
|
||||||
|
@ -172,10 +179,14 @@ impl MtlInstance {
|
||||||
has_memory_model: false,
|
has_memory_model: false,
|
||||||
use_staging_buffers,
|
use_staging_buffers,
|
||||||
};
|
};
|
||||||
|
let helpers = Arc::new(Helpers {
|
||||||
|
clear_pipeline: clear::make_clear_pipeline(&device),
|
||||||
|
});
|
||||||
Ok(MtlDevice {
|
Ok(MtlDevice {
|
||||||
device,
|
device,
|
||||||
cmd_queue: Arc::new(Mutex::new(cmd_queue)),
|
cmd_queue: Arc::new(Mutex::new(cmd_queue)),
|
||||||
gpu_info,
|
gpu_info,
|
||||||
|
helpers,
|
||||||
})
|
})
|
||||||
} else {
|
} else {
|
||||||
Err("can't create system default Metal device".into())
|
Err("can't create system default Metal device".into())
|
||||||
|
@ -292,7 +303,8 @@ impl crate::backend::Device for MtlDevice {
|
||||||
// consider new_command_buffer_with_unretained_references for performance
|
// consider new_command_buffer_with_unretained_references for performance
|
||||||
let cmd_buf = cmd_queue.new_command_buffer();
|
let cmd_buf = cmd_queue.new_command_buffer();
|
||||||
let cmd_buf = autoreleasepool(|| cmd_buf.to_owned());
|
let cmd_buf = autoreleasepool(|| cmd_buf.to_owned());
|
||||||
Ok(CmdBuf { cmd_buf })
|
let helpers = self.helpers.clone();
|
||||||
|
Ok(CmdBuf { cmd_buf, helpers })
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe fn destroy_cmd_buf(&self, _cmd_buf: Self::CmdBuf) -> Result<(), Error> {
|
unsafe fn destroy_cmd_buf(&self, _cmd_buf: Self::CmdBuf) -> Result<(), Error> {
|
||||||
|
@ -467,7 +479,10 @@ impl crate::backend::CmdBuf<MtlDevice> for CmdBuf {
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe fn clear_buffer(&self, buffer: &Buffer, size: Option<u64>) {
|
unsafe fn clear_buffer(&self, buffer: &Buffer, size: Option<u64>) {
|
||||||
todo!()
|
let size = size.unwrap_or(buffer.size);
|
||||||
|
let encoder = self.cmd_buf.new_compute_command_encoder();
|
||||||
|
clear::encode_clear(&encoder, &self.helpers.clear_pipeline, &buffer.buffer, size);
|
||||||
|
encoder.end_encoding()
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe fn copy_buffer(&self, src: &Buffer, dst: &Buffer) {
|
unsafe fn copy_buffer(&self, src: &Buffer, dst: &Buffer) {
|
||||||
|
|
68
piet-gpu-hal/src/metal/clear.rs
Normal file
68
piet-gpu-hal/src/metal/clear.rs
Normal file
|
@ -0,0 +1,68 @@
|
||||||
|
// Copyright 2021 The piet-gpu authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// https://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
// Also licensed under MIT license, at your choice.
|
||||||
|
|
||||||
|
//! The compute shader and stage for clearing buffers.
|
||||||
|
|
||||||
|
use metal::{ComputePipelineState, Device};
|
||||||
|
|
||||||
|
const CLEAR_MSL: &str = r#"
|
||||||
|
using namespace metal;
|
||||||
|
|
||||||
|
struct ConfigBuf
|
||||||
|
{
|
||||||
|
uint size;
|
||||||
|
uint value;
|
||||||
|
};
|
||||||
|
|
||||||
|
kernel void main0(const device ConfigBuf& config [[buffer(0)]], device uint *data [[buffer(1)]], uint3 gid [[thread_position_in_grid]])
|
||||||
|
{
|
||||||
|
uint ix = gid.x;
|
||||||
|
if (ix < config.size)
|
||||||
|
{
|
||||||
|
data[ix] = config.value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"#;
|
||||||
|
|
||||||
|
pub fn make_clear_pipeline(device: &Device) -> ComputePipelineState {
|
||||||
|
let options = metal::CompileOptions::new();
|
||||||
|
let library = device.new_library_with_source(CLEAR_MSL, &options).unwrap();
|
||||||
|
let function = library.get_function("main0", None).unwrap();
|
||||||
|
device
|
||||||
|
.new_compute_pipeline_state_with_function(&function).unwrap()
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn encode_clear(encoder: &metal::ComputeCommandEncoderRef, clear_pipeline: &ComputePipelineState, buffer: &metal::Buffer, size: u64) {
|
||||||
|
// TODO: should be more careful with overflow
|
||||||
|
let size_in_u32s = (size / 4) as u32;
|
||||||
|
encoder.set_compute_pipeline_state(&clear_pipeline);
|
||||||
|
let config = [size_in_u32s, 0];
|
||||||
|
encoder.set_bytes(0, std::mem::size_of_val(&config) as u64, config.as_ptr() as *const _);
|
||||||
|
encoder.set_buffer(1, Some(buffer), 0);
|
||||||
|
let n_wg = (size_in_u32s + 255) / 256;
|
||||||
|
let workgroup_count = metal::MTLSize {
|
||||||
|
width: n_wg as u64,
|
||||||
|
height: 1,
|
||||||
|
depth: 1,
|
||||||
|
};
|
||||||
|
let workgroup_size = metal::MTLSize {
|
||||||
|
width: 256,
|
||||||
|
height: 1,
|
||||||
|
depth: 1,
|
||||||
|
};
|
||||||
|
encoder.dispatch_thread_groups(workgroup_count, workgroup_size);
|
||||||
|
}
|
|
@ -44,7 +44,7 @@ pub struct ClearBinding {
|
||||||
pub unsafe fn run_clear_test(runner: &mut Runner, config: &Config) -> TestResult {
|
pub unsafe fn run_clear_test(runner: &mut Runner, config: &Config) -> TestResult {
|
||||||
let mut result = TestResult::new("clear buffers");
|
let mut result = TestResult::new("clear buffers");
|
||||||
let n_elements: u64 = config.size.choose(1 << 12, 1 << 20, 1 << 24);
|
let n_elements: u64 = config.size.choose(1 << 12, 1 << 20, 1 << 24);
|
||||||
let out_buf = runner.buf_down(n_elements * 4);
|
let out_buf = runner.buf_down(n_elements * 4, BufferUsage::empty());
|
||||||
let code = ClearCode::new(runner);
|
let code = ClearCode::new(runner);
|
||||||
let stage = ClearStage::new_with_value(runner, n_elements, 0x42);
|
let stage = ClearStage::new_with_value(runner, n_elements, 0x42);
|
||||||
let binding = stage.bind(runner, &code, &out_buf.dev_buf);
|
let binding = stage.bind(runner, &code, &out_buf.dev_buf);
|
||||||
|
@ -84,10 +84,6 @@ impl ClearCode {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ClearStage {
|
impl ClearStage {
|
||||||
pub unsafe fn new(runner: &mut Runner, n_elements: u64) -> ClearStage {
|
|
||||||
Self::new_with_value(runner, n_elements, 0)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub unsafe fn new_with_value(runner: &mut Runner, n_elements: u64, value: u32) -> ClearStage {
|
pub unsafe fn new_with_value(runner: &mut Runner, n_elements: u64, value: u32) -> ClearStage {
|
||||||
let config = [n_elements as u32, value];
|
let config = [n_elements as u32, value];
|
||||||
let config_buf = runner
|
let config_buf = runner
|
||||||
|
|
|
@ -14,10 +14,9 @@
|
||||||
//
|
//
|
||||||
// Also licensed under MIT license, at your choice.
|
// Also licensed under MIT license, at your choice.
|
||||||
|
|
||||||
use piet_gpu_hal::{include_shader, BackendType, BindType, DescriptorSet};
|
use piet_gpu_hal::{include_shader, BindType, BufferUsage, DescriptorSet};
|
||||||
use piet_gpu_hal::{Buffer, Pipeline};
|
use piet_gpu_hal::{Buffer, Pipeline};
|
||||||
|
|
||||||
use crate::clear::{ClearBinding, ClearCode, ClearStage};
|
|
||||||
use crate::runner::{Commands, Runner};
|
use crate::runner::{Commands, Runner};
|
||||||
use crate::test_result::TestResult;
|
use crate::test_result::TestResult;
|
||||||
use crate::Config;
|
use crate::Config;
|
||||||
|
@ -27,21 +26,17 @@ const N_BUCKETS: u64 = 65536;
|
||||||
|
|
||||||
struct LinkedListCode {
|
struct LinkedListCode {
|
||||||
pipeline: Pipeline,
|
pipeline: Pipeline,
|
||||||
clear_code: Option<ClearCode>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
struct LinkedListStage {
|
struct LinkedListStage;
|
||||||
clear_stage: Option<ClearStage>,
|
|
||||||
}
|
|
||||||
|
|
||||||
struct LinkedListBinding {
|
struct LinkedListBinding {
|
||||||
descriptor_set: DescriptorSet,
|
descriptor_set: DescriptorSet,
|
||||||
clear_binding: Option<ClearBinding>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub unsafe fn run_linkedlist_test(runner: &mut Runner, config: &Config) -> TestResult {
|
pub unsafe fn run_linkedlist_test(runner: &mut Runner, config: &Config) -> TestResult {
|
||||||
let mut result = TestResult::new("linked list");
|
let mut result = TestResult::new("linked list");
|
||||||
let mem_buf = runner.buf_down(1024 * N_BUCKETS);
|
let mem_buf = runner.buf_down(1024 * N_BUCKETS, BufferUsage::CLEAR);
|
||||||
let code = LinkedListCode::new(runner);
|
let code = LinkedListCode::new(runner);
|
||||||
let stage = LinkedListStage::new(runner, &code, N_BUCKETS);
|
let stage = LinkedListStage::new(runner, &code, N_BUCKETS);
|
||||||
let binding = stage.bind(runner, &code, &mem_buf.dev_buf);
|
let binding = stage.bind(runner, &code, &mem_buf.dev_buf);
|
||||||
|
@ -77,26 +72,17 @@ impl LinkedListCode {
|
||||||
.session
|
.session
|
||||||
.create_compute_pipeline(code, &[BindType::Buffer])
|
.create_compute_pipeline(code, &[BindType::Buffer])
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let clear_code = if runner.backend_type() != BackendType::Vulkan {
|
LinkedListCode { pipeline }
|
||||||
Some(ClearCode::new(runner))
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
};
|
|
||||||
LinkedListCode {
|
|
||||||
pipeline,
|
|
||||||
clear_code,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl LinkedListStage {
|
impl LinkedListStage {
|
||||||
unsafe fn new(runner: &mut Runner, code: &LinkedListCode, n_buckets: u64) -> LinkedListStage {
|
unsafe fn new(
|
||||||
let clear_stage = if code.clear_code.is_some() {
|
_runner: &mut Runner,
|
||||||
Some(ClearStage::new(runner, n_buckets))
|
_code: &LinkedListCode,
|
||||||
} else {
|
_n_buckets: u64,
|
||||||
None
|
) -> LinkedListStage {
|
||||||
};
|
LinkedListStage
|
||||||
LinkedListStage { clear_stage }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe fn bind(
|
unsafe fn bind(
|
||||||
|
@ -109,15 +95,7 @@ impl LinkedListStage {
|
||||||
.session
|
.session
|
||||||
.create_simple_descriptor_set(&code.pipeline, &[mem_buf])
|
.create_simple_descriptor_set(&code.pipeline, &[mem_buf])
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let clear_binding = if let Some(stage) = &self.clear_stage {
|
LinkedListBinding { descriptor_set }
|
||||||
Some(stage.bind(runner, &code.clear_code.as_ref().unwrap(), mem_buf))
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
};
|
|
||||||
LinkedListBinding {
|
|
||||||
descriptor_set,
|
|
||||||
clear_binding,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe fn record(
|
unsafe fn record(
|
||||||
|
@ -127,15 +105,7 @@ impl LinkedListStage {
|
||||||
bindings: &LinkedListBinding,
|
bindings: &LinkedListBinding,
|
||||||
out_buf: &Buffer,
|
out_buf: &Buffer,
|
||||||
) {
|
) {
|
||||||
if let Some(stage) = &self.clear_stage {
|
commands.cmd_buf.clear_buffer(out_buf, None);
|
||||||
stage.record(
|
|
||||||
commands,
|
|
||||||
code.clear_code.as_ref().unwrap(),
|
|
||||||
bindings.clear_binding.as_ref().unwrap(),
|
|
||||||
);
|
|
||||||
} else {
|
|
||||||
commands.cmd_buf.clear_buffer(out_buf, None);
|
|
||||||
}
|
|
||||||
commands.cmd_buf.memory_barrier();
|
commands.cmd_buf.memory_barrier();
|
||||||
let n_workgroups = N_BUCKETS / WG_SIZE;
|
let n_workgroups = N_BUCKETS / WG_SIZE;
|
||||||
commands.cmd_buf.dispatch(
|
commands.cmd_buf.dispatch(
|
||||||
|
|
|
@ -14,10 +14,9 @@
|
||||||
//
|
//
|
||||||
// Also licensed under MIT license, at your choice.
|
// Also licensed under MIT license, at your choice.
|
||||||
|
|
||||||
use piet_gpu_hal::{include_shader, BackendType, BindType, BufferUsage, DescriptorSet, ShaderCode};
|
use piet_gpu_hal::{include_shader, BindType, BufferUsage, DescriptorSet, ShaderCode};
|
||||||
use piet_gpu_hal::{Buffer, Pipeline};
|
use piet_gpu_hal::{Buffer, Pipeline};
|
||||||
|
|
||||||
use crate::clear::{ClearBinding, ClearCode, ClearStage};
|
|
||||||
use crate::config::Config;
|
use crate::config::Config;
|
||||||
use crate::runner::{Commands, Runner};
|
use crate::runner::{Commands, Runner};
|
||||||
use crate::test_result::TestResult;
|
use crate::test_result::TestResult;
|
||||||
|
@ -27,19 +26,16 @@ const N_ELEMENTS: u64 = 65536;
|
||||||
/// The shader code forMessagePassing sum example.
|
/// The shader code forMessagePassing sum example.
|
||||||
struct MessagePassingCode {
|
struct MessagePassingCode {
|
||||||
pipeline: Pipeline,
|
pipeline: Pipeline,
|
||||||
clear_code: Option<ClearCode>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The stage resources for the prefix sum example.
|
/// The stage resources for the prefix sum example.
|
||||||
struct MessagePassingStage {
|
struct MessagePassingStage {
|
||||||
data_buf: Buffer,
|
data_buf: Buffer,
|
||||||
clear_stages: Option<(ClearStage, ClearBinding, ClearStage)>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The binding for the prefix sum example.
|
/// The binding for the prefix sum example.
|
||||||
struct MessagePassingBinding {
|
struct MessagePassingBinding {
|
||||||
descriptor_set: DescriptorSet,
|
descriptor_set: DescriptorSet,
|
||||||
clear_binding: Option<ClearBinding>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
|
@ -54,9 +50,9 @@ pub unsafe fn run_message_passing_test(
|
||||||
variant: Variant,
|
variant: Variant,
|
||||||
) -> TestResult {
|
) -> TestResult {
|
||||||
let mut result = TestResult::new(format!("message passing litmus, {:?}", variant));
|
let mut result = TestResult::new(format!("message passing litmus, {:?}", variant));
|
||||||
let out_buf = runner.buf_down(4);
|
let out_buf = runner.buf_down(4, BufferUsage::CLEAR);
|
||||||
let code = MessagePassingCode::new(runner, variant);
|
let code = MessagePassingCode::new(runner, variant);
|
||||||
let stage = MessagePassingStage::new(runner, &code);
|
let stage = MessagePassingStage::new(runner);
|
||||||
let binding = stage.bind(runner, &code, &out_buf.dev_buf);
|
let binding = stage.bind(runner, &code, &out_buf.dev_buf);
|
||||||
let n_iter = config.n_iter;
|
let n_iter = config.n_iter;
|
||||||
let mut total_elapsed = 0.0;
|
let mut total_elapsed = 0.0;
|
||||||
|
@ -92,39 +88,21 @@ impl MessagePassingCode {
|
||||||
.session
|
.session
|
||||||
.create_compute_pipeline(code, &[BindType::Buffer, BindType::Buffer])
|
.create_compute_pipeline(code, &[BindType::Buffer, BindType::Buffer])
|
||||||
.unwrap();
|
.unwrap();
|
||||||
// Currently, DX12 and Metal backends don't support buffer clearing, so use a
|
MessagePassingCode { pipeline }
|
||||||
// compute shader as a workaround.
|
|
||||||
let clear_code = if runner.backend_type() != BackendType::Vulkan {
|
|
||||||
Some(ClearCode::new(runner))
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
};
|
|
||||||
MessagePassingCode {
|
|
||||||
pipeline,
|
|
||||||
clear_code,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl MessagePassingStage {
|
impl MessagePassingStage {
|
||||||
unsafe fn new(runner: &mut Runner, code: &MessagePassingCode) -> MessagePassingStage {
|
unsafe fn new(runner: &mut Runner) -> MessagePassingStage {
|
||||||
let data_buf_size = 8 * N_ELEMENTS;
|
let data_buf_size = 8 * N_ELEMENTS;
|
||||||
let data_buf = runner
|
let data_buf = runner
|
||||||
.session
|
.session
|
||||||
.create_buffer(data_buf_size, BufferUsage::STORAGE | BufferUsage::COPY_DST)
|
.create_buffer(
|
||||||
|
data_buf_size,
|
||||||
|
BufferUsage::STORAGE | BufferUsage::COPY_DST | BufferUsage::CLEAR,
|
||||||
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let clear_stages = if let Some(clear_code) = &code.clear_code {
|
MessagePassingStage { data_buf }
|
||||||
let stage0 = ClearStage::new(runner, N_ELEMENTS * 2);
|
|
||||||
let binding0 = stage0.bind(runner, clear_code, &data_buf);
|
|
||||||
let stage1 = ClearStage::new(runner, 1);
|
|
||||||
Some((stage0, binding0, stage1))
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
};
|
|
||||||
MessagePassingStage {
|
|
||||||
data_buf,
|
|
||||||
clear_stages,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe fn bind(
|
unsafe fn bind(
|
||||||
|
@ -137,21 +115,7 @@ impl MessagePassingStage {
|
||||||
.session
|
.session
|
||||||
.create_simple_descriptor_set(&code.pipeline, &[&self.data_buf, out_buf])
|
.create_simple_descriptor_set(&code.pipeline, &[&self.data_buf, out_buf])
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let clear_binding = if let Some(clear_code) = &code.clear_code {
|
MessagePassingBinding { descriptor_set }
|
||||||
Some(
|
|
||||||
self.clear_stages
|
|
||||||
.as_ref()
|
|
||||||
.unwrap()
|
|
||||||
.2
|
|
||||||
.bind(runner, clear_code, out_buf),
|
|
||||||
)
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
};
|
|
||||||
MessagePassingBinding {
|
|
||||||
descriptor_set,
|
|
||||||
clear_binding,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe fn record(
|
unsafe fn record(
|
||||||
|
@ -161,14 +125,8 @@ impl MessagePassingStage {
|
||||||
bindings: &MessagePassingBinding,
|
bindings: &MessagePassingBinding,
|
||||||
out_buf: &Buffer,
|
out_buf: &Buffer,
|
||||||
) {
|
) {
|
||||||
if let Some((stage0, binding0, stage1)) = &self.clear_stages {
|
commands.cmd_buf.clear_buffer(&self.data_buf, None);
|
||||||
let code = code.clear_code.as_ref().unwrap();
|
commands.cmd_buf.clear_buffer(out_buf, None);
|
||||||
stage0.record(commands, code, binding0);
|
|
||||||
stage1.record(commands, code, bindings.clear_binding.as_ref().unwrap());
|
|
||||||
} else {
|
|
||||||
commands.cmd_buf.clear_buffer(&self.data_buf, None);
|
|
||||||
commands.cmd_buf.clear_buffer(out_buf, None);
|
|
||||||
}
|
|
||||||
commands.cmd_buf.memory_barrier();
|
commands.cmd_buf.memory_barrier();
|
||||||
commands.cmd_buf.dispatch(
|
commands.cmd_buf.dispatch(
|
||||||
&code.pipeline,
|
&code.pipeline,
|
||||||
|
|
|
@ -14,10 +14,9 @@
|
||||||
//
|
//
|
||||||
// Also licensed under MIT license, at your choice.
|
// Also licensed under MIT license, at your choice.
|
||||||
|
|
||||||
use piet_gpu_hal::{include_shader, BackendType, BindType, BufferUsage, DescriptorSet, ShaderCode};
|
use piet_gpu_hal::{include_shader, BindType, BufferUsage, DescriptorSet, ShaderCode};
|
||||||
use piet_gpu_hal::{Buffer, Pipeline};
|
use piet_gpu_hal::{Buffer, Pipeline};
|
||||||
|
|
||||||
use crate::clear::{ClearBinding, ClearCode, ClearStage};
|
|
||||||
use crate::config::Config;
|
use crate::config::Config;
|
||||||
use crate::runner::{Commands, Runner};
|
use crate::runner::{Commands, Runner};
|
||||||
use crate::test_result::TestResult;
|
use crate::test_result::TestResult;
|
||||||
|
@ -31,7 +30,6 @@ const ELEMENTS_PER_WG: u64 = WG_SIZE * N_ROWS;
|
||||||
/// A code struct can be created once and reused any number of times.
|
/// A code struct can be created once and reused any number of times.
|
||||||
struct PrefixCode {
|
struct PrefixCode {
|
||||||
pipeline: Pipeline,
|
pipeline: Pipeline,
|
||||||
clear_code: Option<ClearCode>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The stage resources for the prefix sum example.
|
/// The stage resources for the prefix sum example.
|
||||||
|
@ -43,7 +41,6 @@ struct PrefixStage {
|
||||||
// treat it as a capacity.
|
// treat it as a capacity.
|
||||||
n_elements: u64,
|
n_elements: u64,
|
||||||
state_buf: Buffer,
|
state_buf: Buffer,
|
||||||
clear_stage: Option<(ClearStage, ClearBinding)>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The binding for the prefix sum example.
|
/// The binding for the prefix sum example.
|
||||||
|
@ -77,9 +74,9 @@ pub unsafe fn run_prefix_test(
|
||||||
.session
|
.session
|
||||||
.create_buffer_init(&data, BufferUsage::STORAGE)
|
.create_buffer_init(&data, BufferUsage::STORAGE)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let out_buf = runner.buf_down(data_buf.size());
|
let out_buf = runner.buf_down(data_buf.size(), BufferUsage::empty());
|
||||||
let code = PrefixCode::new(runner, variant);
|
let code = PrefixCode::new(runner, variant);
|
||||||
let stage = PrefixStage::new(runner, &code, n_elements);
|
let stage = PrefixStage::new(runner, n_elements);
|
||||||
let binding = stage.bind(runner, &code, &data_buf, &out_buf.dev_buf);
|
let binding = stage.bind(runner, &code, &data_buf, &out_buf.dev_buf);
|
||||||
let n_iter = config.n_iter;
|
let n_iter = config.n_iter;
|
||||||
let mut total_elapsed = 0.0;
|
let mut total_elapsed = 0.0;
|
||||||
|
@ -121,37 +118,24 @@ impl PrefixCode {
|
||||||
.unwrap();
|
.unwrap();
|
||||||
// Currently, DX12 and Metal backends don't support buffer clearing, so use a
|
// Currently, DX12 and Metal backends don't support buffer clearing, so use a
|
||||||
// compute shader as a workaround.
|
// compute shader as a workaround.
|
||||||
let clear_code = if runner.backend_type() != BackendType::Vulkan {
|
PrefixCode { pipeline }
|
||||||
Some(ClearCode::new(runner))
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
};
|
|
||||||
PrefixCode {
|
|
||||||
pipeline,
|
|
||||||
clear_code,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl PrefixStage {
|
impl PrefixStage {
|
||||||
unsafe fn new(runner: &mut Runner, code: &PrefixCode, n_elements: u64) -> PrefixStage {
|
unsafe fn new(runner: &mut Runner, n_elements: u64) -> PrefixStage {
|
||||||
let n_workgroups = (n_elements + ELEMENTS_PER_WG - 1) / ELEMENTS_PER_WG;
|
let n_workgroups = (n_elements + ELEMENTS_PER_WG - 1) / ELEMENTS_PER_WG;
|
||||||
let state_buf_size = 4 + 12 * n_workgroups;
|
let state_buf_size = 4 + 12 * n_workgroups;
|
||||||
let state_buf = runner
|
let state_buf = runner
|
||||||
.session
|
.session
|
||||||
.create_buffer(state_buf_size, BufferUsage::STORAGE | BufferUsage::COPY_DST)
|
.create_buffer(
|
||||||
|
state_buf_size,
|
||||||
|
BufferUsage::STORAGE | BufferUsage::COPY_DST | BufferUsage::CLEAR,
|
||||||
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let clear_stage = if let Some(clear_code) = &code.clear_code {
|
|
||||||
let stage = ClearStage::new(runner, state_buf_size / 4);
|
|
||||||
let binding = stage.bind(runner, clear_code, &state_buf);
|
|
||||||
Some((stage, binding))
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
};
|
|
||||||
PrefixStage {
|
PrefixStage {
|
||||||
n_elements,
|
n_elements,
|
||||||
state_buf,
|
state_buf,
|
||||||
clear_stage,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -171,11 +155,7 @@ impl PrefixStage {
|
||||||
|
|
||||||
unsafe fn record(&self, commands: &mut Commands, code: &PrefixCode, bindings: &PrefixBinding) {
|
unsafe fn record(&self, commands: &mut Commands, code: &PrefixCode, bindings: &PrefixBinding) {
|
||||||
let n_workgroups = (self.n_elements + ELEMENTS_PER_WG - 1) / ELEMENTS_PER_WG;
|
let n_workgroups = (self.n_elements + ELEMENTS_PER_WG - 1) / ELEMENTS_PER_WG;
|
||||||
if let Some((stage, binding)) = &self.clear_stage {
|
commands.cmd_buf.clear_buffer(&self.state_buf, None);
|
||||||
stage.record(commands, code.clear_code.as_ref().unwrap(), binding);
|
|
||||||
} else {
|
|
||||||
commands.cmd_buf.clear_buffer(&self.state_buf, None);
|
|
||||||
}
|
|
||||||
commands.cmd_buf.memory_barrier();
|
commands.cmd_buf.memory_barrier();
|
||||||
commands.cmd_buf.dispatch(
|
commands.cmd_buf.dispatch(
|
||||||
&code.pipeline,
|
&code.pipeline,
|
||||||
|
|
|
@ -52,7 +52,7 @@ pub unsafe fn run_prefix_test(runner: &mut Runner, config: &Config) -> TestResul
|
||||||
.session
|
.session
|
||||||
.create_buffer_init(&data, BufferUsage::STORAGE)
|
.create_buffer_init(&data, BufferUsage::STORAGE)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let out_buf = runner.buf_down(data_buf.size());
|
let out_buf = runner.buf_down(data_buf.size(), BufferUsage::empty());
|
||||||
let code = PrefixTreeCode::new(runner);
|
let code = PrefixTreeCode::new(runner);
|
||||||
let stage = PrefixTreeStage::new(runner, n_elements);
|
let stage = PrefixTreeStage::new(runner, n_elements);
|
||||||
let binding = stage.bind(runner, &code, &out_buf.dev_buf);
|
let binding = stage.bind(runner, &code, &out_buf.dev_buf);
|
||||||
|
|
|
@ -101,19 +101,17 @@ impl Runner {
|
||||||
BufUp { stage_buf, dev_buf }
|
BufUp { stage_buf, dev_buf }
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn buf_down(&self, size: u64) -> BufDown {
|
/// Create a buffer for download (readback).
|
||||||
|
///
|
||||||
|
/// The `usage` parameter need not include COPY_SRC and STORAGE.
|
||||||
|
pub fn buf_down(&self, size: u64, usage: BufferUsage) -> BufDown {
|
||||||
let stage_buf = self
|
let stage_buf = self
|
||||||
.session
|
.session
|
||||||
.create_buffer(size, BufferUsage::MAP_READ | BufferUsage::COPY_DST)
|
.create_buffer(size, BufferUsage::MAP_READ | BufferUsage::COPY_DST)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
// Note: the COPY_DST isn't needed in all use cases, but I don't think
|
|
||||||
// making this tighter would help.
|
|
||||||
let dev_buf = self
|
let dev_buf = self
|
||||||
.session
|
.session
|
||||||
.create_buffer(
|
.create_buffer(size, usage | BufferUsage::COPY_SRC | BufferUsage::STORAGE)
|
||||||
size,
|
|
||||||
BufferUsage::COPY_SRC | BufferUsage::COPY_DST | BufferUsage::STORAGE,
|
|
||||||
)
|
|
||||||
.unwrap();
|
.unwrap();
|
||||||
BufDown { stage_buf, dev_buf }
|
BufDown { stage_buf, dev_buf }
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue