From ae76bf9cc1c330d5d272ae91c790f06f5895138e Mon Sep 17 00:00:00 2001 From: chyyran Date: Wed, 28 Aug 2024 01:30:02 -0400 Subject: [PATCH] rt(d3d12): pull out descriptor heap implementation into its own crate --- Cargo.lock | 13 + librashader-runtime-d3d12/Cargo.toml | 1 + .../src/descriptor_heap.rs | 304 +----------------- librashader-runtime-d3d12/src/error.rs | 6 +- librashader-runtime-d3d12/src/filter_chain.rs | 73 +++-- librashader-runtime-d3d12/src/filter_pass.rs | 16 +- librashader-runtime-d3d12/src/framebuffer.rs | 12 +- librashader-runtime-d3d12/src/luts.rs | 8 +- librashader-runtime-d3d12/src/mipmap.rs | 16 +- librashader-runtime-d3d12/src/samplers.rs | 10 +- librashader-runtime-d3d12/src/texture.rs | 8 +- .../tests/hello_triangle/descriptor_heap.rs | 176 +--------- .../tests/hello_triangle/mod.rs | 18 +- 13 files changed, 121 insertions(+), 540 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1336ee2..d140377 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -853,6 +853,18 @@ dependencies = [ "winapi", ] +[[package]] +name = "d3d12-descriptor-heap" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "825c5ff0f3af86b775d1e842a11fb42ee5eac1acec44ff46572a756219be3ff5" +dependencies = [ + "array-init", + "bitvec", + "thiserror", + "windows 0.58.0", +] + [[package]] name = "data-encoding" version = "2.6.0" @@ -1774,6 +1786,7 @@ dependencies = [ "array-init", "bitvec", "bytemuck", + "d3d12-descriptor-heap", "gfx-maths", "gpu-allocator 0.27.0", "librashader-cache", diff --git a/librashader-runtime-d3d12/Cargo.toml b/librashader-runtime-d3d12/Cargo.toml index 6352c71..6efbd42 100644 --- a/librashader-runtime-d3d12/Cargo.toml +++ b/librashader-runtime-d3d12/Cargo.toml @@ -31,6 +31,7 @@ mach-siegbert-vogt-dxcsa = "0.1.3" rayon = "1.6.1" gpu-allocator = { version = "0.27.0", features = ["d3d12"], default-features = false} parking_lot = "0.12.3" +d3d12-descriptor-heap = "0.1.0" [target.'cfg(windows)'.dependencies.windows] workspace = true diff --git a/librashader-runtime-d3d12/src/descriptor_heap.rs b/librashader-runtime-d3d12/src/descriptor_heap.rs index a6235d9..73c229b 100644 --- a/librashader-runtime-d3d12/src/descriptor_heap.rs +++ b/librashader-runtime-d3d12/src/descriptor_heap.rs @@ -1,26 +1,11 @@ -use crate::error; -use bitvec::bitvec; -use bitvec::boxed::BitBox; -use bitvec::order::Lsb0; -use std::marker::PhantomData; -use std::ops::Deref; -use std::sync::atomic::{AtomicUsize, Ordering}; -use std::sync::Arc; - -use crate::error::FilterChainError; use windows::Win32::Graphics::Direct3D12::{ - ID3D12DescriptorHeap, ID3D12Device, D3D12_CPU_DESCRIPTOR_HANDLE, D3D12_DESCRIPTOR_HEAP_DESC, - D3D12_DESCRIPTOR_HEAP_FLAG_NONE, D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE, - D3D12_DESCRIPTOR_HEAP_TYPE, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, + D3D12_DESCRIPTOR_HEAP_DESC, D3D12_DESCRIPTOR_HEAP_FLAG_NONE, + D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, D3D12_DESCRIPTOR_HEAP_TYPE_RTV, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, - D3D12_GPU_DESCRIPTOR_HANDLE, }; -pub trait D3D12HeapType { - fn get_desc(size: usize) -> D3D12_DESCRIPTOR_HEAP_DESC; -} +use d3d12_descriptor_heap::{D3D12DescriptorHeapType, D3D12ShaderVisibleDescriptorHeapType}; -pub trait D3D12ShaderVisibleHeapType: D3D12HeapType {} #[derive(Clone)] pub struct SamplerPaletteHeap; @@ -36,9 +21,9 @@ pub struct ResourceWorkHeap; #[derive(Clone)] pub struct SamplerWorkHeap; -impl D3D12HeapType for SamplerPaletteHeap { +impl D3D12DescriptorHeapType for SamplerPaletteHeap { // sampler palettes just get set directly - fn get_desc(size: usize) -> D3D12_DESCRIPTOR_HEAP_DESC { + fn create_desc(size: usize) -> D3D12_DESCRIPTOR_HEAP_DESC { D3D12_DESCRIPTOR_HEAP_DESC { Type: D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, NumDescriptors: size as u32, @@ -48,9 +33,9 @@ impl D3D12HeapType for SamplerPaletteHeap { } } -impl D3D12HeapType for CpuStagingHeap { +impl D3D12DescriptorHeapType for CpuStagingHeap { // Lut texture heaps are CPU only and get bound to the descriptor heap of the shader. - fn get_desc(size: usize) -> D3D12_DESCRIPTOR_HEAP_DESC { + fn create_desc(size: usize) -> D3D12_DESCRIPTOR_HEAP_DESC { D3D12_DESCRIPTOR_HEAP_DESC { Type: D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, NumDescriptors: size as u32, @@ -60,9 +45,9 @@ impl D3D12HeapType for CpuStagingHeap { } } -impl D3D12HeapType for RenderTargetHeap { +impl D3D12DescriptorHeapType for RenderTargetHeap { // Lut texture heaps are CPU only and get bound to the descriptor heap of the shader. - fn get_desc(size: usize) -> D3D12_DESCRIPTOR_HEAP_DESC { + fn create_desc(size: usize) -> D3D12_DESCRIPTOR_HEAP_DESC { D3D12_DESCRIPTOR_HEAP_DESC { Type: D3D12_DESCRIPTOR_HEAP_TYPE_RTV, NumDescriptors: size as u32, @@ -72,10 +57,10 @@ impl D3D12HeapType for RenderTargetHeap { } } -impl D3D12ShaderVisibleHeapType for ResourceWorkHeap {} -impl D3D12HeapType for ResourceWorkHeap { +unsafe impl D3D12ShaderVisibleDescriptorHeapType for ResourceWorkHeap {} +impl D3D12DescriptorHeapType for ResourceWorkHeap { // Lut texture heaps are CPU only and get bound to the descriptor heap of the shader. - fn get_desc(size: usize) -> D3D12_DESCRIPTOR_HEAP_DESC { + fn create_desc(size: usize) -> D3D12_DESCRIPTOR_HEAP_DESC { D3D12_DESCRIPTOR_HEAP_DESC { Type: D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, NumDescriptors: size as u32, @@ -85,10 +70,10 @@ impl D3D12HeapType for ResourceWorkHeap { } } -impl D3D12ShaderVisibleHeapType for SamplerWorkHeap {} -impl D3D12HeapType for SamplerWorkHeap { +unsafe impl D3D12ShaderVisibleDescriptorHeapType for SamplerWorkHeap {} +impl D3D12DescriptorHeapType for SamplerWorkHeap { // Lut texture heaps are CPU only and get bound to the descriptor heap of the shader. - fn get_desc(size: usize) -> D3D12_DESCRIPTOR_HEAP_DESC { + fn create_desc(size: usize) -> D3D12_DESCRIPTOR_HEAP_DESC { D3D12_DESCRIPTOR_HEAP_DESC { Type: D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, NumDescriptors: size as u32, @@ -97,262 +82,3 @@ impl D3D12HeapType for SamplerWorkHeap { } } } - -pub type D3D12DescriptorHeapSlot = Arc>; - -pub struct D3D12DescriptorHeapSlotInner { - cpu_handle: D3D12_CPU_DESCRIPTOR_HANDLE, - gpu_handle: Option, - heap: Arc, - slot: usize, - _pd: PhantomData, -} - -impl D3D12DescriptorHeapSlotInner { - /// Get the index of the resource within the heap. - pub fn index(&self) -> usize { - self.slot - } - - /// unsafe because type must match - pub unsafe fn copy_descriptor(&self, source: D3D12_CPU_DESCRIPTOR_HANDLE) { - unsafe { - let heap = self.heap.deref(); - - heap.device - .CopyDescriptorsSimple(1, self.cpu_handle, source, heap.ty) - } - } -} - -impl AsRef for D3D12DescriptorHeapSlotInner { - fn as_ref(&self) -> &D3D12_CPU_DESCRIPTOR_HANDLE { - &self.cpu_handle - } -} - -impl AsRef - for D3D12DescriptorHeapSlotInner -{ - fn as_ref(&self) -> &D3D12_GPU_DESCRIPTOR_HANDLE { - // SAFETY: D3D12ShaderVisibleHeapType must have a GPU handle. - self.gpu_handle.as_ref().unwrap() - } -} - -impl From<&D3D12DescriptorHeap> for ID3D12DescriptorHeap { - fn from(value: &D3D12DescriptorHeap) -> Self { - value.0.heap.clone() - } -} - -#[derive(Debug)] -struct D3D12DescriptorHeapInner { - device: ID3D12Device, - heap: ID3D12DescriptorHeap, - ty: D3D12_DESCRIPTOR_HEAP_TYPE, - cpu_start: D3D12_CPU_DESCRIPTOR_HANDLE, - gpu_start: Option, - handle_size: usize, - start: AtomicUsize, - num_descriptors: usize, - map: BitBox, -} - -pub struct D3D12DescriptorHeap(Arc, PhantomData); - -impl D3D12DescriptorHeap { - pub fn new(device: &ID3D12Device, size: usize) -> error::Result> { - let desc = T::get_desc(size); - unsafe { D3D12DescriptorHeap::new_with_desc(device, desc) } - } -} - -impl D3D12DescriptorHeap { - /// Gets a cloned handle to the inner heap - pub fn handle(&self) -> ID3D12DescriptorHeap { - let inner = &self.0; - inner.heap.clone() - } - - pub unsafe fn new_with_desc( - device: &ID3D12Device, - desc: D3D12_DESCRIPTOR_HEAP_DESC, - ) -> error::Result> { - unsafe { - let heap: ID3D12DescriptorHeap = device.CreateDescriptorHeap(&desc)?; - let cpu_start = heap.GetCPUDescriptorHandleForHeapStart(); - - let gpu_start = if (desc.Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE).0 != 0 { - Some(heap.GetGPUDescriptorHandleForHeapStart()) - } else { - None - }; - - Ok(D3D12DescriptorHeap( - Arc::new(D3D12DescriptorHeapInner { - device: device.clone(), - heap, - ty: desc.Type, - cpu_start, - gpu_start, - handle_size: device.GetDescriptorHandleIncrementSize(desc.Type) as usize, - start: AtomicUsize::new(0), - num_descriptors: desc.NumDescriptors as usize, - map: bitvec![AtomicUsize, Lsb0; 0; desc.NumDescriptors as usize] - .into_boxed_bitslice(), - }), - PhantomData::default(), - )) - } - } - - /// suballocates this heap into equally sized chunks. - /// if there aren't enough descriptors, throws an error. - /// - /// it is UB (programmer error) to call this if the descriptor heap already has - /// descriptors allocated for it. - /// - /// size must also divide equally into the size of the heap. - pub unsafe fn suballocate( - self, - size: usize, - reserved: usize, - ) -> ( - Vec>, - Option>, - ID3D12DescriptorHeap, - ) { - // has to be called right after creation. - assert_eq!( - Arc::strong_count(&self.0), - 1, - "D3D12DescriptorHeap::suballocate can only be callled immediately after creation." - ); - - let Ok(inner) = Arc::try_unwrap(self.0) else { - panic!("[d3d12] undefined behaviour to suballocate a descriptor heap with live descriptors.") - }; - - let num_descriptors = inner.num_descriptors - reserved; - - // number of suballocated heaps - let num_heaps = num_descriptors / size; - let remainder = num_descriptors % size; - - assert_eq!( - remainder, 0, - "D3D12DescriptorHeap::suballocate \ - must be called with a size that equally divides the number of descriptors" - ); - - let mut heaps = Vec::new(); - - let mut start = 0; - let root_cpu_ptr = inner.cpu_start.ptr; - let root_gpu_ptr = inner.gpu_start.map(|p| p.ptr); - - for _ in 0..num_heaps { - let new_cpu_start = root_cpu_ptr + (start * inner.handle_size); - let new_gpu_start = root_gpu_ptr.map(|r| D3D12_GPU_DESCRIPTOR_HANDLE { - ptr: r + (start as u64 * inner.handle_size as u64), - }); - - heaps.push(D3D12DescriptorHeapInner { - device: inner.device.clone(), - heap: inner.heap.clone(), - ty: inner.ty, - cpu_start: D3D12_CPU_DESCRIPTOR_HANDLE { ptr: new_cpu_start }, - gpu_start: new_gpu_start, - handle_size: inner.handle_size, - start: AtomicUsize::new(0), - num_descriptors: size, - map: bitvec![AtomicUsize, Lsb0; 0; size].into_boxed_bitslice(), - }); - - start += size; - } - - let mut reserved_heap = None; - if reserved != 0 { - assert_eq!( - reserved, - inner.num_descriptors - start, - "The input heap could not fit the number of requested reserved descriptors." - ); - let new_cpu_start = root_cpu_ptr + (start * inner.handle_size); - let new_gpu_start = root_gpu_ptr.map(|r| D3D12_GPU_DESCRIPTOR_HANDLE { - ptr: r + (start as u64 * inner.handle_size as u64), - }); - - reserved_heap = Some(D3D12DescriptorHeapInner { - device: inner.device.clone(), - heap: inner.heap.clone(), - ty: inner.ty, - cpu_start: D3D12_CPU_DESCRIPTOR_HANDLE { ptr: new_cpu_start }, - gpu_start: new_gpu_start, - handle_size: inner.handle_size, - start: AtomicUsize::new(0), - num_descriptors: reserved, - map: bitvec![AtomicUsize, Lsb0; 0; reserved].into_boxed_bitslice(), - }); - } - - ( - heaps - .into_iter() - .map(|inner| D3D12DescriptorHeap(Arc::new(inner), PhantomData::default())) - .collect(), - reserved_heap.map(|inner| D3D12DescriptorHeap(Arc::new(inner), PhantomData::default())), - inner.heap, - ) - } - - pub fn alloc_slot(&mut self) -> error::Result> { - let mut handle = D3D12_CPU_DESCRIPTOR_HANDLE { ptr: 0 }; - - let inner = &self.0; - let start = inner.start.load(Ordering::Acquire); - for i in start..inner.num_descriptors { - if !inner.map[i] { - inner.map.set_aliased(i, true); - handle.ptr = inner.cpu_start.ptr + (i * inner.handle_size); - inner.start.store(i + 1, Ordering::Release); - - let gpu_handle = inner - .gpu_start - .map(|gpu_start| D3D12_GPU_DESCRIPTOR_HANDLE { - ptr: (handle.ptr as u64 - inner.cpu_start.ptr as u64) + gpu_start.ptr, - }); - - return Ok(Arc::new(D3D12DescriptorHeapSlotInner { - cpu_handle: handle, - slot: i, - heap: Arc::clone(&self.0), - gpu_handle, - _pd: Default::default(), - })); - } - } - - Err(FilterChainError::DescriptorHeapOverflow( - inner.num_descriptors, - )) - } - - pub fn alloc_range( - &mut self, - ) -> error::Result<[D3D12DescriptorHeapSlot; NUM_DESC]> { - let dest = array_init::try_array_init(|_| self.alloc_slot())?; - Ok(dest) - } -} - -impl Drop for D3D12DescriptorHeapSlotInner { - fn drop(&mut self) { - let inner = &self.heap; - inner.map.set_aliased(self.slot, false); - // inner.start > self.slot => inner.start = self.slot - inner.start.fetch_min(self.slot, Ordering::AcqRel); - } -} diff --git a/librashader-runtime-d3d12/src/error.rs b/librashader-runtime-d3d12/src/error.rs index fae2d09..bd4889c 100644 --- a/librashader-runtime-d3d12/src/error.rs +++ b/librashader-runtime-d3d12/src/error.rs @@ -1,5 +1,7 @@ //! Direct3D 12 shader runtime errors. //! + +use d3d12_descriptor_heap::D3D12DescriptorHeapError; use thiserror::Error; /// Cumulative error type for Direct3D12 filter chains. @@ -19,8 +21,8 @@ pub enum FilterChainError { ShaderReflectError(#[from] ShaderReflectError), #[error("lut loading error")] LutLoadError(#[from] ImageError), - #[error("heap overflow")] - DescriptorHeapOverflow(usize), + #[error("heap error")] + HeapError(#[from] D3D12DescriptorHeapError), #[error("allocation error")] AllocationError(#[from] gpu_allocator::AllocationError), } diff --git a/librashader-runtime-d3d12/src/filter_chain.rs b/librashader-runtime-d3d12/src/filter_chain.rs index 5b095a9..5530621 100644 --- a/librashader-runtime-d3d12/src/filter_chain.rs +++ b/librashader-runtime-d3d12/src/filter_chain.rs @@ -1,8 +1,5 @@ use crate::buffer::{D3D12Buffer, RawD3D12Buffer}; -use crate::descriptor_heap::{ - CpuStagingHeap, D3D12DescriptorHeap, D3D12DescriptorHeapSlot, RenderTargetHeap, - ResourceWorkHeap, -}; +use crate::descriptor_heap::{CpuStagingHeap, RenderTargetHeap, ResourceWorkHeap}; use crate::draw_quad::DrawQuad; use crate::error::FilterChainError; use crate::filter_pass::FilterPass; @@ -14,6 +11,9 @@ use crate::options::{FilterChainOptionsD3D12, FrameOptionsD3D12}; use crate::samplers::SamplerSet; use crate::texture::{D3D12InputImage, D3D12OutputView, InputTexture, OutputDescriptor}; use crate::{error, util}; +use d3d12_descriptor_heap::{ + D3D12DescriptorHeap, D3D12DescriptorHeapSlot, D3D12PartitionableHeap, D3D12PartitionedHeap, +}; use gpu_allocator::d3d12::{Allocator, AllocatorCreateDesc, ID3D12DeviceVersion}; use librashader_common::map::FastHashMap; use librashader_common::{ImageFormat, Size, Viewport}; @@ -308,18 +308,22 @@ impl FilterChainD3D12 { })?)); let draw_quad = DrawQuad::new(&allocator)?; - let mut staging_heap = D3D12DescriptorHeap::new( - device, - (MAX_BINDINGS_COUNT as usize) * shader_count - + MIPMAP_RESERVED_WORKHEAP_DESCRIPTORS - + lut_count, - )?; - let rtv_heap = D3D12DescriptorHeap::new( - device, - (MAX_BINDINGS_COUNT as usize) * shader_count - + MIPMAP_RESERVED_WORKHEAP_DESCRIPTORS - + lut_count, - )?; + let mut staging_heap = unsafe { + D3D12DescriptorHeap::new( + device, + (MAX_BINDINGS_COUNT as usize) * shader_count + + MIPMAP_RESERVED_WORKHEAP_DESCRIPTORS + + lut_count, + ) + }?; + let rtv_heap = unsafe { + D3D12DescriptorHeap::new( + device, + (MAX_BINDINGS_COUNT as usize) * shader_count + + MIPMAP_RESERVED_WORKHEAP_DESCRIPTORS + + lut_count, + ) + }?; let root_signature = D3D12RootSignature::new(device)?; @@ -473,22 +477,31 @@ impl FilterChainD3D12 { D3D12DescriptorHeap, )> { let shader_count = passes.len(); - let work_heap = D3D12DescriptorHeap::::new( - device, - (MAX_BINDINGS_COUNT as usize) * shader_count + MIPMAP_RESERVED_WORKHEAP_DESCRIPTORS, - )?; - let (work_heaps, mipmap_heap, texture_heap_handle) = unsafe { - work_heap.suballocate( + let D3D12PartitionedHeap { + partitioned: work_heaps, + reserved: mipmap_heap, + handle: texture_heap_handle, + } = unsafe { + let work_heap = D3D12PartitionableHeap::::new( + device, + (MAX_BINDINGS_COUNT as usize) * shader_count + MIPMAP_RESERVED_WORKHEAP_DESCRIPTORS, + )?; + + work_heap.into_partitioned( MAX_BINDINGS_COUNT as usize, MIPMAP_RESERVED_WORKHEAP_DESCRIPTORS, - ) + )? }; - let sampler_work_heap = - D3D12DescriptorHeap::new(device, (MAX_BINDINGS_COUNT as usize) * shader_count)?; - - let (sampler_work_heaps, _, sampler_heap_handle) = - unsafe { sampler_work_heap.suballocate(MAX_BINDINGS_COUNT as usize, 0) }; + let D3D12PartitionedHeap { + partitioned: sampler_work_heaps, + reserved: _, + handle: sampler_heap_handle, + } = unsafe { + let sampler_heap = + D3D12PartitionableHeap::new(device, (MAX_BINDINGS_COUNT as usize) * shader_count)?; + sampler_heap.into_partitioned(MAX_BINDINGS_COUNT as usize, 0)? + }; let filters: Vec> = passes .into_par_iter() @@ -578,8 +591,8 @@ impl FilterChainD3D12 { let uniform_bindings = reflection.meta.create_binding_map(|param| param.offset()); - let texture_heap = texture_heap.alloc_range()?; - let sampler_heap = sampler_heap.alloc_range()?; + let texture_heap = texture_heap.allocate_descriptor_range()?; + let sampler_heap = sampler_heap.allocate_descriptor_range()?; Ok(FilterPass { reflection, diff --git a/librashader-runtime-d3d12/src/filter_pass.rs b/librashader-runtime-d3d12/src/filter_pass.rs index 91daaff..abdaa92 100644 --- a/librashader-runtime-d3d12/src/filter_pass.rs +++ b/librashader-runtime-d3d12/src/filter_pass.rs @@ -1,11 +1,12 @@ use crate::buffer::RawD3D12Buffer; -use crate::descriptor_heap::{D3D12DescriptorHeapSlot, ResourceWorkHeap, SamplerWorkHeap}; +use crate::descriptor_heap::{ResourceWorkHeap, SamplerWorkHeap}; use crate::error; use crate::filter_chain::FilterCommon; use crate::graphics_pipeline::D3D12GraphicsPipeline; use crate::options::FrameOptionsD3D12; use crate::samplers::SamplerSet; use crate::texture::{D3D12OutputView, InputTexture}; +use d3d12_descriptor_heap::D3D12DescriptorHeapSlot; use librashader_common::map::FastHashMap; use librashader_common::{ImageFormat, Size, Viewport}; use librashader_preprocess::ShaderSource; @@ -17,7 +18,6 @@ use librashader_runtime::filter_pass::FilterPassMeta; use librashader_runtime::quad::QuadType; use librashader_runtime::render_target::RenderTarget; use librashader_runtime::uniforms::{NoUniformBinder, UniformStorage}; -use std::ops::Deref; use windows::core::Interface; use windows::Win32::Foundation::RECT; use windows::Win32::Graphics::Direct3D12::{ @@ -66,12 +66,8 @@ impl BindSemantics, RawD3D12Buffer, RawD3D12Buffer> unsafe { texture_binding[binding.binding as usize].copy_descriptor(*texture.descriptor.as_ref()); - sampler_binding[binding.binding as usize].copy_descriptor( - *samplers - .get(texture.wrap_mode, texture.filter) - .deref() - .as_ref(), - ) + sampler_binding[binding.binding as usize] + .copy_descriptor(*samplers.get(texture.wrap_mode, texture.filter).as_ref()) } } } @@ -177,8 +173,8 @@ impl FilterPass { } unsafe { - cmd.SetGraphicsRootDescriptorTable(0, *self.texture_heap[0].deref().as_ref()); - cmd.SetGraphicsRootDescriptorTable(1, *self.sampler_heap[0].deref().as_ref()); + cmd.SetGraphicsRootDescriptorTable(0, *self.texture_heap[0].as_ref()); + cmd.SetGraphicsRootDescriptorTable(1, *self.sampler_heap[0].as_ref()); } // todo: check for non-renderpass. diff --git a/librashader-runtime-d3d12/src/framebuffer.rs b/librashader-runtime-d3d12/src/framebuffer.rs index e0f6383..2a809a5 100644 --- a/librashader-runtime-d3d12/src/framebuffer.rs +++ b/librashader-runtime-d3d12/src/framebuffer.rs @@ -1,9 +1,10 @@ -use crate::descriptor_heap::{CpuStagingHeap, D3D12DescriptorHeap, RenderTargetHeap}; +use crate::descriptor_heap::{CpuStagingHeap, RenderTargetHeap}; use crate::error::FilterChainError; use crate::filter_chain::FrameResiduals; use crate::texture::{D3D12OutputView, InputTexture}; use crate::util::d3d12_get_closest_format; use crate::{error, util}; +use d3d12_descriptor_heap::D3D12DescriptorHeap; use gpu_allocator::d3d12::{ Allocator, Resource, ResourceCategory, ResourceCreateDesc, ResourceStateOrBarrierLayout, ResourceType, @@ -14,7 +15,6 @@ use librashader_presets::Scale2D; use librashader_runtime::scaling::{MipmapSize, ScaleFramebuffer, ViewportSize}; use parking_lot::Mutex; use std::mem::ManuallyDrop; -use std::ops::Deref; use std::sync::Arc; use windows::Win32::Graphics::Direct3D12::{ ID3D12Device, ID3D12GraphicsCommandList, D3D12_BOX, D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, @@ -275,7 +275,7 @@ impl OwnedImage { filter: FilterMode, wrap_mode: WrapMode, ) -> error::Result { - let descriptor = heap.alloc_slot()?; + let descriptor = heap.allocate_descriptor()?; unsafe { let srv_desc = D3D12_SHADER_RESOURCE_VIEW_DESC { @@ -293,7 +293,7 @@ impl OwnedImage { self.device.CreateShaderResourceView( self.handle.resource(), Some(&srv_desc), - *descriptor.deref().as_ref(), + *descriptor.as_ref(), ); } @@ -311,7 +311,7 @@ impl OwnedImage { &self, heap: &mut D3D12DescriptorHeap, ) -> error::Result { - let descriptor = heap.alloc_slot()?; + let descriptor = heap.allocate_descriptor()?; unsafe { let rtv_desc = D3D12_RENDER_TARGET_VIEW_DESC { @@ -328,7 +328,7 @@ impl OwnedImage { self.device.CreateRenderTargetView( self.handle.resource(), Some(&rtv_desc), - *descriptor.deref().as_ref(), + *descriptor.as_ref(), ); } diff --git a/librashader-runtime-d3d12/src/luts.rs b/librashader-runtime-d3d12/src/luts.rs index c69ebd0..e9664ca 100644 --- a/librashader-runtime-d3d12/src/luts.rs +++ b/librashader-runtime-d3d12/src/luts.rs @@ -1,9 +1,10 @@ -use crate::descriptor_heap::{CpuStagingHeap, D3D12DescriptorHeap}; +use crate::descriptor_heap::CpuStagingHeap; use crate::error; use crate::filter_chain::FrameResiduals; use crate::mipmap::MipmapGenContext; use crate::texture::InputTexture; use crate::util::{d3d12_get_closest_format, d3d12_resource_transition, d3d12_update_subresources}; +use d3d12_descriptor_heap::D3D12DescriptorHeap; use gpu_allocator::d3d12::{ Allocator, Resource, ResourceCategory, ResourceCreateDesc, ResourceStateOrBarrierLayout, ResourceType, @@ -14,7 +15,6 @@ use librashader_runtime::image::Image; use librashader_runtime::scaling::MipmapSize; use parking_lot::Mutex; use std::mem::ManuallyDrop; -use std::ops::Deref; use std::sync::Arc; use windows::Win32::Graphics::Direct3D12::{ ID3D12Device, ID3D12GraphicsCommandList, D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, @@ -80,7 +80,7 @@ impl LutTexture { } desc.Format = d3d12_get_closest_format(device, format_support); - let descriptor = heap.alloc_slot()?; + let descriptor = heap.allocate_descriptor()?; // create handles on GPU let resource = allocator.lock().create_resource(&ResourceCreateDesc { @@ -112,7 +112,7 @@ impl LutTexture { device.CreateShaderResourceView( resource.resource(), Some(&srv_desc), - *descriptor.deref().as_ref(), + *descriptor.as_ref(), ); } diff --git a/librashader-runtime-d3d12/src/mipmap.rs b/librashader-runtime-d3d12/src/mipmap.rs index 3a7cea7..1f463f5 100644 --- a/librashader-runtime-d3d12/src/mipmap.rs +++ b/librashader-runtime-d3d12/src/mipmap.rs @@ -1,11 +1,11 @@ -use crate::descriptor_heap::{D3D12DescriptorHeap, D3D12DescriptorHeapSlot, ResourceWorkHeap}; +use crate::descriptor_heap::ResourceWorkHeap; use crate::util::dxc_validate_shader; use crate::{error, util}; use bytemuck::{Pod, Zeroable}; +use d3d12_descriptor_heap::{D3D12DescriptorHeap, D3D12DescriptorHeapSlot}; use librashader_common::Size; use librashader_runtime::scaling::MipmapSize; use std::mem::ManuallyDrop; -use std::ops::Deref; use windows::Win32::Graphics::Direct3D::Dxc::{ CLSID_DxcLibrary, CLSID_DxcValidator, DxcCreateInstance, }; @@ -191,7 +191,7 @@ impl D3D12MipmapGen { Vec, )> { // create views for mipmap generation - let srv = work_heap.alloc_slot()?; + let srv = work_heap.allocate_descriptor()?; unsafe { let srv_desc = D3D12_SHADER_RESOURCE_VIEW_DESC { Format: format, @@ -206,14 +206,14 @@ impl D3D12MipmapGen { }; self.device - .CreateShaderResourceView(resource, Some(&srv_desc), *srv.deref().as_ref()); + .CreateShaderResourceView(resource, Some(&srv_desc), *srv.as_ref()); } let mut heap_slots = Vec::with_capacity(miplevels as usize); heap_slots.push(srv); for i in 1..miplevels { - let descriptor = work_heap.alloc_slot()?; + let descriptor = work_heap.allocate_descriptor()?; let desc = D3D12_UNORDERED_ACCESS_VIEW_DESC { Format: format, ViewDimension: D3D12_UAV_DIMENSION_TEXTURE2D, @@ -230,14 +230,14 @@ impl D3D12MipmapGen { resource, None, Some(&desc), - *descriptor.deref().as_ref(), + *descriptor.as_ref(), ); } heap_slots.push(descriptor); } unsafe { - cmd.SetComputeRootDescriptorTable(0, *heap_slots[0].deref().as_ref()); + cmd.SetComputeRootDescriptorTable(0, *heap_slots[0].as_ref()); } let mut residual_barriers = Vec::new(); @@ -269,7 +269,7 @@ impl D3D12MipmapGen { cmd.ResourceBarrier(&barriers); residual_barriers.extend(barriers); - cmd.SetComputeRootDescriptorTable(1, *heap_slots[i as usize].deref().as_ref()); + cmd.SetComputeRootDescriptorTable(1, *heap_slots[i as usize].as_ref()); cmd.SetComputeRoot32BitConstants( 2, (std::mem::size_of::() / std::mem::size_of::()) as u32, diff --git a/librashader-runtime-d3d12/src/samplers.rs b/librashader-runtime-d3d12/src/samplers.rs index 89e2613..cb30deb 100644 --- a/librashader-runtime-d3d12/src/samplers.rs +++ b/librashader-runtime-d3d12/src/samplers.rs @@ -1,8 +1,8 @@ -use crate::descriptor_heap::{D3D12DescriptorHeap, D3D12DescriptorHeapSlot, SamplerPaletteHeap}; +use crate::descriptor_heap::SamplerPaletteHeap; use crate::error; +use d3d12_descriptor_heap::{D3D12DescriptorHeap, D3D12DescriptorHeapSlot}; use librashader_common::map::FastHashMap; use librashader_common::{FilterMode, WrapMode}; -use std::ops::Deref; use windows::Win32::Graphics::Direct3D12::{ ID3D12Device, D3D12_COMPARISON_FUNC_NEVER, D3D12_FLOAT32_MAX, D3D12_SAMPLER_DESC, D3D12_TEXTURE_ADDRESS_MODE, @@ -33,12 +33,12 @@ impl SamplerSet { WrapMode::MirroredRepeat, ]; - let mut heap = D3D12DescriptorHeap::new(device, 2 * wrap_modes.len())?; + let mut heap = unsafe { D3D12DescriptorHeap::new(device, 2 * wrap_modes.len())? }; for wrap_mode in wrap_modes { for filter_mode in &[FilterMode::Linear, FilterMode::Nearest] { unsafe { - let sampler = heap.alloc_slot()?; + let sampler = heap.allocate_descriptor()?; device.CreateSampler( &D3D12_SAMPLER_DESC { Filter: (*filter_mode).into(), @@ -52,7 +52,7 @@ impl SamplerSet { MinLOD: -D3D12_FLOAT32_MAX, MaxLOD: D3D12_FLOAT32_MAX, }, - *sampler.deref().as_ref(), + *sampler.as_ref(), ); samplers.insert((*wrap_mode, *filter_mode), sampler); } diff --git a/librashader-runtime-d3d12/src/texture.rs b/librashader-runtime-d3d12/src/texture.rs index 86a012f..803d14a 100644 --- a/librashader-runtime-d3d12/src/texture.rs +++ b/librashader-runtime-d3d12/src/texture.rs @@ -1,6 +1,6 @@ -use crate::descriptor_heap::{CpuStagingHeap, D3D12DescriptorHeapSlot, RenderTargetHeap}; +use crate::descriptor_heap::{CpuStagingHeap, RenderTargetHeap}; +use d3d12_descriptor_heap::D3D12DescriptorHeapSlot; use librashader_common::{FilterMode, Size, WrapMode}; -use std::ops::Deref; use windows::Win32::Graphics::Direct3D12::{ID3D12Resource, D3D12_CPU_DESCRIPTOR_HANDLE}; use windows::Win32::Graphics::Dxgi::Common::DXGI_FORMAT; @@ -25,7 +25,7 @@ pub(crate) enum OutputDescriptor { impl AsRef for InputDescriptor { fn as_ref(&self) -> &D3D12_CPU_DESCRIPTOR_HANDLE { match self { - InputDescriptor::Owned(h) => h.deref().as_ref(), + InputDescriptor::Owned(h) => h.as_ref(), InputDescriptor::Raw(h) => h, } } @@ -34,7 +34,7 @@ impl AsRef for InputDescriptor { impl AsRef for OutputDescriptor { fn as_ref(&self) -> &D3D12_CPU_DESCRIPTOR_HANDLE { match self { - OutputDescriptor::Owned(h) => h.deref().as_ref(), + OutputDescriptor::Owned(h) => h.as_ref(), OutputDescriptor::Raw(h) => h, } } diff --git a/librashader-runtime-d3d12/tests/hello_triangle/descriptor_heap.rs b/librashader-runtime-d3d12/tests/hello_triangle/descriptor_heap.rs index 60dfbac..9b3c775 100644 --- a/librashader-runtime-d3d12/tests/hello_triangle/descriptor_heap.rs +++ b/librashader-runtime-d3d12/tests/hello_triangle/descriptor_heap.rs @@ -1,30 +1,15 @@ -use bitvec::bitvec; -use bitvec::boxed::BitBox; -use bitvec::order::Lsb0; -use std::marker::PhantomData; -use std::ops::Deref; -use std::sync::atomic::{AtomicUsize, Ordering}; -use std::sync::Arc; - +use d3d12_descriptor_heap::D3D12DescriptorHeapType; use windows::Win32::Graphics::Direct3D12::{ - ID3D12DescriptorHeap, ID3D12Device, D3D12_CPU_DESCRIPTOR_HANDLE, D3D12_DESCRIPTOR_HEAP_DESC, - D3D12_DESCRIPTOR_HEAP_FLAG_NONE, D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE, - D3D12_DESCRIPTOR_HEAP_TYPE, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, - D3D12_GPU_DESCRIPTOR_HANDLE, + D3D12_DESCRIPTOR_HEAP_DESC, D3D12_DESCRIPTOR_HEAP_FLAG_NONE, + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, }; -pub trait D3D12HeapType { - fn get_desc(size: usize) -> D3D12_DESCRIPTOR_HEAP_DESC; -} - -pub trait D3D12ShaderVisibleHeapType: D3D12HeapType {} - #[derive(Clone)] pub struct CpuStagingHeap; -impl D3D12HeapType for CpuStagingHeap { +impl D3D12DescriptorHeapType for CpuStagingHeap { // Lut texture heaps are CPU only and get bound to the descriptor heap of the shader. - fn get_desc(size: usize) -> D3D12_DESCRIPTOR_HEAP_DESC { + fn create_desc(size: usize) -> D3D12_DESCRIPTOR_HEAP_DESC { D3D12_DESCRIPTOR_HEAP_DESC { Type: D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, NumDescriptors: size as u32, @@ -33,154 +18,3 @@ impl D3D12HeapType for CpuStagingHeap { } } } - -pub type D3D12DescriptorHeapSlot = Arc>; - -pub struct D3D12DescriptorHeapSlotInner { - cpu_handle: D3D12_CPU_DESCRIPTOR_HANDLE, - gpu_handle: Option, - heap: Arc, - slot: usize, - _pd: PhantomData, -} - -impl D3D12DescriptorHeapSlotInner { - /// Get the index of the resource within the heap. - pub fn index(&self) -> usize { - self.slot - } - - /// unsafe because type must match - pub unsafe fn copy_descriptor(&self, source: D3D12_CPU_DESCRIPTOR_HANDLE) { - unsafe { - let heap = &self.heap.deref(); - - heap.device - .CopyDescriptorsSimple(1, self.cpu_handle, source, heap.ty) - } - } -} - -impl AsRef for D3D12DescriptorHeapSlotInner { - fn as_ref(&self) -> &D3D12_CPU_DESCRIPTOR_HANDLE { - &self.cpu_handle - } -} - -impl AsRef - for D3D12DescriptorHeapSlotInner -{ - fn as_ref(&self) -> &D3D12_GPU_DESCRIPTOR_HANDLE { - // SAFETY: D3D12ShaderVisibleHeapType must have a GPU handle. - self.gpu_handle.as_ref().unwrap() - } -} - -impl From<&D3D12DescriptorHeap> for ID3D12DescriptorHeap { - fn from(value: &D3D12DescriptorHeap) -> Self { - value.0.heap.clone() - } -} - -#[derive(Debug)] -struct D3D12DescriptorHeapInner { - device: ID3D12Device, - heap: ID3D12DescriptorHeap, - ty: D3D12_DESCRIPTOR_HEAP_TYPE, - cpu_start: D3D12_CPU_DESCRIPTOR_HANDLE, - gpu_start: Option, - handle_size: usize, - start: AtomicUsize, - num_descriptors: usize, - map: BitBox, -} - -pub struct D3D12DescriptorHeap(Arc, PhantomData); - -impl D3D12DescriptorHeap { - pub fn new( - device: &ID3D12Device, - size: usize, - ) -> Result, windows::core::Error> { - let desc = T::get_desc(size); - unsafe { D3D12DescriptorHeap::new_with_desc(device, desc) } - } -} - -impl D3D12DescriptorHeap { - /// Gets a cloned handle to the inner heap - pub fn handle(&self) -> ID3D12DescriptorHeap { - self.0.heap.clone() - } - - pub unsafe fn new_with_desc( - device: &ID3D12Device, - desc: D3D12_DESCRIPTOR_HEAP_DESC, - ) -> Result, windows::core::Error> { - unsafe { - let heap: ID3D12DescriptorHeap = device.CreateDescriptorHeap(&desc)?; - let cpu_start = heap.GetCPUDescriptorHandleForHeapStart(); - - let gpu_start = if (desc.Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE).0 != 0 { - Some(heap.GetGPUDescriptorHandleForHeapStart()) - } else { - None - }; - - Ok(D3D12DescriptorHeap( - Arc::new(D3D12DescriptorHeapInner { - device: device.clone(), - heap, - ty: desc.Type, - cpu_start, - gpu_start, - handle_size: device.GetDescriptorHandleIncrementSize(desc.Type) as usize, - start: AtomicUsize::new(0), - num_descriptors: desc.NumDescriptors as usize, - map: bitvec![AtomicUsize, Lsb0; 0; desc.NumDescriptors as usize] - .into_boxed_bitslice(), - }), - PhantomData::default(), - )) - } - } - - pub fn alloc_slot(&mut self) -> D3D12DescriptorHeapSlot { - let mut handle = D3D12_CPU_DESCRIPTOR_HANDLE { ptr: 0 }; - - let inner = &self.0; - let start = inner.start.load(Ordering::Acquire); - for i in start..inner.num_descriptors { - if !inner.map[i] { - inner.map.set_aliased(i, true); - handle.ptr = inner.cpu_start.ptr + (i * inner.handle_size); - inner.start.store(i + 1, Ordering::Release); - - let gpu_handle = inner - .gpu_start - .map(|gpu_start| D3D12_GPU_DESCRIPTOR_HANDLE { - ptr: (handle.ptr as u64 - inner.cpu_start.ptr as u64) + gpu_start.ptr, - }); - - return Arc::new(D3D12DescriptorHeapSlotInner { - cpu_handle: handle, - slot: i, - heap: Arc::clone(&self.0), - gpu_handle, - _pd: Default::default(), - }); - } - } - - panic!("overflow") - } -} - -impl Drop for D3D12DescriptorHeapSlotInner { - fn drop(&mut self) { - let inner = &self.heap; - inner.map.set_aliased(self.slot, false); - // inner.start > self.slot => inner.start = self.slot - inner.start.fetch_min(self.slot, Ordering::AcqRel); - } -} diff --git a/librashader-runtime-d3d12/tests/hello_triangle/mod.rs b/librashader-runtime-d3d12/tests/hello_triangle/mod.rs index af1156b..ba76a49 100644 --- a/librashader-runtime-d3d12/tests/hello_triangle/mod.rs +++ b/librashader-runtime-d3d12/tests/hello_triangle/mod.rs @@ -229,7 +229,8 @@ unsafe extern "system" fn debug_log( pub mod d3d12_hello_triangle { use super::*; - use crate::hello_triangle::descriptor_heap::{CpuStagingHeap, D3D12DescriptorHeap}; + use crate::hello_triangle::descriptor_heap::CpuStagingHeap; + use d3d12_descriptor_heap::D3D12DescriptorHeap; use librashader_common::{Size, Viewport}; use librashader_runtime_d3d12::{D3D12InputImage, D3D12OutputView, FilterChainD3D12}; use std::mem::ManuallyDrop; @@ -480,7 +481,7 @@ pub mod d3d12_hello_triangle { fence, fence_value, fence_event, - frambuffer_heap: D3D12DescriptorHeap::new(&self.device, 1024).unwrap(), + frambuffer_heap: unsafe { D3D12DescriptorHeap::new(&self.device, 1024).unwrap() }, }); Ok(()) @@ -496,7 +497,7 @@ pub mod d3d12_hello_triangle { fn render(&mut self) { if let Some(resources) = &mut self.resources { - let srv = resources.frambuffer_heap.alloc_slot(); + let srv = resources.frambuffer_heap.allocate_descriptor().unwrap(); unsafe { self.device.CreateShaderResourceView( @@ -512,17 +513,12 @@ pub mod d3d12_hello_triangle { }, }, }), - *srv.deref().as_ref(), + *srv.as_ref(), ) } - populate_command_list( - resources, - &mut self.filter, - self.framecount, - *srv.deref().as_ref(), - ) - .unwrap(); + populate_command_list(resources, &mut self.filter, self.framecount, *srv.as_ref()) + .unwrap(); // Execute the command list. let command_list: Option = resources.command_list.cast().ok();