diff --git a/librashader-runtime-d3d12/src/filter_chain.rs b/librashader-runtime-d3d12/src/filter_chain.rs index 5ecfa9f..9aa57f5 100644 --- a/librashader-runtime-d3d12/src/filter_chain.rs +++ b/librashader-runtime-d3d12/src/filter_chain.rs @@ -1,6 +1,6 @@ use std::collections::VecDeque; use crate::{error}; -use crate::heap::{D3D12DescriptorHeap, CpuStagingHeap, ResourceWorkHeap}; +use crate::heap::{D3D12DescriptorHeap, CpuStagingHeap, ResourceWorkHeap, SamplerWorkHeap}; use crate::samplers::SamplerSet; use crate::luts::LutTexture; use librashader_presets::{ShaderPreset, TextureConfig}; @@ -61,7 +61,6 @@ pub(crate) struct FilterCommon { pub luts: FxHashMap, pub mipmap_gen: D3D12MipmapGen, pub root_signature: D3D12RootSignature, - pub work_heap: D3D12DescriptorHeap, pub draw_quad: DrawQuad, } @@ -83,6 +82,8 @@ impl FilterChainD3D12 { preset: ShaderPreset, _options: Option<&()>, ) -> error::Result { + let shader_count = preset.shaders.len(); + let lut_count = preset.textures.len(); let (passes, semantics) = HLSL::compile_preset_passes::>( preset.shaders, &preset.textures, @@ -94,7 +95,9 @@ impl FilterChainD3D12 { let draw_quad = DrawQuad::new(device)?; let mut staging_heap = D3D12DescriptorHeap::new(device, - (MAX_BINDINGS_COUNT as usize) * 64 + 2048 + preset.textures.len())?; + (MAX_BINDINGS_COUNT as usize) * + shader_count + 2048 + lut_count)?; + let luts = FilterChainD3D12::load_luts(device, &mut staging_heap, &preset.textures, &mipmap_gen).unwrap(); @@ -104,8 +107,6 @@ impl FilterChainD3D12 { let filters = FilterChainD3D12::init_passes(device, &root_signature, passes, &semantics)?; - let work_heap = - D3D12DescriptorHeap::::new(device, (MAX_BINDINGS_COUNT as usize) * 64 + 2048)?; // initialize output framebuffers @@ -159,7 +160,6 @@ impl FilterChainD3D12 { luts, mipmap_gen, root_signature, - work_heap, draw_quad, config: FilterMutable { passes_enabled: preset.shader_count as usize, @@ -332,7 +332,30 @@ impl FilterChainD3D12 { -> error::Result> { let mut filters = Vec::new(); - for (index, (config, source, mut reflect)) in passes.into_iter().enumerate() { + let shader_count = passes.len(); + let work_heap = + D3D12DescriptorHeap::::new(device, + (MAX_BINDINGS_COUNT as usize) * + shader_count)?; + let work_heaps = unsafe { + work_heap.suballocate(shader_count) + }; + + + let sampler_work_heap = + D3D12DescriptorHeap::new(device, + (MAX_BINDINGS_COUNT as usize) * shader_count)?; + + let sampler_work_heaps = unsafe { + sampler_work_heap.suballocate(shader_count) + }; + + for (index, (((config, source, mut reflect), + texture_heap), sampler_heap)) + in passes.into_iter() + .zip(work_heaps) + .zip(sampler_work_heaps) + .enumerate() { let reflection = reflect.reflect(index, semantics)?; let hlsl = reflect.compile(None)?; @@ -396,6 +419,8 @@ impl FilterChainD3D12 { ubo_cbuffer, pipeline: graphics_pipeline, config: config.clone(), + texture_heap, + sampler_heap }) } diff --git a/librashader-runtime-d3d12/src/filter_pass.rs b/librashader-runtime-d3d12/src/filter_pass.rs index 6713ba7..6b40637 100644 --- a/librashader-runtime-d3d12/src/filter_pass.rs +++ b/librashader-runtime-d3d12/src/filter_pass.rs @@ -1,10 +1,14 @@ use rustc_hash::FxHashMap; +use librashader_common::Size; use librashader_presets::ShaderPassConfig; use librashader_reflect::reflect::semantics::{MemberOffset, UniformBinding}; use librashader_reflect::reflect::ShaderReflection; +use librashader_runtime::binding::TextureInput; use librashader_runtime::uniforms::UniformStorage; use crate::buffer::D3D12ConstantBuffer; use crate::graphics_pipeline::D3D12GraphicsPipeline; +use crate::heap::{D3D12DescriptorHeap, ResourceWorkHeap, SamplerWorkHeap}; +use crate::texture::InputTexture; pub(crate) struct FilterPass { pub(crate) pipeline: D3D12GraphicsPipeline, @@ -14,5 +18,12 @@ pub(crate) struct FilterPass { pub uniform_storage: UniformStorage, pub(crate) push_cbuffer: Option, pub(crate) ubo_cbuffer: Option, + pub(crate) texture_heap: D3D12DescriptorHeap, + pub(crate) sampler_heap: D3D12DescriptorHeap, } +impl TextureInput for InputTexture { + fn size(&self) -> Size { + self.size + } +} diff --git a/librashader-runtime-d3d12/src/heap.rs b/librashader-runtime-d3d12/src/heap.rs index 568210b..16c3c30 100644 --- a/librashader-runtime-d3d12/src/heap.rs +++ b/librashader-runtime-d3d12/src/heap.rs @@ -2,12 +2,7 @@ use crate::error; use std::cell::RefCell; use std::marker::PhantomData; use std::sync::Arc; -use windows::Win32::Graphics::Direct3D12::{ - ID3D12DescriptorHeap, ID3D12Device, D3D12_CPU_DESCRIPTOR_HANDLE, D3D12_DESCRIPTOR_HEAP_DESC, - D3D12_DESCRIPTOR_HEAP_FLAG_NONE, D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE, - D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, - D3D12_GPU_DESCRIPTOR_HANDLE, -}; +use windows::Win32::Graphics::Direct3D12::{ID3D12DescriptorHeap, ID3D12Device, D3D12_CPU_DESCRIPTOR_HANDLE, D3D12_DESCRIPTOR_HEAP_DESC, D3D12_DESCRIPTOR_HEAP_FLAG_NONE, D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, D3D12_GPU_DESCRIPTOR_HANDLE, D3D12_DESCRIPTOR_HEAP_TYPE}; #[const_trait] pub trait D3D12HeapType { @@ -24,7 +19,9 @@ pub struct CpuStagingHeap; #[derive(Clone)] pub struct ResourceWorkHeap; -impl D3D12ShaderVisibleHeapType for SamplerPaletteHeap {} +#[derive(Clone)] +pub struct SamplerWorkHeap; + impl const D3D12HeapType for SamplerPaletteHeap { // sampler palettes just get set directly @@ -32,7 +29,7 @@ impl const D3D12HeapType for SamplerPaletteHeap { D3D12_DESCRIPTOR_HEAP_DESC { Type: D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, NumDescriptors: size as u32, - Flags: D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE, + Flags: D3D12_DESCRIPTOR_HEAP_FLAG_NONE, NodeMask: 0, } } @@ -63,6 +60,19 @@ impl const D3D12HeapType for ResourceWorkHeap { } } +impl D3D12ShaderVisibleHeapType for SamplerWorkHeap {} +impl const D3D12HeapType for SamplerWorkHeap { + // Lut texture heaps are CPU only and get bound to the descriptor heap of the shader. + fn get_desc(size: usize) -> D3D12_DESCRIPTOR_HEAP_DESC { + D3D12_DESCRIPTOR_HEAP_DESC { + Type: D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, + NumDescriptors: size as u32, + Flags: D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE, + NodeMask: 0, + } + } +} + #[derive(Clone)] pub struct D3D12DescriptorHeapSlot { cpu_handle: D3D12_CPU_DESCRIPTOR_HANDLE, @@ -99,14 +109,16 @@ impl From<&D3D12DescriptorHeap> for ID3D12Desc } } +#[derive(Debug)] struct D3D12DescriptorHeapInner { device: ID3D12Device, heap: ID3D12DescriptorHeap, - desc: D3D12_DESCRIPTOR_HEAP_DESC, + ty: D3D12_DESCRIPTOR_HEAP_TYPE, cpu_start: D3D12_CPU_DESCRIPTOR_HANDLE, gpu_start: Option, handle_size: usize, start: usize, + num_descriptors: usize, // Bit flag representation of available handles in the heap. // // 0 - Occupied @@ -144,11 +156,12 @@ impl D3D12DescriptorHeap { Arc::new(RefCell::new(D3D12DescriptorHeapInner { device: device.clone(), heap, - desc, + ty: desc.Type, cpu_start, gpu_start, handle_size: device.GetDescriptorHandleIncrementSize(desc.Type) as usize, start: 0, + num_descriptors: desc.NumDescriptors as usize, map: vec![false; desc.NumDescriptors as usize].into_boxed_slice(), })), PhantomData::default(), @@ -156,11 +169,71 @@ impl D3D12DescriptorHeap { } } + /// suballocates this heap into equally sized chunks. + /// if there aren't enough descriptors, throws an error. + /// + /// it is UB (programmer error) to call this if the descriptor heap already has + /// descriptors allocated for it. + /// + /// size must also divide equally into the size of the heap. + pub unsafe fn suballocate(self, size: usize) -> Vec> { + // has to be called right after creation. + assert_eq!(Arc::strong_count(&self.0), 1, + "D3D12DescriptorHeap::suballocate can only be callled immediately after creation."); + + let inner = Arc::try_unwrap(self.0) + .expect("[d3d12] undefined behaviour to suballocate a descriptor heap with live descriptors.") + .into_inner(); + + // number of suballocated heaps + let num_heaps = inner.num_descriptors / size; + let remainder = inner.num_descriptors % size; + + assert_eq!(remainder, 0, "D3D12DescriptorHeap::suballocate \ + must be called with a size that equally divides the number of descriptors"); + + let mut heaps = Vec::new(); + + let mut start = 0; + let root_cpu_ptr = inner.cpu_start.ptr; + let root_gpu_ptr = inner.gpu_start.map(|p| p.ptr); + + for _ in 0..num_heaps { + let new_cpu_start = root_cpu_ptr + (start * inner.handle_size); + let new_gpu_start = root_gpu_ptr + .map(|r| D3D12_GPU_DESCRIPTOR_HANDLE { + ptr: r + (start as u64 * inner.handle_size as u64) + }); + + heaps.push(D3D12DescriptorHeapInner { + device: inner.device.clone(), + heap: inner.heap.clone(), + ty: inner.ty, + cpu_start: D3D12_CPU_DESCRIPTOR_HANDLE { + ptr: new_cpu_start + }, + gpu_start: new_gpu_start, + handle_size: inner.handle_size, + start: 0, + num_descriptors: size, + map: vec![false; size].into_boxed_slice(), + }); + + start += size; + } + + heaps.into_iter() + .map(|inner| D3D12DescriptorHeap( + Arc::new(RefCell::new(inner)), + PhantomData::default())) + .collect() + } + pub fn alloc_slot(&mut self) -> error::Result> { let mut handle = D3D12_CPU_DESCRIPTOR_HANDLE { ptr: 0 }; let mut inner = self.0.borrow_mut(); - for i in inner.start..inner.desc.NumDescriptors as usize { + for i in inner.start..inner.num_descriptors { if !inner.map[i] { inner.map[i] = true; handle.ptr = inner.cpu_start.ptr + (i * inner.handle_size); @@ -198,7 +271,7 @@ impl D3D12DescriptorHeap { 1, *dest[i].as_ref(), *source[i], - inner.desc.Type + inner.ty ); } } diff --git a/librashader-runtime-d3d12/src/mipmap.rs b/librashader-runtime-d3d12/src/mipmap.rs index 352c49b..4e304fa 100644 --- a/librashader-runtime-d3d12/src/mipmap.rs +++ b/librashader-runtime-d3d12/src/mipmap.rs @@ -202,7 +202,7 @@ impl D3D12MipmapGen { for i in 1..miplevels as u32 { let scaled = size.scale_mipmap(i); - let mipmap_params =MipConstants { + let mipmap_params = MipConstants { inv_out_texel_size: [ 1.0 / scaled.width as f32, 1.0 / scaled.height as f32 diff --git a/librashader-runtime-d3d12/src/texture.rs b/librashader-runtime-d3d12/src/texture.rs index 55f9a2c..308dcb4 100644 --- a/librashader-runtime-d3d12/src/texture.rs +++ b/librashader-runtime-d3d12/src/texture.rs @@ -60,7 +60,7 @@ impl OutputTexture { pub struct InputTexture { descriptor: InputDescriptor, - size: Size, + pub(crate) size: Size, format: ImageFormat, wrap_mode: WrapMode, filter: FilterMode