d3d12: allocate work heaps for each pass
This commit is contained in:
parent
624a749098
commit
3506e1a3da
|
@ -1,6 +1,6 @@
|
||||||
use std::collections::VecDeque;
|
use std::collections::VecDeque;
|
||||||
use crate::{error};
|
use crate::{error};
|
||||||
use crate::heap::{D3D12DescriptorHeap, CpuStagingHeap, ResourceWorkHeap};
|
use crate::heap::{D3D12DescriptorHeap, CpuStagingHeap, ResourceWorkHeap, SamplerWorkHeap};
|
||||||
use crate::samplers::SamplerSet;
|
use crate::samplers::SamplerSet;
|
||||||
use crate::luts::LutTexture;
|
use crate::luts::LutTexture;
|
||||||
use librashader_presets::{ShaderPreset, TextureConfig};
|
use librashader_presets::{ShaderPreset, TextureConfig};
|
||||||
|
@ -61,7 +61,6 @@ pub(crate) struct FilterCommon {
|
||||||
pub luts: FxHashMap<usize, LutTexture>,
|
pub luts: FxHashMap<usize, LutTexture>,
|
||||||
pub mipmap_gen: D3D12MipmapGen,
|
pub mipmap_gen: D3D12MipmapGen,
|
||||||
pub root_signature: D3D12RootSignature,
|
pub root_signature: D3D12RootSignature,
|
||||||
pub work_heap: D3D12DescriptorHeap<ResourceWorkHeap>,
|
|
||||||
pub draw_quad: DrawQuad,
|
pub draw_quad: DrawQuad,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -83,6 +82,8 @@ impl FilterChainD3D12 {
|
||||||
preset: ShaderPreset,
|
preset: ShaderPreset,
|
||||||
_options: Option<&()>,
|
_options: Option<&()>,
|
||||||
) -> error::Result<FilterChainD3D12> {
|
) -> error::Result<FilterChainD3D12> {
|
||||||
|
let shader_count = preset.shaders.len();
|
||||||
|
let lut_count = preset.textures.len();
|
||||||
let (passes, semantics) = HLSL::compile_preset_passes::<GlslangCompilation, Box<dyn Error>>(
|
let (passes, semantics) = HLSL::compile_preset_passes::<GlslangCompilation, Box<dyn Error>>(
|
||||||
preset.shaders,
|
preset.shaders,
|
||||||
&preset.textures,
|
&preset.textures,
|
||||||
|
@ -94,7 +95,9 @@ impl FilterChainD3D12 {
|
||||||
let draw_quad = DrawQuad::new(device)?;
|
let draw_quad = DrawQuad::new(device)?;
|
||||||
let mut staging_heap =
|
let mut staging_heap =
|
||||||
D3D12DescriptorHeap::new(device,
|
D3D12DescriptorHeap::new(device,
|
||||||
(MAX_BINDINGS_COUNT as usize) * 64 + 2048 + preset.textures.len())?;
|
(MAX_BINDINGS_COUNT as usize) *
|
||||||
|
shader_count + 2048 + lut_count)?;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
let luts = FilterChainD3D12::load_luts(device, &mut staging_heap, &preset.textures, &mipmap_gen).unwrap();
|
let luts = FilterChainD3D12::load_luts(device, &mut staging_heap, &preset.textures, &mipmap_gen).unwrap();
|
||||||
|
@ -104,8 +107,6 @@ impl FilterChainD3D12 {
|
||||||
let filters = FilterChainD3D12::init_passes(device, &root_signature, passes, &semantics)?;
|
let filters = FilterChainD3D12::init_passes(device, &root_signature, passes, &semantics)?;
|
||||||
|
|
||||||
|
|
||||||
let work_heap =
|
|
||||||
D3D12DescriptorHeap::<ResourceWorkHeap>::new(device, (MAX_BINDINGS_COUNT as usize) * 64 + 2048)?;
|
|
||||||
|
|
||||||
|
|
||||||
// initialize output framebuffers
|
// initialize output framebuffers
|
||||||
|
@ -159,7 +160,6 @@ impl FilterChainD3D12 {
|
||||||
luts,
|
luts,
|
||||||
mipmap_gen,
|
mipmap_gen,
|
||||||
root_signature,
|
root_signature,
|
||||||
work_heap,
|
|
||||||
draw_quad,
|
draw_quad,
|
||||||
config: FilterMutable {
|
config: FilterMutable {
|
||||||
passes_enabled: preset.shader_count as usize,
|
passes_enabled: preset.shader_count as usize,
|
||||||
|
@ -332,7 +332,30 @@ impl FilterChainD3D12 {
|
||||||
-> error::Result<Vec<FilterPass>> {
|
-> error::Result<Vec<FilterPass>> {
|
||||||
|
|
||||||
let mut filters = Vec::new();
|
let mut filters = Vec::new();
|
||||||
for (index, (config, source, mut reflect)) in passes.into_iter().enumerate() {
|
let shader_count = passes.len();
|
||||||
|
let work_heap =
|
||||||
|
D3D12DescriptorHeap::<ResourceWorkHeap>::new(device,
|
||||||
|
(MAX_BINDINGS_COUNT as usize) *
|
||||||
|
shader_count)?;
|
||||||
|
let work_heaps = unsafe {
|
||||||
|
work_heap.suballocate(shader_count)
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
let sampler_work_heap =
|
||||||
|
D3D12DescriptorHeap::new(device,
|
||||||
|
(MAX_BINDINGS_COUNT as usize) * shader_count)?;
|
||||||
|
|
||||||
|
let sampler_work_heaps = unsafe {
|
||||||
|
sampler_work_heap.suballocate(shader_count)
|
||||||
|
};
|
||||||
|
|
||||||
|
for (index, (((config, source, mut reflect),
|
||||||
|
texture_heap), sampler_heap))
|
||||||
|
in passes.into_iter()
|
||||||
|
.zip(work_heaps)
|
||||||
|
.zip(sampler_work_heaps)
|
||||||
|
.enumerate() {
|
||||||
let reflection = reflect.reflect(index, semantics)?;
|
let reflection = reflect.reflect(index, semantics)?;
|
||||||
let hlsl = reflect.compile(None)?;
|
let hlsl = reflect.compile(None)?;
|
||||||
|
|
||||||
|
@ -396,6 +419,8 @@ impl FilterChainD3D12 {
|
||||||
ubo_cbuffer,
|
ubo_cbuffer,
|
||||||
pipeline: graphics_pipeline,
|
pipeline: graphics_pipeline,
|
||||||
config: config.clone(),
|
config: config.clone(),
|
||||||
|
texture_heap,
|
||||||
|
sampler_heap
|
||||||
})
|
})
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,10 +1,14 @@
|
||||||
use rustc_hash::FxHashMap;
|
use rustc_hash::FxHashMap;
|
||||||
|
use librashader_common::Size;
|
||||||
use librashader_presets::ShaderPassConfig;
|
use librashader_presets::ShaderPassConfig;
|
||||||
use librashader_reflect::reflect::semantics::{MemberOffset, UniformBinding};
|
use librashader_reflect::reflect::semantics::{MemberOffset, UniformBinding};
|
||||||
use librashader_reflect::reflect::ShaderReflection;
|
use librashader_reflect::reflect::ShaderReflection;
|
||||||
|
use librashader_runtime::binding::TextureInput;
|
||||||
use librashader_runtime::uniforms::UniformStorage;
|
use librashader_runtime::uniforms::UniformStorage;
|
||||||
use crate::buffer::D3D12ConstantBuffer;
|
use crate::buffer::D3D12ConstantBuffer;
|
||||||
use crate::graphics_pipeline::D3D12GraphicsPipeline;
|
use crate::graphics_pipeline::D3D12GraphicsPipeline;
|
||||||
|
use crate::heap::{D3D12DescriptorHeap, ResourceWorkHeap, SamplerWorkHeap};
|
||||||
|
use crate::texture::InputTexture;
|
||||||
|
|
||||||
pub(crate) struct FilterPass {
|
pub(crate) struct FilterPass {
|
||||||
pub(crate) pipeline: D3D12GraphicsPipeline,
|
pub(crate) pipeline: D3D12GraphicsPipeline,
|
||||||
|
@ -14,5 +18,12 @@ pub(crate) struct FilterPass {
|
||||||
pub uniform_storage: UniformStorage,
|
pub uniform_storage: UniformStorage,
|
||||||
pub(crate) push_cbuffer: Option<D3D12ConstantBuffer>,
|
pub(crate) push_cbuffer: Option<D3D12ConstantBuffer>,
|
||||||
pub(crate) ubo_cbuffer: Option<D3D12ConstantBuffer>,
|
pub(crate) ubo_cbuffer: Option<D3D12ConstantBuffer>,
|
||||||
|
pub(crate) texture_heap: D3D12DescriptorHeap<ResourceWorkHeap>,
|
||||||
|
pub(crate) sampler_heap: D3D12DescriptorHeap<SamplerWorkHeap>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl TextureInput for InputTexture {
|
||||||
|
fn size(&self) -> Size<u32> {
|
||||||
|
self.size
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -2,12 +2,7 @@ use crate::error;
|
||||||
use std::cell::RefCell;
|
use std::cell::RefCell;
|
||||||
use std::marker::PhantomData;
|
use std::marker::PhantomData;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use windows::Win32::Graphics::Direct3D12::{
|
use windows::Win32::Graphics::Direct3D12::{ID3D12DescriptorHeap, ID3D12Device, D3D12_CPU_DESCRIPTOR_HANDLE, D3D12_DESCRIPTOR_HEAP_DESC, D3D12_DESCRIPTOR_HEAP_FLAG_NONE, D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, D3D12_GPU_DESCRIPTOR_HANDLE, D3D12_DESCRIPTOR_HEAP_TYPE};
|
||||||
ID3D12DescriptorHeap, ID3D12Device, D3D12_CPU_DESCRIPTOR_HANDLE, D3D12_DESCRIPTOR_HEAP_DESC,
|
|
||||||
D3D12_DESCRIPTOR_HEAP_FLAG_NONE, D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE,
|
|
||||||
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
|
|
||||||
D3D12_GPU_DESCRIPTOR_HANDLE,
|
|
||||||
};
|
|
||||||
|
|
||||||
#[const_trait]
|
#[const_trait]
|
||||||
pub trait D3D12HeapType {
|
pub trait D3D12HeapType {
|
||||||
|
@ -24,7 +19,9 @@ pub struct CpuStagingHeap;
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct ResourceWorkHeap;
|
pub struct ResourceWorkHeap;
|
||||||
|
|
||||||
impl D3D12ShaderVisibleHeapType for SamplerPaletteHeap {}
|
#[derive(Clone)]
|
||||||
|
pub struct SamplerWorkHeap;
|
||||||
|
|
||||||
|
|
||||||
impl const D3D12HeapType for SamplerPaletteHeap {
|
impl const D3D12HeapType for SamplerPaletteHeap {
|
||||||
// sampler palettes just get set directly
|
// sampler palettes just get set directly
|
||||||
|
@ -32,7 +29,7 @@ impl const D3D12HeapType for SamplerPaletteHeap {
|
||||||
D3D12_DESCRIPTOR_HEAP_DESC {
|
D3D12_DESCRIPTOR_HEAP_DESC {
|
||||||
Type: D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
|
Type: D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
|
||||||
NumDescriptors: size as u32,
|
NumDescriptors: size as u32,
|
||||||
Flags: D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE,
|
Flags: D3D12_DESCRIPTOR_HEAP_FLAG_NONE,
|
||||||
NodeMask: 0,
|
NodeMask: 0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -63,6 +60,19 @@ impl const D3D12HeapType for ResourceWorkHeap {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl D3D12ShaderVisibleHeapType for SamplerWorkHeap {}
|
||||||
|
impl const D3D12HeapType for SamplerWorkHeap {
|
||||||
|
// Lut texture heaps are CPU only and get bound to the descriptor heap of the shader.
|
||||||
|
fn get_desc(size: usize) -> D3D12_DESCRIPTOR_HEAP_DESC {
|
||||||
|
D3D12_DESCRIPTOR_HEAP_DESC {
|
||||||
|
Type: D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
|
||||||
|
NumDescriptors: size as u32,
|
||||||
|
Flags: D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE,
|
||||||
|
NodeMask: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct D3D12DescriptorHeapSlot<T> {
|
pub struct D3D12DescriptorHeapSlot<T> {
|
||||||
cpu_handle: D3D12_CPU_DESCRIPTOR_HANDLE,
|
cpu_handle: D3D12_CPU_DESCRIPTOR_HANDLE,
|
||||||
|
@ -99,14 +109,16 @@ impl<T: D3D12ShaderVisibleHeapType> From<&D3D12DescriptorHeap<T>> for ID3D12Desc
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
struct D3D12DescriptorHeapInner {
|
struct D3D12DescriptorHeapInner {
|
||||||
device: ID3D12Device,
|
device: ID3D12Device,
|
||||||
heap: ID3D12DescriptorHeap,
|
heap: ID3D12DescriptorHeap,
|
||||||
desc: D3D12_DESCRIPTOR_HEAP_DESC,
|
ty: D3D12_DESCRIPTOR_HEAP_TYPE,
|
||||||
cpu_start: D3D12_CPU_DESCRIPTOR_HANDLE,
|
cpu_start: D3D12_CPU_DESCRIPTOR_HANDLE,
|
||||||
gpu_start: Option<D3D12_GPU_DESCRIPTOR_HANDLE>,
|
gpu_start: Option<D3D12_GPU_DESCRIPTOR_HANDLE>,
|
||||||
handle_size: usize,
|
handle_size: usize,
|
||||||
start: usize,
|
start: usize,
|
||||||
|
num_descriptors: usize,
|
||||||
// Bit flag representation of available handles in the heap.
|
// Bit flag representation of available handles in the heap.
|
||||||
//
|
//
|
||||||
// 0 - Occupied
|
// 0 - Occupied
|
||||||
|
@ -144,11 +156,12 @@ impl<T> D3D12DescriptorHeap<T> {
|
||||||
Arc::new(RefCell::new(D3D12DescriptorHeapInner {
|
Arc::new(RefCell::new(D3D12DescriptorHeapInner {
|
||||||
device: device.clone(),
|
device: device.clone(),
|
||||||
heap,
|
heap,
|
||||||
desc,
|
ty: desc.Type,
|
||||||
cpu_start,
|
cpu_start,
|
||||||
gpu_start,
|
gpu_start,
|
||||||
handle_size: device.GetDescriptorHandleIncrementSize(desc.Type) as usize,
|
handle_size: device.GetDescriptorHandleIncrementSize(desc.Type) as usize,
|
||||||
start: 0,
|
start: 0,
|
||||||
|
num_descriptors: desc.NumDescriptors as usize,
|
||||||
map: vec![false; desc.NumDescriptors as usize].into_boxed_slice(),
|
map: vec![false; desc.NumDescriptors as usize].into_boxed_slice(),
|
||||||
})),
|
})),
|
||||||
PhantomData::default(),
|
PhantomData::default(),
|
||||||
|
@ -156,11 +169,71 @@ impl<T> D3D12DescriptorHeap<T> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// suballocates this heap into equally sized chunks.
|
||||||
|
/// if there aren't enough descriptors, throws an error.
|
||||||
|
///
|
||||||
|
/// it is UB (programmer error) to call this if the descriptor heap already has
|
||||||
|
/// descriptors allocated for it.
|
||||||
|
///
|
||||||
|
/// size must also divide equally into the size of the heap.
|
||||||
|
pub unsafe fn suballocate(self, size: usize) -> Vec<D3D12DescriptorHeap<T>> {
|
||||||
|
// has to be called right after creation.
|
||||||
|
assert_eq!(Arc::strong_count(&self.0), 1,
|
||||||
|
"D3D12DescriptorHeap::suballocate can only be callled immediately after creation.");
|
||||||
|
|
||||||
|
let inner = Arc::try_unwrap(self.0)
|
||||||
|
.expect("[d3d12] undefined behaviour to suballocate a descriptor heap with live descriptors.")
|
||||||
|
.into_inner();
|
||||||
|
|
||||||
|
// number of suballocated heaps
|
||||||
|
let num_heaps = inner.num_descriptors / size;
|
||||||
|
let remainder = inner.num_descriptors % size;
|
||||||
|
|
||||||
|
assert_eq!(remainder, 0, "D3D12DescriptorHeap::suballocate \
|
||||||
|
must be called with a size that equally divides the number of descriptors");
|
||||||
|
|
||||||
|
let mut heaps = Vec::new();
|
||||||
|
|
||||||
|
let mut start = 0;
|
||||||
|
let root_cpu_ptr = inner.cpu_start.ptr;
|
||||||
|
let root_gpu_ptr = inner.gpu_start.map(|p| p.ptr);
|
||||||
|
|
||||||
|
for _ in 0..num_heaps {
|
||||||
|
let new_cpu_start = root_cpu_ptr + (start * inner.handle_size);
|
||||||
|
let new_gpu_start = root_gpu_ptr
|
||||||
|
.map(|r| D3D12_GPU_DESCRIPTOR_HANDLE {
|
||||||
|
ptr: r + (start as u64 * inner.handle_size as u64)
|
||||||
|
});
|
||||||
|
|
||||||
|
heaps.push(D3D12DescriptorHeapInner {
|
||||||
|
device: inner.device.clone(),
|
||||||
|
heap: inner.heap.clone(),
|
||||||
|
ty: inner.ty,
|
||||||
|
cpu_start: D3D12_CPU_DESCRIPTOR_HANDLE {
|
||||||
|
ptr: new_cpu_start
|
||||||
|
},
|
||||||
|
gpu_start: new_gpu_start,
|
||||||
|
handle_size: inner.handle_size,
|
||||||
|
start: 0,
|
||||||
|
num_descriptors: size,
|
||||||
|
map: vec![false; size].into_boxed_slice(),
|
||||||
|
});
|
||||||
|
|
||||||
|
start += size;
|
||||||
|
}
|
||||||
|
|
||||||
|
heaps.into_iter()
|
||||||
|
.map(|inner| D3D12DescriptorHeap(
|
||||||
|
Arc::new(RefCell::new(inner)),
|
||||||
|
PhantomData::default()))
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
pub fn alloc_slot(&mut self) -> error::Result<D3D12DescriptorHeapSlot<T>> {
|
pub fn alloc_slot(&mut self) -> error::Result<D3D12DescriptorHeapSlot<T>> {
|
||||||
let mut handle = D3D12_CPU_DESCRIPTOR_HANDLE { ptr: 0 };
|
let mut handle = D3D12_CPU_DESCRIPTOR_HANDLE { ptr: 0 };
|
||||||
|
|
||||||
let mut inner = self.0.borrow_mut();
|
let mut inner = self.0.borrow_mut();
|
||||||
for i in inner.start..inner.desc.NumDescriptors as usize {
|
for i in inner.start..inner.num_descriptors {
|
||||||
if !inner.map[i] {
|
if !inner.map[i] {
|
||||||
inner.map[i] = true;
|
inner.map[i] = true;
|
||||||
handle.ptr = inner.cpu_start.ptr + (i * inner.handle_size);
|
handle.ptr = inner.cpu_start.ptr + (i * inner.handle_size);
|
||||||
|
@ -198,7 +271,7 @@ impl<T> D3D12DescriptorHeap<T> {
|
||||||
1,
|
1,
|
||||||
*dest[i].as_ref(),
|
*dest[i].as_ref(),
|
||||||
*source[i],
|
*source[i],
|
||||||
inner.desc.Type
|
inner.ty
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -202,7 +202,7 @@ impl D3D12MipmapGen {
|
||||||
|
|
||||||
for i in 1..miplevels as u32 {
|
for i in 1..miplevels as u32 {
|
||||||
let scaled = size.scale_mipmap(i);
|
let scaled = size.scale_mipmap(i);
|
||||||
let mipmap_params =MipConstants {
|
let mipmap_params = MipConstants {
|
||||||
inv_out_texel_size: [
|
inv_out_texel_size: [
|
||||||
1.0 / scaled.width as f32,
|
1.0 / scaled.width as f32,
|
||||||
1.0 / scaled.height as f32
|
1.0 / scaled.height as f32
|
||||||
|
|
|
@ -60,7 +60,7 @@ impl OutputTexture {
|
||||||
|
|
||||||
pub struct InputTexture {
|
pub struct InputTexture {
|
||||||
descriptor: InputDescriptor,
|
descriptor: InputDescriptor,
|
||||||
size: Size<u32>,
|
pub(crate) size: Size<u32>,
|
||||||
format: ImageFormat,
|
format: ImageFormat,
|
||||||
wrap_mode: WrapMode,
|
wrap_mode: WrapMode,
|
||||||
filter: FilterMode
|
filter: FilterMode
|
||||||
|
|
Loading…
Reference in a new issue