vk/11/12: bind VBO only once per frame to save on draw calls.

This commit is contained in:
chyyran 2023-02-06 00:23:37 -05:00
parent d4525ee23e
commit 3db89e5351
14 changed files with 93 additions and 100 deletions

8
Cargo.lock generated
View file

@ -8,6 +8,12 @@ version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
[[package]]
name = "array-concat"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e9180feb72ccbc07cfe5ef7fa8bbf86ca71490d5dc9ef8ea02c7298ba94e7f7d"
[[package]] [[package]]
name = "array-init" name = "array-init"
version = "2.1.0" version = "2.1.0"
@ -884,6 +890,7 @@ dependencies = [
name = "librashader-runtime-d3d11" name = "librashader-runtime-d3d11"
version = "0.1.0-beta.10" version = "0.1.0-beta.10"
dependencies = [ dependencies = [
"array-concat",
"bytemuck", "bytemuck",
"gfx-maths", "gfx-maths",
"librashader-common", "librashader-common",
@ -902,6 +909,7 @@ dependencies = [
name = "librashader-runtime-d3d12" name = "librashader-runtime-d3d12"
version = "0.1.0-beta.8" version = "0.1.0-beta.8"
dependencies = [ dependencies = [
"array-concat",
"array-init", "array-init",
"bitvec", "bitvec",
"bytemuck", "bytemuck",

View file

@ -23,6 +23,7 @@ spirv_cross = { package = "librashader-spirv-cross", version = "0.23" }
rustc-hash = "1.1.0" rustc-hash = "1.1.0"
bytemuck = "1.12.3" bytemuck = "1.12.3"
rayon = "1.6.1" rayon = "1.6.1"
array-concat = "0.5.2"
[target.'cfg(windows)'.dependencies.windows] [target.'cfg(windows)'.dependencies.windows]
version = "0.44.0" version = "0.44.0"

View file

@ -1,3 +1,4 @@
use array_concat::concat_arrays;
use crate::error; use crate::error;
use crate::error::assume_d3d11_init; use crate::error::assume_d3d11_init;
use bytemuck::offset_of; use bytemuck::offset_of;
@ -21,7 +22,7 @@ struct D3D11Vertex {
const CLEAR: [f32; 4] = [1.0, 1.0, 1.0, 1.0]; const CLEAR: [f32; 4] = [1.0, 1.0, 1.0, 1.0];
static OFFSCREEN_VBO_DATA: &[D3D11Vertex; 4] = &[ const OFFSCREEN_VBO_DATA: [D3D11Vertex; 4] = [
D3D11Vertex { D3D11Vertex {
position: [-1.0, -1.0], position: [-1.0, -1.0],
texcoord: [0.0, 1.0], texcoord: [0.0, 1.0],
@ -44,7 +45,7 @@ static OFFSCREEN_VBO_DATA: &[D3D11Vertex; 4] = &[
}, },
]; ];
static FINAL_VBO_DATA: &[D3D11Vertex; 4] = &[ const FINAL_VBO_DATA: [D3D11Vertex; 4] = [
D3D11Vertex { D3D11Vertex {
position: [0.0, 0.0], position: [0.0, 0.0],
texcoord: [0.0, 1.0], texcoord: [0.0, 1.0],
@ -67,21 +68,21 @@ static FINAL_VBO_DATA: &[D3D11Vertex; 4] = &[
}, },
]; ];
static VBO_DATA: &[D3D11Vertex; 8] = &concat_arrays!(OFFSCREEN_VBO_DATA, FINAL_VBO_DATA);
pub(crate) struct DrawQuad { pub(crate) struct DrawQuad {
final_vbo: ID3D11Buffer,
context: ID3D11DeviceContext, context: ID3D11DeviceContext,
offset: u32,
stride: u32, stride: u32,
offscreen_vbo: ID3D11Buffer, vbo: ID3D11Buffer,
} }
impl DrawQuad { impl DrawQuad {
pub fn new(device: &ID3D11Device, context: &ID3D11DeviceContext) -> error::Result<DrawQuad> { pub fn new(device: &ID3D11Device, context: &ID3D11DeviceContext) -> error::Result<DrawQuad> {
unsafe { unsafe {
let mut final_vbo = None; let mut vbo = None;
device.CreateBuffer( device.CreateBuffer(
&D3D11_BUFFER_DESC { &D3D11_BUFFER_DESC {
ByteWidth: std::mem::size_of::<[D3D11Vertex; 4]>() as u32, ByteWidth: 2 * std::mem::size_of::<[D3D11Vertex; 4]>() as u32,
Usage: D3D11_USAGE_IMMUTABLE, Usage: D3D11_USAGE_IMMUTABLE,
BindFlags: D3D11_BIND_VERTEX_BUFFER, BindFlags: D3D11_BIND_VERTEX_BUFFER,
CPUAccessFlags: Default::default(), CPUAccessFlags: Default::default(),
@ -89,62 +90,48 @@ impl DrawQuad {
StructureByteStride: 0, StructureByteStride: 0,
}, },
Some(&D3D11_SUBRESOURCE_DATA { Some(&D3D11_SUBRESOURCE_DATA {
pSysMem: FINAL_VBO_DATA.as_ptr().cast(), pSysMem: VBO_DATA.as_ptr().cast(),
SysMemPitch: 0, SysMemPitch: 0,
SysMemSlicePitch: 0, SysMemSlicePitch: 0,
}), }),
Some(&mut final_vbo), Some(&mut vbo),
)?; )?;
assume_d3d11_init!(final_vbo, "CreateBuffer"); assume_d3d11_init!(vbo, "CreateBuffer");
let mut offscreen_vbo = None;
device.CreateBuffer(
&D3D11_BUFFER_DESC {
ByteWidth: std::mem::size_of::<[D3D11Vertex; 4]>() as u32,
Usage: D3D11_USAGE_IMMUTABLE,
BindFlags: D3D11_BIND_VERTEX_BUFFER,
CPUAccessFlags: Default::default(),
MiscFlags: Default::default(),
StructureByteStride: 0,
},
Some(&D3D11_SUBRESOURCE_DATA {
pSysMem: OFFSCREEN_VBO_DATA.as_ptr().cast(),
SysMemPitch: 0,
SysMemSlicePitch: 0,
}),
Some(&mut offscreen_vbo),
)?;
assume_d3d11_init!(offscreen_vbo, "CreateBuffer");
Ok(DrawQuad { Ok(DrawQuad {
final_vbo, vbo,
offscreen_vbo,
context: context.clone(), context: context.clone(),
offset: 0,
stride: std::mem::size_of::<D3D11Vertex>() as u32, stride: std::mem::size_of::<D3D11Vertex>() as u32,
}) })
} }
} }
pub fn bind_vertices(&self, vbo_type: QuadType) { pub fn bind_vbo_for_frame(&self) {
unsafe { unsafe {
self.context self.context
.IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); .IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
let buffer = match vbo_type {
QuadType::Offscreen => &self.offscreen_vbo,
QuadType::Final => &self.final_vbo,
};
self.context.IASetVertexBuffers( self.context.IASetVertexBuffers(
0, 0,
1, 1,
Some(&Some(buffer.clone())), Some(&Some(self.vbo.clone())),
Some(&self.stride), Some(&self.stride),
Some(&self.offset), Some(&0),
); );
} }
} }
pub fn draw_quad(&self, context: &ID3D11DeviceContext, vbo_type: QuadType) {
let offset = match vbo_type {
QuadType::Offscreen => 0,
QuadType::Final => 4,
};
unsafe {
context.Draw(4, offset);
}
}
pub fn get_spirv_cross_vbo_desc() -> [D3D11_INPUT_ELEMENT_DESC; 2] { pub fn get_spirv_cross_vbo_desc() -> [D3D11_INPUT_ELEMENT_DESC; 2] {
[ [
D3D11_INPUT_ELEMENT_DESC { D3D11_INPUT_ELEMENT_DESC {

View file

@ -17,7 +17,7 @@ use crate::error::{assume_d3d11_init, FilterChainError};
use crate::filter_pass::{ConstantBufferBinding, FilterPass}; use crate::filter_pass::{ConstantBufferBinding, FilterPass};
use crate::framebuffer::OwnedFramebuffer; use crate::framebuffer::OwnedFramebuffer;
use crate::options::{FilterChainOptionsD3D11, FrameOptionsD3D11}; use crate::options::{FilterChainOptionsD3D11, FrameOptionsD3D11};
use crate::quad_render::DrawQuad; use crate::draw_quad::DrawQuad;
use crate::render_target::RenderTarget; use crate::render_target::RenderTarget;
use crate::samplers::SamplerSet; use crate::samplers::SamplerSet;
use crate::util::d3d11_compile_bound_shader; use crate::util::d3d11_compile_bound_shader;
@ -472,6 +472,8 @@ impl FilterChainD3D11 {
let passes_len = passes.len(); let passes_len = passes.len();
let (pass, last) = passes.split_at_mut(passes_len - 1); let (pass, last) = passes.split_at_mut(passes_len - 1);
self.common.draw_quad.bind_vbo_for_frame();
for (index, pass) in pass.iter_mut().enumerate() { for (index, pass) in pass.iter_mut().enumerate() {
source.filter = pass.config.filter; source.filter = pass.config.filter;
source.wrap_mode = pass.config.wrap_mode; source.wrap_mode = pass.config.wrap_mode;

View file

@ -159,7 +159,6 @@ impl FilterPass {
} }
} }
unsafe { unsafe {
parent.draw_quad.bind_vertices(vbo_type);
context.IASetInputLayout(&self.vertex_layout); context.IASetInputLayout(&self.vertex_layout);
context.VSSetShader(&self.vertex_shader, None); context.VSSetShader(&self.vertex_shader, None);
context.PSSetShader(&self.pixel_shader, None); context.PSSetShader(&self.pixel_shader, None);
@ -243,10 +242,7 @@ impl FilterPass {
context.RSSetViewports(Some(&[output.output.viewport])) context.RSSetViewports(Some(&[output.output.viewport]))
} }
unsafe { parent.draw_quad.draw_quad(context, vbo_type);
// must be under primitive topology trianglestrip with quad
context.Draw(4, 0);
}
unsafe { unsafe {
// unbind resources. // unbind resources.

View file

@ -15,7 +15,7 @@ mod filter_pass;
mod framebuffer; mod framebuffer;
pub mod options; pub mod options;
mod parameters; mod parameters;
mod quad_render; mod draw_quad;
mod render_target; mod render_target;
mod samplers; mod samplers;
mod texture; mod texture;
@ -37,8 +37,9 @@ mod tests {
// "../test/slang-shaders/presets/crt-geom-ntsc-upscale-sharp.slangp", // "../test/slang-shaders/presets/crt-geom-ntsc-upscale-sharp.slangp",
// "../test/slang-shaders/bezel/Mega_Bezel/Presets/MBZ__0__SMOOTH-ADV.slangp", // "../test/slang-shaders/bezel/Mega_Bezel/Presets/MBZ__0__SMOOTH-ADV.slangp",
// "../test/null.slangp", // "../test/null.slangp",
const FILTER_PATH: &str = "../test/slang-shaders/scalefx/scalefx-9x.slangp";
const FILTER_PATH: &str = "../test/slang-shaders/bezel/Mega_Bezel/Presets/MBZ__0__SMOOTH-ADV.slangp"; // const FILTER_PATH: &str = "../test/slang-shaders/bezel/Mega_Bezel/Presets/MBZ__0__SMOOTH-ADV.slangp";
const IMAGE_PATH: &str = "../test/finalfightlong.png"; const IMAGE_PATH: &str = "../test/finalfightlong.png";
#[test] #[test]
fn triangle_d3d11_args() { fn triangle_d3d11_args() {

View file

@ -25,6 +25,7 @@ bytemuck = { version = "1.12.3", features = ["derive"] }
array-init = "2.1.0" array-init = "2.1.0"
bitvec = "1.0.1" bitvec = "1.0.1"
widestring = "1.0.2" widestring = "1.0.2"
array-concat = "0.5.2"
rayon = "1.6.1" rayon = "1.6.1"

View file

@ -604,6 +604,9 @@ impl FilterChainD3D12 {
cmd.SetGraphicsRootSignature(&self.common.root_signature.handle); cmd.SetGraphicsRootSignature(&self.common.root_signature.handle);
self.common.mipmap_gen.pin_root_signature(cmd); self.common.mipmap_gen.pin_root_signature(cmd);
} }
self.common.draw_quad.bind_vertices_for_frame(cmd);
for (index, pass) in pass.iter_mut().enumerate() { for (index, pass) in pass.iter_mut().enumerate() {
source.filter = pass.config.filter; source.filter = pass.config.filter;
source.wrap_mode = pass.config.wrap_mode; source.wrap_mode = pass.config.wrap_mode;

View file

@ -142,7 +142,6 @@ impl FilterPass {
output: &RenderTarget, output: &RenderTarget,
vbo_type: QuadType, vbo_type: QuadType,
) -> error::Result<()> { ) -> error::Result<()> {
parent.draw_quad.bind_vertices(cmd, vbo_type);
unsafe { unsafe {
cmd.SetPipelineState(&self.pipeline.handle); cmd.SetPipelineState(&self.pipeline.handle);
} }
@ -212,8 +211,7 @@ impl FilterPass {
bottom: output.output.size.height as i32, bottom: output.output.size.height as i32,
}]); }]);
// todo put this in drawquad parent.draw_quad.draw_quad(&cmd, vbo_type)
cmd.DrawInstanced(4, 1, 0, 0)
} }
unsafe { cmd.EndRenderPass() } unsafe { cmd.EndRenderPass() }

View file

@ -1,13 +1,11 @@
use array_concat::concat_arrays;
use crate::buffer::D3D12Buffer; use crate::buffer::D3D12Buffer;
use crate::error; use crate::error;
use bytemuck::{offset_of, Pod, Zeroable}; use bytemuck::{offset_of, Pod, Zeroable};
use librashader_runtime::quad::QuadType; use librashader_runtime::quad::QuadType;
use windows::core::PCSTR; use windows::core::PCSTR;
use windows::Win32::Graphics::Direct3D::D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; use windows::Win32::Graphics::Direct3D::D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP;
use windows::Win32::Graphics::Direct3D12::{ use windows::Win32::Graphics::Direct3D12::{ID3D12Device, ID3D12GraphicsCommandList, ID3D12Resource, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, D3D12_INPUT_ELEMENT_DESC, D3D12_VERTEX_BUFFER_VIEW, ID3D12GraphicsCommandList4};
ID3D12Device, ID3D12GraphicsCommandList, ID3D12Resource,
D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, D3D12_INPUT_ELEMENT_DESC, D3D12_VERTEX_BUFFER_VIEW,
};
use windows::Win32::Graphics::Dxgi::Common::DXGI_FORMAT_R32G32_FLOAT; use windows::Win32::Graphics::Dxgi::Common::DXGI_FORMAT_R32G32_FLOAT;
#[repr(C)] #[repr(C)]
@ -20,7 +18,7 @@ struct D3D12Vertex {
const CLEAR: [f32; 4] = [1.0, 1.0, 1.0, 1.0]; const CLEAR: [f32; 4] = [1.0, 1.0, 1.0, 1.0];
static OFFSCREEN_VBO_DATA: &[D3D12Vertex; 4] = &[ const OFFSCREEN_VBO_DATA: [D3D12Vertex; 4] = [
D3D12Vertex { D3D12Vertex {
position: [-1.0, -1.0], position: [-1.0, -1.0],
texcoord: [0.0, 1.0], texcoord: [0.0, 1.0],
@ -43,7 +41,7 @@ static OFFSCREEN_VBO_DATA: &[D3D12Vertex; 4] = &[
}, },
]; ];
static FINAL_VBO_DATA: &[D3D12Vertex; 4] = &[ const FINAL_VBO_DATA: [D3D12Vertex; 4] = [
D3D12Vertex { D3D12Vertex {
position: [0.0, 0.0], position: [0.0, 0.0],
texcoord: [0.0, 1.0], texcoord: [0.0, 1.0],
@ -66,58 +64,49 @@ static FINAL_VBO_DATA: &[D3D12Vertex; 4] = &[
}, },
]; ];
static VBO_DATA: &[D3D12Vertex; 8] = &concat_arrays!(OFFSCREEN_VBO_DATA, FINAL_VBO_DATA);
pub(crate) struct DrawQuad { pub(crate) struct DrawQuad {
offscreen_buffer: ID3D12Resource, buffer: ID3D12Resource,
offscreen_view: D3D12_VERTEX_BUFFER_VIEW, view: D3D12_VERTEX_BUFFER_VIEW,
final_buffer: ID3D12Resource,
final_view: D3D12_VERTEX_BUFFER_VIEW,
} }
impl DrawQuad { impl DrawQuad {
pub fn new(device: &ID3D12Device) -> error::Result<DrawQuad> { pub fn new(device: &ID3D12Device) -> error::Result<DrawQuad> {
let stride = std::mem::size_of::<D3D12Vertex>() as u32; let stride = std::mem::size_of::<D3D12Vertex>() as u32;
let size = std::mem::size_of::<[D3D12Vertex; 4]>() as u32; let size = 2 * std::mem::size_of::<[D3D12Vertex; 4]>() as u32;
let mut offscreen_buffer = D3D12Buffer::new(device, size as usize)?; let mut buffer = D3D12Buffer::new(device, size as usize)?;
offscreen_buffer buffer
.map(None)? .map(None)?
.slice .slice
.copy_from_slice(bytemuck::cast_slice(OFFSCREEN_VBO_DATA)); .copy_from_slice(bytemuck::cast_slice(VBO_DATA));
let offscreen_view = D3D12_VERTEX_BUFFER_VIEW { let view = D3D12_VERTEX_BUFFER_VIEW {
BufferLocation: offscreen_buffer.gpu_address(), BufferLocation: buffer.gpu_address(),
SizeInBytes: size, SizeInBytes: size,
StrideInBytes: stride, StrideInBytes: stride,
}; };
let offscreen_buffer = offscreen_buffer.into_raw(); let buffer = buffer.into_raw();
Ok(DrawQuad { buffer, view })
let mut final_buffer = D3D12Buffer::new(device, size as usize)?;
final_buffer
.map(None)?
.slice
.copy_from_slice(bytemuck::cast_slice(FINAL_VBO_DATA));
let final_view = D3D12_VERTEX_BUFFER_VIEW {
BufferLocation: final_buffer.gpu_address(),
SizeInBytes: size,
StrideInBytes: stride,
};
let final_buffer = final_buffer.into_raw();
Ok(DrawQuad { offscreen_buffer, offscreen_view, final_buffer, final_view })
} }
pub fn bind_vertices(&self, cmd: &ID3D12GraphicsCommandList, vbo_type: QuadType) { pub fn bind_vertices_for_frame(&self, cmd: &ID3D12GraphicsCommandList) {
unsafe { unsafe {
cmd.IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); cmd.IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
cmd.IASetVertexBuffers(0, Some(&[self.view]));
}
}
let view = match vbo_type { // frame uses ID3D12GraphicsCommandList4 for renderpasses, don't need to bother with the cast.
QuadType::Offscreen => [self.offscreen_view], pub fn draw_quad(&self, cmd: &ID3D12GraphicsCommandList4, vbo_type: QuadType) {
QuadType::Final => [self.final_view], let offset = match vbo_type {
QuadType::Offscreen => 0,
QuadType::Final => 4,
}; };
cmd.IASetVertexBuffers(0, Some(&view)); unsafe {
cmd.DrawInstanced(4, 1, offset, 0)
} }
} }

View file

@ -55,19 +55,25 @@ impl DrawQuad {
}) })
} }
pub fn bind_vbo(&self, cmd: vk::CommandBuffer, vbo: QuadType) { pub fn bind_vbo_for_frame(&self, cmd: vk::CommandBuffer) {
let offset = match vbo {
QuadType::Offscreen => 0,
QuadType::Final => std::mem::size_of::<[f32; 16]>(),
};
unsafe { unsafe {
self.device.cmd_bind_vertex_buffers( self.device.cmd_bind_vertex_buffers(
cmd, cmd,
0, 0,
&[self.buffer.handle], &[self.buffer.handle],
&[offset as vk::DeviceSize], &[0 as vk::DeviceSize],
) )
} }
} }
pub fn draw_quad(&self, cmd: vk::CommandBuffer, vbo: QuadType) {
let offset = match vbo {
QuadType::Offscreen => 0,
QuadType::Final => 4,
};
unsafe {
self.device.cmd_draw(cmd, 4, 1, offset, 0);
}
}
} }

View file

@ -670,6 +670,7 @@ impl FilterChainVulkan {
let frame_direction = options.map_or(1, |f| f.frame_direction); let frame_direction = options.map_or(1, |f| f.frame_direction);
self.common.draw_quad.bind_vbo_for_frame(cmd);
for (index, pass) in pass.iter_mut().enumerate() { for (index, pass) in pass.iter_mut().enumerate() {
let target = &self.output_framebuffers[index]; let target = &self.output_framebuffers[index];
source.filter_mode = pass.config.filter; source.filter_mode = pass.config.filter;

View file

@ -172,7 +172,7 @@ impl FilterPass {
); );
} }
parent.draw_quad.bind_vbo(cmd, vbo_type); // parent.draw_quad.bind_vbo(cmd, vbo_type);
parent.device.cmd_set_scissor( parent.device.cmd_set_scissor(
cmd, cmd,
@ -189,7 +189,7 @@ impl FilterPass {
parent parent
.device .device
.cmd_set_viewport(cmd, 0, &[output.output.size.into()]); .cmd_set_viewport(cmd, 0, &[output.output.size.into()]);
parent.device.cmd_draw(cmd, 4, 1, 0, 0); parent.draw_quad.draw_quad(cmd, vbo_type);
self.graphics_pipeline.end_rendering(&parent.device, cmd); self.graphics_pipeline.end_rendering(&parent.device, cmd);
} }
Ok(residual) Ok(residual)

View file

@ -45,8 +45,8 @@ mod tests {
dbg!("finished"); dbg!("finished");
let filter = FilterChainVulkan::load_from_path( let filter = FilterChainVulkan::load_from_path(
&base, &base,
// "../test/slang-shaders/border/gameboy-player/gameboy-player-crt-royale.slangp", "../test/slang-shaders/border/gameboy-player/gameboy-player-crt-royale.slangp",
"../test/slang-shaders/bezel/Mega_Bezel/Presets/MBZ__2__ADV-NO-REFLECT.slangp", // "../test/slang-shaders/bezel/Mega_Bezel/Presets/MBZ__2__ADV-NO-REFLECT.slangp",
// "../test/basic.slangp", // "../test/basic.slangp",
Some(&FilterChainOptionsVulkan { Some(&FilterChainOptionsVulkan {
frames_in_flight: 3, frames_in_flight: 3,