vk/11/12: bind VBO only once per frame to save on draw calls.

This commit is contained in:
chyyran 2023-02-06 00:23:37 -05:00
parent d4525ee23e
commit 3db89e5351
14 changed files with 93 additions and 100 deletions

8
Cargo.lock generated
View file

@ -8,6 +8,12 @@ version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
[[package]]
name = "array-concat"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e9180feb72ccbc07cfe5ef7fa8bbf86ca71490d5dc9ef8ea02c7298ba94e7f7d"
[[package]]
name = "array-init"
version = "2.1.0"
@ -884,6 +890,7 @@ dependencies = [
name = "librashader-runtime-d3d11"
version = "0.1.0-beta.10"
dependencies = [
"array-concat",
"bytemuck",
"gfx-maths",
"librashader-common",
@ -902,6 +909,7 @@ dependencies = [
name = "librashader-runtime-d3d12"
version = "0.1.0-beta.8"
dependencies = [
"array-concat",
"array-init",
"bitvec",
"bytemuck",

View file

@ -23,6 +23,7 @@ spirv_cross = { package = "librashader-spirv-cross", version = "0.23" }
rustc-hash = "1.1.0"
bytemuck = "1.12.3"
rayon = "1.6.1"
array-concat = "0.5.2"
[target.'cfg(windows)'.dependencies.windows]
version = "0.44.0"

View file

@ -1,3 +1,4 @@
use array_concat::concat_arrays;
use crate::error;
use crate::error::assume_d3d11_init;
use bytemuck::offset_of;
@ -21,7 +22,7 @@ struct D3D11Vertex {
const CLEAR: [f32; 4] = [1.0, 1.0, 1.0, 1.0];
static OFFSCREEN_VBO_DATA: &[D3D11Vertex; 4] = &[
const OFFSCREEN_VBO_DATA: [D3D11Vertex; 4] = [
D3D11Vertex {
position: [-1.0, -1.0],
texcoord: [0.0, 1.0],
@ -44,7 +45,7 @@ static OFFSCREEN_VBO_DATA: &[D3D11Vertex; 4] = &[
},
];
static FINAL_VBO_DATA: &[D3D11Vertex; 4] = &[
const FINAL_VBO_DATA: [D3D11Vertex; 4] = [
D3D11Vertex {
position: [0.0, 0.0],
texcoord: [0.0, 1.0],
@ -67,21 +68,21 @@ static FINAL_VBO_DATA: &[D3D11Vertex; 4] = &[
},
];
static VBO_DATA: &[D3D11Vertex; 8] = &concat_arrays!(OFFSCREEN_VBO_DATA, FINAL_VBO_DATA);
pub(crate) struct DrawQuad {
final_vbo: ID3D11Buffer,
context: ID3D11DeviceContext,
offset: u32,
stride: u32,
offscreen_vbo: ID3D11Buffer,
vbo: ID3D11Buffer,
}
impl DrawQuad {
pub fn new(device: &ID3D11Device, context: &ID3D11DeviceContext) -> error::Result<DrawQuad> {
unsafe {
let mut final_vbo = None;
let mut vbo = None;
device.CreateBuffer(
&D3D11_BUFFER_DESC {
ByteWidth: std::mem::size_of::<[D3D11Vertex; 4]>() as u32,
ByteWidth: 2 * std::mem::size_of::<[D3D11Vertex; 4]>() as u32,
Usage: D3D11_USAGE_IMMUTABLE,
BindFlags: D3D11_BIND_VERTEX_BUFFER,
CPUAccessFlags: Default::default(),
@ -89,62 +90,48 @@ impl DrawQuad {
StructureByteStride: 0,
},
Some(&D3D11_SUBRESOURCE_DATA {
pSysMem: FINAL_VBO_DATA.as_ptr().cast(),
pSysMem: VBO_DATA.as_ptr().cast(),
SysMemPitch: 0,
SysMemSlicePitch: 0,
}),
Some(&mut final_vbo),
Some(&mut vbo),
)?;
assume_d3d11_init!(final_vbo, "CreateBuffer");
let mut offscreen_vbo = None;
device.CreateBuffer(
&D3D11_BUFFER_DESC {
ByteWidth: std::mem::size_of::<[D3D11Vertex; 4]>() as u32,
Usage: D3D11_USAGE_IMMUTABLE,
BindFlags: D3D11_BIND_VERTEX_BUFFER,
CPUAccessFlags: Default::default(),
MiscFlags: Default::default(),
StructureByteStride: 0,
},
Some(&D3D11_SUBRESOURCE_DATA {
pSysMem: OFFSCREEN_VBO_DATA.as_ptr().cast(),
SysMemPitch: 0,
SysMemSlicePitch: 0,
}),
Some(&mut offscreen_vbo),
)?;
assume_d3d11_init!(offscreen_vbo, "CreateBuffer");
assume_d3d11_init!(vbo, "CreateBuffer");
Ok(DrawQuad {
final_vbo,
offscreen_vbo,
vbo,
context: context.clone(),
offset: 0,
stride: std::mem::size_of::<D3D11Vertex>() as u32,
})
}
}
pub fn bind_vertices(&self, vbo_type: QuadType) {
pub fn bind_vbo_for_frame(&self) {
unsafe {
self.context
.IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
let buffer = match vbo_type {
QuadType::Offscreen => &self.offscreen_vbo,
QuadType::Final => &self.final_vbo,
};
self.context.IASetVertexBuffers(
0,
1,
Some(&Some(buffer.clone())),
Some(&Some(self.vbo.clone())),
Some(&self.stride),
Some(&self.offset),
Some(&0),
);
}
}
pub fn draw_quad(&self, context: &ID3D11DeviceContext, vbo_type: QuadType) {
let offset = match vbo_type {
QuadType::Offscreen => 0,
QuadType::Final => 4,
};
unsafe {
context.Draw(4, offset);
}
}
pub fn get_spirv_cross_vbo_desc() -> [D3D11_INPUT_ELEMENT_DESC; 2] {
[
D3D11_INPUT_ELEMENT_DESC {

View file

@ -17,7 +17,7 @@ use crate::error::{assume_d3d11_init, FilterChainError};
use crate::filter_pass::{ConstantBufferBinding, FilterPass};
use crate::framebuffer::OwnedFramebuffer;
use crate::options::{FilterChainOptionsD3D11, FrameOptionsD3D11};
use crate::quad_render::DrawQuad;
use crate::draw_quad::DrawQuad;
use crate::render_target::RenderTarget;
use crate::samplers::SamplerSet;
use crate::util::d3d11_compile_bound_shader;
@ -472,6 +472,8 @@ impl FilterChainD3D11 {
let passes_len = passes.len();
let (pass, last) = passes.split_at_mut(passes_len - 1);
self.common.draw_quad.bind_vbo_for_frame();
for (index, pass) in pass.iter_mut().enumerate() {
source.filter = pass.config.filter;
source.wrap_mode = pass.config.wrap_mode;

View file

@ -159,7 +159,6 @@ impl FilterPass {
}
}
unsafe {
parent.draw_quad.bind_vertices(vbo_type);
context.IASetInputLayout(&self.vertex_layout);
context.VSSetShader(&self.vertex_shader, None);
context.PSSetShader(&self.pixel_shader, None);
@ -243,10 +242,7 @@ impl FilterPass {
context.RSSetViewports(Some(&[output.output.viewport]))
}
unsafe {
// must be under primitive topology trianglestrip with quad
context.Draw(4, 0);
}
parent.draw_quad.draw_quad(context, vbo_type);
unsafe {
// unbind resources.

View file

@ -15,7 +15,7 @@ mod filter_pass;
mod framebuffer;
pub mod options;
mod parameters;
mod quad_render;
mod draw_quad;
mod render_target;
mod samplers;
mod texture;
@ -37,8 +37,9 @@ mod tests {
// "../test/slang-shaders/presets/crt-geom-ntsc-upscale-sharp.slangp",
// "../test/slang-shaders/bezel/Mega_Bezel/Presets/MBZ__0__SMOOTH-ADV.slangp",
// "../test/null.slangp",
const FILTER_PATH: &str = "../test/slang-shaders/scalefx/scalefx-9x.slangp";
const FILTER_PATH: &str = "../test/slang-shaders/bezel/Mega_Bezel/Presets/MBZ__0__SMOOTH-ADV.slangp";
// const FILTER_PATH: &str = "../test/slang-shaders/bezel/Mega_Bezel/Presets/MBZ__0__SMOOTH-ADV.slangp";
const IMAGE_PATH: &str = "../test/finalfightlong.png";
#[test]
fn triangle_d3d11_args() {

View file

@ -25,6 +25,7 @@ bytemuck = { version = "1.12.3", features = ["derive"] }
array-init = "2.1.0"
bitvec = "1.0.1"
widestring = "1.0.2"
array-concat = "0.5.2"
rayon = "1.6.1"

View file

@ -604,6 +604,9 @@ impl FilterChainD3D12 {
cmd.SetGraphicsRootSignature(&self.common.root_signature.handle);
self.common.mipmap_gen.pin_root_signature(cmd);
}
self.common.draw_quad.bind_vertices_for_frame(cmd);
for (index, pass) in pass.iter_mut().enumerate() {
source.filter = pass.config.filter;
source.wrap_mode = pass.config.wrap_mode;

View file

@ -142,7 +142,6 @@ impl FilterPass {
output: &RenderTarget,
vbo_type: QuadType,
) -> error::Result<()> {
parent.draw_quad.bind_vertices(cmd, vbo_type);
unsafe {
cmd.SetPipelineState(&self.pipeline.handle);
}
@ -212,8 +211,7 @@ impl FilterPass {
bottom: output.output.size.height as i32,
}]);
// todo put this in drawquad
cmd.DrawInstanced(4, 1, 0, 0)
parent.draw_quad.draw_quad(&cmd, vbo_type)
}
unsafe { cmd.EndRenderPass() }

View file

@ -1,13 +1,11 @@
use array_concat::concat_arrays;
use crate::buffer::D3D12Buffer;
use crate::error;
use bytemuck::{offset_of, Pod, Zeroable};
use librashader_runtime::quad::QuadType;
use windows::core::PCSTR;
use windows::Win32::Graphics::Direct3D::D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP;
use windows::Win32::Graphics::Direct3D12::{
ID3D12Device, ID3D12GraphicsCommandList, ID3D12Resource,
D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, D3D12_INPUT_ELEMENT_DESC, D3D12_VERTEX_BUFFER_VIEW,
};
use windows::Win32::Graphics::Direct3D12::{ID3D12Device, ID3D12GraphicsCommandList, ID3D12Resource, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, D3D12_INPUT_ELEMENT_DESC, D3D12_VERTEX_BUFFER_VIEW, ID3D12GraphicsCommandList4};
use windows::Win32::Graphics::Dxgi::Common::DXGI_FORMAT_R32G32_FLOAT;
#[repr(C)]
@ -20,7 +18,7 @@ struct D3D12Vertex {
const CLEAR: [f32; 4] = [1.0, 1.0, 1.0, 1.0];
static OFFSCREEN_VBO_DATA: &[D3D12Vertex; 4] = &[
const OFFSCREEN_VBO_DATA: [D3D12Vertex; 4] = [
D3D12Vertex {
position: [-1.0, -1.0],
texcoord: [0.0, 1.0],
@ -43,7 +41,7 @@ static OFFSCREEN_VBO_DATA: &[D3D12Vertex; 4] = &[
},
];
static FINAL_VBO_DATA: &[D3D12Vertex; 4] = &[
const FINAL_VBO_DATA: [D3D12Vertex; 4] = [
D3D12Vertex {
position: [0.0, 0.0],
texcoord: [0.0, 1.0],
@ -66,58 +64,49 @@ static FINAL_VBO_DATA: &[D3D12Vertex; 4] = &[
},
];
static VBO_DATA: &[D3D12Vertex; 8] = &concat_arrays!(OFFSCREEN_VBO_DATA, FINAL_VBO_DATA);
pub(crate) struct DrawQuad {
offscreen_buffer: ID3D12Resource,
offscreen_view: D3D12_VERTEX_BUFFER_VIEW,
final_buffer: ID3D12Resource,
final_view: D3D12_VERTEX_BUFFER_VIEW,
buffer: ID3D12Resource,
view: D3D12_VERTEX_BUFFER_VIEW,
}
impl DrawQuad {
pub fn new(device: &ID3D12Device) -> error::Result<DrawQuad> {
let stride = std::mem::size_of::<D3D12Vertex>() as u32;
let size = std::mem::size_of::<[D3D12Vertex; 4]>() as u32;
let mut offscreen_buffer = D3D12Buffer::new(device, size as usize)?;
offscreen_buffer
let size = 2 * std::mem::size_of::<[D3D12Vertex; 4]>() as u32;
let mut buffer = D3D12Buffer::new(device, size as usize)?;
buffer
.map(None)?
.slice
.copy_from_slice(bytemuck::cast_slice(OFFSCREEN_VBO_DATA));
.copy_from_slice(bytemuck::cast_slice(VBO_DATA));
let offscreen_view = D3D12_VERTEX_BUFFER_VIEW {
BufferLocation: offscreen_buffer.gpu_address(),
let view = D3D12_VERTEX_BUFFER_VIEW {
BufferLocation: buffer.gpu_address(),
SizeInBytes: size,
StrideInBytes: stride,
};
let offscreen_buffer = offscreen_buffer.into_raw();
let mut final_buffer = D3D12Buffer::new(device, size as usize)?;
final_buffer
.map(None)?
.slice
.copy_from_slice(bytemuck::cast_slice(FINAL_VBO_DATA));
let final_view = D3D12_VERTEX_BUFFER_VIEW {
BufferLocation: final_buffer.gpu_address(),
SizeInBytes: size,
StrideInBytes: stride,
};
let final_buffer = final_buffer.into_raw();
Ok(DrawQuad { offscreen_buffer, offscreen_view, final_buffer, final_view })
let buffer = buffer.into_raw();
Ok(DrawQuad { buffer, view })
}
pub fn bind_vertices(&self, cmd: &ID3D12GraphicsCommandList, vbo_type: QuadType) {
pub fn bind_vertices_for_frame(&self, cmd: &ID3D12GraphicsCommandList) {
unsafe {
cmd.IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
cmd.IASetVertexBuffers(0, Some(&[self.view]));
}
}
let view = match vbo_type {
QuadType::Offscreen => [self.offscreen_view],
QuadType::Final => [self.final_view],
// frame uses ID3D12GraphicsCommandList4 for renderpasses, don't need to bother with the cast.
pub fn draw_quad(&self, cmd: &ID3D12GraphicsCommandList4, vbo_type: QuadType) {
let offset = match vbo_type {
QuadType::Offscreen => 0,
QuadType::Final => 4,
};
cmd.IASetVertexBuffers(0, Some(&view));
unsafe {
cmd.DrawInstanced(4, 1, offset, 0)
}
}

View file

@ -55,19 +55,25 @@ impl DrawQuad {
})
}
pub fn bind_vbo(&self, cmd: vk::CommandBuffer, vbo: QuadType) {
let offset = match vbo {
QuadType::Offscreen => 0,
QuadType::Final => std::mem::size_of::<[f32; 16]>(),
};
pub fn bind_vbo_for_frame(&self, cmd: vk::CommandBuffer) {
unsafe {
self.device.cmd_bind_vertex_buffers(
cmd,
0,
&[self.buffer.handle],
&[offset as vk::DeviceSize],
&[0 as vk::DeviceSize],
)
}
}
pub fn draw_quad(&self, cmd: vk::CommandBuffer, vbo: QuadType) {
let offset = match vbo {
QuadType::Offscreen => 0,
QuadType::Final => 4,
};
unsafe {
self.device.cmd_draw(cmd, 4, 1, offset, 0);
}
}
}

View file

@ -670,6 +670,7 @@ impl FilterChainVulkan {
let frame_direction = options.map_or(1, |f| f.frame_direction);
self.common.draw_quad.bind_vbo_for_frame(cmd);
for (index, pass) in pass.iter_mut().enumerate() {
let target = &self.output_framebuffers[index];
source.filter_mode = pass.config.filter;

View file

@ -172,7 +172,7 @@ impl FilterPass {
);
}
parent.draw_quad.bind_vbo(cmd, vbo_type);
// parent.draw_quad.bind_vbo(cmd, vbo_type);
parent.device.cmd_set_scissor(
cmd,
@ -189,7 +189,7 @@ impl FilterPass {
parent
.device
.cmd_set_viewport(cmd, 0, &[output.output.size.into()]);
parent.device.cmd_draw(cmd, 4, 1, 0, 0);
parent.draw_quad.draw_quad(cmd, vbo_type);
self.graphics_pipeline.end_rendering(&parent.device, cmd);
}
Ok(residual)

View file

@ -45,8 +45,8 @@ mod tests {
dbg!("finished");
let filter = FilterChainVulkan::load_from_path(
&base,
// "../test/slang-shaders/border/gameboy-player/gameboy-player-crt-royale.slangp",
"../test/slang-shaders/bezel/Mega_Bezel/Presets/MBZ__2__ADV-NO-REFLECT.slangp",
"../test/slang-shaders/border/gameboy-player/gameboy-player-crt-royale.slangp",
// "../test/slang-shaders/bezel/Mega_Bezel/Presets/MBZ__2__ADV-NO-REFLECT.slangp",
// "../test/basic.slangp",
Some(&FilterChainOptionsVulkan {
frames_in_flight: 3,