From 3db89e5351a7a2bb5caa5a12c62c6c150b68a21d Mon Sep 17 00:00:00 2001 From: chyyran Date: Mon, 6 Feb 2023 00:23:37 -0500 Subject: [PATCH] vk/11/12: bind VBO only once per frame to save on draw calls. --- Cargo.lock | 8 +++ librashader-runtime-d3d11/Cargo.toml | 1 + .../src/{quad_render.rs => draw_quad.rs} | 65 +++++++----------- librashader-runtime-d3d11/src/filter_chain.rs | 4 +- librashader-runtime-d3d11/src/filter_pass.rs | 6 +- librashader-runtime-d3d11/src/lib.rs | 5 +- librashader-runtime-d3d12/Cargo.toml | 1 + librashader-runtime-d3d12/src/filter_chain.rs | 3 + librashader-runtime-d3d12/src/filter_pass.rs | 4 +- librashader-runtime-d3d12/src/quad_render.rs | 67 ++++++++----------- librashader-runtime-vk/src/draw_quad.rs | 20 ++++-- librashader-runtime-vk/src/filter_chain.rs | 1 + librashader-runtime-vk/src/filter_pass.rs | 4 +- librashader-runtime-vk/src/lib.rs | 4 +- 14 files changed, 93 insertions(+), 100 deletions(-) rename librashader-runtime-d3d11/src/{quad_render.rs => draw_quad.rs} (69%) diff --git a/Cargo.lock b/Cargo.lock index c186b0d..00cf850 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,6 +8,12 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "array-concat" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9180feb72ccbc07cfe5ef7fa8bbf86ca71490d5dc9ef8ea02c7298ba94e7f7d" + [[package]] name = "array-init" version = "2.1.0" @@ -884,6 +890,7 @@ dependencies = [ name = "librashader-runtime-d3d11" version = "0.1.0-beta.10" dependencies = [ + "array-concat", "bytemuck", "gfx-maths", "librashader-common", @@ -902,6 +909,7 @@ dependencies = [ name = "librashader-runtime-d3d12" version = "0.1.0-beta.8" dependencies = [ + "array-concat", "array-init", "bitvec", "bytemuck", diff --git a/librashader-runtime-d3d11/Cargo.toml b/librashader-runtime-d3d11/Cargo.toml index 9af2f6f..210e0c4 100644 --- a/librashader-runtime-d3d11/Cargo.toml +++ b/librashader-runtime-d3d11/Cargo.toml @@ -23,6 +23,7 @@ spirv_cross = { package = "librashader-spirv-cross", version = "0.23" } rustc-hash = "1.1.0" bytemuck = "1.12.3" rayon = "1.6.1" +array-concat = "0.5.2" [target.'cfg(windows)'.dependencies.windows] version = "0.44.0" diff --git a/librashader-runtime-d3d11/src/quad_render.rs b/librashader-runtime-d3d11/src/draw_quad.rs similarity index 69% rename from librashader-runtime-d3d11/src/quad_render.rs rename to librashader-runtime-d3d11/src/draw_quad.rs index 0091d9f..07d2bfd 100644 --- a/librashader-runtime-d3d11/src/quad_render.rs +++ b/librashader-runtime-d3d11/src/draw_quad.rs @@ -1,3 +1,4 @@ +use array_concat::concat_arrays; use crate::error; use crate::error::assume_d3d11_init; use bytemuck::offset_of; @@ -21,7 +22,7 @@ struct D3D11Vertex { const CLEAR: [f32; 4] = [1.0, 1.0, 1.0, 1.0]; -static OFFSCREEN_VBO_DATA: &[D3D11Vertex; 4] = &[ +const OFFSCREEN_VBO_DATA: [D3D11Vertex; 4] = [ D3D11Vertex { position: [-1.0, -1.0], texcoord: [0.0, 1.0], @@ -44,7 +45,7 @@ static OFFSCREEN_VBO_DATA: &[D3D11Vertex; 4] = &[ }, ]; -static FINAL_VBO_DATA: &[D3D11Vertex; 4] = &[ +const FINAL_VBO_DATA: [D3D11Vertex; 4] = [ D3D11Vertex { position: [0.0, 0.0], texcoord: [0.0, 1.0], @@ -67,21 +68,21 @@ static FINAL_VBO_DATA: &[D3D11Vertex; 4] = &[ }, ]; +static VBO_DATA: &[D3D11Vertex; 8] = &concat_arrays!(OFFSCREEN_VBO_DATA, FINAL_VBO_DATA); + pub(crate) struct DrawQuad { - final_vbo: ID3D11Buffer, context: ID3D11DeviceContext, - offset: u32, stride: u32, - offscreen_vbo: ID3D11Buffer, + vbo: ID3D11Buffer, } impl DrawQuad { pub fn new(device: &ID3D11Device, context: &ID3D11DeviceContext) -> error::Result { unsafe { - let mut final_vbo = None; + let mut vbo = None; device.CreateBuffer( &D3D11_BUFFER_DESC { - ByteWidth: std::mem::size_of::<[D3D11Vertex; 4]>() as u32, + ByteWidth: 2 * std::mem::size_of::<[D3D11Vertex; 4]>() as u32, Usage: D3D11_USAGE_IMMUTABLE, BindFlags: D3D11_BIND_VERTEX_BUFFER, CPUAccessFlags: Default::default(), @@ -89,62 +90,48 @@ impl DrawQuad { StructureByteStride: 0, }, Some(&D3D11_SUBRESOURCE_DATA { - pSysMem: FINAL_VBO_DATA.as_ptr().cast(), + pSysMem: VBO_DATA.as_ptr().cast(), SysMemPitch: 0, SysMemSlicePitch: 0, }), - Some(&mut final_vbo), + Some(&mut vbo), )?; - assume_d3d11_init!(final_vbo, "CreateBuffer"); - - let mut offscreen_vbo = None; - device.CreateBuffer( - &D3D11_BUFFER_DESC { - ByteWidth: std::mem::size_of::<[D3D11Vertex; 4]>() as u32, - Usage: D3D11_USAGE_IMMUTABLE, - BindFlags: D3D11_BIND_VERTEX_BUFFER, - CPUAccessFlags: Default::default(), - MiscFlags: Default::default(), - StructureByteStride: 0, - }, - Some(&D3D11_SUBRESOURCE_DATA { - pSysMem: OFFSCREEN_VBO_DATA.as_ptr().cast(), - SysMemPitch: 0, - SysMemSlicePitch: 0, - }), - Some(&mut offscreen_vbo), - )?; - assume_d3d11_init!(offscreen_vbo, "CreateBuffer"); + assume_d3d11_init!(vbo, "CreateBuffer"); Ok(DrawQuad { - final_vbo, - offscreen_vbo, + vbo, context: context.clone(), - offset: 0, stride: std::mem::size_of::() as u32, }) } } - pub fn bind_vertices(&self, vbo_type: QuadType) { + pub fn bind_vbo_for_frame(&self) { unsafe { self.context .IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); - let buffer = match vbo_type { - QuadType::Offscreen => &self.offscreen_vbo, - QuadType::Final => &self.final_vbo, - }; self.context.IASetVertexBuffers( 0, 1, - Some(&Some(buffer.clone())), + Some(&Some(self.vbo.clone())), Some(&self.stride), - Some(&self.offset), + Some(&0), ); } } + pub fn draw_quad(&self, context: &ID3D11DeviceContext, vbo_type: QuadType) { + let offset = match vbo_type { + QuadType::Offscreen => 0, + QuadType::Final => 4, + }; + + unsafe { + context.Draw(4, offset); + } + } + pub fn get_spirv_cross_vbo_desc() -> [D3D11_INPUT_ELEMENT_DESC; 2] { [ D3D11_INPUT_ELEMENT_DESC { diff --git a/librashader-runtime-d3d11/src/filter_chain.rs b/librashader-runtime-d3d11/src/filter_chain.rs index 4558517..f5207e9 100644 --- a/librashader-runtime-d3d11/src/filter_chain.rs +++ b/librashader-runtime-d3d11/src/filter_chain.rs @@ -17,7 +17,7 @@ use crate::error::{assume_d3d11_init, FilterChainError}; use crate::filter_pass::{ConstantBufferBinding, FilterPass}; use crate::framebuffer::OwnedFramebuffer; use crate::options::{FilterChainOptionsD3D11, FrameOptionsD3D11}; -use crate::quad_render::DrawQuad; +use crate::draw_quad::DrawQuad; use crate::render_target::RenderTarget; use crate::samplers::SamplerSet; use crate::util::d3d11_compile_bound_shader; @@ -472,6 +472,8 @@ impl FilterChainD3D11 { let passes_len = passes.len(); let (pass, last) = passes.split_at_mut(passes_len - 1); + self.common.draw_quad.bind_vbo_for_frame(); + for (index, pass) in pass.iter_mut().enumerate() { source.filter = pass.config.filter; source.wrap_mode = pass.config.wrap_mode; diff --git a/librashader-runtime-d3d11/src/filter_pass.rs b/librashader-runtime-d3d11/src/filter_pass.rs index 2a9d3bf..e59c9ec 100644 --- a/librashader-runtime-d3d11/src/filter_pass.rs +++ b/librashader-runtime-d3d11/src/filter_pass.rs @@ -159,7 +159,6 @@ impl FilterPass { } } unsafe { - parent.draw_quad.bind_vertices(vbo_type); context.IASetInputLayout(&self.vertex_layout); context.VSSetShader(&self.vertex_shader, None); context.PSSetShader(&self.pixel_shader, None); @@ -243,10 +242,7 @@ impl FilterPass { context.RSSetViewports(Some(&[output.output.viewport])) } - unsafe { - // must be under primitive topology trianglestrip with quad - context.Draw(4, 0); - } + parent.draw_quad.draw_quad(context, vbo_type); unsafe { // unbind resources. diff --git a/librashader-runtime-d3d11/src/lib.rs b/librashader-runtime-d3d11/src/lib.rs index 5f329df..3f16b78 100644 --- a/librashader-runtime-d3d11/src/lib.rs +++ b/librashader-runtime-d3d11/src/lib.rs @@ -15,7 +15,7 @@ mod filter_pass; mod framebuffer; pub mod options; mod parameters; -mod quad_render; +mod draw_quad; mod render_target; mod samplers; mod texture; @@ -37,8 +37,9 @@ mod tests { // "../test/slang-shaders/presets/crt-geom-ntsc-upscale-sharp.slangp", // "../test/slang-shaders/bezel/Mega_Bezel/Presets/MBZ__0__SMOOTH-ADV.slangp", // "../test/null.slangp", + const FILTER_PATH: &str = "../test/slang-shaders/scalefx/scalefx-9x.slangp"; - const FILTER_PATH: &str = "../test/slang-shaders/bezel/Mega_Bezel/Presets/MBZ__0__SMOOTH-ADV.slangp"; + // const FILTER_PATH: &str = "../test/slang-shaders/bezel/Mega_Bezel/Presets/MBZ__0__SMOOTH-ADV.slangp"; const IMAGE_PATH: &str = "../test/finalfightlong.png"; #[test] fn triangle_d3d11_args() { diff --git a/librashader-runtime-d3d12/Cargo.toml b/librashader-runtime-d3d12/Cargo.toml index 48869a1..9ed79c1 100644 --- a/librashader-runtime-d3d12/Cargo.toml +++ b/librashader-runtime-d3d12/Cargo.toml @@ -25,6 +25,7 @@ bytemuck = { version = "1.12.3", features = ["derive"] } array-init = "2.1.0" bitvec = "1.0.1" widestring = "1.0.2" +array-concat = "0.5.2" rayon = "1.6.1" diff --git a/librashader-runtime-d3d12/src/filter_chain.rs b/librashader-runtime-d3d12/src/filter_chain.rs index c7f2cc6..ad26114 100644 --- a/librashader-runtime-d3d12/src/filter_chain.rs +++ b/librashader-runtime-d3d12/src/filter_chain.rs @@ -604,6 +604,9 @@ impl FilterChainD3D12 { cmd.SetGraphicsRootSignature(&self.common.root_signature.handle); self.common.mipmap_gen.pin_root_signature(cmd); } + + self.common.draw_quad.bind_vertices_for_frame(cmd); + for (index, pass) in pass.iter_mut().enumerate() { source.filter = pass.config.filter; source.wrap_mode = pass.config.wrap_mode; diff --git a/librashader-runtime-d3d12/src/filter_pass.rs b/librashader-runtime-d3d12/src/filter_pass.rs index fd4b9d0..189cf86 100644 --- a/librashader-runtime-d3d12/src/filter_pass.rs +++ b/librashader-runtime-d3d12/src/filter_pass.rs @@ -142,7 +142,6 @@ impl FilterPass { output: &RenderTarget, vbo_type: QuadType, ) -> error::Result<()> { - parent.draw_quad.bind_vertices(cmd, vbo_type); unsafe { cmd.SetPipelineState(&self.pipeline.handle); } @@ -212,8 +211,7 @@ impl FilterPass { bottom: output.output.size.height as i32, }]); - // todo put this in drawquad - cmd.DrawInstanced(4, 1, 0, 0) + parent.draw_quad.draw_quad(&cmd, vbo_type) } unsafe { cmd.EndRenderPass() } diff --git a/librashader-runtime-d3d12/src/quad_render.rs b/librashader-runtime-d3d12/src/quad_render.rs index 343d373..c06bef2 100644 --- a/librashader-runtime-d3d12/src/quad_render.rs +++ b/librashader-runtime-d3d12/src/quad_render.rs @@ -1,13 +1,11 @@ +use array_concat::concat_arrays; use crate::buffer::D3D12Buffer; use crate::error; use bytemuck::{offset_of, Pod, Zeroable}; use librashader_runtime::quad::QuadType; use windows::core::PCSTR; use windows::Win32::Graphics::Direct3D::D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; -use windows::Win32::Graphics::Direct3D12::{ - ID3D12Device, ID3D12GraphicsCommandList, ID3D12Resource, - D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, D3D12_INPUT_ELEMENT_DESC, D3D12_VERTEX_BUFFER_VIEW, -}; +use windows::Win32::Graphics::Direct3D12::{ID3D12Device, ID3D12GraphicsCommandList, ID3D12Resource, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, D3D12_INPUT_ELEMENT_DESC, D3D12_VERTEX_BUFFER_VIEW, ID3D12GraphicsCommandList4}; use windows::Win32::Graphics::Dxgi::Common::DXGI_FORMAT_R32G32_FLOAT; #[repr(C)] @@ -20,7 +18,7 @@ struct D3D12Vertex { const CLEAR: [f32; 4] = [1.0, 1.0, 1.0, 1.0]; -static OFFSCREEN_VBO_DATA: &[D3D12Vertex; 4] = &[ +const OFFSCREEN_VBO_DATA: [D3D12Vertex; 4] = [ D3D12Vertex { position: [-1.0, -1.0], texcoord: [0.0, 1.0], @@ -43,7 +41,7 @@ static OFFSCREEN_VBO_DATA: &[D3D12Vertex; 4] = &[ }, ]; -static FINAL_VBO_DATA: &[D3D12Vertex; 4] = &[ +const FINAL_VBO_DATA: [D3D12Vertex; 4] = [ D3D12Vertex { position: [0.0, 0.0], texcoord: [0.0, 1.0], @@ -66,58 +64,49 @@ static FINAL_VBO_DATA: &[D3D12Vertex; 4] = &[ }, ]; +static VBO_DATA: &[D3D12Vertex; 8] = &concat_arrays!(OFFSCREEN_VBO_DATA, FINAL_VBO_DATA); + pub(crate) struct DrawQuad { - offscreen_buffer: ID3D12Resource, - offscreen_view: D3D12_VERTEX_BUFFER_VIEW, - final_buffer: ID3D12Resource, - final_view: D3D12_VERTEX_BUFFER_VIEW, + buffer: ID3D12Resource, + view: D3D12_VERTEX_BUFFER_VIEW, } impl DrawQuad { pub fn new(device: &ID3D12Device) -> error::Result { let stride = std::mem::size_of::() as u32; - let size = std::mem::size_of::<[D3D12Vertex; 4]>() as u32; - let mut offscreen_buffer = D3D12Buffer::new(device, size as usize)?; - offscreen_buffer + let size = 2 * std::mem::size_of::<[D3D12Vertex; 4]>() as u32; + let mut buffer = D3D12Buffer::new(device, size as usize)?; + buffer .map(None)? .slice - .copy_from_slice(bytemuck::cast_slice(OFFSCREEN_VBO_DATA)); + .copy_from_slice(bytemuck::cast_slice(VBO_DATA)); - let offscreen_view = D3D12_VERTEX_BUFFER_VIEW { - BufferLocation: offscreen_buffer.gpu_address(), + let view = D3D12_VERTEX_BUFFER_VIEW { + BufferLocation: buffer.gpu_address(), SizeInBytes: size, StrideInBytes: stride, }; - let offscreen_buffer = offscreen_buffer.into_raw(); - - let mut final_buffer = D3D12Buffer::new(device, size as usize)?; - final_buffer - .map(None)? - .slice - .copy_from_slice(bytemuck::cast_slice(FINAL_VBO_DATA)); - - let final_view = D3D12_VERTEX_BUFFER_VIEW { - BufferLocation: final_buffer.gpu_address(), - SizeInBytes: size, - StrideInBytes: stride, - }; - - let final_buffer = final_buffer.into_raw(); - - Ok(DrawQuad { offscreen_buffer, offscreen_view, final_buffer, final_view }) + let buffer = buffer.into_raw(); + Ok(DrawQuad { buffer, view }) } - pub fn bind_vertices(&self, cmd: &ID3D12GraphicsCommandList, vbo_type: QuadType) { + pub fn bind_vertices_for_frame(&self, cmd: &ID3D12GraphicsCommandList) { unsafe { cmd.IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); + cmd.IASetVertexBuffers(0, Some(&[self.view])); + } + } - let view = match vbo_type { - QuadType::Offscreen => [self.offscreen_view], - QuadType::Final => [self.final_view], - }; + // frame uses ID3D12GraphicsCommandList4 for renderpasses, don't need to bother with the cast. + pub fn draw_quad(&self, cmd: &ID3D12GraphicsCommandList4, vbo_type: QuadType) { + let offset = match vbo_type { + QuadType::Offscreen => 0, + QuadType::Final => 4, + }; - cmd.IASetVertexBuffers(0, Some(&view)); + unsafe { + cmd.DrawInstanced(4, 1, offset, 0) } } diff --git a/librashader-runtime-vk/src/draw_quad.rs b/librashader-runtime-vk/src/draw_quad.rs index 7639e39..2859e41 100644 --- a/librashader-runtime-vk/src/draw_quad.rs +++ b/librashader-runtime-vk/src/draw_quad.rs @@ -55,19 +55,25 @@ impl DrawQuad { }) } - pub fn bind_vbo(&self, cmd: vk::CommandBuffer, vbo: QuadType) { - let offset = match vbo { - QuadType::Offscreen => 0, - QuadType::Final => std::mem::size_of::<[f32; 16]>(), - }; - + pub fn bind_vbo_for_frame(&self, cmd: vk::CommandBuffer) { unsafe { self.device.cmd_bind_vertex_buffers( cmd, 0, &[self.buffer.handle], - &[offset as vk::DeviceSize], + &[0 as vk::DeviceSize], ) } } + + pub fn draw_quad(&self, cmd: vk::CommandBuffer, vbo: QuadType) { + let offset = match vbo { + QuadType::Offscreen => 0, + QuadType::Final => 4, + }; + + unsafe { + self.device.cmd_draw(cmd, 4, 1, offset, 0); + } + } } diff --git a/librashader-runtime-vk/src/filter_chain.rs b/librashader-runtime-vk/src/filter_chain.rs index df5e017..a5905e1 100644 --- a/librashader-runtime-vk/src/filter_chain.rs +++ b/librashader-runtime-vk/src/filter_chain.rs @@ -670,6 +670,7 @@ impl FilterChainVulkan { let frame_direction = options.map_or(1, |f| f.frame_direction); + self.common.draw_quad.bind_vbo_for_frame(cmd); for (index, pass) in pass.iter_mut().enumerate() { let target = &self.output_framebuffers[index]; source.filter_mode = pass.config.filter; diff --git a/librashader-runtime-vk/src/filter_pass.rs b/librashader-runtime-vk/src/filter_pass.rs index 92ee688..6c7fcf2 100644 --- a/librashader-runtime-vk/src/filter_pass.rs +++ b/librashader-runtime-vk/src/filter_pass.rs @@ -172,7 +172,7 @@ impl FilterPass { ); } - parent.draw_quad.bind_vbo(cmd, vbo_type); + // parent.draw_quad.bind_vbo(cmd, vbo_type); parent.device.cmd_set_scissor( cmd, @@ -189,7 +189,7 @@ impl FilterPass { parent .device .cmd_set_viewport(cmd, 0, &[output.output.size.into()]); - parent.device.cmd_draw(cmd, 4, 1, 0, 0); + parent.draw_quad.draw_quad(cmd, vbo_type); self.graphics_pipeline.end_rendering(&parent.device, cmd); } Ok(residual) diff --git a/librashader-runtime-vk/src/lib.rs b/librashader-runtime-vk/src/lib.rs index 808dec1..01f8766 100644 --- a/librashader-runtime-vk/src/lib.rs +++ b/librashader-runtime-vk/src/lib.rs @@ -45,8 +45,8 @@ mod tests { dbg!("finished"); let filter = FilterChainVulkan::load_from_path( &base, - // "../test/slang-shaders/border/gameboy-player/gameboy-player-crt-royale.slangp", - "../test/slang-shaders/bezel/Mega_Bezel/Presets/MBZ__2__ADV-NO-REFLECT.slangp", + "../test/slang-shaders/border/gameboy-player/gameboy-player-crt-royale.slangp", + // "../test/slang-shaders/bezel/Mega_Bezel/Presets/MBZ__2__ADV-NO-REFLECT.slangp", // "../test/basic.slangp", Some(&FilterChainOptionsVulkan { frames_in_flight: 3,