From fe84e6a490b0dbe78ca9be93b145fbc3627a5c60 Mon Sep 17 00:00:00 2001 From: Ronny Chan Date: Sat, 17 Feb 2024 02:54:39 -0500 Subject: [PATCH] rt(gl): use identity matrix for intermediate GL passes --- Cargo.lock | 12 ----- README.md | 17 ++---- librashader-runtime-gl/Cargo.toml | 1 - .../src/filter_chain/filter_impl.rs | 15 ++---- .../src/gl/gl3/draw_quad.rs | 44 ++++++++++----- .../src/gl/gl46/draw_quad.rs | 54 ++++++++++++------- librashader-runtime-gl/src/gl/mod.rs | 34 +++++++++--- .../tests/hello_triangle.rs | 12 ++--- 8 files changed, 109 insertions(+), 80 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0e3fb89..24b0134 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -596,17 +596,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "concat-arrays" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1df715824eb382e34b7afb7463b0247bf41538aeba731fba05241ecdb5dc3747" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "concurrent-queue" version = "2.4.0" @@ -1677,7 +1666,6 @@ name = "librashader-runtime-gl" version = "0.2.0" dependencies = [ "bytemuck", - "concat-arrays", "gl", "glfw 0.47.0", "librashader-cache", diff --git a/README.md b/README.md index 4c56d13..22dea2e 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,7 @@ The Metal runtime is **not thread safe**. However you can still defer submission `filter_chain_create_deferred` function. ### Quad vertices and rotations -All runtimes except OpenGL render with an identity matrix MVP and a VBO for with range `[-1, 1]`. The final pass uses a +All runtimes render intermediate passes with an identity matrix MVP and a VBO for with range `[-1, 1]`. The final pass uses a Quad VBO with range `[0, 1]` and the following projection matrix by default. ```rust @@ -81,17 +81,6 @@ static DEFAULT_MVP: &[f32; 16] = &[ As with RetroArch, a rotation on this MVP will be applied only on the final pass for these runtimes. This is the only way to pass orientation information to shaders. -The OpenGL runtime uses a VBO for range `[0, 1]` for all passes and the following MVP for all passes. - -```rust -static GL_DEFAULT_MVP: &[f32; 16] = &[ - 2.0, 0.0, 0.0, 0.0, - 0.0, 2.0, 0.0, 0.0, - 0.0, 0.0, 2.0, 0.0, - -1.0, -1.0, 0.0, 1.0, -]; -``` - ### Building For Rust projects, simply add the crate to your `Cargo.toml`. @@ -159,10 +148,12 @@ Please report an issue if you run into a shader that works in RetroArch, but not * Sampler objects are used rather than `glTexParameter`. * Sampler inputs and outputs are not renamed. This is useful for debugging shaders in RenderDoc. * UBO and Push Constant Buffer sizes are padded to 16-byte boundaries. + * The OpenGL runtime uses the same VBOs as the other runtimes as well as the identity matrix MVP for intermediate passes. RetroArch's OpenGL driver uses only the final VBO. * OpenGL 4.6+ * All caveats from the OpenGL 3.3+ section should be considered. * Should work on OpenGL 4.5 but this is not guaranteed. The OpenGL 4.6 runtime may eventually switch to using `ARB_spirv_extensions` for loading shaders, and this will not be marked as a breaking change. * The OpenGL 4.6 runtime uses Direct State Access to minimize changes to the OpenGL state. For GPUs released within the last 5 years, this may improve performance. + * The OpenGL runtime uses the same VBOs as the other runtimes as well as the identity matrix MVP for intermediate passes. RetroArch's OpenGL driver uses only the final VBO. * Vulkan * The Vulkan runtime can use [`VK_KHR_dynamic_rendering`](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_dynamic_rendering.html). This extension must be enabled at device creation. @@ -177,7 +168,7 @@ Please report an issue if you run into a shader that works in RetroArch, but not This brings shader compatibility beyond what the RetroArch Direct3D 12 driver provides. The HLSL pipeline fallback may be removed in the future as `spirv-to-dxil` improves. * The Direct3D 12 runtime requires `dxil.dll` and `dxcompiler.dll` from the [DirectX Shader Compiler](https://github.com/microsoft/DirectXShaderCompiler). * Metal - * The Metal runtime uses the same VBOs as the other non-OpenGL runtimes as well as the identity matrix MVP for intermediate passes. RetroArch's Metal driver uses only the final VBO. + * The Metal runtime uses the same VBOs as the other runtimes as well as the identity matrix MVP for intermediate passes. RetroArch's Metal driver uses only the final VBO. Most, if not all shader presets should work fine on librashader. The runtime specific differences should not affect the output, and are more a heads-up for integrating librashader into your project. diff --git a/librashader-runtime-gl/Cargo.toml b/librashader-runtime-gl/Cargo.toml index 8e359ea..cf38e6d 100644 --- a/librashader-runtime-gl/Cargo.toml +++ b/librashader-runtime-gl/Cargo.toml @@ -24,7 +24,6 @@ gl = "0.14.0" bytemuck = { version = "1.12.3", features = ["derive"] } thiserror = "1.0.37" rayon = "1.6.1" -concat-arrays = "0.1.2" sptr = "0.3" diff --git a/librashader-runtime-gl/src/filter_chain/filter_impl.rs b/librashader-runtime-gl/src/filter_chain/filter_impl.rs index 1748b91..fcf66c1 100644 --- a/librashader-runtime-gl/src/filter_chain/filter_impl.rs +++ b/librashader-runtime-gl/src/filter_chain/filter_impl.rs @@ -26,18 +26,11 @@ use librashader_reflect::reflect::presets::{CompilePresetTarget, ShaderPassArtif use librashader_reflect::reflect::ReflectShader; use librashader_runtime::binding::BindingUtil; use librashader_runtime::framebuffer::FramebufferInit; +use librashader_runtime::quad::QuadType; use librashader_runtime::render_target::RenderTarget; use librashader_runtime::scaling::ScaleFramebuffer; use std::collections::VecDeque; -#[rustfmt::skip] -pub static GL_MVP_DEFAULT: &[f32; 16] = &[ - 2f32, 0.0, 0.0, 0.0, - 0.0, 2.0, 0.0, 0.0, - 0.0, 0.0, 2.0, 0.0, - -1.0, -1.0, 0.0, 1.0, -]; - pub(crate) struct FilterChainImpl { pub(crate) common: FilterCommon, passes: Box<[FilterPass]>, @@ -293,7 +286,7 @@ impl FilterChainImpl { // do not need to rebind FBO 0 here since first `draw` will // bind automatically. - self.draw_quad.bind_vertices(); + self.draw_quad.bind_vertices(QuadType::Offscreen); let filter = passes[0].config.filter; let wrap_mode = passes[0].config.wrap_mode; @@ -346,6 +339,7 @@ impl FilterChainImpl { let passes_len = passes.len(); let (pass, last) = passes.split_at_mut(passes_len - 1); + self.draw_quad.bind_vertices(QuadType::Offscreen); for (index, pass) in pass.iter_mut().enumerate() { let target = &self.output_framebuffers[index]; source.filter = pass.config.filter; @@ -360,7 +354,7 @@ impl FilterChainImpl { viewport, &original, &source, - RenderTarget::offscreen(target, viewport.mvp.unwrap_or(GL_MVP_DEFAULT)), + RenderTarget::identity(target), ); let target = target.as_texture(pass.config.filter, pass.config.wrap_mode); @@ -368,6 +362,7 @@ impl FilterChainImpl { source = target; } + self.draw_quad.bind_vertices(QuadType::Final); // try to hint the optimizer assert_eq!(last.len(), 1); if let Some(pass) = last.iter_mut().next() { diff --git a/librashader-runtime-gl/src/gl/gl3/draw_quad.rs b/librashader-runtime-gl/src/gl/gl3/draw_quad.rs index f914081..63d3e39 100644 --- a/librashader-runtime-gl/src/gl/gl3/draw_quad.rs +++ b/librashader-runtime-gl/src/gl/gl3/draw_quad.rs @@ -1,27 +1,36 @@ -use crate::gl::FINAL_VBO_DATA; use crate::gl::{DrawQuad, OpenGLVertex}; +use crate::gl::{FINAL_VBO_DATA, OFFSCREEN_VBO_DATA}; use bytemuck::offset_of; use gl::types::{GLsizei, GLsizeiptr, GLuint}; - +use librashader_runtime::quad::QuadType; pub struct Gl3DrawQuad { - vbo: GLuint, + vbo: [GLuint; 2], vao: GLuint, } impl DrawQuad for Gl3DrawQuad { fn new() -> Gl3DrawQuad { - let mut vbo = 0; + let mut vbo = [0, 0]; let mut vao = 0; unsafe { - gl::GenBuffers(1, &mut vbo); - gl::BindBuffer(gl::ARRAY_BUFFER, vbo); + gl::GenBuffers(2, vbo.as_mut_ptr()); + gl::BindBuffer(gl::ARRAY_BUFFER, vbo[0]); gl::BufferData( gl::ARRAY_BUFFER, - 4 * std::mem::size_of::() as GLsizeiptr, + std::mem::size_of_val(OFFSCREEN_VBO_DATA) as GLsizeiptr, + OFFSCREEN_VBO_DATA.as_ptr().cast(), + gl::STATIC_DRAW, + ); + + gl::BindBuffer(gl::ARRAY_BUFFER, vbo[1]); + gl::BufferData( + gl::ARRAY_BUFFER, + std::mem::size_of_val(FINAL_VBO_DATA) as GLsizeiptr, FINAL_VBO_DATA.as_ptr().cast(), gl::STATIC_DRAW, ); + gl::BindBuffer(gl::ARRAY_BUFFER, 0); gl::GenVertexArrays(1, &mut vao); } @@ -29,20 +38,25 @@ impl DrawQuad for Gl3DrawQuad { Self { vbo, vao } } - fn bind_vertices(&self) { + fn bind_vertices(&self, quad_type: QuadType) { + let buffer_index = match quad_type { + QuadType::Offscreen => 0, + QuadType::Final => 1, + }; + unsafe { gl::BindVertexArray(self.vao); gl::EnableVertexAttribArray(0); gl::EnableVertexAttribArray(1); - gl::BindBuffer(gl::ARRAY_BUFFER, self.vbo); + gl::BindBuffer(gl::ARRAY_BUFFER, self.vbo[buffer_index]); // the provided pointers are of OpenGL provenance with respect to the buffer bound to quad_vbo, // and not a known provenance to the Rust abstract machine, therefore we give it invalid pointers. // that are inexpressible in Rust gl::VertexAttribPointer( 0, - 2, + 4, gl::FLOAT, gl::FALSE, std::mem::size_of::() as GLsizei, @@ -54,7 +68,7 @@ impl DrawQuad for Gl3DrawQuad { gl::FLOAT, gl::FALSE, std::mem::size_of::() as GLsizei, - sptr::invalid(offset_of!(OpenGLVertex, position)), + sptr::invalid(offset_of!(OpenGLVertex, texcoord)), ); } } @@ -72,8 +86,12 @@ impl DrawQuad for Gl3DrawQuad { impl Drop for Gl3DrawQuad { fn drop(&mut self) { unsafe { - if self.vbo != 0 { - gl::DeleteBuffers(1, &self.vbo); + if self.vbo[0] != 0 { + gl::DeleteBuffers(1, &self.vbo[0]); + } + + if self.vbo[1] != 0 { + gl::DeleteBuffers(1, &self.vbo[1]); } if self.vao != 0 { diff --git a/librashader-runtime-gl/src/gl/gl46/draw_quad.rs b/librashader-runtime-gl/src/gl/gl46/draw_quad.rs index 9ff8499..362e355 100644 --- a/librashader-runtime-gl/src/gl/gl46/draw_quad.rs +++ b/librashader-runtime-gl/src/gl/gl46/draw_quad.rs @@ -1,43 +1,44 @@ -use crate::gl::FINAL_VBO_DATA; use crate::gl::{DrawQuad, OpenGLVertex}; +use crate::gl::{FINAL_VBO_DATA, OFFSCREEN_VBO_DATA}; use bytemuck::offset_of; use gl::types::{GLint, GLsizeiptr, GLuint}; +use librashader_runtime::quad::QuadType; pub struct Gl46DrawQuad { - vbo: GLuint, + vbo: [GLuint; 2], vao: GLuint, } impl DrawQuad for Gl46DrawQuad { fn new() -> Self { - let mut vbo = 0; + let mut vbo = [0, 0]; let mut vao = 0; unsafe { - gl::CreateBuffers(1, &mut vbo); + gl::CreateBuffers(2, vbo.as_mut_ptr()); gl::NamedBufferData( - vbo, - 4 * std::mem::size_of::() as GLsizeiptr, + vbo[0], + std::mem::size_of_val(OFFSCREEN_VBO_DATA) as GLsizeiptr, + OFFSCREEN_VBO_DATA.as_ptr().cast(), + gl::STATIC_DRAW, + ); + + gl::NamedBufferData( + vbo[1], + std::mem::size_of_val(FINAL_VBO_DATA) as GLsizeiptr, FINAL_VBO_DATA.as_ptr().cast(), gl::STATIC_DRAW, ); + gl::CreateVertexArrays(1, &mut vao); gl::EnableVertexArrayAttrib(vao, 0); gl::EnableVertexArrayAttrib(vao, 1); - gl::VertexArrayVertexBuffer( - vao, - 0, - vbo, - 0, - std::mem::size_of::() as GLint, - ); - gl::VertexArrayAttribFormat( vao, 0, - 2, + 4, gl::FLOAT, gl::FALSE, offset_of!(OpenGLVertex, position) as GLuint, @@ -58,8 +59,21 @@ impl DrawQuad for Gl46DrawQuad { Self { vbo, vao } } - fn bind_vertices(&self) { + fn bind_vertices(&self, quad_type: QuadType) { + let buffer_index = match quad_type { + QuadType::Offscreen => 0, + QuadType::Final => 1, + }; + unsafe { + gl::VertexArrayVertexBuffer( + self.vao, + 0, + self.vbo[buffer_index], + 0, + std::mem::size_of::() as GLint, + ); + gl::BindVertexArray(self.vao); } } @@ -74,8 +88,12 @@ impl DrawQuad for Gl46DrawQuad { impl Drop for Gl46DrawQuad { fn drop(&mut self) { unsafe { - if self.vbo != 0 { - gl::DeleteBuffers(1, &self.vbo); + if self.vbo[0] != 0 { + gl::DeleteBuffers(1, &self.vbo[0]); + } + + if self.vbo[1] != 0 { + gl::DeleteBuffers(1, &self.vbo[1]); } if self.vao != 0 { diff --git a/librashader-runtime-gl/src/gl/mod.rs b/librashader-runtime-gl/src/gl/mod.rs index 42fcb9c..bcb2b27 100644 --- a/librashader-runtime-gl/src/gl/mod.rs +++ b/librashader-runtime-gl/src/gl/mod.rs @@ -16,31 +16,51 @@ use librashader_presets::{Scale2D, TextureConfig}; use librashader_reflect::back::glsl::CrossGlslContext; use librashader_reflect::back::ShaderCompilerOutput; use librashader_reflect::reflect::semantics::{BufferReflection, TextureBinding}; +use librashader_runtime::quad::QuadType; use librashader_runtime::uniforms::UniformStorageAccess; #[repr(C)] #[derive(Debug, Copy, Clone, Default, Zeroable, Pod)] pub(crate) struct OpenGLVertex { - pub position: [f32; 2], + pub position: [f32; 4], pub texcoord: [f32; 2], } -pub(crate) static FINAL_VBO_DATA: &[OpenGLVertex; 4] = &[ +static OFFSCREEN_VBO_DATA: &[OpenGLVertex; 4] = &[ OpenGLVertex { - position: [0.0, 0.0], + position: [-1.0, -1.0, 0.0, 1.0], texcoord: [0.0, 0.0], }, OpenGLVertex { - position: [1.0, 0.0], + position: [1.0, -1.0, 0.0, 1.0], texcoord: [1.0, 0.0], }, OpenGLVertex { - position: [0.0, 1.0], + position: [-1.0, 1.0, 0.0, 1.0], texcoord: [0.0, 1.0], }, OpenGLVertex { - position: [1.0, 1.0], + position: [1.0, 1.0, 0.0, 1.0], + texcoord: [1.0, 1.0], + }, +]; + +static FINAL_VBO_DATA: &[OpenGLVertex; 4] = &[ + OpenGLVertex { + position: [0.0, 0.0, 0.0, 1.0], + texcoord: [0.0, 0.0], + }, + OpenGLVertex { + position: [1.0, 0.0, 0.0, 1.0], + texcoord: [1.0, 0.0], + }, + OpenGLVertex { + position: [0.0, 1.0, 0.0, 1.0], + texcoord: [0.0, 1.0], + }, + OpenGLVertex { + position: [1.0, 1.0, 0.0, 1.0], texcoord: [1.0, 1.0], }, ]; @@ -58,7 +78,7 @@ pub(crate) trait CompileProgram { pub(crate) trait DrawQuad { fn new() -> Self; - fn bind_vertices(&self); + fn bind_vertices(&self, quad_type: QuadType); fn unbind_vertices(&self); } diff --git a/librashader-runtime-wgpu/tests/hello_triangle.rs b/librashader-runtime-wgpu/tests/hello_triangle.rs index 1a60a23..7579c93 100644 --- a/librashader-runtime-wgpu/tests/hello_triangle.rs +++ b/librashader-runtime-wgpu/tests/hello_triangle.rs @@ -123,13 +123,13 @@ impl<'a> State<'a> { // ) // .unwrap(); // - // let preset = - // ShaderPreset::try_parse("../test/shaders_slang/crt/crt-royale.slangp").unwrap(); + let preset = + ShaderPreset::try_parse("../test/shaders_slang/crt/crt-royale.slangp").unwrap(); - let preset = ShaderPreset::try_parse( - "../test/shaders_slang/bezel/Mega_Bezel/Presets/MBZ__0__SMOOTH-ADV.slangp", - ) - .unwrap(); + // let preset = ShaderPreset::try_parse( + // "../test/shaders_slang/bezel/Mega_Bezel/Presets/MBZ__0__SMOOTH-ADV.slangp", + // ) + // .unwrap(); let chain = FilterChainWgpu::load_from_preset( preset,