rt(gl): use identity matrix for intermediate GL passes

This commit is contained in:
Ronny Chan 2024-02-17 02:54:39 -05:00 committed by chyyran
parent 913ede3852
commit fe84e6a490
8 changed files with 109 additions and 80 deletions

12
Cargo.lock generated
View file

@ -596,17 +596,6 @@ dependencies = [
"memchr", "memchr",
] ]
[[package]]
name = "concat-arrays"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1df715824eb382e34b7afb7463b0247bf41538aeba731fba05241ecdb5dc3747"
dependencies = [
"proc-macro2",
"quote",
"syn 1.0.109",
]
[[package]] [[package]]
name = "concurrent-queue" name = "concurrent-queue"
version = "2.4.0" version = "2.4.0"
@ -1677,7 +1666,6 @@ name = "librashader-runtime-gl"
version = "0.2.0" version = "0.2.0"
dependencies = [ dependencies = [
"bytemuck", "bytemuck",
"concat-arrays",
"gl", "gl",
"glfw 0.47.0", "glfw 0.47.0",
"librashader-cache", "librashader-cache",

View file

@ -66,7 +66,7 @@ The Metal runtime is **not thread safe**. However you can still defer submission
`filter_chain_create_deferred` function. `filter_chain_create_deferred` function.
### Quad vertices and rotations ### Quad vertices and rotations
All runtimes except OpenGL render with an identity matrix MVP and a VBO for with range `[-1, 1]`. The final pass uses a All runtimes render intermediate passes with an identity matrix MVP and a VBO for with range `[-1, 1]`. The final pass uses a
Quad VBO with range `[0, 1]` and the following projection matrix by default. Quad VBO with range `[0, 1]` and the following projection matrix by default.
```rust ```rust
@ -81,17 +81,6 @@ static DEFAULT_MVP: &[f32; 16] = &[
As with RetroArch, a rotation on this MVP will be applied only on the final pass for these runtimes. This is the only way to As with RetroArch, a rotation on this MVP will be applied only on the final pass for these runtimes. This is the only way to
pass orientation information to shaders. pass orientation information to shaders.
The OpenGL runtime uses a VBO for range `[0, 1]` for all passes and the following MVP for all passes.
```rust
static GL_DEFAULT_MVP: &[f32; 16] = &[
2.0, 0.0, 0.0, 0.0,
0.0, 2.0, 0.0, 0.0,
0.0, 0.0, 2.0, 0.0,
-1.0, -1.0, 0.0, 1.0,
];
```
### Building ### Building
For Rust projects, simply add the crate to your `Cargo.toml`. For Rust projects, simply add the crate to your `Cargo.toml`.
@ -159,10 +148,12 @@ Please report an issue if you run into a shader that works in RetroArch, but not
* Sampler objects are used rather than `glTexParameter`. * Sampler objects are used rather than `glTexParameter`.
* Sampler inputs and outputs are not renamed. This is useful for debugging shaders in RenderDoc. * Sampler inputs and outputs are not renamed. This is useful for debugging shaders in RenderDoc.
* UBO and Push Constant Buffer sizes are padded to 16-byte boundaries. * UBO and Push Constant Buffer sizes are padded to 16-byte boundaries.
* The OpenGL runtime uses the same VBOs as the other runtimes as well as the identity matrix MVP for intermediate passes. RetroArch's OpenGL driver uses only the final VBO.
* OpenGL 4.6+ * OpenGL 4.6+
* All caveats from the OpenGL 3.3+ section should be considered. * All caveats from the OpenGL 3.3+ section should be considered.
* Should work on OpenGL 4.5 but this is not guaranteed. The OpenGL 4.6 runtime may eventually switch to using `ARB_spirv_extensions` for loading shaders, and this will not be marked as a breaking change. * Should work on OpenGL 4.5 but this is not guaranteed. The OpenGL 4.6 runtime may eventually switch to using `ARB_spirv_extensions` for loading shaders, and this will not be marked as a breaking change.
* The OpenGL 4.6 runtime uses Direct State Access to minimize changes to the OpenGL state. For GPUs released within the last 5 years, this may improve performance. * The OpenGL 4.6 runtime uses Direct State Access to minimize changes to the OpenGL state. For GPUs released within the last 5 years, this may improve performance.
* The OpenGL runtime uses the same VBOs as the other runtimes as well as the identity matrix MVP for intermediate passes. RetroArch's OpenGL driver uses only the final VBO.
* Vulkan * Vulkan
* The Vulkan runtime can use [`VK_KHR_dynamic_rendering`](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_dynamic_rendering.html). * The Vulkan runtime can use [`VK_KHR_dynamic_rendering`](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_dynamic_rendering.html).
This extension must be enabled at device creation. This extension must be enabled at device creation.
@ -177,7 +168,7 @@ Please report an issue if you run into a shader that works in RetroArch, but not
This brings shader compatibility beyond what the RetroArch Direct3D 12 driver provides. The HLSL pipeline fallback may be removed in the future as `spirv-to-dxil` improves. This brings shader compatibility beyond what the RetroArch Direct3D 12 driver provides. The HLSL pipeline fallback may be removed in the future as `spirv-to-dxil` improves.
* The Direct3D 12 runtime requires `dxil.dll` and `dxcompiler.dll` from the [DirectX Shader Compiler](https://github.com/microsoft/DirectXShaderCompiler). * The Direct3D 12 runtime requires `dxil.dll` and `dxcompiler.dll` from the [DirectX Shader Compiler](https://github.com/microsoft/DirectXShaderCompiler).
* Metal * Metal
* The Metal runtime uses the same VBOs as the other non-OpenGL runtimes as well as the identity matrix MVP for intermediate passes. RetroArch's Metal driver uses only the final VBO. * The Metal runtime uses the same VBOs as the other runtimes as well as the identity matrix MVP for intermediate passes. RetroArch's Metal driver uses only the final VBO.
Most, if not all shader presets should work fine on librashader. The runtime specific differences should not affect the output, Most, if not all shader presets should work fine on librashader. The runtime specific differences should not affect the output,
and are more a heads-up for integrating librashader into your project. and are more a heads-up for integrating librashader into your project.

View file

@ -24,7 +24,6 @@ gl = "0.14.0"
bytemuck = { version = "1.12.3", features = ["derive"] } bytemuck = { version = "1.12.3", features = ["derive"] }
thiserror = "1.0.37" thiserror = "1.0.37"
rayon = "1.6.1" rayon = "1.6.1"
concat-arrays = "0.1.2"
sptr = "0.3" sptr = "0.3"

View file

@ -26,18 +26,11 @@ use librashader_reflect::reflect::presets::{CompilePresetTarget, ShaderPassArtif
use librashader_reflect::reflect::ReflectShader; use librashader_reflect::reflect::ReflectShader;
use librashader_runtime::binding::BindingUtil; use librashader_runtime::binding::BindingUtil;
use librashader_runtime::framebuffer::FramebufferInit; use librashader_runtime::framebuffer::FramebufferInit;
use librashader_runtime::quad::QuadType;
use librashader_runtime::render_target::RenderTarget; use librashader_runtime::render_target::RenderTarget;
use librashader_runtime::scaling::ScaleFramebuffer; use librashader_runtime::scaling::ScaleFramebuffer;
use std::collections::VecDeque; use std::collections::VecDeque;
#[rustfmt::skip]
pub static GL_MVP_DEFAULT: &[f32; 16] = &[
2f32, 0.0, 0.0, 0.0,
0.0, 2.0, 0.0, 0.0,
0.0, 0.0, 2.0, 0.0,
-1.0, -1.0, 0.0, 1.0,
];
pub(crate) struct FilterChainImpl<T: GLInterface> { pub(crate) struct FilterChainImpl<T: GLInterface> {
pub(crate) common: FilterCommon, pub(crate) common: FilterCommon,
passes: Box<[FilterPass<T>]>, passes: Box<[FilterPass<T>]>,
@ -293,7 +286,7 @@ impl<T: GLInterface> FilterChainImpl<T> {
// do not need to rebind FBO 0 here since first `draw` will // do not need to rebind FBO 0 here since first `draw` will
// bind automatically. // bind automatically.
self.draw_quad.bind_vertices(); self.draw_quad.bind_vertices(QuadType::Offscreen);
let filter = passes[0].config.filter; let filter = passes[0].config.filter;
let wrap_mode = passes[0].config.wrap_mode; let wrap_mode = passes[0].config.wrap_mode;
@ -346,6 +339,7 @@ impl<T: GLInterface> FilterChainImpl<T> {
let passes_len = passes.len(); let passes_len = passes.len();
let (pass, last) = passes.split_at_mut(passes_len - 1); let (pass, last) = passes.split_at_mut(passes_len - 1);
self.draw_quad.bind_vertices(QuadType::Offscreen);
for (index, pass) in pass.iter_mut().enumerate() { for (index, pass) in pass.iter_mut().enumerate() {
let target = &self.output_framebuffers[index]; let target = &self.output_framebuffers[index];
source.filter = pass.config.filter; source.filter = pass.config.filter;
@ -360,7 +354,7 @@ impl<T: GLInterface> FilterChainImpl<T> {
viewport, viewport,
&original, &original,
&source, &source,
RenderTarget::offscreen(target, viewport.mvp.unwrap_or(GL_MVP_DEFAULT)), RenderTarget::identity(target),
); );
let target = target.as_texture(pass.config.filter, pass.config.wrap_mode); let target = target.as_texture(pass.config.filter, pass.config.wrap_mode);
@ -368,6 +362,7 @@ impl<T: GLInterface> FilterChainImpl<T> {
source = target; source = target;
} }
self.draw_quad.bind_vertices(QuadType::Final);
// try to hint the optimizer // try to hint the optimizer
assert_eq!(last.len(), 1); assert_eq!(last.len(), 1);
if let Some(pass) = last.iter_mut().next() { if let Some(pass) = last.iter_mut().next() {

View file

@ -1,27 +1,36 @@
use crate::gl::FINAL_VBO_DATA;
use crate::gl::{DrawQuad, OpenGLVertex}; use crate::gl::{DrawQuad, OpenGLVertex};
use crate::gl::{FINAL_VBO_DATA, OFFSCREEN_VBO_DATA};
use bytemuck::offset_of; use bytemuck::offset_of;
use gl::types::{GLsizei, GLsizeiptr, GLuint}; use gl::types::{GLsizei, GLsizeiptr, GLuint};
use librashader_runtime::quad::QuadType;
pub struct Gl3DrawQuad { pub struct Gl3DrawQuad {
vbo: GLuint, vbo: [GLuint; 2],
vao: GLuint, vao: GLuint,
} }
impl DrawQuad for Gl3DrawQuad { impl DrawQuad for Gl3DrawQuad {
fn new() -> Gl3DrawQuad { fn new() -> Gl3DrawQuad {
let mut vbo = 0; let mut vbo = [0, 0];
let mut vao = 0; let mut vao = 0;
unsafe { unsafe {
gl::GenBuffers(1, &mut vbo); gl::GenBuffers(2, vbo.as_mut_ptr());
gl::BindBuffer(gl::ARRAY_BUFFER, vbo); gl::BindBuffer(gl::ARRAY_BUFFER, vbo[0]);
gl::BufferData( gl::BufferData(
gl::ARRAY_BUFFER, gl::ARRAY_BUFFER,
4 * std::mem::size_of::<OpenGLVertex>() as GLsizeiptr, std::mem::size_of_val(OFFSCREEN_VBO_DATA) as GLsizeiptr,
OFFSCREEN_VBO_DATA.as_ptr().cast(),
gl::STATIC_DRAW,
);
gl::BindBuffer(gl::ARRAY_BUFFER, vbo[1]);
gl::BufferData(
gl::ARRAY_BUFFER,
std::mem::size_of_val(FINAL_VBO_DATA) as GLsizeiptr,
FINAL_VBO_DATA.as_ptr().cast(), FINAL_VBO_DATA.as_ptr().cast(),
gl::STATIC_DRAW, gl::STATIC_DRAW,
); );
gl::BindBuffer(gl::ARRAY_BUFFER, 0); gl::BindBuffer(gl::ARRAY_BUFFER, 0);
gl::GenVertexArrays(1, &mut vao); gl::GenVertexArrays(1, &mut vao);
} }
@ -29,20 +38,25 @@ impl DrawQuad for Gl3DrawQuad {
Self { vbo, vao } Self { vbo, vao }
} }
fn bind_vertices(&self) { fn bind_vertices(&self, quad_type: QuadType) {
let buffer_index = match quad_type {
QuadType::Offscreen => 0,
QuadType::Final => 1,
};
unsafe { unsafe {
gl::BindVertexArray(self.vao); gl::BindVertexArray(self.vao);
gl::EnableVertexAttribArray(0); gl::EnableVertexAttribArray(0);
gl::EnableVertexAttribArray(1); gl::EnableVertexAttribArray(1);
gl::BindBuffer(gl::ARRAY_BUFFER, self.vbo); gl::BindBuffer(gl::ARRAY_BUFFER, self.vbo[buffer_index]);
// the provided pointers are of OpenGL provenance with respect to the buffer bound to quad_vbo, // the provided pointers are of OpenGL provenance with respect to the buffer bound to quad_vbo,
// and not a known provenance to the Rust abstract machine, therefore we give it invalid pointers. // and not a known provenance to the Rust abstract machine, therefore we give it invalid pointers.
// that are inexpressible in Rust // that are inexpressible in Rust
gl::VertexAttribPointer( gl::VertexAttribPointer(
0, 0,
2, 4,
gl::FLOAT, gl::FLOAT,
gl::FALSE, gl::FALSE,
std::mem::size_of::<OpenGLVertex>() as GLsizei, std::mem::size_of::<OpenGLVertex>() as GLsizei,
@ -54,7 +68,7 @@ impl DrawQuad for Gl3DrawQuad {
gl::FLOAT, gl::FLOAT,
gl::FALSE, gl::FALSE,
std::mem::size_of::<OpenGLVertex>() as GLsizei, std::mem::size_of::<OpenGLVertex>() as GLsizei,
sptr::invalid(offset_of!(OpenGLVertex, position)), sptr::invalid(offset_of!(OpenGLVertex, texcoord)),
); );
} }
} }
@ -72,8 +86,12 @@ impl DrawQuad for Gl3DrawQuad {
impl Drop for Gl3DrawQuad { impl Drop for Gl3DrawQuad {
fn drop(&mut self) { fn drop(&mut self) {
unsafe { unsafe {
if self.vbo != 0 { if self.vbo[0] != 0 {
gl::DeleteBuffers(1, &self.vbo); gl::DeleteBuffers(1, &self.vbo[0]);
}
if self.vbo[1] != 0 {
gl::DeleteBuffers(1, &self.vbo[1]);
} }
if self.vao != 0 { if self.vao != 0 {

View file

@ -1,43 +1,44 @@
use crate::gl::FINAL_VBO_DATA;
use crate::gl::{DrawQuad, OpenGLVertex}; use crate::gl::{DrawQuad, OpenGLVertex};
use crate::gl::{FINAL_VBO_DATA, OFFSCREEN_VBO_DATA};
use bytemuck::offset_of; use bytemuck::offset_of;
use gl::types::{GLint, GLsizeiptr, GLuint}; use gl::types::{GLint, GLsizeiptr, GLuint};
use librashader_runtime::quad::QuadType;
pub struct Gl46DrawQuad { pub struct Gl46DrawQuad {
vbo: GLuint, vbo: [GLuint; 2],
vao: GLuint, vao: GLuint,
} }
impl DrawQuad for Gl46DrawQuad { impl DrawQuad for Gl46DrawQuad {
fn new() -> Self { fn new() -> Self {
let mut vbo = 0; let mut vbo = [0, 0];
let mut vao = 0; let mut vao = 0;
unsafe { unsafe {
gl::CreateBuffers(1, &mut vbo); gl::CreateBuffers(2, vbo.as_mut_ptr());
gl::NamedBufferData( gl::NamedBufferData(
vbo, vbo[0],
4 * std::mem::size_of::<OpenGLVertex>() as GLsizeiptr, std::mem::size_of_val(OFFSCREEN_VBO_DATA) as GLsizeiptr,
OFFSCREEN_VBO_DATA.as_ptr().cast(),
gl::STATIC_DRAW,
);
gl::NamedBufferData(
vbo[1],
std::mem::size_of_val(FINAL_VBO_DATA) as GLsizeiptr,
FINAL_VBO_DATA.as_ptr().cast(), FINAL_VBO_DATA.as_ptr().cast(),
gl::STATIC_DRAW, gl::STATIC_DRAW,
); );
gl::CreateVertexArrays(1, &mut vao); gl::CreateVertexArrays(1, &mut vao);
gl::EnableVertexArrayAttrib(vao, 0); gl::EnableVertexArrayAttrib(vao, 0);
gl::EnableVertexArrayAttrib(vao, 1); gl::EnableVertexArrayAttrib(vao, 1);
gl::VertexArrayVertexBuffer(
vao,
0,
vbo,
0,
std::mem::size_of::<OpenGLVertex>() as GLint,
);
gl::VertexArrayAttribFormat( gl::VertexArrayAttribFormat(
vao, vao,
0, 0,
2, 4,
gl::FLOAT, gl::FLOAT,
gl::FALSE, gl::FALSE,
offset_of!(OpenGLVertex, position) as GLuint, offset_of!(OpenGLVertex, position) as GLuint,
@ -58,8 +59,21 @@ impl DrawQuad for Gl46DrawQuad {
Self { vbo, vao } Self { vbo, vao }
} }
fn bind_vertices(&self) { fn bind_vertices(&self, quad_type: QuadType) {
let buffer_index = match quad_type {
QuadType::Offscreen => 0,
QuadType::Final => 1,
};
unsafe { unsafe {
gl::VertexArrayVertexBuffer(
self.vao,
0,
self.vbo[buffer_index],
0,
std::mem::size_of::<OpenGLVertex>() as GLint,
);
gl::BindVertexArray(self.vao); gl::BindVertexArray(self.vao);
} }
} }
@ -74,8 +88,12 @@ impl DrawQuad for Gl46DrawQuad {
impl Drop for Gl46DrawQuad { impl Drop for Gl46DrawQuad {
fn drop(&mut self) { fn drop(&mut self) {
unsafe { unsafe {
if self.vbo != 0 { if self.vbo[0] != 0 {
gl::DeleteBuffers(1, &self.vbo); gl::DeleteBuffers(1, &self.vbo[0]);
}
if self.vbo[1] != 0 {
gl::DeleteBuffers(1, &self.vbo[1]);
} }
if self.vao != 0 { if self.vao != 0 {

View file

@ -16,31 +16,51 @@ use librashader_presets::{Scale2D, TextureConfig};
use librashader_reflect::back::glsl::CrossGlslContext; use librashader_reflect::back::glsl::CrossGlslContext;
use librashader_reflect::back::ShaderCompilerOutput; use librashader_reflect::back::ShaderCompilerOutput;
use librashader_reflect::reflect::semantics::{BufferReflection, TextureBinding}; use librashader_reflect::reflect::semantics::{BufferReflection, TextureBinding};
use librashader_runtime::quad::QuadType;
use librashader_runtime::uniforms::UniformStorageAccess; use librashader_runtime::uniforms::UniformStorageAccess;
#[repr(C)] #[repr(C)]
#[derive(Debug, Copy, Clone, Default, Zeroable, Pod)] #[derive(Debug, Copy, Clone, Default, Zeroable, Pod)]
pub(crate) struct OpenGLVertex { pub(crate) struct OpenGLVertex {
pub position: [f32; 2], pub position: [f32; 4],
pub texcoord: [f32; 2], pub texcoord: [f32; 2],
} }
pub(crate) static FINAL_VBO_DATA: &[OpenGLVertex; 4] = &[ static OFFSCREEN_VBO_DATA: &[OpenGLVertex; 4] = &[
OpenGLVertex { OpenGLVertex {
position: [0.0, 0.0], position: [-1.0, -1.0, 0.0, 1.0],
texcoord: [0.0, 0.0], texcoord: [0.0, 0.0],
}, },
OpenGLVertex { OpenGLVertex {
position: [1.0, 0.0], position: [1.0, -1.0, 0.0, 1.0],
texcoord: [1.0, 0.0], texcoord: [1.0, 0.0],
}, },
OpenGLVertex { OpenGLVertex {
position: [0.0, 1.0], position: [-1.0, 1.0, 0.0, 1.0],
texcoord: [0.0, 1.0], texcoord: [0.0, 1.0],
}, },
OpenGLVertex { OpenGLVertex {
position: [1.0, 1.0], position: [1.0, 1.0, 0.0, 1.0],
texcoord: [1.0, 1.0],
},
];
static FINAL_VBO_DATA: &[OpenGLVertex; 4] = &[
OpenGLVertex {
position: [0.0, 0.0, 0.0, 1.0],
texcoord: [0.0, 0.0],
},
OpenGLVertex {
position: [1.0, 0.0, 0.0, 1.0],
texcoord: [1.0, 0.0],
},
OpenGLVertex {
position: [0.0, 1.0, 0.0, 1.0],
texcoord: [0.0, 1.0],
},
OpenGLVertex {
position: [1.0, 1.0, 0.0, 1.0],
texcoord: [1.0, 1.0], texcoord: [1.0, 1.0],
}, },
]; ];
@ -58,7 +78,7 @@ pub(crate) trait CompileProgram {
pub(crate) trait DrawQuad { pub(crate) trait DrawQuad {
fn new() -> Self; fn new() -> Self;
fn bind_vertices(&self); fn bind_vertices(&self, quad_type: QuadType);
fn unbind_vertices(&self); fn unbind_vertices(&self);
} }

View file

@ -123,13 +123,13 @@ impl<'a> State<'a> {
// ) // )
// .unwrap(); // .unwrap();
// //
// let preset = let preset =
// ShaderPreset::try_parse("../test/shaders_slang/crt/crt-royale.slangp").unwrap(); ShaderPreset::try_parse("../test/shaders_slang/crt/crt-royale.slangp").unwrap();
let preset = ShaderPreset::try_parse( // let preset = ShaderPreset::try_parse(
"../test/shaders_slang/bezel/Mega_Bezel/Presets/MBZ__0__SMOOTH-ADV.slangp", // "../test/shaders_slang/bezel/Mega_Bezel/Presets/MBZ__0__SMOOTH-ADV.slangp",
) // )
.unwrap(); // .unwrap();
let chain = FilterChainWgpu::load_from_preset( let chain = FilterChainWgpu::load_from_preset(
preset, preset,