diff --git a/blurs/README.md b/blurs/README.md new file mode 100644 index 0000000..6c00263 --- /dev/null +++ b/blurs/README.md @@ -0,0 +1,129 @@ +This info pertains to the Blurs by TroggleMonkey: + +DESCRIPTION: +Gaussian blurs are common building blocks in multi-pass shaders, and this +library of optimized and tested blurs should make it easier to use whatever size +blur you need. All of these shaders are based on the tex2Dblur* functions in +include/blur-functions.h, so you can use those directly if you ever need to +add more processing to the same pass as a Gaussian blur. + +PICK THE RIGHT BLUR FOR YOUR USE CASE: +There are several different types of blurs, ranging in size from 3-12 texels: +a.) "Resize" separable blurs use vertical and horizontal passes and require N + taps for an Nx blur. These are arbitrarily resizable. +b.) "Fast" separable blurs use vertical and horizontal passes and require N taps + for an (N*2 - 1)x blur. They exploit bilinear filtering to reduce the + required taps from e.g. 9 to 5. These are always faster, but they have + strict image scale requirements. +c.) "Resize" one-pass blurs combine the vertical/horizontal passes of the + "resize" separable blurs, and they require NxN taps for an NxN blur. These + perform slowly enough that only tex2Dblur3x3resize is useful/included. +d.) Other one-pass blurs combine the vertical/horizontal passes of the "fast" + separable blurs, and they exploit bilinear filtering the same way. They're + faster than separable blurs at 3x3, competitive at 5x5 depending on options, + and slower at 7x7 and above...but larger blurs may still be useful if you're + hurting for passes. +e.) "Shared" one-pass blurs go a step further: They also use quad-pixel + communication with fine-grained derivatives to distribute texture samples + across a 2x2 pixel quad. (ddx() and ddy() are required, as well as a GPU + that uses fine-grained derivatives). These blurs are faster than the other + one-pass blurs, but they have some artifacts from combining sample-sharing + with bilinear sampling, so they're best reserved for reblurring an already- + blurred input. + +Every blur expects linear filtering. Except for resize separable blurs, all +require a pass scale of (1/(2^M)) for some M >= 0. That is, the output image +has to have a 1:1 pixel:texel ratio with some mipmap of the input image, so use +e.g. scaleN = "1.0" or scaleN = "0.25", not scaleN = "0.33" or scaleN = "2.0". +Note: mipmap_inputN must = "true" in your .slangp file for scales other than 1.0. + +There are two suffixes on the .slang files relating to gamma correction: +* Blurs with no suffix assume linear RGB input and output. +* Blurs with a "-last-pass" suffix use pow() to gamma-correct their output. +* Blurs with a "-gamma-encode-every-fbo" suffix use pow() to linearize each + input sample and again to gamma-correct the output. These blurs are MUCH + slower than blurs without this suffix, but they're provided in case you want + to be [almost] gamma-correct on platforms without sRGB FBO's. (The "almost" + is because bilinear filtering still won't be gamma-correct without sRGB.) +* There are also blurs with both suffixes. This may seem redundant, but they + make it easier to use a different output gamma for the last pass than for + the rest of the pipeline (such as when simulating another display device like + a Game Boy Advance or CRT). See srgb-helpers/README.txt for more information. + +BENCHMARK RESULTS: +Blurs have different performance characteristics depending on whether the input +is mipmapped and depending on whether they're gamma-encoding every FBO. Here's +an excerpt from the blur-functions.h description with a comparison. Note that +benchmarks without an sRGB heading use "-gamma-encode-every-fbo" suffixes, and +you can just look at the sRGB performance figures if you don't care about gamma: +// Here are some framerates from a GeForce 8800GTS. The first pass resizes to +// viewport size (4x in this test) and linearizes for sRGB codepaths, and the +// remaining passes perform 6 full blurs. Mipmapped tests are performed at the +// same scale, so they just measure the cost of mipmapping each FBO (only every +// other FBO is mipmapped for separable blurs, to mimic realistic usage). +// Mipmap Neither sRGB+Mipmap sRGB Function +// 76.0 92.3 131.3 193.7 tex2Dblur3fast +// 63.2 74.4 122.4 175.5 tex2Dblur3resize +// 93.7 121.2 159.3 263.2 tex2Dblur3x3 +// 59.7 68.7 115.4 162.1 tex2Dblur3x3resize +// 63.2 74.4 122.4 175.5 tex2Dblur5fast +// 49.3 54.8 100.0 132.7 tex2Dblur5resize +// 59.7 68.7 115.4 162.1 tex2Dblur5x5 +// 64.9 77.2 99.1 137.2 tex2Dblur6x6shared +// 55.8 63.7 110.4 151.8 tex2Dblur7fast +// 39.8 43.9 83.9 105.8 tex2Dblur7resize +// 40.0 44.2 83.2 104.9 tex2Dblur7x7 +// 56.4 65.5 71.9 87.9 tex2Dblur8x8shared +// 49.3 55.1 99.9 132.5 tex2Dblur9fast +// 33.3 36.2 72.4 88.0 tex2Dblur9resize +// 27.8 29.7 61.3 72.2 tex2Dblur9x9 +// 37.2 41.1 52.6 60.2 tex2Dblur10x10shared +// 44.4 49.5 91.3 117.8 tex2Dblur11fast +// 28.8 30.8 63.6 75.4 tex2Dblur11resize +// 33.6 36.5 40.9 45.5 tex2Dblur12x12shared + +BASIC USAGE: +The .slangp presets in the quality-test-presets folder provide usage examples +for basically every .slang blur shader. The "-srgb" suffix on some .slangp +presets is an explicit notice that they use sRGB FBO's. Note how and when the +"-last-pass" suffix is used for each .slang file, etc. + +The provided .slangp files with the "-mipmap" suffix are used to test quality and +benchmarking with mipmapping enabled, but none of them actually use mipmapping +as a feature in and of itself. The following contrived .slangp would do that: + shaders = "4" + + # Pass0: Linearize RGB: + shader0 = ../../srgb-helpers/first-pass-linearize.slang + filter_linear0 = "true" + scale_type0 = "source" + scale0 = "1.0" + srgb_framebuffer0 = "true" + + # Pass1: Upsize to 4x. Pretend this pass does significant processing at 4x. + shader1 = ../../stock.slang + filter_linear1 = "true" + scale_type1 = "source" + scale1 = "4.0" + srgb_framebuffer1 = "true" + + # Pass2: Blur a source-sized mipmap 9x vertically; just shrink horizontally. + shader2 = ../blur9fast-vertical.slang + filter_linear2 = "true" + scale_type2 = "source" + scale2 = "0.25" + srgb_framebuffer2 = "true" + mipmap_input = "true" + + # Pass3: Blur 9x horizontally + shader3 = ../blur9fast-horizontal.slang + filter_linear3 = "true" + scale_type3 = "source" + scale3 = "1.0" + srgb_framebuffer3 = "true" + + # Pass4: Scale to the screen size and gamma-correct the output: + shader4 = ../../srgb-helpers/last-pass-gamma-correct.slang + filter_linear4 = "true" + scale_type4 = "viewport" + scale4 = "1.0" diff --git a/blurs/blur10x10shared-gamma-encode-every-fbo.slang b/blurs/blur10x10shared-gamma-encode-every-fbo.slang new file mode 100644 index 0000000..e562409 --- /dev/null +++ b/blurs/blur10x10shared-gamma-encode-every-fbo.slang @@ -0,0 +1,87 @@ +#version 450 + +///////////////////////////////// MIT LICENSE //////////////////////////////// + +// Copyright (C) 2014 TroggleMonkey +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal in the Software without restriction, including without limitation the +// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +// sell copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +// IN THE SOFTWARE. + +layout(push_constant) uniform Push +{ + vec4 SourceSize; + vec4 OriginalSize; + vec4 OutputSize; + uint FrameCount; +} params; + +layout(std140, set = 0, binding = 0) uniform UBO +{ + mat4 MVP; +} global; + +///////////////////////////// SETTINGS MANAGEMENT //////////////////////////// + +// PASS SETTINGS: +// gamma-management.h needs to know what kind of pipeline we're using and +// what pass this is in that pipeline. This will become obsolete if/when we +// can #define things like this in the preset file. +#define GAMMA_ENCODE_EVERY_FBO +//#define FIRST_PASS +//#define LAST_PASS +//#define SIMULATE_CRT_ON_LCD +//#define SIMULATE_GBA_ON_LCD +//#define SIMULATE_LCD_ON_CRT +//#define SIMULATE_GBA_ON_CRT + +// blur-functions.h needs to know our profile's capabilities: +// 1.) DRIVERS_ALLOW_DERIVATIVES is mandatory for one-pass shared-sample blurs. +// 2.) DRIVERS_ALLOW_TEX2DLOD is optional, but mipmapped blurs will have awful +// artifacts without it due to funky texture sampling derivatives. +#define DRIVERS_ALLOW_DERIVATIVES +#define DRIVERS_ALLOW_TEX2DLOD + +/////////////////////////////// VERTEX INCLUDES /////////////////////////////// + +#include "../include/compat_macros.inc" +#pragma stage vertex +#include "vertex-shader-blur-one-pass-shared-sample.h" + +#pragma stage fragment +layout(location = 0) in vec4 tex_uv; +layout(location = 1) in vec4 output_pixel_num; +layout(location = 2) in vec2 blur_dxdy; +layout(location = 0) out vec4 FragColor; +layout(set = 0, binding = 2) uniform sampler2D Source; +#define input_texture Source + +///////////////////////////// FRAGMENT INCLUDES ///////////////////////////// +#include "../include/gamma-management.h" +#include "../include/blur-functions.h" + +void main() +{ + // Get the integer output pixel number from two origins (uv and screen): + float4 output_pixel_num_integer = floor(output_pixel_num); + // Get the fragment's position in the pixel quad and do a shared-sample blur: + float4 quad_vector = get_quad_vector(output_pixel_num_integer); + float3 color = tex2Dblur10x10shared(input_texture, tex_uv, + blur_dxdy, quad_vector); + // Encode and output the blurred image: + FragColor = encode_output(float4(color, 1.0)); +} diff --git a/blurs/blur10x10shared-last-pass-gamma-encode-every-fbo.slang b/blurs/blur10x10shared-last-pass-gamma-encode-every-fbo.slang new file mode 100644 index 0000000..16b02c1 --- /dev/null +++ b/blurs/blur10x10shared-last-pass-gamma-encode-every-fbo.slang @@ -0,0 +1,87 @@ +#version 450 + +///////////////////////////////// MIT LICENSE //////////////////////////////// + +// Copyright (C) 2014 TroggleMonkey +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal in the Software without restriction, including without limitation the +// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +// sell copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +// IN THE SOFTWARE. + +layout(push_constant) uniform Push +{ + vec4 SourceSize; + vec4 OriginalSize; + vec4 OutputSize; + uint FrameCount; +} params; + +layout(std140, set = 0, binding = 0) uniform UBO +{ + mat4 MVP; +} global; + +///////////////////////////// SETTINGS MANAGEMENT //////////////////////////// + +// PASS SETTINGS: +// gamma-management.h needs to know what kind of pipeline we're using and +// what pass this is in that pipeline. This will become obsolete if/when we +// can #define things like this in the preset file. +#define GAMMA_ENCODE_EVERY_FBO +//#define FIRST_PASS +#define LAST_PASS +//#define SIMULATE_CRT_ON_LCD +//#define SIMULATE_GBA_ON_LCD +//#define SIMULATE_LCD_ON_CRT +//#define SIMULATE_GBA_ON_CRT + +// blur-functions.h needs to know our profile's capabilities: +// 1.) DRIVERS_ALLOW_DERIVATIVES is mandatory for one-pass shared-sample blurs. +// 2.) DRIVERS_ALLOW_TEX2DLOD is optional, but mipmapped blurs will have awful +// artifacts without it due to funky texture sampling derivatives. +#define DRIVERS_ALLOW_DERIVATIVES +#define DRIVERS_ALLOW_TEX2DLOD + +/////////////////////////////// VERTEX INCLUDES /////////////////////////////// + +#include "../include/compat_macros.inc" +#pragma stage vertex +#include "vertex-shader-blur-one-pass-shared-sample.h" + +#pragma stage fragment +layout(location = 0) in vec4 tex_uv; +layout(location = 1) in vec4 output_pixel_num; +layout(location = 2) in vec2 blur_dxdy; +layout(location = 0) out vec4 FragColor; +layout(set = 0, binding = 2) uniform sampler2D Source; +#define input_texture Source + +///////////////////////////// FRAGMENT INCLUDES ///////////////////////////// +#include "../include/gamma-management.h" +#include "../include/blur-functions.h" + +void main() +{ + // Get the integer output pixel number from two origins (uv and screen): + float4 output_pixel_num_integer = floor(output_pixel_num); + // Get the fragment's position in the pixel quad and do a shared-sample blur: + float4 quad_vector = get_quad_vector(output_pixel_num_integer); + float3 color = tex2Dblur10x10shared(input_texture, tex_uv, + blur_dxdy, quad_vector); + // Encode and output the blurred image: + FragColor = encode_output(float4(color, 1.0)); +} diff --git a/blurs/blur10x10shared-last-pass.slang b/blurs/blur10x10shared-last-pass.slang new file mode 100644 index 0000000..ccd7372 --- /dev/null +++ b/blurs/blur10x10shared-last-pass.slang @@ -0,0 +1,87 @@ +#version 450 + +///////////////////////////////// MIT LICENSE //////////////////////////////// + +// Copyright (C) 2014 TroggleMonkey +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal in the Software without restriction, including without limitation the +// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +// sell copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +// IN THE SOFTWARE. + +layout(push_constant) uniform Push +{ + vec4 SourceSize; + vec4 OriginalSize; + vec4 OutputSize; + uint FrameCount; +} params; + +layout(std140, set = 0, binding = 0) uniform UBO +{ + mat4 MVP; +} global; + +///////////////////////////// SETTINGS MANAGEMENT //////////////////////////// + +// PASS SETTINGS: +// gamma-management.h needs to know what kind of pipeline we're using and +// what pass this is in that pipeline. This will become obsolete if/when we +// can #define things like this in the preset file. +//#define GAMMA_ENCODE_EVERY_FBO +//#define FIRST_PASS +#define LAST_PASS +//#define SIMULATE_CRT_ON_LCD +//#define SIMULATE_GBA_ON_LCD +//#define SIMULATE_LCD_ON_CRT +//#define SIMULATE_GBA_ON_CRT + +// blur-functions.h needs to know our profile's capabilities: +// 1.) DRIVERS_ALLOW_DERIVATIVES is mandatory for one-pass shared-sample blurs. +// 2.) DRIVERS_ALLOW_TEX2DLOD is optional, but mipmapped blurs will have awful +// artifacts without it due to funky texture sampling derivatives. +#define DRIVERS_ALLOW_DERIVATIVES +#define DRIVERS_ALLOW_TEX2DLOD + +/////////////////////////////// VERTEX INCLUDES /////////////////////////////// + +#include "../include/compat_macros.inc" +#pragma stage vertex +#include "vertex-shader-blur-one-pass-shared-sample.h" + +#pragma stage fragment +layout(location = 0) in vec4 tex_uv; +layout(location = 1) in vec4 output_pixel_num; +layout(location = 2) in vec2 blur_dxdy; +layout(location = 0) out vec4 FragColor; +layout(set = 0, binding = 2) uniform sampler2D Source; +#define input_texture Source + +///////////////////////////// FRAGMENT INCLUDES ///////////////////////////// +#include "../include/gamma-management.h" +#include "../include/blur-functions.h" + +void main() +{ + // Get the integer output pixel number from two origins (uv and screen): + float4 output_pixel_num_integer = floor(output_pixel_num); + // Get the fragment's position in the pixel quad and do a shared-sample blur: + float4 quad_vector = get_quad_vector(output_pixel_num_integer); + float3 color = tex2Dblur10x10shared(input_texture, tex_uv, + blur_dxdy, quad_vector); + // Encode and output the blurred image: + FragColor = encode_output(float4(color, 1.0)); +} diff --git a/blurs/blur10x10shared.slang b/blurs/blur10x10shared.slang new file mode 100644 index 0000000..f7b0891 --- /dev/null +++ b/blurs/blur10x10shared.slang @@ -0,0 +1,87 @@ +#version 450 + +///////////////////////////////// MIT LICENSE //////////////////////////////// + +// Copyright (C) 2014 TroggleMonkey +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal in the Software without restriction, including without limitation the +// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +// sell copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +// IN THE SOFTWARE. + +layout(push_constant) uniform Push +{ + vec4 SourceSize; + vec4 OriginalSize; + vec4 OutputSize; + uint FrameCount; +} params; + +layout(std140, set = 0, binding = 0) uniform UBO +{ + mat4 MVP; +} global; + +///////////////////////////// SETTINGS MANAGEMENT //////////////////////////// + +// PASS SETTINGS: +// gamma-management.h needs to know what kind of pipeline we're using and +// what pass this is in that pipeline. This will become obsolete if/when we +// can #define things like this in the preset file. +//#define GAMMA_ENCODE_EVERY_FBO +//#define FIRST_PASS +//#define LAST_PASS +//#define SIMULATE_CRT_ON_LCD +//#define SIMULATE_GBA_ON_LCD +//#define SIMULATE_LCD_ON_CRT +//#define SIMULATE_GBA_ON_CRT + +// blur-functions.h needs to know our profile's capabilities: +// 1.) DRIVERS_ALLOW_DERIVATIVES is mandatory for one-pass shared-sample blurs. +// 2.) DRIVERS_ALLOW_TEX2DLOD is optional, but mipmapped blurs will have awful +// artifacts without it due to funky texture sampling derivatives. +#define DRIVERS_ALLOW_DERIVATIVES +#define DRIVERS_ALLOW_TEX2DLOD + +/////////////////////////////// VERTEX INCLUDES /////////////////////////////// + +#include "../include/compat_macros.inc" +#pragma stage vertex +#include "vertex-shader-blur-one-pass-shared-sample.h" + +#pragma stage fragment +layout(location = 0) in vec4 tex_uv; +layout(location = 1) in vec4 output_pixel_num; +layout(location = 2) in vec2 blur_dxdy; +layout(location = 0) out vec4 FragColor; +layout(set = 0, binding = 2) uniform sampler2D Source; +#define input_texture Source + +///////////////////////////// FRAGMENT INCLUDES ///////////////////////////// +#include "../include/gamma-management.h" +#include "../include/blur-functions.h" + +void main() +{ + // Get the integer output pixel number from two origins (uv and screen): + float4 output_pixel_num_integer = floor(output_pixel_num); + // Get the fragment's position in the pixel quad and do a shared-sample blur: + float4 quad_vector = get_quad_vector(output_pixel_num_integer); + float3 color = tex2Dblur10x10shared(input_texture, tex_uv, + blur_dxdy, quad_vector); + // Encode and output the blurred image: + FragColor = encode_output(float4(color, 1.0)); +} diff --git a/blurs/blur12x12shared-gamma-encode-every-fbo.slang b/blurs/blur12x12shared-gamma-encode-every-fbo.slang new file mode 100644 index 0000000..33bf942 --- /dev/null +++ b/blurs/blur12x12shared-gamma-encode-every-fbo.slang @@ -0,0 +1,87 @@ +#version 450 + +///////////////////////////////// MIT LICENSE //////////////////////////////// + +// Copyright (C) 2014 TroggleMonkey +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal in the Software without restriction, including without limitation the +// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +// sell copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +// IN THE SOFTWARE. + +layout(push_constant) uniform Push +{ + vec4 SourceSize; + vec4 OriginalSize; + vec4 OutputSize; + uint FrameCount; +} params; + +layout(std140, set = 0, binding = 0) uniform UBO +{ + mat4 MVP; +} global; + +///////////////////////////// SETTINGS MANAGEMENT //////////////////////////// + +// PASS SETTINGS: +// gamma-management.h needs to know what kind of pipeline we're using and +// what pass this is in that pipeline. This will become obsolete if/when we +// can #define things like this in the preset file. +#define GAMMA_ENCODE_EVERY_FBO +//#define FIRST_PASS +//#define LAST_PASS +//#define SIMULATE_CRT_ON_LCD +//#define SIMULATE_GBA_ON_LCD +//#define SIMULATE_LCD_ON_CRT +//#define SIMULATE_GBA_ON_CRT + +// blur-functions.h needs to know our profile's capabilities: +// 1.) DRIVERS_ALLOW_DERIVATIVES is mandatory for one-pass shared-sample blurs. +// 2.) DRIVERS_ALLOW_TEX2DLOD is optional, but mipmapped blurs will have awful +// artifacts without it due to funky texture sampling derivatives. +#define DRIVERS_ALLOW_DERIVATIVES +#define DRIVERS_ALLOW_TEX2DLOD + +/////////////////////////////// VERTEX INCLUDES /////////////////////////////// + +#include "../include/compat_macros.inc" +#pragma stage vertex +#include "vertex-shader-blur-one-pass-shared-sample.h" + +#pragma stage fragment +layout(location = 0) in vec4 tex_uv; +layout(location = 1) in vec4 output_pixel_num; +layout(location = 2) in vec2 blur_dxdy; +layout(location = 0) out vec4 FragColor; +layout(set = 0, binding = 2) uniform sampler2D Source; +#define input_texture Source + +///////////////////////////// FRAGMENT INCLUDES ///////////////////////////// +#include "../include/gamma-management.h" +#include "../include/blur-functions.h" + +void main() +{ + // Get the integer output pixel number from two origins (uv and screen): + float4 output_pixel_num_integer = floor(output_pixel_num); + // Get the fragment's position in the pixel quad and do a shared-sample blur: + float4 quad_vector = get_quad_vector(output_pixel_num_integer); + float3 color = tex2Dblur12x12shared(input_texture, tex_uv, + blur_dxdy, quad_vector); + // Encode and output the blurred image: + FragColor = encode_output(float4(color, 1.0)); +} diff --git a/blurs/blur12x12shared-last-pass-gamma-encode-every-fbo.slang b/blurs/blur12x12shared-last-pass-gamma-encode-every-fbo.slang new file mode 100644 index 0000000..f5d9b2b --- /dev/null +++ b/blurs/blur12x12shared-last-pass-gamma-encode-every-fbo.slang @@ -0,0 +1,87 @@ +#version 450 + +///////////////////////////////// MIT LICENSE //////////////////////////////// + +// Copyright (C) 2014 TroggleMonkey +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal in the Software without restriction, including without limitation the +// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +// sell copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +// IN THE SOFTWARE. + +layout(push_constant) uniform Push +{ + vec4 SourceSize; + vec4 OriginalSize; + vec4 OutputSize; + uint FrameCount; +} params; + +layout(std140, set = 0, binding = 0) uniform UBO +{ + mat4 MVP; +} global; + +///////////////////////////// SETTINGS MANAGEMENT //////////////////////////// + +// PASS SETTINGS: +// gamma-management.h needs to know what kind of pipeline we're using and +// what pass this is in that pipeline. This will become obsolete if/when we +// can #define things like this in the preset file. +#define GAMMA_ENCODE_EVERY_FBO +//#define FIRST_PASS +#define LAST_PASS +//#define SIMULATE_CRT_ON_LCD +//#define SIMULATE_GBA_ON_LCD +//#define SIMULATE_LCD_ON_CRT +//#define SIMULATE_GBA_ON_CRT + +// blur-functions.h needs to know our profile's capabilities: +// 1.) DRIVERS_ALLOW_DERIVATIVES is mandatory for one-pass shared-sample blurs. +// 2.) DRIVERS_ALLOW_TEX2DLOD is optional, but mipmapped blurs will have awful +// artifacts without it due to funky texture sampling derivatives. +#define DRIVERS_ALLOW_DERIVATIVES +#define DRIVERS_ALLOW_TEX2DLOD + +/////////////////////////////// VERTEX INCLUDES /////////////////////////////// + +#include "../include/compat_macros.inc" +#pragma stage vertex +#include "vertex-shader-blur-one-pass-shared-sample.h" + +#pragma stage fragment +layout(location = 0) in vec4 tex_uv; +layout(location = 1) in vec4 output_pixel_num; +layout(location = 2) in vec2 blur_dxdy; +layout(location = 0) out vec4 FragColor; +layout(set = 0, binding = 2) uniform sampler2D Source; +#define input_texture Source + +///////////////////////////// FRAGMENT INCLUDES ///////////////////////////// +#include "../include/gamma-management.h" +#include "../include/blur-functions.h" + +void main() +{ + // Get the integer output pixel number from two origins (uv and screen): + float4 output_pixel_num_integer = floor(output_pixel_num); + // Get the fragment's position in the pixel quad and do a shared-sample blur: + float4 quad_vector = get_quad_vector(output_pixel_num_integer); + float3 color = tex2Dblur12x12shared(input_texture, tex_uv, + blur_dxdy, quad_vector); + // Encode and output the blurred image: + FragColor = encode_output(float4(color, 1.0)); +} diff --git a/blurs/blur12x12shared-last-pass.slang b/blurs/blur12x12shared-last-pass.slang new file mode 100644 index 0000000..e92f659 --- /dev/null +++ b/blurs/blur12x12shared-last-pass.slang @@ -0,0 +1,87 @@ +#version 450 + +///////////////////////////////// MIT LICENSE //////////////////////////////// + +// Copyright (C) 2014 TroggleMonkey +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal in the Software without restriction, including without limitation the +// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +// sell copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +// IN THE SOFTWARE. + +layout(push_constant) uniform Push +{ + vec4 SourceSize; + vec4 OriginalSize; + vec4 OutputSize; + uint FrameCount; +} params; + +layout(std140, set = 0, binding = 0) uniform UBO +{ + mat4 MVP; +} global; + +///////////////////////////// SETTINGS MANAGEMENT //////////////////////////// + +// PASS SETTINGS: +// gamma-management.h needs to know what kind of pipeline we're using and +// what pass this is in that pipeline. This will become obsolete if/when we +// can #define things like this in the preset file. +//#define GAMMA_ENCODE_EVERY_FBO +//#define FIRST_PASS +#define LAST_PASS +//#define SIMULATE_CRT_ON_LCD +//#define SIMULATE_GBA_ON_LCD +//#define SIMULATE_LCD_ON_CRT +//#define SIMULATE_GBA_ON_CRT + +// blur-functions.h needs to know our profile's capabilities: +// 1.) DRIVERS_ALLOW_DERIVATIVES is mandatory for one-pass shared-sample blurs. +// 2.) DRIVERS_ALLOW_TEX2DLOD is optional, but mipmapped blurs will have awful +// artifacts without it due to funky texture sampling derivatives. +#define DRIVERS_ALLOW_DERIVATIVES +#define DRIVERS_ALLOW_TEX2DLOD + +/////////////////////////////// VERTEX INCLUDES /////////////////////////////// + +#include "../include/compat_macros.inc" +#pragma stage vertex +#include "vertex-shader-blur-one-pass-shared-sample.h" + +#pragma stage fragment +layout(location = 0) in vec4 tex_uv; +layout(location = 1) in vec4 output_pixel_num; +layout(location = 2) in vec2 blur_dxdy; +layout(location = 0) out vec4 FragColor; +layout(set = 0, binding = 2) uniform sampler2D Source; +#define input_texture Source + +///////////////////////////// FRAGMENT INCLUDES ///////////////////////////// +#include "../include/gamma-management.h" +#include "../include/blur-functions.h" + +void main() +{ + // Get the integer output pixel number from two origins (uv and screen): + float4 output_pixel_num_integer = floor(output_pixel_num); + // Get the fragment's position in the pixel quad and do a shared-sample blur: + float4 quad_vector = get_quad_vector(output_pixel_num_integer); + float3 color = tex2Dblur12x12shared(input_texture, tex_uv, + blur_dxdy, quad_vector); + // Encode and output the blurred image: + FragColor = encode_output(float4(color, 1.0)); +} diff --git a/blurs/blur12x12shared.slang b/blurs/blur12x12shared.slang index cca3ed4..f5944a1 100644 --- a/blurs/blur12x12shared.slang +++ b/blurs/blur12x12shared.slang @@ -40,7 +40,7 @@ layout(std140, set = 0, binding = 0) uniform UBO // PASS SETTINGS: // gamma-management.h needs to know what kind of pipeline we're using and // what pass this is in that pipeline. This will become obsolete if/when we -// can #define things like this in the .cgp preset file. +// can #define things like this in the preset file. //#define GAMMA_ENCODE_EVERY_FBO //#define FIRST_PASS //#define LAST_PASS @@ -64,8 +64,8 @@ layout(std140, set = 0, binding = 0) uniform UBO #pragma stage fragment layout(location = 0) in vec4 tex_uv; -layout(location = 1) in vec2 blur_dxdy; -layout(location = 2) in vec4 output_pixel_num; +layout(location = 1) in vec4 output_pixel_num; +layout(location = 2) in vec2 blur_dxdy; layout(location = 0) out vec4 FragColor; layout(set = 0, binding = 2) uniform sampler2D Source; #define input_texture Source @@ -84,4 +84,4 @@ void main() blur_dxdy, quad_vector); // Encode and output the blurred image: FragColor = encode_output(float4(color, 1.0)); -} \ No newline at end of file +} diff --git a/handheld/shaders/color/nds-color.slang b/handheld/shaders/color/nds-color.slang index 4402900..325db58 100644 --- a/handheld/shaders/color/nds-color.slang +++ b/handheld/shaders/color/nds-color.slang @@ -1,13 +1,5 @@ #version 450 -layout(std140, set = 0, binding = 0) uniform UBO -{ - mat4 MVP; - vec4 OutputSize; - vec4 OriginalSize; - vec4 SourceSize; -} global; - /* Shader Modified: Pokefan531 Color Mangler @@ -16,74 +8,105 @@ layout(std140, set = 0, binding = 0) uniform UBO */ // Shader that replicates the LCD dynamics from a Nintendo DS Phat -- -#define target_gamma 1.91 -#define display_gamma 1.91 -#define sat 1.0 -#define lum 0.89 -#define contrast 1.0 -#define blr 0.0 -#define blg 0.0 -#define blb 0.0 -#define r 0.87 -#define g 0.645 -#define b 0.73 -#define rg 0.10 -#define rb 0.10 -#define gr 0.255 -#define gb 0.17 -#define br -0.125 -#define bg 0.255 -#define overscan_percent_x 0.0 -#define overscan_percent_y 0.0 +layout(std140, set = 0, binding = 0) uniform UBO +{ + mat4 MVP; + vec4 OutputSize; + vec4 OriginalSize; + vec4 SourceSize; + float mode, white_toggle; +} global; + +#pragma parameter mode "Color Profile (1=sRGB, 2=DCI, 3=Rec2020)" 1.0 1.0 3.0 1.0 +int color_mode = int(global.mode); + +#pragma parameter white_toggle "Toggle White Balance" 0.0 0.0 1.0 1.0 +bool white = bool(global.white_toggle); + +#define target_gamma 2.2 +#define display_gamma 2.2 /* -White-NDS -#define lum 0.98 -#define r 0.83 -#define g 0.625 -#define b 0.785 -#define rg 0.07 -#define rb 0.075 -#define gr 0.22 -#define gb 0.155 -#define br -0.13 -#define bg 0.22 +We'll define our color weights in this pattern: + r, rg, rb, 0.0, //red channel + gr, g, gb, 0.0, //green channel + br, bg, b, 0.0, //blue channel + blr, blg, blb, lum //alpha channel; we'll hide luma at the end, too */ +const mat4 NDS_Rec2020 = mat4( + 0.58, 0.13, 0.09, 0.0, + 0.38, 0.645, 0.20, 0.0, + 0.04, 0.225, 0.71, 0.0, + 0.0, 0.0, 0.0, 1.0 +); + +const mat4 NDS_Rec2020_white = mat4( + 0.535, 0.12, 0.09, 0.0, + 0.345, 0.585, 0.20, 0.0, + 0.04, 0.215, 0.71, 0.0, + 0.0, 0.0, 0.0, 1.0 +); + +const mat4 NDS_DCI = mat4( + 0.745, 0.10, 0.09, 0.0, + 0.315, 0.665, 0.195, 0.0, + -0.06, 0.235, 0.715, 0.0, + 0.0, 0.0, 0.0, 0.95 +); + +const mat4 NDS_DCI_white = mat4( + 0.685, 0.095, 0.09, 0.0, + 0.29, 0.605, 0.19, 0.0, + -0.06, 0.215, 0.715, 0.0, + 0.0, 0.0, 0.0, 1.0 +); + +const mat4 NDS_sRGB = mat4( + 0.815, 0.07, 0.075, 0.0, + 0.215, 0.62, 0.155, 0.0, + -0.12, 0.22, 0.77, 0.0, + 0.0, 0.0, 0.0, 0.89 +); + +const mat4 NDS_sRGB_white = mat4( + 0.815, 0.07, 0.075, 0.0, + 0.215, 0.62, 0.155, 0.0, + -0.12, 0.22, 0.77, 0.0, + 0.0, 0.0, 0.0, 0.97 +); + #pragma stage vertex layout(location = 0) in vec4 Position; layout(location = 1) in vec2 TexCoord; layout(location = 0) out vec2 vTexCoord; +layout(location = 1) out mat4 profile; void main() { - gl_Position = global.MVP * Position; - vTexCoord = TexCoord; + gl_Position = global.MVP * Position; + vTexCoord = TexCoord; + + if (color_mode == 3) profile = (!white) ? NDS_Rec2020 : NDS_Rec2020_white; + else if (color_mode == 2) profile = (!white) ? NDS_DCI : NDS_DCI_white; + else if (color_mode == 1) profile = (!white) ? NDS_sRGB : NDS_sRGB_white; } #pragma stage fragment layout(location = 0) in vec2 vTexCoord; +layout(location = 1) in mat4 profile; layout(location = 0) out vec4 FragColor; layout(set = 0, binding = 2) uniform sampler2D Source; void main() { - vec4 screen = pow(texture(Source, vTexCoord), vec4(target_gamma)).rgba; - vec4 avglum = vec4(0.5); - screen = mix(screen, avglum, (1.0 - contrast)); - - // r g b black -mat4 color = mat4(r, rg, rb, 0.0, //red channel - gr, g, gb, 0.0, //green channel - br, bg, b, 0.0, //blue channel - blr, blg, blb, 0.0); //alpha channel; these numbers do nothing for our purposes. - -mat4 adjust = mat4((1.0 - sat) * 0.3086 + sat, (1.0 - sat) * 0.3086, (1.0 - sat) * 0.3086, 1.0, -(1.0 - sat) * 0.6094, (1.0 - sat) * 0.6094 + sat, (1.0 - sat) * 0.6094, 1.0, -(1.0 - sat) * 0.0820, (1.0 - sat) * 0.0820, (1.0 - sat) * 0.0820 + sat, 1.0, -0.0, 0.0, 0.0, 1.0); - color *= adjust; + // bring out our stored luminance value + float lum = profile[3].w; + + // our adjustments need to happen in linear gamma + vec4 screen = pow(texture(Source, vTexCoord), vec4(target_gamma)).rgba; + screen = clamp(screen * lum, 0.0, 1.0); - screen = color * screen; + screen = profile * screen; FragColor = pow(screen, vec4(1.0 / display_gamma)); -} \ No newline at end of file +} diff --git a/misc/ss-gamma-ramp.slang b/misc/ss-gamma-ramp.slang new file mode 100644 index 0000000..4873152 --- /dev/null +++ b/misc/ss-gamma-ramp.slang @@ -0,0 +1,72 @@ +#version 450 + +// Super Sleuth Gamma Ramp +// based on Overload's ramp as implemented in bsnes v073 +// ported by hunterk +// license: GPLv2 + +layout(push_constant) uniform Push +{ + vec4 SourceSize; + vec4 OriginalSize; + vec4 OutputSize; + uint FrameCount; + float mixer; +} params; + +#pragma parameter mixer "Gamma Boost (%)" 150.0 100.0 200.0 1.0 + +layout(std140, set = 0, binding = 0) uniform UBO +{ + mat4 MVP; +} global; + +#pragma stage vertex +layout(location = 0) in vec4 Position; +layout(location = 1) in vec2 TexCoord; +layout(location = 0) out vec2 vTexCoord; + +void main() +{ + gl_Position = global.MVP * Position; + vTexCoord = TexCoord; +} + +#pragma stage fragment +layout(location = 0) in vec2 vTexCoord; +layout(location = 0) out vec4 FragColor; +layout(set = 0, binding = 2) uniform sampler2D Source; + +// Overload's gamma ramp from Super Sleuth +// Apparently not really based on anything but it looks nice +const uint gammaRamp[32] = { + 0x00, 0x01, 0x03, 0x06, 0x0a, 0x0f, 0x15, 0x1c, + 0x24, 0x2d, 0x37, 0x42, 0x4e, 0x5b, 0x69, 0x78, + 0x88, 0x90, 0x98, 0xa0, 0xa8, 0xb0, 0xb8, 0xc0, + 0xc8, 0xd0, 0xd8, 0xe0, 0xe8, 0xf0, 0xf8, 0xff, + }; + +#define conv(f) ((f >= 1.0) ? 255 : (f <= 0.0 ? 0 : int(floor(f * 256.0)))) + +void main() +{ + vec4 img = texture(Source, vTexCoord); + + // convert standard vec4 to uint color values + uvec4 int_img = uvec4(conv(img.r), conv(img.g), conv(img.b), conv(img.a)); + + uint r = (int_img.r) & 0xff; + uint g = (int_img.g) & 0xff; + uint b = (int_img.b) & 0xff; + + // apply the ramp + uint R = gammaRamp[r >> 3]; + uint G = gammaRamp[g >> 3]; + uint B = gammaRamp[b >> 3]; + + uvec3 output_i = uvec3(R, G, B); + vec3 output_f = vec3(output_i) * vec3(1./255.); + + // mix between corrected and uncorrected output + FragColor.rgb = mix(img.rgb, output_f, (params.mixer / 100.0) - 1.0); +}