massive overhaul of crt-royale and all associated shaders

2025-02-23 18:17:44 +11:00 · 2017-11-29 13:40:28 -06:00 · 2017-11-29 13:40:28 -06:00 · 760d334b53
commit 760d334b53
parent 44c0a731de
124 changed files with 7135 additions and 4754 deletions
--- a/blurs/blur11fast-horizontal-gamma-encode-every-fbo.slang
+++ b/blurs/blur11fast-horizontal-gamma-encode-every-fbo.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-fast-horizontal.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur11fast-horizontal-last-pass-gamma-encode-every-fbo.slang
+++ b/blurs/blur11fast-horizontal-last-pass-gamma-encode-every-fbo.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-fast-horizontal.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur11fast-horizontal-last-pass.slang
+++ b/blurs/blur11fast-horizontal-last-pass.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-fast-horizontal.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur11fast-horizontal.slang
+++ b/blurs/blur11fast-horizontal.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-fast-horizontal.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur11fast-vertical-gamma-encode-every-fbo.slang
+++ b/blurs/blur11fast-vertical-gamma-encode-every-fbo.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-fast-vertical.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur11fast-vertical.slang
+++ b/blurs/blur11fast-vertical.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-fast-vertical.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur11resize-horizontal-gamma-encode-every-fbo.slang
+++ b/blurs/blur11resize-horizontal-gamma-encode-every-fbo.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-resize-horizontal.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur11resize-horizontal-last-pass-gamma-encode-every-fbo.slang
+++ b/blurs/blur11resize-horizontal-last-pass-gamma-encode-every-fbo.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-resize-horizontal.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur11resize-horizontal-last-pass.slang
+++ b/blurs/blur11resize-horizontal-last-pass.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-resize-horizontal.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur11resize-horizontal.slang
+++ b/blurs/blur11resize-horizontal.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-resize-horizontal.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur11resize-vertical-gamma-encode-every-fbo.slang
+++ b/blurs/blur11resize-vertical-gamma-encode-every-fbo.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-resize-vertical.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur11resize-vertical.slang
+++ b/blurs/blur11resize-vertical.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-resize-vertical.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur12x12shared.slang
+++ b/blurs/blur12x12shared.slang
@ -0,0 +1,87 @@
+#version 450
+
+/////////////////////////////////  MIT LICENSE  ////////////////////////////////
+
+//  Copyright (C) 2014 TroggleMonkey
+//
+//  Permission is hereby granted, free of charge, to any person obtaining a copy
+//  of this software and associated documentation files (the "Software"), to
+//  deal in the Software without restriction, including without limitation the
+//  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+//  sell copies of the Software, and to permit persons to whom the Software is
+//  furnished to do so, subject to the following conditions:
+//  
+//  The above copyright notice and this permission notice shall be included in
+//  all copies or substantial portions of the Software.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+//  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+//  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+//  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+//  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+//  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+//  IN THE SOFTWARE.
+
+layout(push_constant) uniform Push
+{
+	vec4 SourceSize;
+	vec4 OriginalSize;
+	vec4 OutputSize;
+	uint FrameCount;
+} params;
+
+layout(std140, set = 0, binding = 0) uniform UBO
+{
+	mat4 MVP;
+} global;
+
+/////////////////////////////  SETTINGS MANAGEMENT  ////////////////////////////
+
+//  PASS SETTINGS:
+//  gamma-management.h needs to know what kind of pipeline we're using and
+//  what pass this is in that pipeline.  This will become obsolete if/when we
+//  can #define things like this in the .cgp preset file.
+//#define GAMMA_ENCODE_EVERY_FBO
+//#define FIRST_PASS
+//#define LAST_PASS
+//#define SIMULATE_CRT_ON_LCD
+//#define SIMULATE_GBA_ON_LCD
+//#define SIMULATE_LCD_ON_CRT
+//#define SIMULATE_GBA_ON_CRT
+
+//  blur-functions.h needs to know our profile's capabilities:
+//  1.) DRIVERS_ALLOW_DERIVATIVES is mandatory for one-pass shared-sample blurs.
+//  2.) DRIVERS_ALLOW_TEX2DLOD is optional, but mipmapped blurs will have awful
+//      artifacts without it due to funky texture sampling derivatives.
+#define DRIVERS_ALLOW_DERIVATIVES
+#define DRIVERS_ALLOW_TEX2DLOD
+
+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////
+
+#include "../include/compat_macros.inc"
+#pragma stage vertex
+#include "vertex-shader-blur-one-pass-shared-sample.h"
+
+#pragma stage fragment
+layout(location = 0) in vec4 tex_uv;
+layout(location = 1) in vec2 blur_dxdy;
+layout(location = 2) in vec4 output_pixel_num;
+layout(location = 0) out vec4 FragColor;
+layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"
+
+void main()
+{
+    //  Get the integer output pixel number from two origins (uv and screen):
+    float4 output_pixel_num_integer = floor(output_pixel_num);
+    //  Get the fragment's position in the pixel quad and do a shared-sample blur:
+    float4 quad_vector = get_quad_vector(output_pixel_num_integer);
+    float3 color = tex2Dblur12x12shared(input_texture, tex_uv,
+        blur_dxdy, quad_vector);
+    //  Encode and output the blurred image:
+    FragColor = encode_output(float4(color, 1.0));
+}
--- a/blurs/blur3fast-horizontal-gamma-encode-every-fbo.slang
+++ b/blurs/blur3fast-horizontal-gamma-encode-every-fbo.slang
@ -51,12 +51,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_GBA_ON_CRT


-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-fast-horizontal.h"

@ -67,6 +64,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur3fast-horizontal-last-pass-gamma-encode-every-fbo.slang
+++ b/blurs/blur3fast-horizontal-last-pass-gamma-encode-every-fbo.slang
@ -51,12 +51,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_GBA_ON_CRT


-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-fast-horizontal.h"

@ -67,6 +64,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur3fast-horizontal-last-pass.slang
+++ b/blurs/blur3fast-horizontal-last-pass.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-fast-horizontal.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur3fast-horizontal.slang
+++ b/blurs/blur3fast-horizontal.slang
@ -51,12 +51,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_GBA_ON_CRT


-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-fast-horizontal.h"

@ -67,6 +64,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur3fast-vertical-gamma-encode-every-fbo.slang
+++ b/blurs/blur3fast-vertical-gamma-encode-every-fbo.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-fast-vertical.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur3fast-vertical.slang
+++ b/blurs/blur3fast-vertical.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-fast-vertical.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur3resize-horizontal-gamma-encode-every-fbo.slang
+++ b/blurs/blur3resize-horizontal-gamma-encode-every-fbo.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-resize-horizontal.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur3resize-horizontal-last-pass-gamma-encode-every-fbo.slang
+++ b/blurs/blur3resize-horizontal-last-pass-gamma-encode-every-fbo.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-resize-horizontal.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur3resize-horizontal-last-pass.slang
+++ b/blurs/blur3resize-horizontal-last-pass.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-resize-horizontal.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur3resize-horizontal.slang
+++ b/blurs/blur3resize-horizontal.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-resize-horizontal.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur3resize-vertical-gamma-encode-every-fbo.slang
+++ b/blurs/blur3resize-vertical-gamma-encode-every-fbo.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-resize-vertical.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur3resize-vertical.slang
+++ b/blurs/blur3resize-vertical.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-resize-vertical.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur3x3-gamma-encode-every-fbo.slang
+++ b/blurs/blur3x3-gamma-encode-every-fbo.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-one-pass.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur3x3-last-pass-gamma-encode-every-fbo.slang
+++ b/blurs/blur3x3-last-pass-gamma-encode-every-fbo.slang
@ -50,15 +50,11 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
-#include "vertex-shader-blur-one-pass-resize.h"
+#include "vertex-shader-blur-one-pass.h"

 ///////////////////////////////  FRAGMENT SHADER  //////////////////////////////

@ -67,10 +63,15 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
-	vec3 color = tex2Dblur3x3resize(Source, tex_uv, blur_dxdy);
+	vec3 color = tex2Dblur3x3(Source, tex_uv, blur_dxdy);
    //  Encode and output the blurred image:
   FragColor = encode_output(vec4(color, 1.0));
 }
--- a/blurs/blur3x3-last-pass.slang
+++ b/blurs/blur3x3-last-pass.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-one-pass.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur3x3.slang
+++ b/blurs/blur3x3.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-one-pass.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur3x3resize-gamma-encode-every-fbo.slang
+++ b/blurs/blur3x3resize-gamma-encode-every-fbo.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-one-pass-resize.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur3x3resize-last-pass-gamma-encode-every-fbo.slang
+++ b/blurs/blur3x3resize-last-pass-gamma-encode-every-fbo.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-one-pass-resize.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur3x3resize-last-pass.slang
+++ b/blurs/blur3x3resize-last-pass.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-one-pass-resize.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur3x3resize.slang
+++ b/blurs/blur3x3resize.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-one-pass-resize.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur5fast-horizontal-gamma-encode-every-fbo.slang
+++ b/blurs/blur5fast-horizontal-gamma-encode-every-fbo.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-fast-horizontal.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur5fast-horizontal-last-pass-gamma-encode-every-fbo.slang
+++ b/blurs/blur5fast-horizontal-last-pass-gamma-encode-every-fbo.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-fast-horizontal.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur5fast-horizontal-last-pass.slang
+++ b/blurs/blur5fast-horizontal-last-pass.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-fast-horizontal.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur5fast-horizontal.slang
+++ b/blurs/blur5fast-horizontal.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-fast-horizontal.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur5fast-vertical-gamma-encode-every-fbo.slang
+++ b/blurs/blur5fast-vertical-gamma-encode-every-fbo.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-fast-vertical.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur5fast-vertical.slang
+++ b/blurs/blur5fast-vertical.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-fast-vertical.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur5resize-horizontal-gamma-encode-every-fbo.slang
+++ b/blurs/blur5resize-horizontal-gamma-encode-every-fbo.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-resize-horizontal.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur5resize-horizontal-last-pass-gamma-encode-every-fbo.slang
+++ b/blurs/blur5resize-horizontal-last-pass-gamma-encode-every-fbo.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-resize-horizontal.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur5resize-horizontal-last-pass.slang
+++ b/blurs/blur5resize-horizontal-last-pass.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-resize-horizontal.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur5resize-horizontal.slang
+++ b/blurs/blur5resize-horizontal.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-resize-horizontal.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur5resize-vertical-gamma-encode-every-fbo.slang
+++ b/blurs/blur5resize-vertical-gamma-encode-every-fbo.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-resize-vertical.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur5resize-vertical.slang
+++ b/blurs/blur5resize-vertical.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-resize-vertical.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur5x5-last-pass-gamma-encode-every-fbo.slang
+++ b/blurs/blur5x5-last-pass-gamma-encode-every-fbo.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-one-pass.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur5x5-last-pass.slang
+++ b/blurs/blur5x5-last-pass.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-one-pass.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur5x5.slang
+++ b/blurs/blur5x5.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-one-pass.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur7fast-horizontal-gamma-encode-every-fbo.slang
+++ b/blurs/blur7fast-horizontal-gamma-encode-every-fbo.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-fast-horizontal.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur7fast-horizontal-last-pass-gamma-encode-every-fbo.slang
+++ b/blurs/blur7fast-horizontal-last-pass-gamma-encode-every-fbo.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-fast-horizontal.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur7fast-horizontal-last-pass.slang
+++ b/blurs/blur7fast-horizontal-last-pass.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-fast-horizontal.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur7fast-horizontal.slang
+++ b/blurs/blur7fast-horizontal.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-fast-horizontal.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur7fast-vertical-gamma-encode-every-fbo.slang
+++ b/blurs/blur7fast-vertical-gamma-encode-every-fbo.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-fast-vertical.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur7fast-vertical.slang
+++ b/blurs/blur7fast-vertical.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-fast-vertical.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur7resize-horizontal-gamma-encode-every-fbo.slang
+++ b/blurs/blur7resize-horizontal-gamma-encode-every-fbo.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-resize-horizontal.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur7resize-horizontal-last-pass-gamma-encode-every-fbo.slang
+++ b/blurs/blur7resize-horizontal-last-pass-gamma-encode-every-fbo.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-resize-horizontal.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur7resize-horizontal-last-pass.slang
+++ b/blurs/blur7resize-horizontal-last-pass.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-resize-horizontal.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur7resize-horizontal.slang
+++ b/blurs/blur7resize-horizontal.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-resize-horizontal.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur7resize-vertical-gamma-encode-every-fbo.slang
+++ b/blurs/blur7resize-vertical-gamma-encode-every-fbo.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-resize-vertical.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur7resize-vertical.slang
+++ b/blurs/blur7resize-vertical.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-resize-vertical.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur7x7-gamma-encode-every-fbo.slang
+++ b/blurs/blur7x7-gamma-encode-every-fbo.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-one-pass.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur7x7-last-pass-gamma-encode-every-fbo.slang
+++ b/blurs/blur7x7-last-pass-gamma-encode-every-fbo.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-one-pass.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur7x7-last-pass.slang
+++ b/blurs/blur7x7-last-pass.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-one-pass.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur7x7.slang
+++ b/blurs/blur7x7.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-one-pass.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur9fast-horizontal-gamma-encode-every-fbo.slang
+++ b/blurs/blur9fast-horizontal-gamma-encode-every-fbo.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-fast-horizontal.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur9fast-horizontal-last-pass-gamma-encode-every-fbo.slang
+++ b/blurs/blur9fast-horizontal-last-pass-gamma-encode-every-fbo.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-fast-horizontal.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur9fast-horizontal-last-pass.slang
+++ b/blurs/blur9fast-horizontal-last-pass.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-fast-horizontal.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur9fast-horizontal.slang
+++ b/blurs/blur9fast-horizontal.slang
@ -1,18 +1,5 @@
 #version 450

-layout(push_constant) uniform Push
-{
-	vec4 SourceSize;
-	vec4 OriginalSize;
-	vec4 OutputSize;
-	uint FrameCount;
-} params;
-
-layout(std140, set = 0, binding = 0) uniform UBO
-{
-	mat4 MVP;
-} global;
-
 /////////////////////////////////  MIT LICENSE  ////////////////////////////////

 //  Copyright (C) 2014 TroggleMonkey
@ -35,6 +22,18 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 //  IN THE SOFTWARE.

+layout(push_constant) uniform Push
+{
+	vec4 SourceSize;
+	vec4 OriginalSize;
+	vec4 OutputSize;
+	uint FrameCount;
+} params;
+
+layout(std140, set = 0, binding = 0) uniform UBO
+{
+	mat4 MVP;
+} global;

 /////////////////////////////  SETTINGS MANAGEMENT  ////////////////////////////

@ -50,24 +49,24 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-fast-horizontal.h"

 ///////////////////////////////  FRAGMENT SHADER  //////////////////////////////

 #pragma stage fragment
-#pragma format R8G8B8A8_SRGB
 layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur9fast-vertical-gamma-encode-every-fbo.slang
+++ b/blurs/blur9fast-vertical-gamma-encode-every-fbo.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-fast-vertical.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur9fast-vertical.slang
+++ b/blurs/blur9fast-vertical.slang
@ -1,18 +1,5 @@
 #version 450

-layout(push_constant) uniform Push
-{
-	vec4 SourceSize;
-	vec4 OriginalSize;
-	vec4 OutputSize;
-	uint FrameCount;
-} params;
-
-layout(std140, set = 0, binding = 0) uniform UBO
-{
-	mat4 MVP;
-} global;
-
 /////////////////////////////////  MIT LICENSE  ////////////////////////////////

 //  Copyright (C) 2014 TroggleMonkey
@ -35,6 +22,18 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 //  IN THE SOFTWARE.

+layout(push_constant) uniform Push
+{
+	vec4 SourceSize;
+	vec4 OriginalSize;
+	vec4 OutputSize;
+	uint FrameCount;
+} params;
+
+layout(std140, set = 0, binding = 0) uniform UBO
+{
+	mat4 MVP;
+} global;

 /////////////////////////////  SETTINGS MANAGEMENT  ////////////////////////////

@ -50,28 +49,28 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-fast-vertical.h"

 ///////////////////////////////  FRAGMENT SHADER  //////////////////////////////

 #pragma stage fragment
-#pragma format R8G8B8A8_SRGB
 layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
-	vec3 color = tex2Dblur9fast(Source, tex_uv, blur_dxdy);
+	vec3 color = tex2Dblur9fast(input_texture, tex_uv, blur_dxdy);
    //  Encode and output the blurred image:
-   FragColor = encode_output(vec4(color, 1.0));
+   FragColor = encode_output(float4(color, 1.0));
 }
--- a/blurs/blur9resize-horizontal-gamma-encode-every-fbo.slang
+++ b/blurs/blur9resize-horizontal-gamma-encode-every-fbo.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-resize-horizontal.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur9resize-horizontal-last-pass-gamma-encode-every-fbo.slang
+++ b/blurs/blur9resize-horizontal-last-pass-gamma-encode-every-fbo.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-resize-horizontal.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur9resize-horizontal-last-pass.slang
+++ b/blurs/blur9resize-horizontal-last-pass.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-resize-horizontal.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur9resize-horizontal.slang
+++ b/blurs/blur9resize-horizontal.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-resize-horizontal.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur9resize-vertical-gamma-encode-every-fbo.slang
+++ b/blurs/blur9resize-vertical-gamma-encode-every-fbo.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-resize-vertical.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur9resize-vertical.slang
+++ b/blurs/blur9resize-vertical.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-resize-vertical.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur9x9-gamma-encode-every-fbo.slang
+++ b/blurs/blur9x9-gamma-encode-every-fbo.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-one-pass.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur9x9-last-pass-gamma-encode-every-fbo.slang
+++ b/blurs/blur9x9-last-pass-gamma-encode-every-fbo.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-one-pass.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur9x9-last-pass.slang
+++ b/blurs/blur9x9-last-pass.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-one-pass.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/blur9x9.slang
+++ b/blurs/blur9x9.slang
@ -50,13 +50,9 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //#define SIMULATE_LCD_ON_CRT
 //#define SIMULATE_GBA_ON_CRT

+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-//  #included by vertex shader:
-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
-
+#include "../include/compat_macros.inc"
 #pragma stage vertex
 #include "vertex-shader-blur-one-pass.h"

@ -67,6 +63,11 @@ layout(location = 0) in vec2 tex_uv;
 layout(location = 1) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"

 void main()
 {
--- a/blurs/vertex-shader-blur-fast-horizontal.h
+++ b/blurs/vertex-shader-blur-fast-horizontal.h
@ -30,8 +30,8 @@

 //////////////////////////////////  INCLUDES  //////////////////////////////////

-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
+//#include "../include/gamma-management.h"
+//#include "../include/blur-functions.h"

 #pragma stage vertex
 layout(location = 0) in vec4 Position;
--- a/blurs/vertex-shader-blur-fast-vertical.h
+++ b/blurs/vertex-shader-blur-fast-vertical.h
@ -23,17 +23,10 @@
 //  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 //  IN THE SOFTWARE.

-
-/////////////////////////////  SETTINGS MANAGEMENT  ////////////////////////////
-
-//  PASS SETTINGS:
-//  Pass settings should be set by the shader file that #includes this one.
-
-
 //////////////////////////////////  INCLUDES  //////////////////////////////////

-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
+//#include "../include/gamma-management.h"
+//#include "../include/blur-functions.h"

 #pragma stage vertex
 layout(location = 0) in vec4 Position;
@ -55,10 +48,10 @@ void main()
    //  (not output pixels), but we avoid this and consistently blur at the
    //  destination size.  Otherwise, combining statically calculated weights
    //  with bilinear sample exploitation would result in terrible artifacts.   
-    const vec2 dxdy_scale = params.SourceSize.xy * params.OutputSize.zw;
-	const vec2 dxdy = dxdy_scale * params.SourceSize.zw;
+    const float2 dxdy_scale = IN.video_size/IN.output_size;
+	const float2 dxdy = dxdy_scale/IN.texture_size;
    //  This blur is vertical-only, so zero out the horizontal offset:
-	blur_dxdy = vec2(0.0, dxdy.y);
+	blur_dxdy = float2(0.0, dxdy.y);
 }

 #endif  //  VERTEX_SHADER_BLUR_FAST_VERTICAL_H
--- a/blurs/vertex-shader-blur-one-pass-resize.h
+++ b/blurs/vertex-shader-blur-one-pass-resize.h
@ -32,8 +32,8 @@

 //////////////////////////////////  INCLUDES  //////////////////////////////////

-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
+//#include "../include/gamma-management.h"
+//#include "../include/blur-functions.h"

 #pragma stage vertex
 layout(location = 0) in vec4 Position;
--- a/blurs/vertex-shader-blur-one-pass-shared-sample.h
+++ b/blurs/vertex-shader-blur-one-pass-shared-sample.h
@ -32,20 +32,20 @@

 //////////////////////////////////  INCLUDES  //////////////////////////////////

-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
+//#include "../include/gamma-management.h"
+//#include "../include/blur-functions.h"

 #pragma stage vertex
 layout(location = 0) in vec4 Position;
 layout(location = 1) in vec2 TexCoord;
-layout(location = 0) out vec2 tex_uv;
+layout(location = 0) out vec4 tex_uv;
 layout(location = 1) out vec4 output_pixel_num;
 layout(location = 2) out vec2 blur_dxdy;

 void main()
 {
   gl_Position = global.MVP * Position;
-   tex_uv = TexCoord;
+   vec2 tex_uv_ = TexCoord;

 	//  Get the uv sample distance between output pixels.  Blurs are not generic
    //  Gaussian resizers, and correct blurs require:
@ -57,21 +57,21 @@ void main()
    //  (not output pixels), but we avoid this and consistently blur at the
    //  destination size.  Otherwise, combining statically calculated weights
    //  with bilinear sample exploitation would result in terrible artifacts.
-    const vec2 dxdy_scale params.SourceSize.xy * params.OutputSize.zw;
+    const vec2 dxdy_scale = params.SourceSize.xy * params.OutputSize.zw;
    blur_dxdy = dxdy_scale * params.SourceSize.zw;

    //  Get the output pixel number in ([0, xres), [0, yres)) with respect to
    //  the uv origin (.xy components) and the screen origin (.zw components).
    //  Both are useful.  Don't round until the fragment shader.
-    const float2 video_uv = tex_uv;
+    const float2 video_uv = tex_uv_;
    output_pixel_num.xy = params.OutputSize.xy * vec2(video_uv.x, video_uv.y);
    output_pixel_num.zw = params.OutputSize.xy *
-        (out_position.xy * 0.5 + vec2(0.5));
+        (gl_Position.xy * 0.5 + vec2(0.5));

    //  Set the mip level correctly for shared-sample blurs (where the
    //  derivatives are unreliable):
    const float mip_level = log2(params.SourceSize.xy * params.OutputSize.zw).y;
-    tex_uv = vec4(tex_uv, 0.0, mip_level);
+    tex_uv = vec4(tex_uv_, 0.0, mip_level);
 }

 #endif  //  VERTEX_SHADER_BLUR_ONE_PASS_SHARED_SAMPLE_H
--- a/blurs/vertex-shader-blur-one-pass.h
+++ b/blurs/vertex-shader-blur-one-pass.h
@ -32,8 +32,8 @@

 //////////////////////////////////  INCLUDES  //////////////////////////////////

-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
+//#include "../include/gamma-management.h"
+//#include "../include/blur-functions.h"

 #pragma stage vertex
 layout(location = 0) in vec4 Position;
--- a/blurs/vertex-shader-blur-resize-horizontal.h
+++ b/blurs/vertex-shader-blur-resize-horizontal.h
@ -32,8 +32,8 @@

 //////////////////////////////////  INCLUDES  //////////////////////////////////

-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
+//#include "../include/gamma-management.h"
+//#include "../include/blur-functions.h"

 #pragma stage vertex
 layout(location = 0) in vec4 Position;
--- a/blurs/vertex-shader-blur-resize-vertical.h
+++ b/blurs/vertex-shader-blur-resize-vertical.h
@ -32,8 +32,8 @@

 //////////////////////////////////  INCLUDES  //////////////////////////////////

-#include "../include/gamma-management.h"
-#include "../include/blur-functions.h"
+//#include "../include/gamma-management.h"
+//#include "../include/blur-functions.h"

 #pragma stage vertex
 layout(location = 0) in vec4 Position;
--- a/crt/crt-royale-fake-bloom-intel.slangp
+++ b/crt/crt-royale-fake-bloom-intel.slangp
@ -0,0 +1,134 @@
+# IMPORTANT:
+# Shader passes need to know details about the image in the mask_texture LUT
+# files, so set the following constants in user-cgp-constants.h accordingly:
+# 1.) mask_triads_per_tile = (number of horizontal triads in mask texture LUT's)
+# 2.) mask_texture_small_size = (texture size of mask*texture_small LUT's)
+# 3.) mask_texture_large_size = (texture size of mask*texture_large LUT's)
+# 4.) mask_grille_avg_color = (avg. brightness of mask_grille_texture* LUT's, in [0, 1])
+# 5.) mask_slot_avg_color = (avg. brightness of mask_slot_texture* LUT's, in [0, 1])
+# 6.) mask_shadow_avg_color = (avg. brightness of mask_shadow_texture* LUT's, in [0, 1])
+# Shader passes also need to know certain scales set in this .slangp, but their
+# compilation model doesn't currently allow the .slangp file to tell them.  Make
+# sure to set the following constants in user-cgp-constants.h accordingly too:
+# 1.) bloom_approx_scale_x_for_fake = scale_x2
+# 2.) mask_resize_viewport_scale = float2(scale_x6, scale_y5)
+# Finally, shader passes need to know the value of geom_max_aspect_ratio used to
+# calculate scale_y5 (among other values):
+# 1.) geom_max_aspect_ratio = (geom_max_aspect_ratio used to calculate scale_y5)
+
+shaders = "7"
+
+# Set an identifier, filename, and sampling traits for the phosphor mask texture.
+# Load an aperture grille, slot mask, and an EDP shadow mask, and load a small
+# non-mipmapped version and a large mipmapped version.
+# TODO: Test masks in other directories.
+textures = "mask_grille_texture_small;mask_grille_texture_large;mask_slot_texture_small;mask_slot_texture_large;mask_shadow_texture_small;mask_shadow_texture_large"
+mask_grille_texture_small = "shaders/crt-royale/TileableLinearApertureGrille15Wide8And5d5SpacingResizeTo64.png"
+mask_grille_texture_large = "shaders/crt-royale/TileableLinearApertureGrille15Wide8And5d5Spacing.png"
+mask_slot_texture_small = "shaders/crt-royale/TileableLinearSlotMaskTall15Wide9And4d5Horizontal9d14VerticalSpacingResizeTo64.png"
+mask_slot_texture_large = "shaders/crt-royale/TileableLinearSlotMaskTall15Wide9And4d5Horizontal9d14VerticalSpacing.png"
+mask_shadow_texture_small = "shaders/crt-royale/TileableLinearShadowMaskEDPResizeTo64.png"
+mask_shadow_texture_large = "shaders/crt-royale/TileableLinearShadowMaskEDP.png"
+mask_grille_texture_small_wrap_mode = "repeat"
+mask_grille_texture_large_wrap_mode = "repeat"
+mask_slot_texture_small_wrap_mode = "repeat"
+mask_slot_texture_large_wrap_mode = "repeat"
+mask_shadow_texture_small_wrap_mode = "repeat"
+mask_shadow_texture_large_wrap_mode = "repeat"
+mask_grille_texture_small_linear = "true"
+mask_grille_texture_large_linear = "true"
+mask_slot_texture_small_linear = "true"
+mask_slot_texture_large_linear = "true"
+mask_shadow_texture_small_linear = "true"
+mask_shadow_texture_large_linear = "true"
+mask_grille_texture_small_mipmap = "false"  # Mipmapping causes artifacts with manually resized masks without tex2Dlod
+mask_grille_texture_large_mipmap = "true"   # Essential for hardware-resized masks
+mask_slot_texture_small_mipmap = "false"    # Mipmapping causes artifacts with manually resized masks without tex2Dlod
+mask_slot_texture_large_mipmap = "true"     # Essential for hardware-resized masks
+mask_shadow_texture_small_mipmap = "false"  # Mipmapping causes artifacts with manually resized masks without tex2Dlod
+mask_shadow_texture_large_mipmap = "true"   # Essential for hardware-resized masks
+
+
+# Pass0: Linearize the input based on CRT gamma and bob interlaced fields.
+# (Bobbing ensures we can immediately blur without getting artifacts.)
+shader0 = "shaders/crt-royale/src/crt-royale-first-pass-linearize-crt-gamma-bob-fields.slang"
+alias0 = "ORIG_LINEARIZED"
+filter_linear0 = "false"
+scale_type0 = "source"
+scale0 = "1.0"
+srgb_framebuffer0 = "true"
+
+# Pass1: Resample interlaced (and misconverged) scanlines vertically.
+# Separating vertical/horizontal scanline sampling is faster: It lets us
+# consider more scanlines while calculating weights for fewer pixels, and
+# it reduces our samples from vertical*horizontal to vertical+horizontal.
+# This has to come right after ORIG_LINEARIZED, because there's no
+# "original_source" scale_type we can use later.
+shader1 = "shaders/crt-royale/src/crt-royale-scanlines-vertical-interlacing.slang"
+alias1 = "VERTICAL_SCANLINES"
+filter_linear1 = "true"
+scale_type_x1 = "source"
+scale_x1 = "1.0"
+scale_type_y1 = "viewport"
+scale_y1 = "1.0"
+#float_framebuffer1 = "true"
+srgb_framebuffer1 = "true"
+
+# Pass2: Do a small resize blur of ORIG_LINEARIZED at an absolute size, and
+# account for convergence offsets.  We want to blur a predictable portion of the
+# screen to match the phosphor bloom, and absolute scale works best for
+# reliable results with a fixed-size bloom.  Picking a scale is tricky:
+# a.) 400x300 is a good compromise for the "fake-bloom" version: It's low enough
+#     to blur high-res/interlaced sources but high enough that resampling
+#     doesn't smear low-res sources too much.
+# b.) 320x240 works well for the "real bloom" version: It's 1-1.5% faster, and
+#     the only noticeable visual difference is a larger halation spread (which
+#     may be a good thing for people who like to crank it up).
+# Note the 4:3 aspect ratio assumes the input has cropped geom_overscan (so it's
+# *intended* for an ~4:3 aspect ratio).
+shader2 = "shaders/crt-royale/src/crt-royale-bloom-approx-fake-bloom-intel.slang"
+alias2 = "BLOOM_APPROX"
+filter_linear2 = "true"
+scale_type2 = "absolute"
+scale_x2 = "400"
+scale_y2 = "300"
+srgb_framebuffer2 = "true"
+
+# Pass3: Vertically blur the input for halation and refractive diffusion.
+# Base this on BLOOM_APPROX: This blur should be small and fast, and blurring
+# a constant portion of the screen is probably physically correct if the
+# viewport resolution is proportional to the simulated CRT size.
+shader3 = "../blurs/blur9fast-vertical.slang"
+filter_linear3 = "true"
+scale_type3 = "source"
+scale3 = "1.0"
+srgb_framebuffer3 = "true"
+
+# Pass4: Horizontally blur the input for halation and refractive diffusion.
+# Note: Using a one-pass 9x9 blur is about 1% slower.
+shader4 = "../blurs/blur9fast-horizontal.slang"
+alias4 = "HALATION_BLUR"
+filter_linear4 = "true"
+scale_type4 = "source"
+scale4 = "1.0"
+srgb_framebuffer4 = "true"
+
+# Pass5: Resample (misconverged) scanlines horizontally, apply halation, and
+# apply the phosphor mask, then fake a phosphor bloom, all in one pass.
+shader5 = "shaders/crt-royale/src/crt-royale-scanlines-horizontal-apply-mask-fake-bloom-intel.slang"
+alias5 = "MASKED_SCANLINES"
+filter_linear5 = "true" # This could just as easily be nearest neighbor.
+scale_type5 = "viewport"
+scale5 = "1.0"
+#float_framebuffer5 = "true"
+srgb_framebuffer5 = "true"
+
+# Pass 6: Compute curvature/AA:
+shader6 = "shaders/crt-royale/src/crt-royale-geometry-aa-last-pass-intel.slang"
+filter_linear6 = "true"
+scale_type6 = "viewport"
+mipmap_input6 = "true"
+texture_wrap_mode6 = "clamp_to_edge"
+
+
+
--- a/crt/crt-royale_fallback.slangp
+++ b/crt/crt-royale_fallback.slangp
@ -1,22 +1,22 @@
 # IMPORTANT:
 # Shader passes need to know details about the image in the mask_texture LUT
-# files, so set the following constants in user-preset-constants.h accordingly:
+# files, so set the following constants in user-cgp-constants.h accordingly:
 # 1.) mask_triads_per_tile = (number of horizontal triads in mask texture LUT's)
 # 2.) mask_texture_small_size = (texture size of mask*texture_small LUT's)
 # 3.) mask_texture_large_size = (texture size of mask*texture_large LUT's)
 # 4.) mask_grille_avg_color = (avg. brightness of mask_grille_texture* LUT's, in [0, 1])
 # 5.) mask_slot_avg_color = (avg. brightness of mask_slot_texture* LUT's, in [0, 1])
 # 6.) mask_shadow_avg_color = (avg. brightness of mask_shadow_texture* LUT's, in [0, 1])
-# Shader passes also need to know certain scales set in this preset, but their
-# compilation model doesn't currently allow the preset file to tell them.  Make
-# sure to set the following constants in user-preset-constants.h accordingly too:
+# Shader passes also need to know certain scales set in this .slangp, but their
+# compilation model doesn't currently allow the .slangp file to tell them.  Make
+# sure to set the following constants in user-cgp-constants.h accordingly too:
 # 1.) bloom_approx_scale_x = scale_x2
-# 2.) mask_resize_viewport_scale = vec2(scale_x6, scale_y5)
+# 2.) mask_resize_viewport_scale = float2(scale_x6, scale_y5)
 # Finally, shader passes need to know the value of geom_max_aspect_ratio used to
 # calculate scale_y5 (among other values):
 # 1.) geom_max_aspect_ratio = (geom_max_aspect_ratio used to calculate scale_y5)

-shaders = "12"
+shaders = "10"

 # Set an identifier, filename, and sampling traits for the phosphor mask texture.
 # Load an aperture grille, slot mask, and an EDP shadow mask, and load a small
@ -71,6 +71,7 @@ scale_type_x1 = "source"
 scale_x1 = "1.0"
 scale_type_y1 = "viewport"
 scale_y1 = "1.0"
+#float_framebuffer1 = "true"
 srgb_framebuffer1 = "true"

 # Pass2: Do a small resize blur of ORIG_LINEARIZED at an absolute size, and
@ -85,7 +86,7 @@ srgb_framebuffer1 = "true"
 #     may be a good thing for people who like to crank it up).
 # Note the 4:3 aspect ratio assumes the input has cropped geom_overscan (so it's
 # *intended* for an ~4:3 aspect ratio).
-shader2 = "shaders/crt-royale/src/crt-royale-bloom-approx_fallback.slang"
+shader2 = "shaders/crt-royale/src/crt-royale-bloom-approx-intel.slang"
 alias2 = "BLOOM_APPROX"
 filter_linear2 = "true"
 scale_type2 = "absolute"
@ -112,95 +113,42 @@ scale_type4 = "source"
 scale4 = "1.0"
 srgb_framebuffer4 = "true"

-# Pass5: Lanczos-resize the phosphor mask vertically.  Set the absolute
-# scale_x5 == mask_texture_small_size.x (see IMPORTANT above).  Larger scales
-# will blur, and smaller scales could get nasty.  The vertical size must be
-# based on the viewport size and calculated carefully to avoid artifacts later.
-# First calculate the minimum number of mask tiles we need to draw.
-# Since curvature is computed after the scanline masking pass:
-#   num_resized_mask_tiles = 2.0;
-# If curvature were computed in the scanline masking pass (it's not):
-#   max_mask_texel_border = ~3.0 * (1/3.0 + 4.0*sqrt(2.0) + 0.5 + 1.0);
-#   max_mask_tile_border = max_mask_texel_border/
-#       (min_resized_phosphor_triad_size * mask_triads_per_tile);
-#   num_resized_mask_tiles = max(2.0, 1.0 + max_mask_tile_border * 2.0);
-#   At typical values (triad_size >= 2.0, mask_triads_per_tile == 8):
-#       num_resized_mask_tiles = ~3.8
-# Triad sizes are given in horizontal terms, so we need geom_max_aspect_ratio
-# to relate them to vertical resolution.  The widest we expect is:
-#   geom_max_aspect_ratio = 4.0/3.0  # Note: Shader passes need to know this!
-# The fewer triads we tile across the screen, the larger each triad will be as a
-# fraction of the viewport size, and the larger scale_y5 must be to draw a full
-# num_resized_mask_tiles.  Therefore, we must decide the smallest number of
-# triads we'll guarantee can be displayed on screen.  We'll set this according
-# to 3-pixel triads at 768p resolution (the lowest anyone's likely to use):
-#   min_allowed_viewport_triads = 768.0*geom_max_aspect_ratio / 3.0 = 341.333333
-# Now calculate the viewport scale that ensures we can draw resized_mask_tiles:
-#   min_scale_x = resized_mask_tiles * mask_triads_per_tile /
-#       min_allowed_viewport_triads
-#   scale_y5 = geom_max_aspect_ratio * min_scale_x
-#   # Some code might depend on equal scales:
-#   scale_x6 = scale_y5
-# Given our default geom_max_aspect_ratio and min_allowed_viewport_triads:
-#   scale_y5 = 4.0/3.0 * 2.0/(341.33333 / 8.0) = 0.0625
-# IMPORTANT: The scales MUST be calculated in this way.  If you wish to change
-# geom_max_aspect_ratio, update that constant in user-preset-constants.h!
-shader5 = "shaders/crt-royale/src/crt-royale-mask-resize-vertical.slang"
-filter_linear5 = "true"
-scale_type_x5 = "absolute"
-scale_x5 = "64"
-scale_type_y5 = "viewport"
-scale_y5 = "0.0625" # Safe for >= 341.333 horizontal triads at viewport size
-#srgb_framebuffer5 = "false" # mask_texture is already assumed linear
-
-# Pass6: Lanczos-resize the phosphor mask horizontally.  scale_x6 = scale_y5.
-# TODO: Check again if the shaders actually require equal scales.
-shader6 = "shaders/crt-royale/src/crt-royale-mask-resize-horizontal.slang"
-alias6 = "MASK_RESIZE"
-filter_linear6 = "false"
-scale_type_x6 = "viewport"
-scale_x6 = "0.0625"
-scale_type_y6 = "source"
-scale_y6 = "1.0"
-#srgb_framebuffer6 = "false" # mask_texture is already assumed linear
-
-# Pass7: Resample (misconverged) scanlines horizontally, apply halation, and
+# Pass5: Resample (misconverged) scanlines horizontally, apply halation, and
 # apply the phosphor mask.
-shader7 = "shaders/crt-royale/src/crt-royale-scanlines-horizontal-apply-mask.slang"
-alias7 = "MASKED_SCANLINES"
+shader5 = "shaders/crt-royale/src/crt-royale-scanlines-horizontal-apply-mask-intel.slang"
+alias5 = "MASKED_SCANLINES"
+filter_linear5 = "true" # This could just as easily be nearest neighbor.
+scale_type5 = "viewport"
+scale5 = "1.0"
+#float_framebuffer5 = "true"
+srgb_framebuffer5 = "true"
+
+# Pass 6: Compute a brightpass.  This will require reading the final mask.
+shader6 = "shaders/crt-royale/src/crt-royale-brightpass.slang"
+alias6 = "BRIGHTPASS"
+filter_linear6 = "true" # This could just as easily be nearest neighbor.
+scale_type6 = "viewport"
+scale6 = "1.0"
+srgb_framebuffer6 = "true"
+
+# Pass 7: Blur the brightpass vertically
+shader7 = "shaders/crt-royale/src/crt-royale-bloom-vertical.slang"
 filter_linear7 = "true" # This could just as easily be nearest neighbor.
-scale_type7 = "viewport"
+scale_type7 = "source"
 scale7 = "1.0"
 srgb_framebuffer7 = "true"

-# Pass 8: Compute a brightpass.  This will require reading the final mask.
-shader8 = "shaders/crt-royale/src/crt-royale-brightpass.slang"
-alias8 = "BRIGHTPASS"
-filter_linear8 = "true" # This could just as easily be nearest neighbor.
-scale_type8 = "viewport"
+# Pass 8: Blur the brightpass horizontally and combine it with the dimpass:
+shader8 = "shaders/crt-royale/src/crt-royale-bloom-horizontal-reconstitute.slang"
+filter_linear8 = "true"
+scale_type8 = "source"
 scale8 = "1.0"
 srgb_framebuffer8 = "true"

-# Pass 9: Blur the brightpass vertically
-shader9 = "shaders/crt-royale/src/crt-royale-bloom-vertical.slang"
-filter_linear9 = "true" # This could just as easily be nearest neighbor.
-scale_type9 = "source"
-scale9 = "1.0"
-srgb_framebuffer9 = "true"
+# Pass 9: Compute curvature/AA:
+shader9 = "shaders/crt-royale/src/crt-royale-geometry-aa-last-pass-intel.slang"
+filter_linear9 = "true"
+scale_type9 = "viewport"
+mipmap_input9 = "true"
+texture_wrap_mode9 = "clamp_to_edge"

-# Pass 10: Blur the brightpass horizontally and combine it with the dimpass:
-shader10 = "shaders/crt-royale/src/crt-royale-bloom-horizontal-reconstitute.slang"
-filter_linear10 = "true"
-scale_type10 = "source"
-scale10 = "1.0"
-srgb_framebuffer10 = "true"
-
-# Pass 11: Compute curvature/AA:
-shader11 = "shaders/crt-royale/src/crt-royale-geometry-aa-last-pass.slang"
-filter_linear11 = "true"
-scale_type11 = "viewport"
-mipmap_input11 = "true"
-texture_wrap_mode11 = "clamp_to_edge"
-
-parameters = "beam_num_scanlines"
-beam_num_scanlines = 3.0
--- a/crt/shaders/crt-royale/LICENSE.TXT
+++ b/crt/shaders/crt-royale/LICENSE.TXT
@ -0,0 +1,280 @@
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+                            NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+                     END OF TERMS AND CONDITIONS
--- a/crt/shaders/crt-royale/README.TXT
+++ b/crt/shaders/crt-royale/README.TXT
@ -0,0 +1,493 @@
+////////////////////////////////////////////////////////////////////////////////
+////          crt-royale, by TroggleMonkey <trogglemonkey@gmx.com>          ////
+////                     Last Updated: August 16, 2014                      ////
+////////////////////////////////////////////////////////////////////////////////
+
+REQUIREMENTS:
+The earliest official Retroarch version fully supporting crt-royale is 1.0.0.3
+(currently unreleased).  Earlier versions lack shader parameters and proper
+mipmapping and sRGB support, but the shader may still run at reduced quality.
+
+The earliest development version fully supporting this shader is:
+    commit ba40be909913c9ccc34dab5d452fba4fe61af9d0
+    Author: Themaister <maister@archlinux.us>
+    Date:   Thu Jun 5 17:41:10 2014 +0200
+A few earlier revisions support the required features, but they may be buggier.
+
+
+BASICS:
+crt-royale is a highly customizable CRT shader for Retroarch and other programs
+supporting the libretro Cg shader standard.  It uses a number of nonstandardized
+extensions like sRGB FBO's, mipmapping, and runtime shader parameters, but
+hopefully it will run without much of a fuss on new implementations of the
+standard as well.
+
+There are a huge number of parameters.  Among the things you can customize:
+* Phosphor mask type: An aperture grille, slot mask, and shadow mask are each
+  included, although the latter won't be seeing much usage until 1440p displays
+  and better become more common (4k UHD and 8k UHD are increasingly optimal).
+* Phosphor mask dot pitch
+* Phosphor mask resampling method: Choose between Lanczos sinc resizing,
+  mipmapped hardware resizing, and no resizing of the input LUT.
+* Phosphor bloom softness and type (real or fake ;))
+* Gaussian and generalized Gaussian scanline beam properties/distribution,
+  including convergence offsets
+* Screen geometry, including curvature (spherical, alternative spherical, or
+  cylindrical like Trinitrons), tilt, and borders
+* Antialiasing level, resampling filter, and sharpness parameters for gracefully
+  combining screen curvature with high-frequency phosphor details, including
+  optionally resampling based on RGB subpixel positions.
+* Halation (electrons bouncing under the glass and lighting random phosphors)
+  random phosphors)
+* Refractive diffusion (light spreading from the imperfect CRT glass face)
+* Interlacing options
+* etc.
+
+There are two major ways to customize the shader:
+* Runtime shader parameters allow convenient experimentation with real-time
+  feedback, but they are much slower, because they prevent static evaluation of
+  a lot of math.  Disabling them drastically speeds up the shader.
+* If runtime shader parameters are disabled (partially or totally), those same
+  settings can be freely altered in the text of the user-settings.h file.  There
+  are also a number of other static-only settings, including the #define macros
+  which indicate where and when to allow runtime shader parameters.  To disable
+  them entirely, comment out the "#define RUNTIME_SHADER_PARAMS_ENABLE" line by
+  putting a double-slash ("//") at the beginning...your FPS will skyrocket.
+
+You may also note that there are two major versions of the shader preset:
+* crt-royale.cgh is the "full" version of the shader, which blooms the light
+  from the brighter phosphors to maintain brightness and avoid clipping.
+* crt-royale-fake-bloom.cgh is the "cheater's" version of the shader, which
+  only fakes the bloom based on carefully blending in a [potentially blurred]
+  version of the original input.  This version is MUCH faster, and you have to
+  strain to see the difference, so people with slower GPU's will prefer it.
+
+There's a lot to play around with, and I encourage everyone using this shader to
+read through the user-settings.h file to learn about the parameters.  Before
+loading the shader, be sure to read the next section, entitled...
+
+
+////////////////////////////////////////////////////////////////////////////////
+////                    FREQUENTLY EXPECTED QUESTIONS:                      ////
+////////////////////////////////////////////////////////////////////////////////
+
+1.) WHY IS THE SHADER CRASHING WHEN I LOAD IT?!?
+Do you get C6001 or C6002 errors with integrated graphics, like Intel HD 4000?
+If so, please try one of the following .cgp presets:
+* crt-royale-intel.cgp
+* crt-royale-fake-bloom-intel.cgp
+These load .cg wrappers that #define INTEGRATED_GRAPHICS_COMPATIBILITY_MODE
+(also available in user-settings.h) before loading the main .cg shader files.
+
+Integrated graphics compatibility mode will disable these three features, which
+currently require more registers or instructions than Intel GPU's allow:
+* PHOSPHOR_MASK_MANUALLY_RESIZE: The phosphor mask will be softer.
+  (This may be reenabled in a later release.)
+* RUNTIME_GEOMETRY_MODE: You must change the screen geometry/curvature using
+  the geom_mode_static setting in user-settings.h.
+* The high-quality 4x4 Gaussian resize for the bloom approximation
+
+Using Intel-specific .cgp files is equivalent to #defining
+INTEGRATED_GRAPHICS_COMPATIBILITY_MODE in your user-settings.h.  Out of the box,
+user-settings.h is configured for maximum configurability and compatibility with
+dedicated nVidia and AMD/ATI GPU's.  Compatibility mode is disabled by default
+to avoid silently degrading quality for AMD/ATI and nVidia users, so Intel-
+specific .cgp's are a convenient way for Intel users to play with the shader
+without editing text files.
+
+I've tested this solution on Intel HD 4000 graphics, and it should work for that
+GPU at least, but please let me know if you're still having problems!
+
+--------------------------------------------------------------------------------
+
+2.) WHY IS EVERYTHING SO SLOW?!?:
+Out of the box, this will be a problem for all but monster GPU's.  The default
+user-settings.h file disables any features and optimizations which might cause
+compilation failure on AMD/ATI GPU's.  Despite the name of the options, this is
+not a problem with your card or drivers; it's a shortcoming in the Cg shader
+compiler's nVidia-centric profile setups.
+
+Uncommenting the following #define macros at the top of user-settings.h will
+help performance a good deal on compatible nVidia cards:
+    #define DRIVERS_ALLOW_DERIVATIVES
+    #define DRIVERS_ALLOW_DYNAMIC_BRANCHES
+    #define ACCOMODATE_POSSIBLE_DYNAMIC_LOOPS
+    #define DRIVERS_ALLOW_TEX2DLOD
+    #define DRIVERS_ALLOW_TEX2DBIAS
+A few of these warrant some elaboration.  First, derivatives:
+
+Derivatives allow the shader to cheaply calculate a tangent-space matrix for
+correct antialiasing when curvature or overscan are used.  Without them, there
+are two options:
+    a.) Cheat, and there will be artifacts with strong cylindrical curvature
+    b.) Compute the correct tangent-space matrix analytically.  This is used
+        by default, and it's controlled by this option near the bottom:
+            geom_force_correct_tangent_matrix = true
+
+Dynamic branches:
+Dynamic branches allow the shader to avoid performing computations that it
+doesn't need (but might have, given different runtime options).  Without them,
+the shader has to either let the GPU evaluate every possible codepath and select
+a result, or make a "best guess" ahead of time.  The full phosphor bloom suffers
+most from not having dynamic branches, because the shader doesn't know how big
+of a blur to use until it knows your phosphor mask dot pitch...which you set at
+runtime if shader parameters are enabled.
+
+If RUNTIME_PHOSPHOR_BLOOM_SIGMA is commented out (faster), this won't matter:
+The shader will just select the blur size and standard deviation suitable for
+the mask_triad_size_desired_static setting in user-settings.cgp.  It will be
+fast, but larger triads won't blur enough, and smaller triads will blur more
+than they need to.  However, if RUNTIME_PHOSPHOR_BLOOM_SIGMA is enabled, the
+shader will calculate an optimal standard deviation and *try* to use the right
+blur size for it...but using an "if standard deviation is such and such"
+condition would be prohibitively slow without dynamic branches.  Instead, the
+shader uses the largest and slowest blur the user lets it use (to cover the
+widest range of triad sizes and standard deviations), according to these macros:
+    #define PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_3_PIXELS
+    //#define PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_6_PIXELS
+    //#define PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_9_PIXELS
+    //#define PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_12_PIXELS
+The more you have uncommented, the larger the triads you can blur, but the
+slower runtime sigmas will be if your GPU can't use dynamic branches.  By
+default, triads up to 6 pixels wide will be bloomed perfectly, and a little
+beyond that (8 should be fine), but going too far beyond that will create
+blocking artifacts in the blur due to an insufficient support size.
+
+tex2Dlod:
+The tex2Dlod function allows the shader to disables anisotropic filtering, which
+can get confused when we're manually tiling the texture coordinates for a small
+resized phosphor mask tile (it creates nasty seam artifacts).  There are several
+ways the shader can deal with this: The cheapest is to use tex2Dlod to tile the
+output of MASK_RESIZE across the screen...and the slower alternatives either
+require derivatives or force the shader to draw 2 tiles to MASK_RESIZE in each
+direction, thereby reducing your maximum allowed dot pitch by half.
+
+tex2Dbias:
+According to nVidia's Cg language standard library page, tex2Dbias requires the
+fp30 profile, which doesn't work on ATI/AMD cards...but you might actually have
+mixed results.  This can be used as a substitute for tex2Dlod at times, so it's
+worth trying even on ATI.
+
+--------------------------------------------------------------------------------
+
+3.) WHY IS EVERYTHING STILL SO SLOW?!?:
+For maximum quality and configurability out of the box, almost all shader
+parameters are enabled by default (except for the disproportionately expensive
+runtime subpixel offsets).  Some are more expensive than others.  Commenting
+the following macro disables all shader parameters:
+    #define RUNTIME_SHADER_PARAMS_ENABLE
+Commenting these macros disables selective shader parameters:
+    #define RUNTIME_PHOSPHOR_BLOOM_SIGMA
+    #define RUNTIME_ANTIALIAS_WEIGHTS
+    //#define RUNTIME_ANTIALIAS_SUBPIXEL_OFFSETS
+    #define RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE
+    #define RUNTIME_GEOMETRY_TILT
+    #define RUNTIME_GEOMETRY_MODE
+    #define FORCE_RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT
+Note that all shader parameters will still show up in your GUI list, and the
+disabled ones simply won't work.
+
+Finally, there are a lot of other options enabled by default that carry serious
+performance penalties.  For instance, the default antialiasing filter is a
+cubic filter, because it's the most configurable, but it's also quite slow if
+RUNTIME_ANTIALIAS_WEIGHTS is #defined.  A lot of the static true/false options
+have a significant influence, and the shader is faster if the red subpixel
+offset (from which the blue one is calculated as well) is zero...even if it's
+a static value, because RUNTIME_ANTIALIAS_SUBPIXEL_OFFSETS is commented out.
+To avoid any confusion, I should also clarify now that subpixel offsets are
+separate from scanline beam convergence offsets.
+
+To quickly see how much performance you can get from other settings, you can
+temporarily replace your user-settings.h with one of:
+a.) crt-royale-settings-files/user-settings-fast-static-ati.h
+b.) crt-royale-settings-files/user-settings-fast-static-nvidia.h
+Then load crt-royale-fake-bloom.cgp.  It should be far more playable.
+
+--------------------------------------------------------------------------------
+
+4.) WHY WON'T MY SHADER BLOOM MY PHOSPHORS ENOUGH?
+First, see the discussion about dynamic branching above, in 1.
+If you don't have dynamic branches, you can either uncomment the lines that
+let the shader pessimistically use larger blurs than it's guaranteed to need
+(which is slow), or...you can just use crt-royale-fake-bloom.cgp, which
+doesn't have this problem. :)
+
+--------------------------------------------------------------------------------
+
+5.) WHY CAN'T I MAKE MY PHOSPHORS ANY BIGGER?
+By default, the phosphor mask is Lanczos-resized in earlier passes to your
+specified dot pitch (mask_sample_mode = 0).  This gives a much sharper result
+than mipmapped hardware sampling (mask_sample_mode = 1), but it can be much
+slower without taking proper care: If the input mask tile (containing 8
+phosphor triads by default) is large, like 512x512, and you try to resize it
+to 24x24 for 3x3 pixel triads, the resizer has to take 128 samples in each
+pass/direction (the max allowed) for a 3-lobe Lanczos filter.  This can be
+very slow, so I made the output of MASK_RESIZE very small by default: Just
+1/16th of the viewport size in each direction.  The exact limit scales with
+your viewport size, and it *should* be reasonable, but the restrictions can
+get tighter if we can't use tex2Dlod and have to fit two whole tiles (16
+phosphor triads with default 8-triad tiles) into the MASK_RESIZE pass for
+compatibility with anisotropic filtering (long story).
+
+If you want bigger phosphor triads, you have two options:
+a.) Set mask_sample_mode to 1 in your shader params (if enabled) or set
+    mask_sample_mode_static to 1 in your user-settings.h file.  This will use
+    hardware sampling, which is softer but has no limitations.
+b.) To increase the limit with manual mask-resizing (best quality), you need to
+    do five things:
+    1.) Go into your .cgp file and find the MASK_RESIZE pass (the horizontal
+        mask resizing pass) and the one before it (the vertical mask resizing
+        pass).  Find the viewport-relative scales, which should say 0.0625, and
+        change them to 0.125 or even 0.25.
+    2.) Still in your .cgp file, also make sure your mask_*_texture_small
+        filenames point to LUT textures that are larger than your final desired
+        onscreen size (upsizing is not currently permitted).
+    3.) Go into user-cgp-constants.h and change mask_resize_viewport_scale from
+        0.0625 to the new value you changed it to in step 1.  This is necessary,
+        because we can't pass that value from the .cgp file to the shader, and
+        the shader can't compute the viewport size (necessary) without it.
+    4.) Still in user-cgp-constants.h, update mask_texture_small_size and
+        mask_triads_per_tile appropriately if you changed your LUT texture in
+        step 2.
+    5.) Reload your .cgp file.
+I REALLY wish there was an easier way to do that, but my hands are tied until
+.cgp files are allowed to pass more information to .cg shaders (which would
+require major updates to the cg2glsl script).
+
+--------------------------------------------------------------------------------
+
+6.) WHY CAN'T I MAKE MY PHOSPHORS ANY SMALLER THAN 2 PIXELS PER TRIAD?
+This is controlled by mask_min_allowed_triad_size in your user-settings.h file.
+Set it to 1.0 instead of 2.0 (anything lower than 1 is pointless), and you're
+set.  It defaults to 2.0 to make mask resizing twice as fast when dynamic
+branches aren't allowed.  Some people may want to be able to fade the phosphors
+away entirely to get a more PVM-like scanlined image though, so change it to 1.0
+for that (or get a higher-resolution display ;)).
+
+Note: This setting should be obsolete soon.  I have some ideas for more
+sophisticated mask resampling that I just don't have a spare few hours to
+implement yet.
+
+--------------------------------------------------------------------------------
+
+7.) I AM NOT RUNNING INTEGRATED GRAPHICS.  WHY AM I GETTING ERRORS?
+First recheck the top of your user-settings.h to make sure incompatible driver
+options are commented out (disabled).  If they're all disabled and you're still
+having problems, you've probably found a bug.  There are bound to be a number of
+them with certain setting combinations, and there might even be a few individual
+settings I broke more recently than I tested them.  My contact information is up
+top, so let me know!
+
+--------------------------------------------------------------------------------
+
+8.) WHY AM I GETTING BANDING IN DARK COLORS?  OR, WHY WON'T MIPMAPPING WORK?
+crt-royale uses features like sRGB and mipmapping, which are not available in
+the latest Retroarch release (1.0.0.2) at the time of this writing.
+
+You may get banding in dark colors if your platform or Retroarch version doesn't
+support sRGB FBO's, and mask_sample_mode 1 will look awful without mipmapping.
+I expect most platforms capable of running this shader at full speed will
+support sRGB FBO's, but if yours doesn't, please let me know, and I'll include
+a note about it.
+
+Alternately, setting levels_autodim_temp too low will cause precision loss and
+banding.
+
+--------------------------------------------------------------------------------
+
+9.) HOW DO I SET GEOMETRY/CURVATURE/ETC.?
+If RUNTIME_SHADER_PARAMS_ENABLE and RUNTIME_GEOMETRY_MODE are both #defined (not
+commented out) in user-settings.cgp, you can find these options in your shader
+parameters (in Retroarch's RGUI for instance) under e.g. geom_mode.  Otherwise,
+you can set the corresponding e.g. geom_mode_static options in user-settings.h.
+
+--------------------------------------------------------------------------------
+
+10.) WHY DON'T MY SHADER PARAMETERS STICK?
+This is a bit confusing, at least in the version of Retroarch I'm using.
+In the Shader Options menu, Parameters (Current) controls what's on your screen
+right now, whereas Parameters (RGUI) seems to control what gets saved to a
+shader preset (in your base shaders directory) with Save As Shader Preset.
+
+--------------------------------------------------------------------------------
+
+11.) WHY DID YOU SLOW THE SHADER DOWN WITH ALL OF THESE FEATURES I DON'T WANT?
+     WHY DIDN'T YOU MAKE THE DEFAULTS MORE TO MY LIKING?
+
+The default settings tend to best match flat ~13" slot mask TV's with sharp
+scanlines.  Real CRT's however vary a lot in their characteristics (and many are
+softer in more ways than one), so it's impossible to make the default settings
+look like everyone's favorite CRT.  Moreover, it's impossible to decide which
+of the slower features and options are superfluous:
+
+Some people love curvature, and some people hate it.  Some people love
+scanlines, and some people hate them.  Some people love phosphors, and some
+people hate them.  Some people love interlacing support, and some people hate
+it.  Some people love sharpness, and some people hate it.  Some people love
+convergence error, and some people hate it.  The one thing you hate the most is
+probably someone else's most critical feature.  This is why there are so many
+options, why the shader is so complicated, and why it's impossible to please
+everyone out of the box...unfortunately.
+
+That said, if you spend some time tweaking the settings, you're bound to get a
+picture you like.  Once you've made up your mind, you can save the settings to
+a user-settings.h file and disable shader parameters and other slow options to
+get the kind of performance you want.
+
+--------------------------------------------------------------------------------
+
+12.) WHY DIDN'T YOU INCLUDE A SHADER PRESET WITH NTSC SUPPORT?  WHY DIDN'T YOU
+     INCLUDE MORE CANNED PRESETS WITH DIFFERENT OPTIONS?  WHY CAN'T I SELECT
+     FROM ONE OF SEVERAL USER SETTINGS FILES WITHOUT MANUAL FILE RENAMING?
+
+I do plan on adding a version that uses the NTSC shader for the first two
+passes, but it will take a bit of work, because there are several NTSC shader
+versions as it is.  It's easy enough to combine the HALATION_BLUR passes into a
+one-pass blur from blurs/blur9x9fast.cg, but I'm not sure yet just how much
+modification the NTSC shader passes themselves might need for best results.
+
+I originally wanted NTSC support to be included out-of-the-box, but I'd also
+like to release the shader ASAP, so it'll have to wait.
+
+As for other canned presets, that's a little more complicated: I DO intend on
+creating more canned presets, but the combinatorial explosion of major codepath
+options in this shader is too overwhelming to be as exhaustive as I'd like.
+When I get the time, I'll add what I can to make this more user-friendly.
+In the meantime, I'll start adding a few different default versions of the
+user settings file and put them in a subdirectory for people to manually
+place in the main directory and rename to "user-settings.h."
+
+However, the libretro Cg shader specification (and the Cg to GLSL compiler) does
+not currently allow .cgp files to pass any static settings to the source files.
+This presents a huge problem, because it means that in order to create a new
+preset with different options, I also have to create duplicate files for EVERY
+single .cg pass for every permutation, not just the .cgp.  I plan on creating
+a number of skeleton wrapper .cg files in a subdirectory (which set a few
+options and then include the main .cg file for the pass), but it'll be a while
+yet.  In the meantime, I'd rather let people play with what's already done than
+keep it hidden on my hard drive.
+
+--------------------------------------------------------------------------------
+
+13.) WHY DO SO MANY VALUES IN USER_SETTINGS.H HAVE A _STATIC SUFFIX?
+
+The "_static" suffix is there to prevent naming conflicts with runtime shader
+parameters: The shader usually uses a version without the suffix, which is
+assigned either the value of the "_static" version or the runtime shader
+parameter version.  If a value in uset-settings.h doesn't have a "_static"
+suffix, it's usually because it's a static compile-time option only, with no
+corresponding runtime version.  Basically, you can ignore the suffix. :)
+
+--------------------------------------------------------------------------------
+
+14.) ARE THERE ANY BROKEN SETTINGS I SHOULD BE AWARE OF?
+     WHAT IF I WANT TO CHANGE SETTINGS IN THE .CGP FILE?
+
+As far as I know, all of the options in user-settings.h and the runtime shader
+parameters are pretty robust, with a few caveats:
+* As noted above, there are some tradeoffs between runtime and compile-time
+  options.  If runtime blur sigmas are disabled for instance, the phosphor
+  bloom (and to a lesser extent, the fake bloom) may not blur the right amount.
+* If you set your aspect ratio incorrectly, and mask_specify_num_triads == 1.0
+  (i.e. true, as opposed to 0.0, which is false), the shader will misinterpret
+  the number of triads you want by the same proportion.
+* Disabled shader parameters will do nothing, including either:
+    a.) mask_triad_size_desired
+    b.) mask_num_triads_desired,
+  depending on the value of mask_specify_num_triads.
+
+There is a broken and unimplemented option in derived-settings-and-constants.h,
+but users shouldn't need to mess around in there anyway.  (It's related to the
+more efficient phosphor mask resampling I want to implement.)
+
+However, the .cgp files are another story: They are pretty brittle, especially
+when it comes to their interaction with user-cgp-constants.h.  Be aware that the
+shader passes rely on scale types and sizes in your .cgp file being exactly what
+they expect.  Do not change any scale types from the defaults, or you'll get
+artifacts under certain conditions.  You can change the BLOOM_APPROX and
+MASK_RESIZE scale values (not scale types), but you must update the associated
+constant in user-cgp-constants.h to let the .cg shader files know about it, and
+the implications may reach farther than you expect.  Similarly, if you plan on
+changing an LUT texture, make sure you update the associated constants in
+user-cgp-constants.h.  In short, if you plan on changing anything in a .cgp
+file, you'll want to read it thoroughly first, especially the "IMPORTANT"
+section at the top.
+
+--------------------------------------------------------------------------------
+
+15.) WHAT ARE THE MOST COMMON DOT PITCHES FOR CRT TELEVISIONS?
+     WHAT KIND OF RESOLUTION WOULD I NEED FOR A REAL SHADOW MASK?
+
+The most demanding CRT we're ever likely to emulate is a Sony PVM-20M4U:
+    Width: 450mm
+    Aperture Grille Pitch: 0.31mm
+    Triads in 4:3 frame: 1451, assuming little to no overscan
+For 3-pixel triads, we would need about 6k UHD resolution.  A BVM-20F1U has
+similar requirements.
+
+However, common slot masks are far more similar to the kind of image this shader
+will produce at 900p, 1080p, 1200p, and 1440p:
+1.) A typical 13" diagonal CRT might have a 0.60mm slot pitch, for a total of
+    440.26666666666665 or so phosphor triads horizontally.
+2.) A typical 19" diagonal CRT might have a 0.75mm slot pitch, for a total of
+    514.7733333333333 or so phosphor triads horizontally.
+3.) According to http://repairfaq.ece.drexel.edu/REPAIR/F_crtfaq.html, a
+    typical 25" diagonal CRT might have a 0.9mm slot pitch, for a total of
+    564.4444444444445 or so phosphor triads horizontally.
+4.) A 21" Samsung SMC210N CCTV monitor (450 TV lines) has a 0.7mm stripe
+    pitch, for a total of 609.6 or so phosphor triads horizontally.
+
+The included EDP shadow mask starts looking very good with ~6-pixel triads, so
+it may take nearly 4k resolution to make it a particularly compelling option.
+However, it's possible to make smaller shadow masks on a pixel-by-pixel basis
+and tile them at a 1:1 ratio (mask_sample_mode = 2).  I may include a mask like
+this in a future update.
+
+--------------------------------------------------------------------------------
+
+16.) IS THIS PHOSPHOR BLOOM REALISTIC?
+
+Probably not:
+
+Realistically, the "phosphor bloom" blurs bright phosphors significantly more
+than your eyes would bloom the brighter phosphors on a real CRT.  This extra
+blurring however is necessary to distribute enough brightness to nearby pixels
+that we can amplify the overall brightness to that of the original source after
+applying the phosphor mask.  If you're interested, there are more comments on
+the subject at the top of the fragment shader in crt-royale-bloom-approx.cg.
+
+On the subject of the phosphor bloom: I intended to include some exposition
+about the math behind the brightpass calculation (and the much more complex
+and thorough calculation I originally used to blur the minimal amount necessary,
+which turned out to be inferior in practice), but that document isn't release-
+ready at the moment.  Sorry Hyllian. ;)
+
+--------------------------------------------------------------------------------
+
+17.) SO WHAT DO YOU PLAN ON ADDING IN THE FUTURE?
+
+I'd like to add these relatively soon:
+1.) A combined ntsc-crt-royale.cgp and ntsc-crt-royale-fake-bloom.cgp.
+2.) More presets, especially if maister or squarepusher find a way to make the
+Cg to GLSL compiler process .cgp files (which will allows .cgp's to pass
+arbitrary #defines to the .cg shader passes).
+3.) More efficient and flexible phosphor mask resampling.  Hopefully, this will
+    make it possible to manually resize the mask on Intel HD Graphics as well.
+4.) Make it more easy and convenient to use and experiment with mask_sample_mode
+    2 (direct 1:1 tiling of an input texture) by using a separate LUT texture
+    with its own parameters in user-cgp-constants.h, etc.  I haven't done this
+    yet because it requires yet another texture sample that could hurt other
+    codepaths, and I'm waiting until I have time to optimize it.
+5.) Refine the runtime shader parameters: Some of them are probably too fine-
+    grained and slow to change.
+
+Maybe's:
+1.) I've had trouble getting LUT's from subdirectories to work consistently
+    across platforms, but I'd like to get around that and include more mask
+    textures I've made.
+2.) If you're using spherical curvature with a small radius, the edges of the
+    sphere are blocky due to the pixel discards being done in 2x2 fragment
+    blocks.  I'd like to fix this if it can be done without a performance hit.
+3.) I have some ideas for procedural mask generation with a fast, closed-form
+    low-pass filter, but I don't know if I'll ever get around to it.
+
--- a/crt/shaders/crt-royale/THANKS.TXT
+++ b/crt/shaders/crt-royale/THANKS.TXT
@ -0,0 +1,43 @@
+Thank you squarepusher and maister, for hammering out the shader framework that
+made this possible and being so receptive to my feedback for Retroarch and the
+libretro Cg shader spec.  Thank you especially maister, for designing the sRGB
+support with me and implementing all the code for both sRGB FBO's and mipmapped
+FBO's in less time than it took me to add mipmapped LUT's alone!
+
+I want to thank xythen and DOLLS for inspiring me with their early efforts:
+    http://board.byuu.org/viewtopic.php?f=10&t=147
+    http://board.byuu.org/viewtopic.php?p=3820#p3834
+I've never spoken with them, but I never would have thought to make this shader
+if xythen hadn't gotten the ball rolling, or if DOLLS hadn't made his point
+about just how far CRT emulation could go with his phosphor mask prototypes,
+convergence error images, and barrel distortion code.
+
+I also want to thank hunterk for his excellent blog, especially this post:
+    http://filthypants.blogspot.com/2011/05/
+        more-emulator-pixel-shaders-crt-updated.html
+Along with caligari's work, his PhosphorLUT shader provoked me to experiment
+with game-style bloom as a way to reconcile shadow masks with full brightness.
+Along with Pulp Fiction, he also gets credit for helping me name this shader. :D
+
+Thank you Hyllian for your enthusiasm: It kept me focused on actually releasing
+this shader instead of refining it in perpetuity!
+
+Finally, I want to thank cgwg for everything he has done for CRT emulation:
+He was the first to consider the effects of halation, and (in addition to
+caligari?) he did the most research on the Gaussian properties of scanline
+electron beams.  His forum posts and links to academic research were very
+helpful, and so were the few PM's we exchanged many months ago: I originally
+meant to wet my feet by extending his shader with cylindrical curvature before
+writing my own.  I never managed to understand his curvature code (due to all of
+the different algebraic/trigonometric stages being rolled into one), and I gave
+up and started from scratch, but talking with him helped me piece together how
+his spherical uv<=>xyz mapping worked mathematically.  My own is subtly
+different, but not on purpose. ;)  A lot of the user parameters for geometry
+were inspired by his own (including Euler angle tilt and a "view distance" for
+controlling the field of view with a simplified near-plane).  Last but not
+least, my border dimming code was based more directly off of his: I did what I
+could to write a fresh implementation of his algorithm with new features, but
+the line between code and algorithm is pretty thin in that function, and it's
+a testament to him coming up with such an elegant solution.
+
+TroggleMonkey
--- a/crt/shaders/crt-royale/src/bind-shader-params.h
+++ b/crt/shaders/crt-royale/src/bind-shader-params.h
@ -27,7 +27,7 @@

 //  Override some parameters for gamma-management.h and tex2Dantialias.h:
 #define OVERRIDE_DEVICE_GAMMA
-const float gba_gamma = 3.5; //  Irrelevant but necessary to define.
+static const float gba_gamma = 3.5; //  Irrelevant but necessary to define.
 #define ANTIALIAS_OVERRIDE_BASICS
 #define ANTIALIAS_OVERRIDE_PARAMETERS

@ -38,8 +38,9 @@ const float gba_gamma = 3.5; //  Irrelevant but necessary to define.
 #endif

 //  Bind option names to shader parameter uniforms or static constants.
+#ifdef HARDCODE_SETTINGS
 #ifdef PARAMETER_UNIFORM
-/*    uniform float crt_gamma;
+    uniform float crt_gamma;
    uniform float lcd_gamma;
    uniform float levels_contrast;
    uniform float halation_weight;
@ -57,8 +58,8 @@ const float gba_gamma = 3.5; //  Irrelevant but necessary to define.
        uniform float beam_horiz_filter;
        uniform float beam_horiz_linear_rgb_weight;
    #else
-        const float beam_horiz_filter = clamp(beam_horiz_filter_static, 0.0, 2.0);
-        const float beam_horiz_linear_rgb_weight = clamp(beam_horiz_linear_rgb_weight_static, 0.0, 1.0);
+        static const float beam_horiz_filter = clamp(beam_horiz_filter_static, 0.0, 2.0);
+        static const float beam_horiz_linear_rgb_weight = clamp(beam_horiz_linear_rgb_weight_static, 0.0, 1.0);
    #endif
    uniform float convergence_offset_x_r;
    uniform float convergence_offset_x_g;
@ -69,7 +70,7 @@ const float gba_gamma = 3.5; //  Irrelevant but necessary to define.
    #ifdef RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT
        uniform float mask_type;
    #else
-        const float mask_type = clamp(mask_type_static, 0.0, 2.0);
+        static const float mask_type = clamp(mask_type_static, 0.0, 2.0);
    #endif
    uniform float mask_sample_mode_desired;
    uniform float mask_specify_num_triads;
@ -81,8 +82,8 @@ const float gba_gamma = 3.5; //  Irrelevant but necessary to define.
        uniform float aa_cubic_c;
        uniform float aa_gauss_sigma;
    #else
-        const float aa_cubic_c = aa_cubic_c_static;                              //  Clamp to [0, 4]?
-        const float aa_gauss_sigma = max(FIX_ZERO(0.0), aa_gauss_sigma_static);  //  Clamp to [FIXZERO(0), 1]?
+        static const float aa_cubic_c = aa_cubic_c_static;                              //  Clamp to [0, 4]?
+        static const float aa_gauss_sigma = max(FIX_ZERO(0.0), aa_gauss_sigma_static);  //  Clamp to [FIXZERO(0), 1]?
    #endif
    uniform float geom_mode_runtime;
    uniform float geom_radius;
@ -97,113 +98,114 @@ const float gba_gamma = 3.5; //  Irrelevant but necessary to define.
    uniform float border_darkness;
    uniform float border_compress;
    uniform float interlace_bff;
-    uniform float interlace_1080i; */
+    uniform float interlace_1080i;
 #else
    //  Use constants from user-settings.h, and limit ranges appropriately:
-    const float crt_gamma = max(0.0, crt_gamma_static);
-    const float lcd_gamma = max(0.0, lcd_gamma_static);
-    const float levels_contrast = clamp(levels_contrast_static, 0.0, 4.0);
-    const float halation_weight = clamp(halation_weight_static, 0.0, 1.0);
-    const float diffusion_weight = clamp(diffusion_weight_static, 0.0, 1.0);
-    const float bloom_underestimate_levels = max(FIX_ZERO(0.0), bloom_underestimate_levels_static);
-    const float bloom_excess = clamp(bloom_excess_static, 0.0, 1.0);
-    const float beam_min_sigma = max(FIX_ZERO(0.0), beam_min_sigma_static);
-    const float beam_max_sigma = max(beam_min_sigma, beam_max_sigma_static);
-    const float beam_spot_power = max(beam_spot_power_static, 0.0);
-    const float beam_min_shape = max(2.0, beam_min_shape_static);
-    const float beam_max_shape = max(beam_min_shape, beam_max_shape_static);
-    const float beam_shape_power = max(0.0, beam_shape_power_static);
-//    const float beam_horiz_filter = clamp(beam_horiz_filter_static, 0.0, 2.0);
-    const float beam_horiz_sigma = max(FIX_ZERO(0.0), beam_horiz_sigma_static);
-    const float beam_horiz_linear_rgb_weight = clamp(beam_horiz_linear_rgb_weight_static, 0.0, 1.0);
-    //  Unpack vector elements to match scalar uniforms:
-    const float convergence_offset_x_r = clamp(convergence_offsets_r_static.x, -4.0, 4.0);
-    const float convergence_offset_x_g = clamp(convergence_offsets_g_static.x, -4.0, 4.0);
-    const float convergence_offset_x_b = clamp(convergence_offsets_b_static.x, -4.0, 4.0);
-    const float convergence_offset_y_r = clamp(convergence_offsets_r_static.y, -4.0, 4.0);
-    const float convergence_offset_y_g = clamp(convergence_offsets_g_static.y, -4.0, 4.0);
-    const float convergence_offset_y_b = clamp(convergence_offsets_b_static.y, -4.0, 4.0);
-    const float mask_type = clamp(mask_type_static, 0.0, 2.0);
-    const float mask_sample_mode_desired = clamp(mask_sample_mode_static, 0.0, 2.0);
-    const float mask_specify_num_triads = clamp(mask_specify_num_triads_static, 0.0, 1.0);
- //   const float mask_triad_size_desired = clamp(mask_triad_size_desired_static, 1.0, 18.0);
-    const float mask_num_triads_desired = clamp(mask_num_triads_desired_static, 342.0, 1920.0);
-    const float aa_subpixel_r_offset_x_runtime = clamp(aa_subpixel_r_offset_static.x, -0.5, 0.5);
-    const float aa_subpixel_r_offset_y_runtime = clamp(aa_subpixel_r_offset_static.y, -0.5, 0.5);
-    const float aa_cubic_c = aa_cubic_c_static;                              //  Clamp to [0, 4]?
-    const float aa_gauss_sigma = max(FIX_ZERO(0.0), aa_gauss_sigma_static);  //  Clamp to [FIXZERO(0), 1]?
-    const float geom_mode_runtime = clamp(geom_mode_static, 0.0, 3.0);
-    const float geom_radius = max(1.0/(2.0*pi), geom_radius_static);         //  Clamp to [1/(2*pi), 1024]?
-    const float geom_view_dist = max(0.5, geom_view_dist_static);            //  Clamp to [0.5, 1024]?
-    const float geom_tilt_angle_x = clamp(geom_tilt_angle_static.x, -pi, pi);
-    const float geom_tilt_angle_y = clamp(geom_tilt_angle_static.y, -pi, pi);
-    const float geom_aspect_ratio_x = geom_aspect_ratio_static;              //  Force >= 1?
-    const float geom_aspect_ratio_y = 1.0;
-    const float geom_overscan_x = max(FIX_ZERO(0.0), geom_overscan_static.x);
-    const float geom_overscan_y = max(FIX_ZERO(0.0), geom_overscan_static.y);
-    const float border_size = clamp(border_size_static, 0.0, 0.5);           //  0.5 reaches to image center
-    const float border_darkness = max(0.0, border_darkness_static);
-    const float border_compress = max(1.0, border_compress_static);          //  < 1.0 darkens whole image
-    const float interlace_bff = float(interlace_bff_static);
-    const float interlace_1080i = float(interlace_1080i_static);
+    static const float crt_gamma = max(0.0, crt_gamma_static);
+    static const float lcd_gamma = max(0.0, lcd_gamma_static);
+    static const float levels_contrast = clamp(levels_contrast_static, 0.0, 4.0);
+    static const float halation_weight = clamp(halation_weight_static, 0.0, 1.0);
+    static const float diffusion_weight = clamp(diffusion_weight_static, 0.0, 1.0);
+    static const float bloom_underestimate_levels = max(FIX_ZERO(0.0), bloom_underestimate_levels_static);
+    static const float bloom_excess = clamp(bloom_excess_static, 0.0, 1.0);
+    static const float beam_min_sigma = max(FIX_ZERO(0.0), beam_min_sigma_static);
+    static const float beam_max_sigma = max(beam_min_sigma, beam_max_sigma_static);
+    static const float beam_spot_power = max(beam_spot_power_static, 0.0);
+    static const float beam_min_shape = max(2.0, beam_min_shape_static);
+    static const float beam_max_shape = max(beam_min_shape, beam_max_shape_static);
+    static const float beam_shape_power = max(0.0, beam_shape_power_static);
+    static const float beam_horiz_filter = clamp(beam_horiz_filter_static, 0.0, 2.0);
+    static const float beam_horiz_sigma = max(FIX_ZERO(0.0), beam_horiz_sigma_static);
+    static const float beam_horiz_linear_rgb_weight = clamp(beam_horiz_linear_rgb_weight_static, 0.0, 1.0);
+    //  Unpack static vector elements to match scalar uniforms:
+    static const float convergence_offset_x_r = clamp(convergence_offsets_r_static.x, -4.0, 4.0);
+    static const float convergence_offset_x_g = clamp(convergence_offsets_g_static.x, -4.0, 4.0);
+    static const float convergence_offset_x_b = clamp(convergence_offsets_b_static.x, -4.0, 4.0);
+    static const float convergence_offset_y_r = clamp(convergence_offsets_r_static.y, -4.0, 4.0);
+    static const float convergence_offset_y_g = clamp(convergence_offsets_g_static.y, -4.0, 4.0);
+    static const float convergence_offset_y_b = clamp(convergence_offsets_b_static.y, -4.0, 4.0);
+    static const float mask_type = clamp(mask_type_static, 0.0, 2.0);
+    static const float mask_sample_mode_desired = clamp(mask_sample_mode_static, 0.0, 2.0);
+    static const float mask_specify_num_triads = clamp(mask_specify_num_triads_static, 0.0, 1.0);
+    static const float mask_triad_size_desired = clamp(mask_triad_size_desired_static, 1.0, 18.0);
+    static const float mask_num_triads_desired = clamp(mask_num_triads_desired_static, 342.0, 1920.0);
+    static const float aa_subpixel_r_offset_x_runtime = clamp(aa_subpixel_r_offset_static.x, -0.5, 0.5);
+    static const float aa_subpixel_r_offset_y_runtime = clamp(aa_subpixel_r_offset_static.y, -0.5, 0.5);
+    static const float aa_cubic_c = aa_cubic_c_static;                              //  Clamp to [0, 4]?
+    static const float aa_gauss_sigma = max(FIX_ZERO(0.0), aa_gauss_sigma_static);  //  Clamp to [FIXZERO(0), 1]?
+    static const float geom_mode_runtime = clamp(geom_mode_static, 0.0, 3.0);
+    static const float geom_radius = max(1.0/(2.0*pi), geom_radius_static);         //  Clamp to [1/(2*pi), 1024]?
+    static const float geom_view_dist = max(0.5, geom_view_dist_static);            //  Clamp to [0.5, 1024]?
+    static const float geom_tilt_angle_x = clamp(geom_tilt_angle_static.x, -pi, pi);
+    static const float geom_tilt_angle_y = clamp(geom_tilt_angle_static.y, -pi, pi);
+    static const float geom_aspect_ratio_x = geom_aspect_ratio_static;              //  Force >= 1?
+    static const float geom_aspect_ratio_y = 1.0;
+    static const float geom_overscan_x = max(FIX_ZERO(0.0), geom_overscan_static.x);
+    static const float geom_overscan_y = max(FIX_ZERO(0.0), geom_overscan_static.y);
+    static const float border_size = clamp(border_size_static, 0.0, 0.5);           //  0.5 reaches to image center
+    static const float border_darkness = max(0.0, border_darkness_static);
+    static const float border_compress = max(1.0, border_compress_static);          //  < 1.0 darkens whole image
+    static const float interlace_bff = float(interlace_bff_static);
+    static const float interlace_1080i = float(interlace_1080i_static);
+#endif
 #endif


 //  Provide accessors for vector constants that pack scalar uniforms:
-vec2 get_aspect_vector(const float geom_aspect_ratio)
+inline float2 get_aspect_vector(const float geom_aspect_ratio)
 {
    //  Get an aspect ratio vector.  Enforce geom_max_aspect_ratio, and prevent
    //  the absolute scale from affecting the uv-mapping for curvature:
    const float geom_clamped_aspect_ratio =
        min(geom_aspect_ratio, geom_max_aspect_ratio);
-    const vec2 geom_aspect =
-        normalize(vec2(geom_clamped_aspect_ratio, 1.0));
+    const float2 geom_aspect =
+        normalize(float2(geom_clamped_aspect_ratio, 1.0));
    return geom_aspect;
 }

-vec2 get_geom_overscan_vector()
+inline float2 get_geom_overscan_vector()
 {
-    return vec2(geom_overscan_x, geom_overscan_y);
+    return float2(geom_overscan_x, geom_overscan_y);
 }

-vec2 get_geom_tilt_angle_vector()
+inline float2 get_geom_tilt_angle_vector()
 {
-    return vec2(geom_tilt_angle_x, geom_tilt_angle_y);
+    return float2(geom_tilt_angle_x, geom_tilt_angle_y);
 }

-vec3 get_convergence_offsets_x_vector()
+inline float3 get_convergence_offsets_x_vector()
 {
-    return vec3(convergence_offset_x_r, convergence_offset_x_g,
+    return float3(convergence_offset_x_r, convergence_offset_x_g,
        convergence_offset_x_b);
 }

-vec3 get_convergence_offsets_y_vector()
+inline float3 get_convergence_offsets_y_vector()
 {
-    return vec3(convergence_offset_y_r, convergence_offset_y_g,
+    return float3(convergence_offset_y_r, convergence_offset_y_g,
        convergence_offset_y_b);
 }

-vec2 get_convergence_offsets_r_vector()
+inline float2 get_convergence_offsets_r_vector()
 {
-    return vec2(convergence_offset_x_r, convergence_offset_y_r);
+    return float2(convergence_offset_x_r, convergence_offset_y_r);
 }

-vec2 get_convergence_offsets_g_vector()
+inline float2 get_convergence_offsets_g_vector()
 {
-    return vec2(convergence_offset_x_g, convergence_offset_y_g);
+    return float2(convergence_offset_x_g, convergence_offset_y_g);
 }

-vec2 get_convergence_offsets_b_vector()
+inline float2 get_convergence_offsets_b_vector()
 {
-    return vec2(convergence_offset_x_b, convergence_offset_y_b);
+    return float2(convergence_offset_x_b, convergence_offset_y_b);
 }

-vec2 get_aa_subpixel_r_offset()
+inline float2 get_aa_subpixel_r_offset()
 {
    #ifdef RUNTIME_ANTIALIAS_WEIGHTS
        #ifdef RUNTIME_ANTIALIAS_SUBPIXEL_OFFSETS
            //  WARNING: THIS IS EXTREMELY EXPENSIVE.
-            return vec2(aa_subpixel_r_offset_x_runtime,
+            return float2(aa_subpixel_r_offset_x_runtime,
                aa_subpixel_r_offset_y_runtime);
        #else
            return aa_subpixel_r_offset_static;
@ -214,17 +216,17 @@ vec2 get_aa_subpixel_r_offset()
 }

 //  Provide accessors settings which still need "cooking:"
-float get_mask_amplify()
+inline float get_mask_amplify()
 {
-    const float mask_grille_amplify = 1.0/mask_grille_avg_color;
-    const float mask_slot_amplify = 1.0/mask_slot_avg_color;
-    const float mask_shadow_amplify = 1.0/mask_shadow_avg_color;
+    static const float mask_grille_amplify = 1.0/mask_grille_avg_color;
+    static const float mask_slot_amplify = 1.0/mask_slot_avg_color;
+    static const float mask_shadow_amplify = 1.0/mask_shadow_avg_color;
    return mask_type < 0.5 ? mask_grille_amplify :
        mask_type < 1.5 ? mask_slot_amplify :
        mask_shadow_amplify;
 }

-float get_mask_sample_mode()
+inline float get_mask_sample_mode()
 {
    #ifdef RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT
        #ifdef PHOSPHOR_MASK_MANUALLY_RESIZE
--- a/crt/shaders/crt-royale/src/bloom-functions.h
+++ b/crt/shaders/crt-royale/src/bloom-functions.h
@ -37,14 +37,13 @@
 ///////////////////////////////  BLOOM CONSTANTS  //////////////////////////////

 //  Compute constants with manual inlines of the functions below:
-const float bloom_diff_thresh = 1.0/256.0;
+static const float bloom_diff_thresh = 1.0/256.0;
+

-//  Assume an extremely large viewport size for asymptotic results:
-const float max_viewport_size_x = 1080.0*1024.0*(4.0/3.0);

 ///////////////////////////////////  HELPERS  //////////////////////////////////

-float get_min_sigma_to_blur_triad(const float triad_size,
+inline float get_min_sigma_to_blur_triad(const float triad_size,
    const float thresh)
 {
    //  Requires:   1.) triad_size is the final phosphor triad size in pixels
@ -60,7 +59,7 @@ float get_min_sigma_to_blur_triad(const float triad_size,
    //return 0.5985*triad_size - triad_size*sqrt(thresh)
 }

-float get_absolute_scale_blur_sigma(const float thresh)
+inline float get_absolute_scale_blur_sigma(const float thresh)
 {
    //  Requires:   1.) min_expected_triads must be a global float.  The number
    //                  of horizontal phosphor triads in the final image must be
@ -93,7 +92,7 @@ float get_absolute_scale_blur_sigma(const float thresh)
            max_viewport_size_x/min_allowed_viewport_triads.x, thresh);
 }

-float get_center_weight(const float sigma)
+inline float get_center_weight(const float sigma)
 {
    //  Given a Gaussian blur sigma, get the blur weight for the center texel.
    #ifdef RUNTIME_PHOSPHOR_BLOOM_SIGMA
@ -161,8 +160,8 @@ float get_center_weight(const float sigma)
    #endif
 }

-vec3 tex2DblurNfast(const sampler2D tex, const vec2 tex_uv,
-    const vec2 dxdy, const float sigma)
+inline float3 tex2DblurNfast(const sampler2D texture, const float2 tex_uv,
+    const float2 dxdy, const float sigma)
 {
    //  If sigma is static, we can safely branch and use the smallest blur
    //  that's big enough.  Ignore #define hints, because we'll only use a
@ -186,40 +185,40 @@ vec3 tex2DblurNfast(const sampler2D tex, const vec2 tex_uv,
    #ifdef PHOSPHOR_BLOOM_BRANCH_FOR_BLUR_SIZE
        if(sigma <= blur9_std_dev)
        {
-            return tex2Dblur9fast(tex, tex_uv, dxdy, sigma);
+            return tex2Dblur9fast(texture, tex_uv, dxdy, sigma);
        }
        else if(sigma <= blur17_std_dev)
        {
-            return tex2Dblur17fast(tex, tex_uv, dxdy, sigma);
+            return tex2Dblur17fast(texture, tex_uv, dxdy, sigma);
        }
        else if(sigma <= blur25_std_dev)
        {
-            return tex2Dblur25fast(tex, tex_uv, dxdy, sigma);
+            return tex2Dblur25fast(texture, tex_uv, dxdy, sigma);
        }
        else if(sigma <= blur31_std_dev)
        {
-            return tex2Dblur31fast(tex, tex_uv, dxdy, sigma);
+            return tex2Dblur31fast(texture, tex_uv, dxdy, sigma);
        }
        else
        {
-            return tex2Dblur43fast(tex, tex_uv, dxdy, sigma);
+            return tex2Dblur43fast(texture, tex_uv, dxdy, sigma);
        }
    #else
        //  If we can't afford to branch, we can only guess at what blur
        //  size we need.  Therefore, use the largest blur allowed.
        #ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_12_PIXELS
-            return tex2Dblur43fast(tex, tex_uv, dxdy, sigma);
+            return tex2Dblur43fast(texture, tex_uv, dxdy, sigma);
        #else
        #ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_9_PIXELS
-            return tex2Dblur31fast(tex, tex_uv, dxdy, sigma);
+            return tex2Dblur31fast(texture, tex_uv, dxdy, sigma);
        #else
        #ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_6_PIXELS
-            return tex2Dblur25fast(tex, tex_uv, dxdy, sigma);
+            return tex2Dblur25fast(texture, tex_uv, dxdy, sigma);
        #else
        #ifdef PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_3_PIXELS
-            return tex2Dblur17fast(tex, tex_uv, dxdy, sigma);
+            return tex2Dblur17fast(texture, tex_uv, dxdy, sigma);
        #else
-            return tex2Dblur9fast(tex, tex_uv, dxdy, sigma);
+            return tex2Dblur9fast(texture, tex_uv, dxdy, sigma);
        #endif  //  PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_3_PIXELS
        #endif  //  PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_6_PIXELS
        #endif  //  PHOSPHOR_BLOOM_TRIADS_LARGER_THAN_9_PIXELS
@ -227,7 +226,7 @@ vec3 tex2DblurNfast(const sampler2D tex, const vec2 tex_uv,
    #endif  //  PHOSPHOR_BLOOM_BRANCH_FOR_BLUR_SIZE
 }

-float get_bloom_approx_sigma(const float output_size_x_runtime,
+inline float get_bloom_approx_sigma(const float output_size_x_runtime,
    const float estimated_viewport_size_x)
 {
    //  Requires:   1.) output_size_x_runtime == BLOOM_APPROX.output_size.x.
@ -243,15 +242,15 @@ float get_bloom_approx_sigma(const float output_size_x_runtime,
    //              bilinear filtering, so use static calculations.
    //  Assume the default static value.  This is a compromise that ensures
    //  typical triads are blurred, even if unusually large ones aren't.
-    const float mask_num_triads_static =
+    static const float mask_num_triads_static =
        max(min_allowed_viewport_triads.x, mask_num_triads_desired_static);
    const float mask_num_triads_from_size =
-        estimated_viewport_size_x/params.mask_triad_size_desired;
+        estimated_viewport_size_x/global.mask_triad_size_desired;
    const float mask_num_triads_runtime = max(min_allowed_viewport_triads.x,
-        mix(mask_num_triads_from_size, params.mask_num_triads_desired,
-            mask_specify_num_triads));
+        lerp(mask_num_triads_from_size, global.mask_num_triads_desired,
+            global.mask_specify_num_triads));
    //  Assume an extremely large viewport size for asymptotic results:
-     const float max_viewport_size_x = 1080.0*1024.0*(4.0/3.0);
+    static const float max_viewport_size_x = 1080.0*1024.0*(4.0/3.0);
    if(bloom_approx_filter > 1.5)   //  4x4 true Gaussian resize
    {
        //  Use the runtime num triads and output size:
@ -264,7 +263,7 @@ float get_bloom_approx_sigma(const float output_size_x_runtime,
        //  The BLOOM_APPROX input has to be ORIG_LINEARIZED to avoid moire, but
        //  account for the Gaussian scanline sigma from the last pass too.
        //  The bloom will be too wide horizontally but tall enough vertically.
-        return length(vec2(bloom_approx_sigma, beam_max_sigma));
+        return length(float2(bloom_approx_sigma, beam_max_sigma));
    }
    else    //  3x3 blur resize (the bilinear resize doesn't need a sigma)
    {
@ -272,12 +271,12 @@ float get_bloom_approx_sigma(const float output_size_x_runtime,
        //  reason to choose blur3x3 is to avoid dynamic weights, so use a
        //  static calculation.
        #ifdef PHOSPHOR_BLOOM_FAKE
-            const float output_size_x_static =
+            static const float output_size_x_static =
                bloom_approx_size_x_for_fake;
        #else
-            const float output_size_x_static = bloom_approx_size_x;
+            static const float output_size_x_static = bloom_approx_size_x;
        #endif
-        const float asymptotic_triad_size =
+        static const float asymptotic_triad_size =
            max_viewport_size_x/mask_num_triads_static;
        const float asymptotic_sigma = get_min_sigma_to_blur_triad(
            asymptotic_triad_size, bloom_diff_thresh);
@ -286,11 +285,11 @@ float get_bloom_approx_sigma(const float output_size_x_runtime,
        //  The BLOOM_APPROX input has to be ORIG_LINEARIZED to avoid moire, but
        //  try accounting for the Gaussian scanline sigma from the last pass
        //  too; use the static default value:
-        return length(vec2(bloom_approx_sigma, beam_max_sigma_static));
+        return length(float2(bloom_approx_sigma, beam_max_sigma_static));
    }
 }

-float get_final_bloom_sigma(const float bloom_sigma_runtime)
+inline float get_final_bloom_sigma(const float bloom_sigma_runtime)
 {
    //  Requires:   1.) bloom_sigma_runtime is a precalculated sigma that's
    //                  optimal for the [known] triad size.
@ -303,7 +302,7 @@ float get_final_bloom_sigma(const float bloom_sigma_runtime)
    //  Notes:      Call this from the fragment shader, NOT the vertex shader,
    //              so static sigmas can be constant-folded!
    const float bloom_sigma_optimistic = get_min_sigma_to_blur_triad(
-        params.mask_triad_size_desired, bloom_diff_thresh);
+        mask_triad_size_desired_static, bloom_diff_thresh);
    #ifdef RUNTIME_PHOSPHOR_BLOOM_SIGMA
        return bloom_sigma_runtime;
    #else
@ -313,4 +312,6 @@ float get_final_bloom_sigma(const float bloom_sigma_runtime)
    #endif
 }

-#endif  //  BLOOM_FUNCTIONS_H
+
+#endif  //  BLOOM_FUNCTIONS_H
+
--- a/crt/shaders/crt-royale/src/crt-royale-bloom-approx-intel.slang
+++ b/crt/shaders/crt-royale/src/crt-royale-bloom-approx-intel.slang
@ -0,0 +1,3 @@
+#version 450
+#define INTEGRATED_GRAPHICS_COMPATIBILITY_MODE
+#include "crt-royale-bloom-approx.h"
--- a/crt/shaders/crt-royale/src/crt-royale-bloom-approx_fallback.slang
+++ b/crt/shaders/crt-royale/src/crt-royale-bloom-approx_fallback.slang
@ -1,16 +1,3 @@
-#version 450
-
-layout(push_constant) uniform Push
-{
-	vec4 SourceSize;
-	vec4 OriginalSize;
-	vec4 OutputSize;
-	uint FrameCount;
-	vec4 ORIG_LINEARIZEDSize;
-} registers;
-
-#include "params.inc"
-
 /////////////////////////////  GPL LICENSE NOTICE  /////////////////////////////

 //  crt-royale: A full-featured CRT shader, with cheese.
@ -29,32 +16,170 @@ layout(push_constant) uniform Push
 //  this program; if not, write to the Free Software Foundation, Inc., 59 Temple
 //  Place, Suite 330, Boston, MA 02111-1307 USA

+layout(push_constant) uniform Push
+{
+	vec4 SourceSize;
+	vec4 OriginalSize;
+	vec4 OutputSize;
+	uint FrameCount;
+	vec4 ORIG_LINEARIZEDSize;
+} params;

-//////////////////////////////////  INCLUDES  //////////////////////////////////
+layout(std140, set = 0, binding = 0) uniform UBO
+{
+	mat4 MVP;
+	float crt_gamma;
+	float lcd_gamma;
+	float levels_contrast;
+	float halation_weight;
+	float diffusion_weight;
+	float bloom_underestimate_levels;
+	float bloom_excess;
+	float beam_min_sigma;
+	float beam_max_sigma;
+	float beam_spot_power;
+	float beam_min_shape;
+	float beam_max_shape;
+	float beam_shape_power;
+	float beam_horiz_filter;
+	float beam_horiz_sigma;
+	float beam_horiz_linear_rgb_weight;
+	float convergence_offset_x_r;
+	float convergence_offset_x_g;
+	float convergence_offset_x_b;
+	float convergence_offset_y_r;
+	float convergence_offset_y_g;
+	float convergence_offset_y_b;
+	float mask_type;
+	float mask_sample_mode_desired;
+	float mask_num_triads_desired;
+	float mask_triad_size_desired;
+	float mask_specify_num_triads;
+	float aa_subpixel_r_offset_x_runtime;
+	float aa_subpixel_r_offset_y_runtime;
+	float aa_cubic_c;
+	float aa_gauss_sigma;
+	float geom_mode_runtime;
+	float geom_radius;
+	float geom_view_dist;
+	float geom_tilt_angle_x;
+	float geom_tilt_angle_y;
+	float geom_aspect_ratio_x;
+	float geom_aspect_ratio_y;
+	float geom_overscan_x;
+	float geom_overscan_y;
+	float border_size;
+	float border_darkness;
+	float border_compress;
+	float interlace_bff;
+	float interlace_1080i;
+} global;

+#define ORIG_LINEARIZEDvideo_size params.SourceSize.xy
+#define ORIG_LINEARIZEDtexture_size params.SourceSize.xy
+
+float bloom_approx_scale_x = params.OutputSize.x / params.SourceSize.y;
+const float max_viewport_size_x = 1080.0*1024.0*(4.0/3.0);
+
+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////
+
+#include "params.inc"
+#include "../../../../include/compat_macros.inc"
 #include "../user-settings.h"
-#include "derived-settings-and-constants.h"
 #include "bind-shader-params.h"
 #include "../../../../include/gamma-management.h"
-#include "../../../../include/blur-functions.h"
+#include "derived-settings-and-constants.h"
 #include "scanline-functions.h"
+
+#pragma stage vertex
+layout(location = 0) in vec4 Position;
+layout(location = 1) in vec2 TexCoord;
+layout(location = 0) out vec2 tex_uv;
+layout(location = 1) out vec2 blur_dxdy;
+layout(location = 2) out vec2 uv_scanline_step;
+layout(location = 3) out float estimated_viewport_size_x;
+layout(location = 4) out vec2 texture_size_inv;
+layout(location = 5) out vec2 tex_uv_to_pixel_scale;
+
+void main()
+{
+   gl_Position = global.MVP * Position;
+   float2 vTexCoord = TexCoord;
+    const float2 video_uv = vTexCoord * IN.texture_size/IN.video_size;
+    tex_uv = video_uv * ORIG_LINEARIZEDvideo_size /
+        ORIG_LINEARIZEDtexture_size;
+    //  The last pass (vertical scanlines) had a viewport y scale, so we can
+    //  use it to calculate a better runtime sigma:
+    estimated_viewport_size_x =
+        IN.video_size.y * geom_aspect_ratio_x/geom_aspect_ratio_y;
+
+    //  Get the uv sample distance between output pixels.  We're using a resize
+    //  blur, so arbitrary upsizing will be acceptable if filter_linearN =
+    //  "true," and arbitrary downsizing will be acceptable if mipmap_inputN =
+    //  "true" too.  The blur will be much more accurate if a true 4x4 Gaussian
+    //  resize is used instead of tex2Dblur3x3_resize (which samples between
+    //  texels even for upsizing).
+    const float2 dxdy_min_scale = ORIG_LINEARIZEDvideo_size/IN.output_size;
+    const float2 texture_size_inv = float2(1.0)/ORIG_LINEARIZEDtexture_size;
+    if(bloom_approx_filter > 1.5)   //  4x4 true Gaussian resize
+    {
+        //  For upsizing, we'll snap to texels and sample the nearest 4.
+        const float2 dxdy_scale = max(dxdy_min_scale, float2(1.0));
+        blur_dxdy = dxdy_scale * texture_size_inv;
+    }
+    else
+    {
+        const float2 dxdy_scale = dxdy_min_scale;
+        blur_dxdy = dxdy_scale * texture_size_inv;
+    }
+    //  tex2Dresize_gaussian4x4 needs to know a bit more than the other filters:
+    tex_uv_to_pixel_scale = IN.output_size *
+        ORIG_LINEARIZEDtexture_size / ORIG_LINEARIZEDvideo_size;
+    //texture_size_inv = texture_size_inv;
+
+    //  Detecting interlacing again here lets us apply convergence offsets in
+    //  this pass.  il_step_multiple contains the (texel, scanline) step
+    //  multiple: 1 for progressive, 2 for interlaced.
+    const float2 orig_video_size = ORIG_LINEARIZEDvideo_size;
+    const float y_step = 1.0 + float(is_interlaced(orig_video_size.y));
+    const float2 il_step_multiple = float2(1.0, y_step);
+    //  Get the uv distance between (texels, same-field scanlines):
+    uv_scanline_step = il_step_multiple / ORIG_LINEARIZEDtexture_size;
+}
+
+#pragma stage fragment
+#pragma format R8G8B8A8_SRGB
+layout(location = 0) in vec2 tex_uv;
+layout(location = 1) in vec2 blur_dxdy;
+layout(location = 2) in vec2 uv_scanline_step;
+layout(location = 3) in float estimated_viewport_size_x;
+layout(location = 4) in vec2 texture_size_inv;
+layout(location = 5) in vec2 tex_uv_to_pixel_scale;
+layout(location = 0) out vec4 FragColor;
+layout(set = 0, binding = 2) uniform sampler2D Source;
+layout(set = 0, binding = 3) uniform sampler2D ORIG_LINEARIZED;
+layout(set = 0, binding = 4) uniform sampler2D Original;
+
+//////////////////////////////  FRAGMENT INCLUDES  //////////////////////////////
+
+#include "../../../../include/blur-functions.h"
 #include "bloom-functions.h"
+#include "../../../../include/gamma-management.h"
+

 ///////////////////////////////////  HELPERS  //////////////////////////////////

-vec3 tex2Dresize_gaussian4x4(const sampler2D tex, const vec2 tex_uv,
-    const vec2 dxdy, const vec2 texture_size, const vec2 texture_size_inv,
-    const vec2 tex_uv_to_pixel_scale, const float sigma)
+float3 tex2Dresize_gaussian4x4(sampler2D tex, float2 tex_uv, float2 dxdy, float2 tex_size, float2 texture_size_inv, float2 tex_uv_to_pixel_scale, float sigma)
 {
    //  Requires:   1.) All requirements of gamma-management.h must be satisfied!
    //              2.) filter_linearN must == "true" in your .cgp preset.
    //              3.) mipmap_inputN must == "true" in your .cgp preset if
    //                  IN.output_size << SRC.video_size.
    //              4.) dxdy should contain the uv pixel spacing:
-    //                      dxdy = max(vec2(1.0),
+    //                      dxdy = max(float2(1.0),
    //                          SRC.video_size/IN.output_size)/SRC.texture_size;
    //              5.) texture_size == SRC.texture_size
-    //              6.) texture_size_inv == vec2(1.0)/SRC.texture_size
+    //              6.) texture_size_inv == float2(1.0)/SRC.texture_size
    //              7.) tex_uv_to_pixel_scale == IN.output_size *
    //                      SRC.texture_size / SRC.video_size;
    //              8.) sigma is the desired Gaussian standard deviation, in
@ -72,65 +197,65 @@ vec3 tex2Dresize_gaussian4x4(const sampler2D tex, const vec2 tex_uv,
    const float denom_inv = 0.5/(sigma*sigma);
    //  We're taking 4x4 samples, and we're snapping to texels for upsizing.
    //  Find texture coords for sample 5 (second row, second column):
-    const vec2 curr_texel = tex_uv * texture_size;
-    const vec2 prev_texel =
-        floor(curr_texel - vec2(under_half)) + vec2(0.5);
-    const vec2 prev_texel_uv = prev_texel * texture_size_inv;
-    const bvec2 snap = lessThanEqual(dxdy , texture_size_inv);
-    const vec2 sample5_downsize_uv = tex_uv - 0.5 * dxdy;
-    const vec2 sample5_uv = mix(sample5_downsize_uv, prev_texel_uv, snap);
+    const float2 curr_texel = tex_uv * tex_size;
+    const float2 prev_texel =
+        floor(curr_texel - float2(under_half)) + float2(0.5);
+    const float2 prev_texel_uv = prev_texel * texture_size_inv;
+    const float2 snap = float2((dxdy.x <= texture_size_inv.x), (dxdy.y <= texture_size_inv.y));
+    const float2 sample5_downsize_uv = tex_uv - 0.5 * dxdy;
+    const float2 sample5_uv = lerp(sample5_downsize_uv, prev_texel_uv, snap);
    //  Compute texture coords for other samples:
-    const vec2 dx = vec2(dxdy.x, 0.0);
-    const vec2 sample0_uv = sample5_uv - dxdy;
-    const vec2 sample10_uv = sample5_uv + dxdy;
-    const vec2 sample15_uv = sample5_uv + 2.0 * dxdy;
-    const vec2 sample1_uv = sample0_uv + dx;
-    const vec2 sample2_uv = sample0_uv + 2.0 * dx;
-    const vec2 sample3_uv = sample0_uv + 3.0 * dx;
-    const vec2 sample4_uv = sample5_uv - dx;
-    const vec2 sample6_uv = sample5_uv + dx;
-    const vec2 sample7_uv = sample5_uv + 2.0 * dx;
-    const vec2 sample8_uv = sample10_uv - 2.0 * dx;
-    const vec2 sample9_uv = sample10_uv - dx;
-    const vec2 sample11_uv = sample10_uv + dx;
-    const vec2 sample12_uv = sample15_uv - 3.0 * dx;
-    const vec2 sample13_uv = sample15_uv - 2.0 * dx;
-    const vec2 sample14_uv = sample15_uv - dx;
+    const float2 dx = float2(dxdy.x, 0.0);
+    const float2 sample0_uv = sample5_uv - dxdy;
+    const float2 sample10_uv = sample5_uv + dxdy;
+    const float2 sample15_uv = sample5_uv + 2.0 * dxdy;
+    const float2 sample1_uv = sample0_uv + dx;
+    const float2 sample2_uv = sample0_uv + 2.0 * dx;
+    const float2 sample3_uv = sample0_uv + 3.0 * dx;
+    const float2 sample4_uv = sample5_uv - dx;
+    const float2 sample6_uv = sample5_uv + dx;
+    const float2 sample7_uv = sample5_uv + 2.0 * dx;
+    const float2 sample8_uv = sample10_uv - 2.0 * dx;
+    const float2 sample9_uv = sample10_uv - dx;
+    const float2 sample11_uv = sample10_uv + dx;
+    const float2 sample12_uv = sample15_uv - 3.0 * dx;
+    const float2 sample13_uv = sample15_uv - 2.0 * dx;
+    const float2 sample14_uv = sample15_uv - dx;
    //  Load each sample:
-    const vec3 sample0 = tex2D_linearize(tex, sample0_uv).rgb;
-    const vec3 sample1 = tex2D_linearize(tex, sample1_uv).rgb;
-    const vec3 sample2 = tex2D_linearize(tex, sample2_uv).rgb;
-    const vec3 sample3 = tex2D_linearize(tex, sample3_uv).rgb;
-    const vec3 sample4 = tex2D_linearize(tex, sample4_uv).rgb;
-    const vec3 sample5 = tex2D_linearize(tex, sample5_uv).rgb;
-    const vec3 sample6 = tex2D_linearize(tex, sample6_uv).rgb;
-    const vec3 sample7 = tex2D_linearize(tex, sample7_uv).rgb;
-    const vec3 sample8 = tex2D_linearize(tex, sample8_uv).rgb;
-    const vec3 sample9 = tex2D_linearize(tex, sample9_uv).rgb;
-    const vec3 sample10 = tex2D_linearize(tex, sample10_uv).rgb;
-    const vec3 sample11 = tex2D_linearize(tex, sample11_uv).rgb;
-    const vec3 sample12 = tex2D_linearize(tex, sample12_uv).rgb;
-    const vec3 sample13 = tex2D_linearize(tex, sample13_uv).rgb;
-    const vec3 sample14 = tex2D_linearize(tex, sample14_uv).rgb;
-    const vec3 sample15 = tex2D_linearize(tex, sample15_uv).rgb;
+    float3 sample0 = tex2D_linearize(tex, sample0_uv).rgb;
+    float3 sample1 = tex2D_linearize(tex, sample1_uv).rgb;
+    float3 sample2 = tex2D_linearize(tex, dx).rgb;
+    float3 sample3 = tex2D_linearize(tex, sample3_uv).rgb;
+    float3 sample4 = tex2D_linearize(tex, sample4_uv).rgb;
+    float3 sample5 = tex2D_linearize(tex, sample5_uv).rgb;
+    float3 sample6 = tex2D_linearize(tex, sample6_uv).rgb;
+    float3 sample7 = tex2D_linearize(tex, sample7_uv).rgb;
+    float3 sample8 = tex2D_linearize(tex, sample8_uv).rgb;
+    float3 sample9 = tex2D_linearize(tex, sample9_uv).rgb;
+    float3 sample10 = tex2D_linearize(tex, sample10_uv).rgb;
+    float3 sample11 = tex2D_linearize(tex, sample11_uv).rgb;
+    float3 sample12 = tex2D_linearize(tex, sample12_uv).rgb;
+    float3 sample13 = tex2D_linearize(tex, sample13_uv).rgb;
+    float3 sample14 = tex2D_linearize(tex, sample14_uv).rgb;
+    float3 sample15 = tex2D_linearize(tex, sample15_uv).rgb;
    //  Compute destination pixel offsets for each sample:
-    const vec2 dest_pixel = tex_uv * tex_uv_to_pixel_scale;
-    const vec2 sample0_offset = sample0_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const vec2 sample1_offset = sample1_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const vec2 sample2_offset = sample2_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const vec2 sample3_offset = sample3_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const vec2 sample4_offset = sample4_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const vec2 sample5_offset = sample5_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const vec2 sample6_offset = sample6_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const vec2 sample7_offset = sample7_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const vec2 sample8_offset = sample8_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const vec2 sample9_offset = sample9_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const vec2 sample10_offset = sample10_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const vec2 sample11_offset = sample11_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const vec2 sample12_offset = sample12_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const vec2 sample13_offset = sample13_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const vec2 sample14_offset = sample14_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const vec2 sample15_offset = sample15_uv * tex_uv_to_pixel_scale - dest_pixel;
+    const float2 dest_pixel = tex_uv * tex_uv_to_pixel_scale;
+    const float2 sample0_offset = sample0_uv * tex_uv_to_pixel_scale - dest_pixel;
+    const float2 sample1_offset = sample1_uv * tex_uv_to_pixel_scale - dest_pixel;
+    const float2 sample2_offset = sample2_uv * tex_uv_to_pixel_scale - dest_pixel;
+    const float2 sample3_offset = sample3_uv * tex_uv_to_pixel_scale - dest_pixel;
+    const float2 sample4_offset = sample4_uv * tex_uv_to_pixel_scale - dest_pixel;
+    const float2 sample5_offset = sample5_uv * tex_uv_to_pixel_scale - dest_pixel;
+    const float2 sample6_offset = sample6_uv * tex_uv_to_pixel_scale - dest_pixel;
+    const float2 sample7_offset = sample7_uv * tex_uv_to_pixel_scale - dest_pixel;
+    const float2 sample8_offset = sample8_uv * tex_uv_to_pixel_scale - dest_pixel;
+    const float2 sample9_offset = sample9_uv * tex_uv_to_pixel_scale - dest_pixel;
+    const float2 sample10_offset = sample10_uv * tex_uv_to_pixel_scale - dest_pixel;
+    const float2 sample11_offset = sample11_uv * tex_uv_to_pixel_scale - dest_pixel;
+    const float2 sample12_offset = sample12_uv * tex_uv_to_pixel_scale - dest_pixel;
+    const float2 sample13_offset = sample13_uv * tex_uv_to_pixel_scale - dest_pixel;
+    const float2 sample14_offset = sample14_uv * tex_uv_to_pixel_scale - dest_pixel;
+    const float2 sample15_offset = sample15_uv * tex_uv_to_pixel_scale - dest_pixel;
    //  Compute Gaussian sample weights:
    const float w0 = exp(-LENGTH_SQ(sample0_offset) * denom_inv);
    const float w1 = exp(-LENGTH_SQ(sample1_offset) * denom_inv);
@ -152,81 +277,13 @@ vec3 tex2Dresize_gaussian4x4(const sampler2D tex, const vec2 tex_uv,
        w0 + w1 + w2 + w3 + w4 + w5 + w6 + w7 +
        w8 +w9 + w10 + w11 + w12 + w13 + w14 + w15);
    //  Weight and sum the samples:
-    const vec3 sum = w0 * sample0 + w1 * sample1 + w2 * sample2 + w3 * sample3 +
+    const float3 sum = w0 * sample0 + w1 * sample1 + w2 * sample2 + w3 * sample3 +
        w4 * sample4 + w5 * sample5 + w6 * sample6 + w7 * sample7 +
        w8 * sample8 + w9 * sample9 + w10 * sample10 + w11 * sample11 +
        w12 * sample12 + w13 * sample13 + w14 * sample14 + w15 * sample15;
    return sum * weight_sum_inv;
 }

-#pragma stage vertex
-layout(location = 0) in vec4 Position;
-layout(location = 1) in vec2 TexCoord;
-layout(location = 0) out vec2 tex_uv;
-layout(location = 1) out float estimated_viewport_size_x;
-layout(location = 2) out vec2 blur_dxdy;
-layout(location = 3) out vec2 uv_scanline_step;
-layout(location = 4) out vec2 texture_size_inv;
-layout(location = 5) out vec2 tex_uv_to_pixel_scale;
-
-void main()
-{
-    //  This vertex shader copies blurs/vertex-shader-blur-one-pass-resize.h,
-    //  except we're using a different source image.
-   gl_Position = params.MVP * Position;
-   const vec2 video_uv = TexCoord;
-   tex_uv = video_uv;
-    //  The last pass (vertical scanlines) had a viewport y scale, so we can
-    //  use it to calculate a better runtime sigma:
-	estimated_viewport_size_x = registers.SourceSize.y * params.geom_aspect_ratio_x / params.geom_aspect_ratio_y;
-   
-    //  Get the uv sample distance between output pixels.  We're using a resize
-    //  blur, so arbitrary upsizing will be acceptable if filter_linearN =
-    //  "true," and arbitrary downsizing will be acceptable if mipmap_inputN =
-    //  "true" too.  The blur will be much more accurate if a true 4x4 Gaussian
-    //  resize is used instead of tex2Dblur3x3_resize (which samples between
-    //  texels even for upsizing).
-	const vec2 dxdy_min_scale = registers.ORIG_LINEARIZEDSize.xy * registers.OutputSize.zw;
-    texture_size_inv = registers.ORIG_LINEARIZEDSize.zw;
-    if(bloom_approx_filter > 1.5)   //  4x4 true Gaussian resize
-    {
-        //  For upsizing, we'll snap to texels and sample the nearest 4.
-        const vec2 dxdy_scale = max(dxdy_min_scale, vec2(1.0));
-        blur_dxdy = dxdy_scale * texture_size_inv;
-    }
-    else
-    {
-        const vec2 dxdy_scale = dxdy_min_scale;
-        blur_dxdy = dxdy_scale * texture_size_inv;
-	}
-	
-	tex_uv_to_pixel_scale = registers.OutputSize.xy;
-//  texture_size_inv = texture_size_inv; <- commented out because it's pointless in slang
-
-    //  Detecting interlacing again here lets us apply convergence offsets in
-    //  this pass.  il_step_multiple contains the (texel, scanline) step
-    //  multiple: 1 for progressive, 2 for interlaced.
-    const vec2 orig_video_size = registers.ORIG_LINEARIZEDSize.xy;
-	float interlace_check = 0.0;
-	if (is_interlaced(orig_video_size.y) == true) interlace_check = 1.0;
-    const float y_step = 1.0 + interlace_check;
-    const vec2 il_step_multiple = vec2(1.0, y_step);
-    //  Get the uv distance between (texels, same-field scanlines):
-    uv_scanline_step = il_step_multiple * registers.ORIG_LINEARIZEDSize.zw;
-}
-
-#pragma stage fragment
-#pragma format R8G8B8A8_SRGB
-layout(location = 0) in vec2 tex_uv;
-layout(location = 1) in float estimated_viewport_size_x;
-layout(location = 2) in vec2 blur_dxdy;
-layout(location = 3) in vec2 uv_scanline_step;
-layout(location = 4) in vec2 texture_size_inv;
-layout(location = 5) in vec2 tex_uv_to_pixel_scale;
-layout(location = 0) out vec4 FragColor;
-layout(set = 0, binding = 2) uniform sampler2D Source;
-layout(set = 0, binding = 3) uniform sampler2D ORIG_LINEARIZED;
-
 void main()
 {
    //  Would a viewport-relative size work better for this pass?  (No.)
@ -264,45 +321,52 @@ void main()
    //      bandwidth if it's done at a small constant scale.
    
    //  Get the constants we need to sample:
-	const vec2 texture_size = registers.ORIG_LINEARIZEDSize.xy;
-	vec2 tex_uv_r, tex_uv_g, tex_uv_b;
-	
-	if(beam_misconvergence == true)
+//    const sampler2D texture = ORIG_LINEARIZED.texture;
+//    const float2 tex_uv = tex_uv;
+//    const float2 blur_dxdy = blur_dxdy;
+    const float2 texture_size_ = ORIG_LINEARIZEDtexture_size;
+//    const float2 texture_size_inv = texture_size_inv;
+//    const float2 tex_uv_to_pixel_scale = tex_uv_to_pixel_scale;
+    float2 tex_uv_r, tex_uv_g, tex_uv_b;
+
+    if(beam_misconvergence)
    {
-        const vec2 convergence_offsets_r = vec2(params.convergence_offset_x_r, params.convergence_offset_y_r);//get_convergence_offsets_r_vector();
-        const vec2 convergence_offsets_g = vec2(params.convergence_offset_x_g, params.convergence_offset_y_g);//get_convergence_offsets_g_vector();
-        const vec2 convergence_offsets_b = vec2(params.convergence_offset_x_b, params.convergence_offset_y_b);//get_convergence_offsets_b_vector();
-        tex_uv_r = tex_uv - vec2(params.convergence_offset_x_r, params.convergence_offset_y_r) * uv_scanline_step;
-        tex_uv_g = tex_uv - vec2(params.convergence_offset_x_g, params.convergence_offset_y_g) * uv_scanline_step;
-        tex_uv_b = tex_uv - vec2(params.convergence_offset_x_b, params.convergence_offset_y_b) * uv_scanline_step;
+        const float2 uv_scanline_step = uv_scanline_step;
+        const float2 convergence_offsets_r = get_convergence_offsets_r_vector();
+        const float2 convergence_offsets_g = get_convergence_offsets_g_vector();
+        const float2 convergence_offsets_b = get_convergence_offsets_b_vector();
+        tex_uv_r = tex_uv - convergence_offsets_r * uv_scanline_step;
+        tex_uv_g = tex_uv - convergence_offsets_g * uv_scanline_step;
+        tex_uv_b = tex_uv - convergence_offsets_b * uv_scanline_step;
    }
-	//  Get the blur sigma:
-    const float bloom_approx_sigma = get_bloom_approx_sigma(registers.OutputSize.x, estimated_viewport_size_x);
-	
-	//  Sample the resized and blurred texture, and apply convergence offsets if
+    //  Get the blur sigma:
+    const float bloom_approx_sigma = get_bloom_approx_sigma(IN.output_size.x,
+        estimated_viewport_size_x);
+
+    //  Sample the resized and blurred texture, and apply convergence offsets if
    //  necessary.  Applying convergence offsets here triples our samples from
    //  16/9/1 to 48/27/3, but faster and easier than sampling BLOOM_APPROX and
    //  HALATION_BLUR 3 times at full resolution every time they're used.
-    vec3 color_r, color_g, color_b, color;
-	if(bloom_approx_filter > 1.5)
+    float3 color_r, color_g, color_b, color;
+    if(bloom_approx_filter > 1.5)
    {
        //  Use a 4x4 Gaussian resize.  This is slower but technically correct.
-        if(beam_misconvergence == true)
+        if(beam_misconvergence)
        {
            color_r = tex2Dresize_gaussian4x4(ORIG_LINEARIZED, tex_uv_r,
-                blur_dxdy, texture_size, texture_size_inv,
+                blur_dxdy, texture_size_, texture_size_inv,
                tex_uv_to_pixel_scale, bloom_approx_sigma);
            color_g = tex2Dresize_gaussian4x4(ORIG_LINEARIZED, tex_uv_g,
-                blur_dxdy, texture_size, texture_size_inv,
+                blur_dxdy, texture_size_, texture_size_inv,
                tex_uv_to_pixel_scale, bloom_approx_sigma);
            color_b = tex2Dresize_gaussian4x4(ORIG_LINEARIZED, tex_uv_b,
-                blur_dxdy, texture_size, texture_size_inv,
+                blur_dxdy, texture_size_, texture_size_inv,
                tex_uv_to_pixel_scale, bloom_approx_sigma);
        }
        else
        {
            color = tex2Dresize_gaussian4x4(ORIG_LINEARIZED, tex_uv,
-                blur_dxdy, texture_size, texture_size_inv,
+                blur_dxdy, texture_size_, texture_size_inv,
                tex_uv_to_pixel_scale, bloom_approx_sigma);
        }
    }
@ -311,7 +375,7 @@ void main()
        //  Use a 3x3 resize blur.  This is the softest option, because we're
        //  blurring already blurry bilinear samples.  It doesn't play quite as
        //  nicely with convergence offsets, but it has its charms.
-        if(beam_misconvergence == true)
+        if(beam_misconvergence)
        {
            color_r = tex2Dblur3x3resize(ORIG_LINEARIZED, tex_uv_r,
                blur_dxdy, bloom_approx_sigma);
@ -333,7 +397,7 @@ void main()
        //  too sharp above ~400x300, but the blurs break down above that
        //  resolution too, unless min_allowed_viewport_triads is high enough to
        //  keep bloom_approx_scale_x/min_allowed_viewport_triads < ~1.1658025.)
-        if(beam_misconvergence == true)
+        if(beam_misconvergence)
        {
            color_r = tex2D_linearize(ORIG_LINEARIZED, tex_uv_r).rgb;
            color_g = tex2D_linearize(ORIG_LINEARIZED, tex_uv_g).rgb;
@ -344,11 +408,11 @@ void main()
            color = tex2D_linearize(ORIG_LINEARIZED, tex_uv).rgb;
        }
    }
-	//  Pack the colors from the red/green/blue beams into a single vector:
-    if(beam_misconvergence == true)
+    //  Pack the colors from the red/green/blue beams into a single vector:
+    if(beam_misconvergence)
    {
-        color = vec3(color_r.r, color_g.g, color_b.b);
+        color = float3(color_r.r, color_g.g, color_b.b);
    }
    //  Encode and output the blurred image:
-   FragColor = vec4(texture(ORIG_LINEARIZED, tex_uv));//vec4(color, 1.0);//
-}
+		FragColor = encode_output(float4(tex2D_linearize(ORIG_LINEARIZED, tex_uv)));
+}
--- a/crt/shaders/crt-royale/src/crt-royale-bloom-approx.slang
+++ b/crt/shaders/crt-royale/src/crt-royale-bloom-approx.slang
@ -1,354 +1,2 @@
 #version 450
-
-layout(push_constant) uniform Push
-{
-	vec4 SourceSize;
-	vec4 OriginalSize;
-	vec4 OutputSize;
-	uint FrameCount;
-	vec4 ORIG_LINEARIZEDSize;
-} registers;
-
-#include "params.inc"
-
-/////////////////////////////  GPL LICENSE NOTICE  /////////////////////////////
-
-//  crt-royale: A full-featured CRT shader, with cheese.
-//  Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
-//
-//  This program is free software; you can redistribute it and/or modify it
-//  under the terms of the GNU General Public License as published by the Free
-//  Software Foundation; either version 2 of the License, or any later version.
-//
-//  This program is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-//  more details.
-//
-//  You should have received a copy of the GNU General Public License along with
-//  this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-//  Place, Suite 330, Boston, MA 02111-1307 USA
-
-
-//////////////////////////////////  INCLUDES  //////////////////////////////////
-
-#include "../user-settings.h"
-#include "derived-settings-and-constants.h"
-#include "bind-shader-params.h"
-#include "../../../../include/gamma-management.h"
-#include "../../../../include/blur-functions.h"
-#include "scanline-functions.h"
-#include "bloom-functions.h"
-
-///////////////////////////////////  HELPERS  //////////////////////////////////
-
-vec3 tex2Dresize_gaussian4x4(const sampler2D tex, const vec2 tex_uv,
-    const vec2 dxdy, const vec2 texture_size, const vec2 texture_size_inv,
-    const vec2 tex_uv_to_pixel_scale, const float sigma)
-{
-    //  Requires:   1.) All requirements of gamma-management.h must be satisfied!
-    //              2.) filter_linearN must == "true" in your .cgp preset.
-    //              3.) mipmap_inputN must == "true" in your .cgp preset if
-    //                  IN.output_size << SRC.video_size.
-    //              4.) dxdy should contain the uv pixel spacing:
-    //                      dxdy = max(vec2(1.0),
-    //                          SRC.video_size/IN.output_size)/SRC.texture_size;
-    //              5.) texture_size == SRC.texture_size
-    //              6.) texture_size_inv == vec2(1.0)/SRC.texture_size
-    //              7.) tex_uv_to_pixel_scale == IN.output_size *
-    //                      SRC.texture_size / SRC.video_size;
-    //              8.) sigma is the desired Gaussian standard deviation, in
-    //                  terms of output pixels.  It should be < ~0.66171875 to
-    //                  ensure the first unused sample (outside the 4x4 box) has
-    //                  a weight < 1.0/256.0.
-    //  Returns:    A true 4x4 Gaussian resize of the input.
-    //  Description:
-    //  Given correct inputs, this Gaussian resizer samples 4 pixel locations
-    //  along each downsized dimension and/or 4 texel locations along each
-    //  upsized dimension.  It computes dynamic weights based on the pixel-space
-    //  distance of each sample from the destination pixel.  It is arbitrarily
-    //  resizable and higher quality than tex2Dblur3x3_resize, but it's slower.
-    //  TODO: Move this to a more suitable file once there are others like it.
-    const float denom_inv = 0.5/(sigma*sigma);
-    //  We're taking 4x4 samples, and we're snapping to texels for upsizing.
-    //  Find texture coords for sample 5 (second row, second column):
-    const vec2 curr_texel = tex_uv * texture_size;
-    const vec2 prev_texel =
-        floor(curr_texel - vec2(under_half)) + vec2(0.5);
-    const vec2 prev_texel_uv = prev_texel * texture_size_inv;
-    const bvec2 snap = lessThanEqual(dxdy , texture_size_inv);
-    const vec2 sample5_downsize_uv = tex_uv - 0.5 * dxdy;
-    const vec2 sample5_uv = mix(sample5_downsize_uv, prev_texel_uv, snap);
-    //  Compute texture coords for other samples:
-    const vec2 dx = vec2(dxdy.x, 0.0);
-    const vec2 sample0_uv = sample5_uv - dxdy;
-    const vec2 sample10_uv = sample5_uv + dxdy;
-    const vec2 sample15_uv = sample5_uv + 2.0 * dxdy;
-    const vec2 sample1_uv = sample0_uv + dx;
-    const vec2 sample2_uv = sample0_uv + 2.0 * dx;
-    const vec2 sample3_uv = sample0_uv + 3.0 * dx;
-    const vec2 sample4_uv = sample5_uv - dx;
-    const vec2 sample6_uv = sample5_uv + dx;
-    const vec2 sample7_uv = sample5_uv + 2.0 * dx;
-    const vec2 sample8_uv = sample10_uv - 2.0 * dx;
-    const vec2 sample9_uv = sample10_uv - dx;
-    const vec2 sample11_uv = sample10_uv + dx;
-    const vec2 sample12_uv = sample15_uv - 3.0 * dx;
-    const vec2 sample13_uv = sample15_uv - 2.0 * dx;
-    const vec2 sample14_uv = sample15_uv - dx;
-    //  Load each sample:
-    const vec3 sample0 = tex2D_linearize(tex, sample0_uv).rgb;
-    const vec3 sample1 = tex2D_linearize(tex, sample1_uv).rgb;
-    const vec3 sample2 = tex2D_linearize(tex, sample2_uv).rgb;
-    const vec3 sample3 = tex2D_linearize(tex, sample3_uv).rgb;
-    const vec3 sample4 = tex2D_linearize(tex, sample4_uv).rgb;
-    const vec3 sample5 = tex2D_linearize(tex, sample5_uv).rgb;
-    const vec3 sample6 = tex2D_linearize(tex, sample6_uv).rgb;
-    const vec3 sample7 = tex2D_linearize(tex, sample7_uv).rgb;
-    const vec3 sample8 = tex2D_linearize(tex, sample8_uv).rgb;
-    const vec3 sample9 = tex2D_linearize(tex, sample9_uv).rgb;
-    const vec3 sample10 = tex2D_linearize(tex, sample10_uv).rgb;
-    const vec3 sample11 = tex2D_linearize(tex, sample11_uv).rgb;
-    const vec3 sample12 = tex2D_linearize(tex, sample12_uv).rgb;
-    const vec3 sample13 = tex2D_linearize(tex, sample13_uv).rgb;
-    const vec3 sample14 = tex2D_linearize(tex, sample14_uv).rgb;
-    const vec3 sample15 = tex2D_linearize(tex, sample15_uv).rgb;
-    //  Compute destination pixel offsets for each sample:
-    const vec2 dest_pixel = tex_uv * tex_uv_to_pixel_scale;
-    const vec2 sample0_offset = sample0_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const vec2 sample1_offset = sample1_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const vec2 sample2_offset = sample2_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const vec2 sample3_offset = sample3_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const vec2 sample4_offset = sample4_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const vec2 sample5_offset = sample5_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const vec2 sample6_offset = sample6_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const vec2 sample7_offset = sample7_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const vec2 sample8_offset = sample8_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const vec2 sample9_offset = sample9_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const vec2 sample10_offset = sample10_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const vec2 sample11_offset = sample11_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const vec2 sample12_offset = sample12_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const vec2 sample13_offset = sample13_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const vec2 sample14_offset = sample14_uv * tex_uv_to_pixel_scale - dest_pixel;
-    const vec2 sample15_offset = sample15_uv * tex_uv_to_pixel_scale - dest_pixel;
-    //  Compute Gaussian sample weights:
-    const float w0 = exp(-LENGTH_SQ(sample0_offset) * denom_inv);
-    const float w1 = exp(-LENGTH_SQ(sample1_offset) * denom_inv);
-    const float w2 = exp(-LENGTH_SQ(sample2_offset) * denom_inv);
-    const float w3 = exp(-LENGTH_SQ(sample3_offset) * denom_inv);
-    const float w4 = exp(-LENGTH_SQ(sample4_offset) * denom_inv);
-    const float w5 = exp(-LENGTH_SQ(sample5_offset) * denom_inv);
-    const float w6 = exp(-LENGTH_SQ(sample6_offset) * denom_inv);
-    const float w7 = exp(-LENGTH_SQ(sample7_offset) * denom_inv);
-    const float w8 = exp(-LENGTH_SQ(sample8_offset) * denom_inv);
-    const float w9 = exp(-LENGTH_SQ(sample9_offset) * denom_inv);
-    const float w10 = exp(-LENGTH_SQ(sample10_offset) * denom_inv);
-    const float w11 = exp(-LENGTH_SQ(sample11_offset) * denom_inv);
-    const float w12 = exp(-LENGTH_SQ(sample12_offset) * denom_inv);
-    const float w13 = exp(-LENGTH_SQ(sample13_offset) * denom_inv);
-    const float w14 = exp(-LENGTH_SQ(sample14_offset) * denom_inv);
-    const float w15 = exp(-LENGTH_SQ(sample15_offset) * denom_inv);
-    const float weight_sum_inv = 1.0/(
-        w0 + w1 + w2 + w3 + w4 + w5 + w6 + w7 +
-        w8 +w9 + w10 + w11 + w12 + w13 + w14 + w15);
-    //  Weight and sum the samples:
-    const vec3 sum = w0 * sample0 + w1 * sample1 + w2 * sample2 + w3 * sample3 +
-        w4 * sample4 + w5 * sample5 + w6 * sample6 + w7 * sample7 +
-        w8 * sample8 + w9 * sample9 + w10 * sample10 + w11 * sample11 +
-        w12 * sample12 + w13 * sample13 + w14 * sample14 + w15 * sample15;
-    return sum * weight_sum_inv;
-}
-
-#pragma stage vertex
-layout(location = 0) in vec4 Position;
-layout(location = 1) in vec2 TexCoord;
-layout(location = 0) out vec2 tex_uv;
-layout(location = 1) out float estimated_viewport_size_x;
-layout(location = 2) out vec2 blur_dxdy;
-layout(location = 3) out vec2 uv_scanline_step;
-layout(location = 4) out vec2 texture_size_inv;
-layout(location = 5) out vec2 tex_uv_to_pixel_scale;
-
-void main()
-{
-    //  This vertex shader copies blurs/vertex-shader-blur-one-pass-resize.h,
-    //  except we're using a different source image.
-   gl_Position = params.MVP * Position;
-   const vec2 video_uv = TexCoord;
-   tex_uv = video_uv;
-    //  The last pass (vertical scanlines) had a viewport y scale, so we can
-    //  use it to calculate a better runtime sigma:
-	estimated_viewport_size_x = registers.SourceSize.y * params.geom_aspect_ratio_x / params.geom_aspect_ratio_y;
-   
-    //  Get the uv sample distance between output pixels.  We're using a resize
-    //  blur, so arbitrary upsizing will be acceptable if filter_linearN =
-    //  "true," and arbitrary downsizing will be acceptable if mipmap_inputN =
-    //  "true" too.  The blur will be much more accurate if a true 4x4 Gaussian
-    //  resize is used instead of tex2Dblur3x3_resize (which samples between
-    //  texels even for upsizing).
-	const vec2 dxdy_min_scale = registers.ORIG_LINEARIZEDSize.xy * registers.OutputSize.zw;
-    texture_size_inv = registers.ORIG_LINEARIZEDSize.zw;
-    if(bloom_approx_filter > 1.5)   //  4x4 true Gaussian resize
-    {
-        //  For upsizing, we'll snap to texels and sample the nearest 4.
-        const vec2 dxdy_scale = max(dxdy_min_scale, vec2(1.0));
-        blur_dxdy = dxdy_scale * texture_size_inv;
-    }
-    else
-    {
-        const vec2 dxdy_scale = dxdy_min_scale;
-        blur_dxdy = dxdy_scale * texture_size_inv;
-	}
-	
-	tex_uv_to_pixel_scale = registers.OutputSize.xy;
-//  texture_size_inv = texture_size_inv; <- commented out because it's pointless in slang
-
-    //  Detecting interlacing again here lets us apply convergence offsets in
-    //  this pass.  il_step_multiple contains the (texel, scanline) step
-    //  multiple: 1 for progressive, 2 for interlaced.
-    const vec2 orig_video_size = registers.ORIG_LINEARIZEDSize.xy;
-	float interlace_check = 0.0;
-	if (is_interlaced(orig_video_size.y) == true) interlace_check = 1.0;
-    const float y_step = 1.0 + interlace_check;
-    const vec2 il_step_multiple = vec2(1.0, y_step);
-    //  Get the uv distance between (texels, same-field scanlines):
-    uv_scanline_step = il_step_multiple * registers.ORIG_LINEARIZEDSize.zw;
-}
-
-#pragma stage fragment
-#pragma format R8G8B8A8_SRGB
-layout(location = 0) in vec2 tex_uv;
-layout(location = 1) in float estimated_viewport_size_x;
-layout(location = 2) in vec2 blur_dxdy;
-layout(location = 3) in vec2 uv_scanline_step;
-layout(location = 4) in vec2 texture_size_inv;
-layout(location = 5) in vec2 tex_uv_to_pixel_scale;
-layout(location = 0) out vec4 FragColor;
-layout(set = 0, binding = 2) uniform sampler2D Source;
-layout(set = 0, binding = 3) uniform sampler2D ORIG_LINEARIZED;
-
-void main()
-{
-    //  Would a viewport-relative size work better for this pass?  (No.)
-    //  PROS:
-    //  1.) Instead of writing an absolute size to user-cgp-constants.h, we'd
-    //      write a viewport scale.  That number could be used to directly scale
-    //      the viewport-resolution bloom sigma and/or triad size to a smaller
-    //      scale.  This way, we could calculate an optimal dynamic sigma no
-    //      matter how the dot pitch is specified.
-    //  CONS:
-    //  1.) Texel smearing would be much worse at small viewport sizes, but
-    //      performance would be much worse at large viewport sizes, so there
-    //      would be no easy way to calculate a decent scale.
-    //  2.) Worse, we could no longer get away with using a constant-size blur!
-    //      Instead, we'd have to face all the same difficulties as the real
-    //      phosphor bloom, which requires static #ifdefs to decide the blur
-    //      size based on the expected triad size...a dynamic value.
-    //  3.) Like the phosphor bloom, we'd have less control over making the blur
-    //      size correct for an optical blur.  That said, we likely overblur (to
-    //      maintain brightness) more than the eye would do by itself: 20/20
-    //      human vision distinguishes ~1 arc minute, or 1/60 of a degree.  The
-    //      highest viewing angle recommendation I know of is THX's 40.04 degree
-    //      recommendation, at which 20/20 vision can distinguish about 2402.4
-    //      lines.  Assuming the "TV lines" definition, that means 1201.2
-    //      distinct light lines and 1201.2 distinct dark lines can be told
-    //      apart, i.e. 1201.2 pairs of lines.  This would correspond to 1201.2
-    //      pairs of alternating lit/unlit phosphors, so 2402.4 phosphors total
-    //      (if they're alternately lit).  That's a max of 800.8 triads.  Using
-    //      a more popular 30 degree viewing angle recommendation, 20/20 vision
-    //      can distinguish 1800 lines, or 600 triads of alternately lit
-    //      phosphors.  In contrast, we currently blur phosphors all the way
-    //      down to 341.3 triads to ensure full brightness.
-    //  4.) Realistically speaking, we're usually just going to use bilinear
-    //      filtering in this pass anyway, but it only works well to limit
-    //      bandwidth if it's done at a small constant scale.
-    
-    //  Get the constants we need to sample:
-	const vec2 texture_size = registers.ORIG_LINEARIZEDSize.xy;
-	vec2 tex_uv_r, tex_uv_g, tex_uv_b;
-	
-	if(beam_misconvergence == true)
-    {
-        const vec2 convergence_offsets_r = vec2(params.convergence_offset_x_r, params.convergence_offset_y_r);//get_convergence_offsets_r_vector();
-        const vec2 convergence_offsets_g = vec2(params.convergence_offset_x_g, params.convergence_offset_y_g);//get_convergence_offsets_g_vector();
-        const vec2 convergence_offsets_b = vec2(params.convergence_offset_x_b, params.convergence_offset_y_b);//get_convergence_offsets_b_vector();
-        tex_uv_r = tex_uv - vec2(params.convergence_offset_x_r, params.convergence_offset_y_r) * uv_scanline_step;
-        tex_uv_g = tex_uv - vec2(params.convergence_offset_x_g, params.convergence_offset_y_g) * uv_scanline_step;
-        tex_uv_b = tex_uv - vec2(params.convergence_offset_x_b, params.convergence_offset_y_b) * uv_scanline_step;
-    }
-	//  Get the blur sigma:
-    const float bloom_approx_sigma = get_bloom_approx_sigma(registers.OutputSize.x, estimated_viewport_size_x);
-	
-	//  Sample the resized and blurred texture, and apply convergence offsets if
-    //  necessary.  Applying convergence offsets here triples our samples from
-    //  16/9/1 to 48/27/3, but faster and easier than sampling BLOOM_APPROX and
-    //  HALATION_BLUR 3 times at full resolution every time they're used.
-    vec3 color_r, color_g, color_b, color;
-	if(bloom_approx_filter > 1.5)
-    {
-        //  Use a 4x4 Gaussian resize.  This is slower but technically correct.
-        if(beam_misconvergence == true)
-        {
-            color_r = tex2Dresize_gaussian4x4(ORIG_LINEARIZED, tex_uv_r,
-                blur_dxdy, texture_size, texture_size_inv,
-                tex_uv_to_pixel_scale, bloom_approx_sigma);
-            color_g = tex2Dresize_gaussian4x4(ORIG_LINEARIZED, tex_uv_g,
-                blur_dxdy, texture_size, texture_size_inv,
-                tex_uv_to_pixel_scale, bloom_approx_sigma);
-            color_b = tex2Dresize_gaussian4x4(ORIG_LINEARIZED, tex_uv_b,
-                blur_dxdy, texture_size, texture_size_inv,
-                tex_uv_to_pixel_scale, bloom_approx_sigma);
-        }
-        else
-        {
-            color = tex2Dresize_gaussian4x4(ORIG_LINEARIZED, tex_uv,
-                blur_dxdy, texture_size, texture_size_inv,
-                tex_uv_to_pixel_scale, bloom_approx_sigma);
-        }
-    }
-    else if(bloom_approx_filter > 0.5)
-    {
-        //  Use a 3x3 resize blur.  This is the softest option, because we're
-        //  blurring already blurry bilinear samples.  It doesn't play quite as
-        //  nicely with convergence offsets, but it has its charms.
-        if(beam_misconvergence == true)
-        {
-            color_r = tex2Dblur3x3resize(ORIG_LINEARIZED, tex_uv_r,
-                blur_dxdy, bloom_approx_sigma);
-            color_g = tex2Dblur3x3resize(ORIG_LINEARIZED, tex_uv_g,
-                blur_dxdy, bloom_approx_sigma);
-            color_b = tex2Dblur3x3resize(ORIG_LINEARIZED, tex_uv_b,
-                blur_dxdy, bloom_approx_sigma);
-        }
-        else
-        {
-            color = tex2Dblur3x3resize(ORIG_LINEARIZED, tex_uv, blur_dxdy);
-        }
-    }
-    else
-    {
-        //  Use bilinear sampling.  This approximates a 4x4 Gaussian resize MUCH
-        //  better than tex2Dblur3x3_resize for the very small sigmas we're
-        //  likely to use at small output resolutions.  (This estimate becomes
-        //  too sharp above ~400x300, but the blurs break down above that
-        //  resolution too, unless min_allowed_viewport_triads is high enough to
-        //  keep bloom_approx_scale_x/min_allowed_viewport_triads < ~1.1658025.)
-        if(beam_misconvergence == true)
-        {
-            color_r = tex2D_linearize(ORIG_LINEARIZED, tex_uv_r).rgb;
-            color_g = tex2D_linearize(ORIG_LINEARIZED, tex_uv_g).rgb;
-            color_b = tex2D_linearize(ORIG_LINEARIZED, tex_uv_b).rgb;
-        }
-        else
-        {
-            color = tex2D_linearize(ORIG_LINEARIZED, tex_uv).rgb;
-        }
-    }
-	//  Pack the colors from the red/green/blue beams into a single vector:
-    if(beam_misconvergence == true)
-    {
-        color = vec3(color_r.r, color_g.g, color_b.b);
-    }
-    //  Encode and output the blurred image:
-   FragColor = vec4(color, 1.0);//vec4(texture(ORIG_LINEARIZED, tex_uv));//
-}
+#include "crt-royale-bloom-approx.h"
--- a/crt/shaders/crt-royale/src/crt-royale-bloom-horizontal-reconstitute.slang
+++ b/crt/shaders/crt-royale/src/crt-royale-bloom-horizontal-reconstitute.slang
@ -1,17 +1,5 @@
 #version 450

-layout(push_constant) uniform Push
-{
-	vec4 SourceSize;
-	vec4 OutputSize;
-	vec4 ORIG_LINEARIZEDSize;
-	vec4 HALATION_BLURSize;
-	vec4 MASKED_SCANLINESSize;
-	vec4 BRIGHTPASSSize;
-} registers;
-
-#include "params.inc"
-
 /////////////////////////////  GPL LICENSE NOTICE  /////////////////////////////

 //  crt-royale: A full-featured CRT shader, with cheese.
@ -30,18 +18,93 @@ layout(push_constant) uniform Push
 //  this program; if not, write to the Free Software Foundation, Inc., 59 Temple
 //  Place, Suite 330, Boston, MA 02111-1307 USA

+layout(push_constant) uniform Push
+{
+	vec4 SourceSize;
+	vec4 OriginalSize;
+	vec4 OutputSize;
+	uint FrameCount;
+} params;
+
+layout(std140, set = 0, binding = 0) uniform UBO
+{
+	mat4 MVP;
+	float crt_gamma;
+	float lcd_gamma;
+	float levels_contrast;
+	float halation_weight;
+	float diffusion_weight;
+	float bloom_underestimate_levels;
+	float bloom_excess;
+	float beam_min_sigma;
+	float beam_max_sigma;
+	float beam_spot_power;
+	float beam_min_shape;
+	float beam_max_shape;
+	float beam_shape_power;
+	float beam_horiz_filter;
+	float beam_horiz_sigma;
+	float beam_horiz_linear_rgb_weight;
+	float convergence_offset_x_r;
+	float convergence_offset_x_g;
+	float convergence_offset_x_b;
+	float convergence_offset_y_r;
+	float convergence_offset_y_g;
+	float convergence_offset_y_b;
+	float mask_type;
+	float mask_sample_mode_desired;
+	float mask_num_triads_desired;
+	float mask_triad_size_desired;
+	float mask_specify_num_triads;
+	float aa_subpixel_r_offset_x_runtime;
+	float aa_subpixel_r_offset_y_runtime;
+	float aa_cubic_c;
+	float aa_gauss_sigma;
+	float geom_mode_runtime;
+	float geom_radius;
+	float geom_view_dist;
+	float geom_tilt_angle_x;
+	float geom_tilt_angle_y;
+	float geom_aspect_ratio_x;
+	float geom_aspect_ratio_y;
+	float geom_overscan_x;
+	float geom_overscan_y;
+	float border_size;
+	float border_darkness;
+	float border_compress;
+	float interlace_bff;
+	float interlace_1080i;
+	vec4 MASKED_SCANLINESSize;
+	vec4 HALATION_BLURSize;
+	vec4 BRIGHTPASSSize;
+} global;
+
+#define MASKED_SCANLINEStexture MASKED_SCANLINES
+#define MASKED_SCANLINEStexture_size global.MASKED_SCANLINESSize.xy
+#define MASKED_SCANLINESvideo_size global.MASKED_SCANLINESSize.xy
+#define HALATION_BLURtexture HALATION_BLUR
+#define HALATION_BLURtexture_size global.HALATION_BLURSize.xy
+#define HALATION_BLURvideo_size global.HALATION_BLURSize.xy
+#define BRIGHTPASStexture BRIGHTPASS
+#define BRIGHTPASStexture_size global.BRIGHTPASSSize.xy
+#define BRIGHTPASSvideo_size global.BRIGHTPASSSize.xy
+
+float bloom_approx_scale_x = params.OutputSize.x / params.SourceSize.y;
+const float max_viewport_size_x = 1080.0*1024.0*(4.0/3.0);
+const float bloom_diff_thresh_ = 1.0/256.0;

 /////////////////////////////  SETTINGS MANAGEMENT  ////////////////////////////

+#include "params.inc"
+#include "../../../../include/compat_macros.inc"
 #include "../user-settings.h"
 #include "derived-settings-and-constants.h"
 #include "bind-shader-params.h"


-//////////////////////////////////  INCLUDES  //////////////////////////////////
+///////////////////////////////  VERTEX INCLUDES  //////////////////////////////

 #include "../../../../include/gamma-management.h"
-#include "bloom-functions.h"
 #include "phosphor-mask-resizing.h"
 #include "scanline-functions.h"

@ -56,34 +119,52 @@ layout(location = 4) out vec2 bloom_tex_uv;
 layout(location = 5) out vec2 bloom_dxdy;
 layout(location = 6) out float bloom_sigma_runtime;

+// copied from bloom-functions.h
+inline float get_min_sigma_to_blur_triad(const float triad_size,
+    const float thresh)
+{
+    //  Requires:   1.) triad_size is the final phosphor triad size in pixels
+    //              2.) thresh is the max desired pixel difference in the
+    //                  blurred triad (e.g. 1.0/256.0).
+    //  Returns:    Return the minimum sigma that will fully blur a phosphor
+    //              triad on the screen to an even color, within thresh.
+    //              This closed-form function was found by curve-fitting data.
+    //  Estimate: max error = ~0.086036, mean sq. error = ~0.0013387:
+    return -0.05168 + 0.6113*triad_size -
+        1.122*triad_size*sqrt(0.000416 + thresh);
+    //  Estimate: max error = ~0.16486, mean sq. error = ~0.0041041:
+    //return 0.5985*triad_size - triad_size*sqrt(thresh)
+}
+
 void main()
 {
-   gl_Position = params.MVP * Position;
-   video_uv = TexCoord;
+   gl_Position = global.MVP * Position;
+   float2 tex_uv = TexCoord;
   
    //  Our various input textures use different coords:
-    scanline_tex_uv = video_uv * registers.MASKED_SCANLINESSize.xy *
-        registers.MASKED_SCANLINESSize.zw;
-    halation_tex_uv = video_uv * registers.HALATION_BLURSize.xy *
-        registers.HALATION_BLURSize.zw;
-    brightpass_tex_uv = video_uv * registers.BRIGHTPASSSize.xy *
-        registers.BRIGHTPASSSize.zw;
-    bloom_tex_uv = TexCoord;
+    const float2 video_uv = tex_uv * IN.texture_size/IN.video_size;
+//    video_uv = video_uv;
+    scanline_tex_uv = video_uv * MASKED_SCANLINESvideo_size /
+        MASKED_SCANLINEStexture_size;
+    halation_tex_uv = video_uv * HALATION_BLURvideo_size /
+        HALATION_BLURtexture_size;
+    brightpass_tex_uv = video_uv * BRIGHTPASSvideo_size /
+        BRIGHTPASStexture_size;
+    bloom_tex_uv = tex_uv;

    //  We're horizontally blurring the bloom input (vertically blurred
    //  brightpass).  Get the uv distance between output pixels / input texels
    //  in the horizontal direction (this pass must NOT resize):
-    bloom_dxdy = vec2(registers.SourceSize.z, 0.0);
+    bloom_dxdy = float2(1.0/IN.texture_size.x, 0.0);

    //  Calculate a runtime bloom_sigma in case it's needed:
    const float mask_tile_size_x = get_resized_mask_tile_size(
-        registers.OutputSize.xy, registers.OutputSize.xy * mask_resize_viewport_scale, false).x;
+        IN.output_size, IN.output_size * mask_resize_viewport_scale, false).x;
    bloom_sigma_runtime = get_min_sigma_to_blur_triad(
-        mask_tile_size_x / mask_triads_per_tile, bloom_diff_thresh);
+        mask_tile_size_x / mask_triads_per_tile, bloom_diff_thresh_);
 }

 #pragma stage fragment
-#pragma format R8G8B8A8_SRGB
 layout(location = 0) in vec2 video_uv;
 layout(location = 1) in vec2 scanline_tex_uv;
 layout(location = 2) in vec2 halation_tex_uv;
@ -93,40 +174,45 @@ layout(location = 5) in vec2 bloom_dxdy;
 layout(location = 6) in float bloom_sigma_runtime;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
-layout(set = 0, binding = 3) uniform sampler2D MASKED_SCANLINES;
-layout(set = 0, binding = 4) uniform sampler2D HALATION_BLUR;
-layout(set = 0, binding = 5) uniform sampler2D BRIGHTPASS;
+layout(set = 0, binding = 3) uniform sampler2D HALATION_BLUR;
+layout(set = 0, binding = 4) uniform sampler2D BRIGHTPASS;
+layout(set = 0, binding = 5) uniform sampler2D MASKED_SCANLINES;
+#define bloom_texture Source
+
+//////////////////////////////  FRAGMENT INCLUDES  //////////////////////////////
+
+#include "bloom-functions.h"

 void main()
 {
-//  Blur the vertically blurred brightpass horizontally by 9/17/25/43x:
+    //  Blur the vertically blurred brightpass horizontally by 9/17/25/43x:
    const float bloom_sigma = get_final_bloom_sigma(bloom_sigma_runtime);
-    const vec3 blurred_brightpass = tex2DblurNfast(Source,
+    const float3 blurred_brightpass = tex2DblurNfast(bloom_texture,
        bloom_tex_uv, bloom_dxdy, bloom_sigma);

-//  Sample the masked scanlines.  Alpha contains the auto-dim factor:
-    const vec3 intensity_dim =
-        tex2D_linearize(MASKED_SCANLINES, scanline_tex_uv).rgb;
+    //  Sample the masked scanlines.  Alpha contains the auto-dim factor:
+    const float3 intensity_dim =
+        tex2D_linearize(MASKED_SCANLINEStexture, scanline_tex_uv).rgb;
    const float auto_dim_factor = levels_autodim_temp;
    const float undim_factor = 1.0/auto_dim_factor;
-	
-	//  Calculate the mask dimpass, add it to the blurred brightpass, and
+
+    //  Calculate the mask dimpass, add it to the blurred brightpass, and
    //  undim (from scanline auto-dim) and amplify (from mask dim) the result:
    const float mask_amplify = get_mask_amplify();
-    const vec3 brightpass = tex2D_linearize(BRIGHTPASS,
+    const float3 brightpass = tex2D_linearize(BRIGHTPASStexture,
        brightpass_tex_uv).rgb;
-    const vec3 dimpass = intensity_dim - brightpass;
-    const vec3 phosphor_bloom = (dimpass + blurred_brightpass) *
-        mask_amplify * undim_factor * params.levels_contrast;
-		
-	//  Sample the halation texture, and let some light bleed into refractive
+    const float3 dimpass = intensity_dim - brightpass;
+    const float3 phosphor_bloom = (dimpass + blurred_brightpass) *
+        mask_amplify * undim_factor * levels_contrast;
+
+    //  Sample the halation texture, and let some light bleed into refractive
    //  diffusion.  Conceptually this occurs before the phosphor bloom, but
    //  adding it in earlier passes causes black crush in the diffusion colors.
-    const vec3 diffusion_color = params.levels_contrast * tex2D_linearize(
-        HALATION_BLUR, halation_tex_uv).rgb;
-    const vec3 final_bloom = mix(phosphor_bloom,
-        diffusion_color, params.diffusion_weight);
-		
-	//  Encode and output the bloomed image:
-   FragColor = encode_output(vec4(final_bloom, 1.0));
-}
+    const float3 diffusion_color = levels_contrast * tex2D_linearize(
+        HALATION_BLURtexture, halation_tex_uv).rgb;
+    const float3 final_bloom = lerp(phosphor_bloom,
+        diffusion_color, global.diffusion_weight);
+
+    //  Encode and output the bloomed image:
+    FragColor = encode_output(float4(final_bloom, 1.0));
+}
--- a/crt/shaders/crt-royale/src/crt-royale-bloom-vertical.slang
+++ b/crt/shaders/crt-royale/src/crt-royale-bloom-vertical.slang
@ -1,15 +1,5 @@
 #version 450

-layout(push_constant) uniform Push
-{
-	vec4 SourceSize;
-	vec4 OriginalSize;
-	vec4 OutputSize;
-	uint FrameCount;
-} registers;
-
-#include "params.inc"
-
 /////////////////////////////  GPL LICENSE NOTICE  /////////////////////////////

 //  crt-royale: A full-featured CRT shader, with cheese.
@ -28,20 +18,84 @@ layout(push_constant) uniform Push
 //  this program; if not, write to the Free Software Foundation, Inc., 59 Temple
 //  Place, Suite 330, Boston, MA 02111-1307 USA

+layout(push_constant) uniform Push
+{
+	vec4 SourceSize;
+	vec4 OriginalSize;
+	vec4 OutputSize;
+	uint FrameCount;
+} params;
+
+layout(std140, set = 0, binding = 0) uniform UBO
+{
+	mat4 MVP;
+	float crt_gamma;
+	float lcd_gamma;
+	float levels_contrast;
+	float halation_weight;
+	float diffusion_weight;
+	float bloom_underestimate_levels;
+	float bloom_excess;
+	float beam_min_sigma;
+	float beam_max_sigma;
+	float beam_spot_power;
+	float beam_min_shape;
+	float beam_max_shape;
+	float beam_shape_power;
+	float beam_horiz_filter;
+	float beam_horiz_sigma;
+	float beam_horiz_linear_rgb_weight;
+	float convergence_offset_x_r;
+	float convergence_offset_x_g;
+	float convergence_offset_x_b;
+	float convergence_offset_y_r;
+	float convergence_offset_y_g;
+	float convergence_offset_y_b;
+	float mask_type;
+	float mask_sample_mode_desired;
+	float mask_num_triads_desired;
+	float mask_triad_size_desired;
+	float mask_specify_num_triads;
+	float aa_subpixel_r_offset_x_runtime;
+	float aa_subpixel_r_offset_y_runtime;
+	float aa_cubic_c;
+	float aa_gauss_sigma;
+	float geom_mode_runtime;
+	float geom_radius;
+	float geom_view_dist;
+	float geom_tilt_angle_x;
+	float geom_tilt_angle_y;
+	float geom_aspect_ratio_x;
+	float geom_aspect_ratio_y;
+	float geom_overscan_x;
+	float geom_overscan_y;
+	float border_size;
+	float border_darkness;
+	float border_compress;
+	float interlace_bff;
+	float interlace_1080i;
+	vec4 MASKED_SCANLINESSize;
+	vec4 BLOOM_APPROXSize;
+} global;

 /////////////////////////////  SETTINGS MANAGEMENT  ////////////////////////////

+#include "params.inc"
+#include "../../../../include/compat_macros.inc"
 #include "../user-settings.h"
 #include "derived-settings-and-constants.h"
 #include "bind-shader-params.h"


-//////////////////////////////////  INCLUDES  //////////////////////////////////
+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

 #include "../../../../include/gamma-management.h"
-#include "bloom-functions.h"
 #include "phosphor-mask-resizing.h"

+float bloom_approx_scale_x = params.OutputSize.x / params.SourceSize.y;
+const float max_viewport_size_x = 1080.0*1024.0*(4.0/3.0);
+const float bloom_diff_thresh_ = 1.0/256.0;
+
 #pragma stage vertex
 layout(location = 0) in vec4 Position;
 layout(location = 1) in vec2 TexCoord;
@ -49,23 +103,40 @@ layout(location = 0) out vec2 tex_uv;
 layout(location = 1) out vec2 bloom_dxdy;
 layout(location = 2) out float bloom_sigma_runtime;

+// copied from bloom-functions.h
+inline float get_min_sigma_to_blur_triad(const float triad_size,
+    const float thresh)
+{
+    //  Requires:   1.) triad_size is the final phosphor triad size in pixels
+    //              2.) thresh is the max desired pixel difference in the
+    //                  blurred triad (e.g. 1.0/256.0).
+    //  Returns:    Return the minimum sigma that will fully blur a phosphor
+    //              triad on the screen to an even color, within thresh.
+    //              This closed-form function was found by curve-fitting data.
+    //  Estimate: max error = ~0.086036, mean sq. error = ~0.0013387:
+    return -0.05168 + 0.6113*triad_size -
+        1.122*triad_size*sqrt(0.000416 + thresh);
+    //  Estimate: max error = ~0.16486, mean sq. error = ~0.0041041:
+    //return 0.5985*triad_size - triad_size*sqrt(thresh)
+}
+
 void main()
 {
-   gl_Position = params.MVP * Position;
-   tex_uv = TexCoord;
+   gl_Position = global.MVP * Position;
+   tex_uv = TexCoord * 1.0001;
   
-    //  Get the uv sample distance between output pixels.  Calculate dxdy like
+	//  Get the uv sample distance between output pixels.  Calculate dxdy like
    //  blurs/vertex-shader-blur-fast-vertical.h.
-    const vec2 dxdy_scale = registers.SourceSize.xy * registers.OutputSize.zw;
-    const vec2 dxdy = dxdy_scale * registers.SourceSize.zw;
+    const float2 dxdy_scale = IN.video_size/IN.output_size;
+    const float2 dxdy = dxdy_scale/IN.texture_size;
    //  This blur is vertical-only, so zero out the vertical offset:
-    bloom_dxdy = vec2(0.0, dxdy.y);
+    bloom_dxdy = float2(0.0, dxdy.y);

    //  Calculate a runtime bloom_sigma in case it's needed:
    const float mask_tile_size_x = get_resized_mask_tile_size(
-        registers.OutputSize.xy, registers.OutputSize.xy * mask_resize_viewport_scale, false).x;
+        IN.output_size, IN.output_size * mask_resize_viewport_scale, false).x;
    bloom_sigma_runtime = get_min_sigma_to_blur_triad(
-        mask_tile_size_x / mask_triads_per_tile, bloom_diff_thresh);
+        mask_tile_size_x / mask_triads_per_tile, bloom_diff_thresh_);
 }

 #pragma stage fragment
@ -75,13 +146,18 @@ layout(location = 1) in vec2 bloom_dxdy;
 layout(location = 2) in float bloom_sigma_runtime;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+//////////////////////////////  FRAGMENT INCLUDES  //////////////////////////////
+
+#include "bloom-functions.h"

 void main()
 {
    //  Blur the brightpass horizontally with a 9/17/25/43x blur:
    const float bloom_sigma = get_final_bloom_sigma(bloom_sigma_runtime);
-    const vec3 color = tex2DblurNfast(Source, tex_uv,
+    const float3 color = tex2DblurNfast(input_texture, tex_uv,
        bloom_dxdy, bloom_sigma);
    //  Encode and output the blurred image:
-   FragColor = encode_output(vec4(color, 1.0));
-}
+    FragColor = encode_output(float4(color, 1.0));
+}
--- a/crt/shaders/crt-royale/src/crt-royale-brightpass.slang
+++ b/crt/shaders/crt-royale/src/crt-royale-brightpass.slang
@ -1,17 +1,5 @@
 #version 450

-layout(push_constant) uniform Push
-{
-	vec4 SourceSize;
-	vec4 OriginalSize;
-	vec4 OutputSize;
-	uint FrameCount;
-	vec4 MASKED_SCANLINESSize;
-	vec4 BLOOM_APPROXSize;
-} registers;
-
-#include "params.inc"
-
 /////////////////////////////  GPL LICENSE NOTICE  /////////////////////////////

 //  crt-royale: A full-featured CRT shader, with cheese.
@ -30,114 +18,198 @@ layout(push_constant) uniform Push
 //  this program; if not, write to the Free Software Foundation, Inc., 59 Temple
 //  Place, Suite 330, Boston, MA 02111-1307 USA

+layout(push_constant) uniform Push
+{
+	vec4 SourceSize;
+	vec4 OriginalSize;
+	vec4 OutputSize;
+	uint FrameCount;
+} params;
+
+layout(std140, set = 0, binding = 0) uniform UBO
+{
+	mat4 MVP;
+	float crt_gamma;
+	float lcd_gamma;
+	float levels_contrast;
+	float halation_weight;
+	float diffusion_weight;
+	float bloom_underestimate_levels;
+	float bloom_excess;
+	float beam_min_sigma;
+	float beam_max_sigma;
+	float beam_spot_power;
+	float beam_min_shape;
+	float beam_max_shape;
+	float beam_shape_power;
+	float beam_horiz_filter;
+	float beam_horiz_sigma;
+	float beam_horiz_linear_rgb_weight;
+	float convergence_offset_x_r;
+	float convergence_offset_x_g;
+	float convergence_offset_x_b;
+	float convergence_offset_y_r;
+	float convergence_offset_y_g;
+	float convergence_offset_y_b;
+	float mask_type;
+	float mask_sample_mode_desired;
+	float mask_num_triads_desired;
+	float mask_triad_size_desired;
+	float mask_specify_num_triads;
+	float aa_subpixel_r_offset_x_runtime;
+	float aa_subpixel_r_offset_y_runtime;
+	float aa_cubic_c;
+	float aa_gauss_sigma;
+	float geom_mode_runtime;
+	float geom_radius;
+	float geom_view_dist;
+	float geom_tilt_angle_x;
+	float geom_tilt_angle_y;
+	float geom_aspect_ratio_x;
+	float geom_aspect_ratio_y;
+	float geom_overscan_x;
+	float geom_overscan_y;
+	float border_size;
+	float border_darkness;
+	float border_compress;
+	float interlace_bff;
+	float interlace_1080i;
+	vec4 MASKED_SCANLINESSize;
+	vec4 BLOOM_APPROXSize;
+} global;
+
+#define MASKED_SCANLINEStexture MASKED_SCANLINES
+#define MASKED_SCANLINEStexture_size global.MASKED_SCANLINESSize.xy
+#define MASKED_SCANLINESvideo_size global.MASKED_SCANLINESSize.xy
+#define BLOOM_APPROXtexture BLOOM_APPROX
+#define BLOOM_APPROXtexture_size global.BLOOM_APPROXSize.xy
+#define BLOOM_APPROXvideo_size global.BLOOM_APPROXSize.xy
+
+float bloom_approx_scale_x = params.OutputSize.x / params.SourceSize.y;
+const float max_viewport_size_x = 1080.0*1024.0*(4.0/3.0);
+const float bloom_diff_thresh_ = 1.0/256.0;

 /////////////////////////////  SETTINGS MANAGEMENT  ////////////////////////////

+#include "params.inc"
+#include "../../../../include/compat_macros.inc"
 #include "../user-settings.h"
 #include "derived-settings-and-constants.h"
 #include "bind-shader-params.h"


-//////////////////////////////////  INCLUDES  //////////////////////////////////
+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////

 #include "../../../../include/gamma-management.h"
-#include "../../../../include/blur-functions.h"
 #include "phosphor-mask-resizing.h"
 #include "scanline-functions.h"
-#include "bloom-functions.h"
-

 #pragma stage vertex
 layout(location = 0) in vec4 Position;
 layout(location = 1) in vec2 TexCoord;
-layout(location = 0) out vec2 video_uv;
-layout(location = 1) out vec2 scanline_tex_uv;
+layout(location = 0) out vec2 scanline_tex_uv;
+layout(location = 1) out vec2 blur3x3_tex_uv;
 layout(location = 2) out float bloom_sigma_runtime;
-layout(location = 3) out vec2 blur3x3_tex_uv;
+
+// copied from bloom-functions.h
+inline float get_min_sigma_to_blur_triad(const float triad_size,
+    const float thresh)
+{
+    //  Requires:   1.) triad_size is the final phosphor triad size in pixels
+    //              2.) thresh is the max desired pixel difference in the
+    //                  blurred triad (e.g. 1.0/256.0).
+    //  Returns:    Return the minimum sigma that will fully blur a phosphor
+    //              triad on the screen to an even color, within thresh.
+    //              This closed-form function was found by curve-fitting data.
+    //  Estimate: max error = ~0.086036, mean sq. error = ~0.0013387:
+    return -0.05168 + 0.6113*triad_size -
+        1.122*triad_size*sqrt(0.000416 + thresh);
+    //  Estimate: max error = ~0.16486, mean sq. error = ~0.0041041:
+    //return 0.5985*triad_size - triad_size*sqrt(thresh)
+}

 void main()
 {
-   gl_Position = params.MVP * Position;
-   const vec2 tex_uv = TexCoord;
+   gl_Position = global.MVP * Position;
+   float2 tex_uv = TexCoord;
    //  Our various input textures use different coords:
-    video_uv = tex_uv;
-    scanline_tex_uv = video_uv * registers.MASKED_SCANLINESSize.xy *
-        registers.MASKED_SCANLINESSize.zw;
-    blur3x3_tex_uv = video_uv * registers.BLOOM_APPROXSize.xy * registers.BLOOM_APPROXSize.zw;
+    float2 video_uv = tex_uv * IN.texture_size/IN.video_size;
+    //video_uv = video_uv;
+    scanline_tex_uv = video_uv * MASKED_SCANLINESvideo_size /
+        MASKED_SCANLINEStexture_size;
+    blur3x3_tex_uv = video_uv * BLOOM_APPROXvideo_size / BLOOM_APPROXtexture_size;

    //  Calculate a runtime bloom_sigma in case it's needed:
    const float mask_tile_size_x = get_resized_mask_tile_size(
-        registers.OutputSize.xy, registers.OutputSize.xy * mask_resize_viewport_scale, false).x;
+        IN.output_size, IN.output_size * mask_resize_viewport_scale, false).x;
    bloom_sigma_runtime = get_min_sigma_to_blur_triad(
-        mask_tile_size_x / mask_triads_per_tile, bloom_diff_thresh);
+        mask_tile_size_x / mask_triads_per_tile, bloom_diff_thresh_);
 }

 #pragma stage fragment
-#pragma format R8G8B8A8_SRGB
-layout(location = 0) in vec2 video_uv;
-layout(location = 1) in vec2 scanline_tex_uv;
+layout(location = 0) in vec2 scanline_tex_uv;
+layout(location = 1) in vec2 blur3x3_tex_uv;
 layout(location = 2) in float bloom_sigma_runtime;
-layout(location = 3) in vec2 blur3x3_tex_uv;
 layout(location = 0) out vec4 FragColor;
-layout(set = 0, binding = 2) uniform sampler2D Source;
-layout(set = 0, binding = 3) uniform sampler2D MASKED_SCANLINES;
-layout(set = 0, binding = 4) uniform sampler2D BLOOM_APPROX;
+layout(set = 0, binding = 2) uniform sampler2D MASKED_SCANLINES;
+layout(set = 0, binding = 3) uniform sampler2D BLOOM_APPROX;
+
+//////////////////////////////  FRAGMENT INCLUDES  //////////////////////////////
+
+#include "bloom-functions.h"
+#include "../../../../include/blur-functions.h"

 void main()
 {
-	//  Sample the masked scanlines:
-    const vec3 intensity_dim =
-        tex2D_linearize(MASKED_SCANLINES, scanline_tex_uv).rgb;
+    //  Sample the masked scanlines:
+    const float3 intensity_dim =
+        tex2D_linearize(MASKED_SCANLINEStexture, scanline_tex_uv).rgb;
    //  Get the full intensity, including auto-undimming, and mask compensation:
    const float auto_dim_factor = levels_autodim_temp;
    const float undim_factor = 1.0/auto_dim_factor;
    const float mask_amplify = get_mask_amplify();
-    const vec3 intensity = intensity_dim * undim_factor * mask_amplify *
-        params.levels_contrast;
-		
-	//  Sample BLOOM_APPROX to estimate what a straight blur of masked scanlines
+    const float3 intensity = intensity_dim * undim_factor * mask_amplify *
+        levels_contrast;
+
+    //  Sample BLOOM_APPROX to estimate what a straight blur of masked scanlines
    //  would look like, so we can estimate how much energy we'll receive from
    //  blooming neighbors:
-    const vec3 phosphor_blur_approx = params.levels_contrast * tex2D_linearize(
-        BLOOM_APPROX, blur3x3_tex_uv).rgb;
-		
-	//  Compute the blur weight for the center texel and the maximum energy we
+    const float3 phosphor_blur_approx = levels_contrast * tex2D_linearize(
+        BLOOM_APPROXtexture, blur3x3_tex_uv).rgb;
+
+    //  Compute the blur weight for the center texel and the maximum energy we
    //  expect to receive from neighbors:
    const float bloom_sigma = get_final_bloom_sigma(bloom_sigma_runtime);
    const float center_weight = get_center_weight(bloom_sigma);
-    const vec3 max_area_contribution_approx =
-        max(vec3(0.0), phosphor_blur_approx - center_weight * intensity);
-		
-	//  Assume neighbors will blur 100% of their intensity (blur_ratio = 1.0),
+    const float3 max_area_contribution_approx =
+        max(float3(0.0, 0.0, 0.0), phosphor_blur_approx - center_weight * intensity);
+    //  Assume neighbors will blur 100% of their intensity (blur_ratio = 1.0),
    //  because it actually gets better results (on top of being very simple),
    //  but adjust all intensities for the user's desired underestimate factor:
-    const vec3 area_contrib_underestimate =
-        params.bloom_underestimate_levels * max_area_contribution_approx;
-    const vec3 intensity_underestimate =
-        params.bloom_underestimate_levels * intensity;
-		
-	//  Calculate the blur_ratio, the ratio of intensity we want to blur:
+    const float3 area_contrib_underestimate =
+        bloom_underestimate_levels * max_area_contribution_approx;
+    const float3 intensity_underestimate =
+        bloom_underestimate_levels * intensity;
+    //  Calculate the blur_ratio, the ratio of intensity we want to blur:
    #ifdef BRIGHTPASS_AREA_BASED
        //  This area-based version changes blur_ratio more smoothly and blurs
        //  more, clipping less but offering less phosphor differentiation:
-        const vec3 phosphor_blur_underestimate = params.bloom_underestimate_levels *
+        const float3 phosphor_blur_underestimate = bloom_underestimate_levels *
            phosphor_blur_approx;
-        const vec3 soft_intensity = max(intensity_underestimate,
+        const float3 soft_intensity = max(intensity_underestimate,
            phosphor_blur_underestimate * mask_amplify);
-        const vec3 blur_ratio_temp =
-            ((vec3(1.0) - area_contrib_underestimate) /
-            soft_intensity - vec3(1.0)) / (center_weight - 1.0);
+        const float3 blur_ratio_temp =
+            ((float3(1.0, 1.0, 1.0) - area_contrib_underestimate) /
+            soft_intensity - float3(1.0, 1.0, 1.0)) / (center_weight - 1.0);
    #else
-        const vec3 blur_ratio_temp =
-            ((vec3(1.0) - area_contrib_underestimate) /
-            intensity_underestimate - vec3(1.0)) / (center_weight - 1.0);
+        const float3 blur_ratio_temp =
+            ((float3(1.0, 1.0, 1.0) - area_contrib_underestimate) /
+            intensity_underestimate - float3(1.0, 1.0, 1.0)) / (center_weight - 1.0);
    #endif
-	
-	const vec3 blur_ratio = clamp(blur_ratio_temp, 0.0, 1.0);
+    const float3 blur_ratio = clamp(blur_ratio_temp, 0.0, 1.0);
    //  Calculate the brightpass based on the auto-dimmed, unamplified, masked
    //  scanlines, encode if necessary, and return!
-    const vec3 brightpass = intensity_dim *
-        mix(blur_ratio, vec3(1.0), params.bloom_excess);
-		
-   FragColor = encode_output(vec4(brightpass, 1.0));
-}
+    const float3 brightpass = intensity_dim *
+        lerp(blur_ratio, float3(1.0, 1.0, 1.0), global.bloom_excess);
+    FragColor = encode_output(float4(brightpass, 1.0));
+}
--- a/Show more
+++ b/Show more