diff --git a/blurs/README.md b/blurs/README.md
new file mode 100644
index 0000000..6c00263
--- /dev/null
+++ b/blurs/README.md
@@ -0,0 +1,129 @@
+This info pertains to the Blurs by TroggleMonkey:
+
+DESCRIPTION:
+Gaussian blurs are common building blocks in multi-pass shaders, and this
+library of optimized and tested blurs should make it easier to use whatever size
+blur you need.  All of these shaders are based on the tex2Dblur* functions in
+include/blur-functions.h, so you can use those directly if you ever need to
+add more processing to the same pass as a Gaussian blur.
+
+PICK THE RIGHT BLUR FOR YOUR USE CASE:
+There are several different types of blurs, ranging in size from 3-12 texels:
+a.) "Resize" separable blurs use vertical and horizontal passes and require N
+    taps for an Nx blur.  These are arbitrarily resizable.
+b.) "Fast" separable blurs use vertical and horizontal passes and require N taps
+    for an (N*2 - 1)x blur.  They exploit bilinear filtering to reduce the
+    required taps from e.g. 9 to 5.  These are always faster, but they have
+    strict image scale requirements.
+c.) "Resize" one-pass blurs combine the vertical/horizontal passes of the
+    "resize" separable blurs, and they require NxN taps for an NxN blur.  These
+    perform slowly enough that only tex2Dblur3x3resize is useful/included.
+d.) Other one-pass blurs combine the vertical/horizontal passes of the "fast"
+    separable blurs, and they exploit bilinear filtering the same way.  They're
+    faster than separable blurs at 3x3, competitive at 5x5 depending on options,
+    and slower at 7x7 and above...but larger blurs may still be useful if you're
+    hurting for passes.
+e.) "Shared" one-pass blurs go a step further: They also use quad-pixel
+    communication with fine-grained derivatives to distribute texture samples
+    across a 2x2 pixel quad.  (ddx() and ddy() are required, as well as a GPU
+    that uses fine-grained derivatives).  These blurs are faster than the other
+    one-pass blurs, but they have some artifacts from combining sample-sharing
+    with bilinear sampling, so they're best reserved for reblurring an already-
+    blurred input.
+
+Every blur expects linear filtering.  Except for resize separable blurs, all
+require a pass scale of (1/(2^M)) for some M >= 0.  That is, the output image
+has to have a 1:1 pixel:texel ratio with some mipmap of the input image, so use
+e.g. scaleN = "1.0" or scaleN = "0.25", not scaleN = "0.33" or scaleN = "2.0".
+Note: mipmap_inputN must = "true" in your .slangp file for scales other than 1.0.
+
+There are two suffixes on the .slang files relating to gamma correction:
+* Blurs with no suffix assume linear RGB input and output.
+* Blurs with a "-last-pass" suffix use pow() to gamma-correct their output.
+* Blurs with a "-gamma-encode-every-fbo" suffix use pow() to linearize each
+  input sample and again to gamma-correct the output.  These blurs are MUCH
+  slower than blurs without this suffix, but they're provided in case you want
+  to be [almost] gamma-correct on platforms without sRGB FBO's.  (The "almost"
+  is because bilinear filtering still won't be gamma-correct without sRGB.)
+* There are also blurs with both suffixes.  This may seem redundant, but they
+  make it easier to use a different output gamma for the last pass than for
+  the rest of the pipeline (such as when simulating another display device like
+  a Game Boy Advance or CRT).  See srgb-helpers/README.txt for more information.
+
+BENCHMARK RESULTS:
+Blurs have different performance characteristics depending on whether the input
+is mipmapped and depending on whether they're gamma-encoding every FBO.  Here's
+an excerpt from the blur-functions.h description with a comparison.  Note that
+benchmarks without an sRGB heading use "-gamma-encode-every-fbo" suffixes, and
+you can just look at the sRGB performance figures if you don't care about gamma:
+//  Here are some framerates from a GeForce 8800GTS.  The first pass resizes to
+//  viewport size (4x in this test) and linearizes for sRGB codepaths, and the
+//  remaining passes perform 6 full blurs.  Mipmapped tests are performed at the
+//  same scale, so they just measure the cost of mipmapping each FBO (only every
+//  other FBO is mipmapped for separable blurs, to mimic realistic usage).
+//  Mipmap      Neither     sRGB+Mipmap sRGB        Function
+//  76.0        92.3        131.3       193.7       tex2Dblur3fast
+//  63.2        74.4        122.4       175.5       tex2Dblur3resize
+//  93.7        121.2       159.3       263.2       tex2Dblur3x3
+//  59.7        68.7        115.4       162.1       tex2Dblur3x3resize
+//  63.2        74.4        122.4       175.5       tex2Dblur5fast
+//  49.3        54.8        100.0       132.7       tex2Dblur5resize
+//  59.7        68.7        115.4       162.1       tex2Dblur5x5
+//  64.9        77.2        99.1        137.2       tex2Dblur6x6shared
+//  55.8        63.7        110.4       151.8       tex2Dblur7fast
+//  39.8        43.9        83.9        105.8       tex2Dblur7resize
+//  40.0        44.2        83.2        104.9       tex2Dblur7x7
+//  56.4        65.5        71.9        87.9        tex2Dblur8x8shared
+//  49.3        55.1        99.9        132.5       tex2Dblur9fast
+//  33.3        36.2        72.4        88.0        tex2Dblur9resize
+//  27.8        29.7        61.3        72.2        tex2Dblur9x9
+//  37.2        41.1        52.6        60.2        tex2Dblur10x10shared
+//  44.4        49.5        91.3        117.8       tex2Dblur11fast
+//  28.8        30.8        63.6        75.4        tex2Dblur11resize
+//  33.6        36.5        40.9        45.5        tex2Dblur12x12shared
+
+BASIC USAGE:
+The .slangp presets in the quality-test-presets folder provide usage examples
+for basically every .slang blur shader.  The "-srgb" suffix on some .slangp
+presets is an explicit notice that they use sRGB FBO's.  Note how and when the
+"-last-pass" suffix is used for each .slang file, etc.
+
+The provided .slangp files with the "-mipmap" suffix are used to test quality and
+benchmarking with mipmapping enabled, but none of them actually use mipmapping
+as a feature in and of itself.  The following contrived .slangp would do that:
+    shaders = "4"
+
+    # Pass0: Linearize RGB:
+    shader0 = ../../srgb-helpers/first-pass-linearize.slang
+    filter_linear0 = "true"
+    scale_type0 = "source"
+    scale0 = "1.0"
+    srgb_framebuffer0 = "true"
+    
+    # Pass1: Upsize to 4x.  Pretend this pass does significant processing at 4x.
+    shader1 = ../../stock.slang
+    filter_linear1 = "true"
+    scale_type1 = "source"
+    scale1 = "4.0"
+    srgb_framebuffer1 = "true"
+    
+    # Pass2: Blur a source-sized mipmap 9x vertically; just shrink horizontally.
+    shader2 = ../blur9fast-vertical.slang
+    filter_linear2 = "true"
+    scale_type2 = "source"
+    scale2 = "0.25"
+    srgb_framebuffer2 = "true"
+    mipmap_input = "true"
+
+    # Pass3: Blur 9x horizontally
+    shader3 = ../blur9fast-horizontal.slang
+    filter_linear3 = "true"
+    scale_type3 = "source"
+    scale3 = "1.0"
+    srgb_framebuffer3 = "true"
+
+    # Pass4: Scale to the screen size and gamma-correct the output:
+    shader4 = ../../srgb-helpers/last-pass-gamma-correct.slang
+    filter_linear4 = "true"
+    scale_type4 = "viewport"
+    scale4 = "1.0"
diff --git a/blurs/blur10x10shared-gamma-encode-every-fbo.slang b/blurs/blur10x10shared-gamma-encode-every-fbo.slang
new file mode 100644
index 0000000..e562409
--- /dev/null
+++ b/blurs/blur10x10shared-gamma-encode-every-fbo.slang
@@ -0,0 +1,87 @@
+#version 450
+
+/////////////////////////////////  MIT LICENSE  ////////////////////////////////
+
+//  Copyright (C) 2014 TroggleMonkey
+//
+//  Permission is hereby granted, free of charge, to any person obtaining a copy
+//  of this software and associated documentation files (the "Software"), to
+//  deal in the Software without restriction, including without limitation the
+//  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+//  sell copies of the Software, and to permit persons to whom the Software is
+//  furnished to do so, subject to the following conditions:
+//  
+//  The above copyright notice and this permission notice shall be included in
+//  all copies or substantial portions of the Software.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+//  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+//  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+//  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+//  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+//  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+//  IN THE SOFTWARE.
+
+layout(push_constant) uniform Push
+{
+	vec4 SourceSize;
+	vec4 OriginalSize;
+	vec4 OutputSize;
+	uint FrameCount;
+} params;
+
+layout(std140, set = 0, binding = 0) uniform UBO
+{
+	mat4 MVP;
+} global;
+
+/////////////////////////////  SETTINGS MANAGEMENT  ////////////////////////////
+
+//  PASS SETTINGS:
+//  gamma-management.h needs to know what kind of pipeline we're using and
+//  what pass this is in that pipeline.  This will become obsolete if/when we
+//  can #define things like this in the preset file.
+#define GAMMA_ENCODE_EVERY_FBO
+//#define FIRST_PASS
+//#define LAST_PASS
+//#define SIMULATE_CRT_ON_LCD
+//#define SIMULATE_GBA_ON_LCD
+//#define SIMULATE_LCD_ON_CRT
+//#define SIMULATE_GBA_ON_CRT
+
+//  blur-functions.h needs to know our profile's capabilities:
+//  1.) DRIVERS_ALLOW_DERIVATIVES is mandatory for one-pass shared-sample blurs.
+//  2.) DRIVERS_ALLOW_TEX2DLOD is optional, but mipmapped blurs will have awful
+//      artifacts without it due to funky texture sampling derivatives.
+#define DRIVERS_ALLOW_DERIVATIVES
+#define DRIVERS_ALLOW_TEX2DLOD
+
+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////
+
+#include "../include/compat_macros.inc"
+#pragma stage vertex
+#include "vertex-shader-blur-one-pass-shared-sample.h"
+
+#pragma stage fragment
+layout(location = 0) in vec4 tex_uv;
+layout(location = 1) in vec4 output_pixel_num;
+layout(location = 2) in vec2 blur_dxdy;
+layout(location = 0) out vec4 FragColor;
+layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"
+
+void main()
+{
+    //  Get the integer output pixel number from two origins (uv and screen):
+    float4 output_pixel_num_integer = floor(output_pixel_num);
+    //  Get the fragment's position in the pixel quad and do a shared-sample blur:
+    float4 quad_vector = get_quad_vector(output_pixel_num_integer);
+    float3 color = tex2Dblur10x10shared(input_texture, tex_uv,
+        blur_dxdy, quad_vector);
+    //  Encode and output the blurred image:
+    FragColor = encode_output(float4(color, 1.0));
+}
diff --git a/blurs/blur10x10shared-last-pass-gamma-encode-every-fbo.slang b/blurs/blur10x10shared-last-pass-gamma-encode-every-fbo.slang
new file mode 100644
index 0000000..16b02c1
--- /dev/null
+++ b/blurs/blur10x10shared-last-pass-gamma-encode-every-fbo.slang
@@ -0,0 +1,87 @@
+#version 450
+
+/////////////////////////////////  MIT LICENSE  ////////////////////////////////
+
+//  Copyright (C) 2014 TroggleMonkey
+//
+//  Permission is hereby granted, free of charge, to any person obtaining a copy
+//  of this software and associated documentation files (the "Software"), to
+//  deal in the Software without restriction, including without limitation the
+//  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+//  sell copies of the Software, and to permit persons to whom the Software is
+//  furnished to do so, subject to the following conditions:
+//  
+//  The above copyright notice and this permission notice shall be included in
+//  all copies or substantial portions of the Software.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+//  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+//  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+//  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+//  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+//  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+//  IN THE SOFTWARE.
+
+layout(push_constant) uniform Push
+{
+	vec4 SourceSize;
+	vec4 OriginalSize;
+	vec4 OutputSize;
+	uint FrameCount;
+} params;
+
+layout(std140, set = 0, binding = 0) uniform UBO
+{
+	mat4 MVP;
+} global;
+
+/////////////////////////////  SETTINGS MANAGEMENT  ////////////////////////////
+
+//  PASS SETTINGS:
+//  gamma-management.h needs to know what kind of pipeline we're using and
+//  what pass this is in that pipeline.  This will become obsolete if/when we
+//  can #define things like this in the preset file.
+#define GAMMA_ENCODE_EVERY_FBO
+//#define FIRST_PASS
+#define LAST_PASS
+//#define SIMULATE_CRT_ON_LCD
+//#define SIMULATE_GBA_ON_LCD
+//#define SIMULATE_LCD_ON_CRT
+//#define SIMULATE_GBA_ON_CRT
+
+//  blur-functions.h needs to know our profile's capabilities:
+//  1.) DRIVERS_ALLOW_DERIVATIVES is mandatory for one-pass shared-sample blurs.
+//  2.) DRIVERS_ALLOW_TEX2DLOD is optional, but mipmapped blurs will have awful
+//      artifacts without it due to funky texture sampling derivatives.
+#define DRIVERS_ALLOW_DERIVATIVES
+#define DRIVERS_ALLOW_TEX2DLOD
+
+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////
+
+#include "../include/compat_macros.inc"
+#pragma stage vertex
+#include "vertex-shader-blur-one-pass-shared-sample.h"
+
+#pragma stage fragment
+layout(location = 0) in vec4 tex_uv;
+layout(location = 1) in vec4 output_pixel_num;
+layout(location = 2) in vec2 blur_dxdy;
+layout(location = 0) out vec4 FragColor;
+layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"
+
+void main()
+{
+    //  Get the integer output pixel number from two origins (uv and screen):
+    float4 output_pixel_num_integer = floor(output_pixel_num);
+    //  Get the fragment's position in the pixel quad and do a shared-sample blur:
+    float4 quad_vector = get_quad_vector(output_pixel_num_integer);
+    float3 color = tex2Dblur10x10shared(input_texture, tex_uv,
+        blur_dxdy, quad_vector);
+    //  Encode and output the blurred image:
+    FragColor = encode_output(float4(color, 1.0));
+}
diff --git a/blurs/blur10x10shared-last-pass.slang b/blurs/blur10x10shared-last-pass.slang
new file mode 100644
index 0000000..ccd7372
--- /dev/null
+++ b/blurs/blur10x10shared-last-pass.slang
@@ -0,0 +1,87 @@
+#version 450
+
+/////////////////////////////////  MIT LICENSE  ////////////////////////////////
+
+//  Copyright (C) 2014 TroggleMonkey
+//
+//  Permission is hereby granted, free of charge, to any person obtaining a copy
+//  of this software and associated documentation files (the "Software"), to
+//  deal in the Software without restriction, including without limitation the
+//  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+//  sell copies of the Software, and to permit persons to whom the Software is
+//  furnished to do so, subject to the following conditions:
+//  
+//  The above copyright notice and this permission notice shall be included in
+//  all copies or substantial portions of the Software.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+//  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+//  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+//  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+//  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+//  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+//  IN THE SOFTWARE.
+
+layout(push_constant) uniform Push
+{
+	vec4 SourceSize;
+	vec4 OriginalSize;
+	vec4 OutputSize;
+	uint FrameCount;
+} params;
+
+layout(std140, set = 0, binding = 0) uniform UBO
+{
+	mat4 MVP;
+} global;
+
+/////////////////////////////  SETTINGS MANAGEMENT  ////////////////////////////
+
+//  PASS SETTINGS:
+//  gamma-management.h needs to know what kind of pipeline we're using and
+//  what pass this is in that pipeline.  This will become obsolete if/when we
+//  can #define things like this in the preset file.
+//#define GAMMA_ENCODE_EVERY_FBO
+//#define FIRST_PASS
+#define LAST_PASS
+//#define SIMULATE_CRT_ON_LCD
+//#define SIMULATE_GBA_ON_LCD
+//#define SIMULATE_LCD_ON_CRT
+//#define SIMULATE_GBA_ON_CRT
+
+//  blur-functions.h needs to know our profile's capabilities:
+//  1.) DRIVERS_ALLOW_DERIVATIVES is mandatory for one-pass shared-sample blurs.
+//  2.) DRIVERS_ALLOW_TEX2DLOD is optional, but mipmapped blurs will have awful
+//      artifacts without it due to funky texture sampling derivatives.
+#define DRIVERS_ALLOW_DERIVATIVES
+#define DRIVERS_ALLOW_TEX2DLOD
+
+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////
+
+#include "../include/compat_macros.inc"
+#pragma stage vertex
+#include "vertex-shader-blur-one-pass-shared-sample.h"
+
+#pragma stage fragment
+layout(location = 0) in vec4 tex_uv;
+layout(location = 1) in vec4 output_pixel_num;
+layout(location = 2) in vec2 blur_dxdy;
+layout(location = 0) out vec4 FragColor;
+layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"
+
+void main()
+{
+    //  Get the integer output pixel number from two origins (uv and screen):
+    float4 output_pixel_num_integer = floor(output_pixel_num);
+    //  Get the fragment's position in the pixel quad and do a shared-sample blur:
+    float4 quad_vector = get_quad_vector(output_pixel_num_integer);
+    float3 color = tex2Dblur10x10shared(input_texture, tex_uv,
+        blur_dxdy, quad_vector);
+    //  Encode and output the blurred image:
+    FragColor = encode_output(float4(color, 1.0));
+}
diff --git a/blurs/blur10x10shared.slang b/blurs/blur10x10shared.slang
new file mode 100644
index 0000000..f7b0891
--- /dev/null
+++ b/blurs/blur10x10shared.slang
@@ -0,0 +1,87 @@
+#version 450
+
+/////////////////////////////////  MIT LICENSE  ////////////////////////////////
+
+//  Copyright (C) 2014 TroggleMonkey
+//
+//  Permission is hereby granted, free of charge, to any person obtaining a copy
+//  of this software and associated documentation files (the "Software"), to
+//  deal in the Software without restriction, including without limitation the
+//  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+//  sell copies of the Software, and to permit persons to whom the Software is
+//  furnished to do so, subject to the following conditions:
+//  
+//  The above copyright notice and this permission notice shall be included in
+//  all copies or substantial portions of the Software.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+//  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+//  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+//  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+//  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+//  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+//  IN THE SOFTWARE.
+
+layout(push_constant) uniform Push
+{
+	vec4 SourceSize;
+	vec4 OriginalSize;
+	vec4 OutputSize;
+	uint FrameCount;
+} params;
+
+layout(std140, set = 0, binding = 0) uniform UBO
+{
+	mat4 MVP;
+} global;
+
+/////////////////////////////  SETTINGS MANAGEMENT  ////////////////////////////
+
+//  PASS SETTINGS:
+//  gamma-management.h needs to know what kind of pipeline we're using and
+//  what pass this is in that pipeline.  This will become obsolete if/when we
+//  can #define things like this in the preset file.
+//#define GAMMA_ENCODE_EVERY_FBO
+//#define FIRST_PASS
+//#define LAST_PASS
+//#define SIMULATE_CRT_ON_LCD
+//#define SIMULATE_GBA_ON_LCD
+//#define SIMULATE_LCD_ON_CRT
+//#define SIMULATE_GBA_ON_CRT
+
+//  blur-functions.h needs to know our profile's capabilities:
+//  1.) DRIVERS_ALLOW_DERIVATIVES is mandatory for one-pass shared-sample blurs.
+//  2.) DRIVERS_ALLOW_TEX2DLOD is optional, but mipmapped blurs will have awful
+//      artifacts without it due to funky texture sampling derivatives.
+#define DRIVERS_ALLOW_DERIVATIVES
+#define DRIVERS_ALLOW_TEX2DLOD
+
+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////
+
+#include "../include/compat_macros.inc"
+#pragma stage vertex
+#include "vertex-shader-blur-one-pass-shared-sample.h"
+
+#pragma stage fragment
+layout(location = 0) in vec4 tex_uv;
+layout(location = 1) in vec4 output_pixel_num;
+layout(location = 2) in vec2 blur_dxdy;
+layout(location = 0) out vec4 FragColor;
+layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"
+
+void main()
+{
+    //  Get the integer output pixel number from two origins (uv and screen):
+    float4 output_pixel_num_integer = floor(output_pixel_num);
+    //  Get the fragment's position in the pixel quad and do a shared-sample blur:
+    float4 quad_vector = get_quad_vector(output_pixel_num_integer);
+    float3 color = tex2Dblur10x10shared(input_texture, tex_uv,
+        blur_dxdy, quad_vector);
+    //  Encode and output the blurred image:
+    FragColor = encode_output(float4(color, 1.0));
+}
diff --git a/blurs/blur12x12shared-gamma-encode-every-fbo.slang b/blurs/blur12x12shared-gamma-encode-every-fbo.slang
new file mode 100644
index 0000000..33bf942
--- /dev/null
+++ b/blurs/blur12x12shared-gamma-encode-every-fbo.slang
@@ -0,0 +1,87 @@
+#version 450
+
+/////////////////////////////////  MIT LICENSE  ////////////////////////////////
+
+//  Copyright (C) 2014 TroggleMonkey
+//
+//  Permission is hereby granted, free of charge, to any person obtaining a copy
+//  of this software and associated documentation files (the "Software"), to
+//  deal in the Software without restriction, including without limitation the
+//  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+//  sell copies of the Software, and to permit persons to whom the Software is
+//  furnished to do so, subject to the following conditions:
+//  
+//  The above copyright notice and this permission notice shall be included in
+//  all copies or substantial portions of the Software.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+//  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+//  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+//  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+//  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+//  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+//  IN THE SOFTWARE.
+
+layout(push_constant) uniform Push
+{
+	vec4 SourceSize;
+	vec4 OriginalSize;
+	vec4 OutputSize;
+	uint FrameCount;
+} params;
+
+layout(std140, set = 0, binding = 0) uniform UBO
+{
+	mat4 MVP;
+} global;
+
+/////////////////////////////  SETTINGS MANAGEMENT  ////////////////////////////
+
+//  PASS SETTINGS:
+//  gamma-management.h needs to know what kind of pipeline we're using and
+//  what pass this is in that pipeline.  This will become obsolete if/when we
+//  can #define things like this in the preset file.
+#define GAMMA_ENCODE_EVERY_FBO
+//#define FIRST_PASS
+//#define LAST_PASS
+//#define SIMULATE_CRT_ON_LCD
+//#define SIMULATE_GBA_ON_LCD
+//#define SIMULATE_LCD_ON_CRT
+//#define SIMULATE_GBA_ON_CRT
+
+//  blur-functions.h needs to know our profile's capabilities:
+//  1.) DRIVERS_ALLOW_DERIVATIVES is mandatory for one-pass shared-sample blurs.
+//  2.) DRIVERS_ALLOW_TEX2DLOD is optional, but mipmapped blurs will have awful
+//      artifacts without it due to funky texture sampling derivatives.
+#define DRIVERS_ALLOW_DERIVATIVES
+#define DRIVERS_ALLOW_TEX2DLOD
+
+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////
+
+#include "../include/compat_macros.inc"
+#pragma stage vertex
+#include "vertex-shader-blur-one-pass-shared-sample.h"
+
+#pragma stage fragment
+layout(location = 0) in vec4 tex_uv;
+layout(location = 1) in vec4 output_pixel_num;
+layout(location = 2) in vec2 blur_dxdy;
+layout(location = 0) out vec4 FragColor;
+layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"
+
+void main()
+{
+    //  Get the integer output pixel number from two origins (uv and screen):
+    float4 output_pixel_num_integer = floor(output_pixel_num);
+    //  Get the fragment's position in the pixel quad and do a shared-sample blur:
+    float4 quad_vector = get_quad_vector(output_pixel_num_integer);
+    float3 color = tex2Dblur12x12shared(input_texture, tex_uv,
+        blur_dxdy, quad_vector);
+    //  Encode and output the blurred image:
+    FragColor = encode_output(float4(color, 1.0));
+}
diff --git a/blurs/blur12x12shared-last-pass-gamma-encode-every-fbo.slang b/blurs/blur12x12shared-last-pass-gamma-encode-every-fbo.slang
new file mode 100644
index 0000000..f5d9b2b
--- /dev/null
+++ b/blurs/blur12x12shared-last-pass-gamma-encode-every-fbo.slang
@@ -0,0 +1,87 @@
+#version 450
+
+/////////////////////////////////  MIT LICENSE  ////////////////////////////////
+
+//  Copyright (C) 2014 TroggleMonkey
+//
+//  Permission is hereby granted, free of charge, to any person obtaining a copy
+//  of this software and associated documentation files (the "Software"), to
+//  deal in the Software without restriction, including without limitation the
+//  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+//  sell copies of the Software, and to permit persons to whom the Software is
+//  furnished to do so, subject to the following conditions:
+//  
+//  The above copyright notice and this permission notice shall be included in
+//  all copies or substantial portions of the Software.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+//  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+//  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+//  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+//  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+//  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+//  IN THE SOFTWARE.
+
+layout(push_constant) uniform Push
+{
+	vec4 SourceSize;
+	vec4 OriginalSize;
+	vec4 OutputSize;
+	uint FrameCount;
+} params;
+
+layout(std140, set = 0, binding = 0) uniform UBO
+{
+	mat4 MVP;
+} global;
+
+/////////////////////////////  SETTINGS MANAGEMENT  ////////////////////////////
+
+//  PASS SETTINGS:
+//  gamma-management.h needs to know what kind of pipeline we're using and
+//  what pass this is in that pipeline.  This will become obsolete if/when we
+//  can #define things like this in the preset file.
+#define GAMMA_ENCODE_EVERY_FBO
+//#define FIRST_PASS
+#define LAST_PASS
+//#define SIMULATE_CRT_ON_LCD
+//#define SIMULATE_GBA_ON_LCD
+//#define SIMULATE_LCD_ON_CRT
+//#define SIMULATE_GBA_ON_CRT
+
+//  blur-functions.h needs to know our profile's capabilities:
+//  1.) DRIVERS_ALLOW_DERIVATIVES is mandatory for one-pass shared-sample blurs.
+//  2.) DRIVERS_ALLOW_TEX2DLOD is optional, but mipmapped blurs will have awful
+//      artifacts without it due to funky texture sampling derivatives.
+#define DRIVERS_ALLOW_DERIVATIVES
+#define DRIVERS_ALLOW_TEX2DLOD
+
+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////
+
+#include "../include/compat_macros.inc"
+#pragma stage vertex
+#include "vertex-shader-blur-one-pass-shared-sample.h"
+
+#pragma stage fragment
+layout(location = 0) in vec4 tex_uv;
+layout(location = 1) in vec4 output_pixel_num;
+layout(location = 2) in vec2 blur_dxdy;
+layout(location = 0) out vec4 FragColor;
+layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"
+
+void main()
+{
+    //  Get the integer output pixel number from two origins (uv and screen):
+    float4 output_pixel_num_integer = floor(output_pixel_num);
+    //  Get the fragment's position in the pixel quad and do a shared-sample blur:
+    float4 quad_vector = get_quad_vector(output_pixel_num_integer);
+    float3 color = tex2Dblur12x12shared(input_texture, tex_uv,
+        blur_dxdy, quad_vector);
+    //  Encode and output the blurred image:
+    FragColor = encode_output(float4(color, 1.0));
+}
diff --git a/blurs/blur12x12shared-last-pass.slang b/blurs/blur12x12shared-last-pass.slang
new file mode 100644
index 0000000..e92f659
--- /dev/null
+++ b/blurs/blur12x12shared-last-pass.slang
@@ -0,0 +1,87 @@
+#version 450
+
+/////////////////////////////////  MIT LICENSE  ////////////////////////////////
+
+//  Copyright (C) 2014 TroggleMonkey
+//
+//  Permission is hereby granted, free of charge, to any person obtaining a copy
+//  of this software and associated documentation files (the "Software"), to
+//  deal in the Software without restriction, including without limitation the
+//  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+//  sell copies of the Software, and to permit persons to whom the Software is
+//  furnished to do so, subject to the following conditions:
+//  
+//  The above copyright notice and this permission notice shall be included in
+//  all copies or substantial portions of the Software.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+//  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+//  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+//  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+//  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+//  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+//  IN THE SOFTWARE.
+
+layout(push_constant) uniform Push
+{
+	vec4 SourceSize;
+	vec4 OriginalSize;
+	vec4 OutputSize;
+	uint FrameCount;
+} params;
+
+layout(std140, set = 0, binding = 0) uniform UBO
+{
+	mat4 MVP;
+} global;
+
+/////////////////////////////  SETTINGS MANAGEMENT  ////////////////////////////
+
+//  PASS SETTINGS:
+//  gamma-management.h needs to know what kind of pipeline we're using and
+//  what pass this is in that pipeline.  This will become obsolete if/when we
+//  can #define things like this in the preset file.
+//#define GAMMA_ENCODE_EVERY_FBO
+//#define FIRST_PASS
+#define LAST_PASS
+//#define SIMULATE_CRT_ON_LCD
+//#define SIMULATE_GBA_ON_LCD
+//#define SIMULATE_LCD_ON_CRT
+//#define SIMULATE_GBA_ON_CRT
+
+//  blur-functions.h needs to know our profile's capabilities:
+//  1.) DRIVERS_ALLOW_DERIVATIVES is mandatory for one-pass shared-sample blurs.
+//  2.) DRIVERS_ALLOW_TEX2DLOD is optional, but mipmapped blurs will have awful
+//      artifacts without it due to funky texture sampling derivatives.
+#define DRIVERS_ALLOW_DERIVATIVES
+#define DRIVERS_ALLOW_TEX2DLOD
+
+///////////////////////////////  VERTEX INCLUDES  ///////////////////////////////
+
+#include "../include/compat_macros.inc"
+#pragma stage vertex
+#include "vertex-shader-blur-one-pass-shared-sample.h"
+
+#pragma stage fragment
+layout(location = 0) in vec4 tex_uv;
+layout(location = 1) in vec4 output_pixel_num;
+layout(location = 2) in vec2 blur_dxdy;
+layout(location = 0) out vec4 FragColor;
+layout(set = 0, binding = 2) uniform sampler2D Source;
+#define input_texture Source
+
+/////////////////////////////  FRAGMENT INCLUDES  /////////////////////////////
+#include "../include/gamma-management.h"
+#include "../include/blur-functions.h"
+
+void main()
+{
+    //  Get the integer output pixel number from two origins (uv and screen):
+    float4 output_pixel_num_integer = floor(output_pixel_num);
+    //  Get the fragment's position in the pixel quad and do a shared-sample blur:
+    float4 quad_vector = get_quad_vector(output_pixel_num_integer);
+    float3 color = tex2Dblur12x12shared(input_texture, tex_uv,
+        blur_dxdy, quad_vector);
+    //  Encode and output the blurred image:
+    FragColor = encode_output(float4(color, 1.0));
+}
diff --git a/blurs/blur12x12shared.slang b/blurs/blur12x12shared.slang
index cca3ed4..f5944a1 100644
--- a/blurs/blur12x12shared.slang
+++ b/blurs/blur12x12shared.slang
@@ -40,7 +40,7 @@ layout(std140, set = 0, binding = 0) uniform UBO
 //  PASS SETTINGS:
 //  gamma-management.h needs to know what kind of pipeline we're using and
 //  what pass this is in that pipeline.  This will become obsolete if/when we
-//  can #define things like this in the .cgp preset file.
+//  can #define things like this in the preset file.
 //#define GAMMA_ENCODE_EVERY_FBO
 //#define FIRST_PASS
 //#define LAST_PASS
@@ -64,8 +64,8 @@ layout(std140, set = 0, binding = 0) uniform UBO
 
 #pragma stage fragment
 layout(location = 0) in vec4 tex_uv;
-layout(location = 1) in vec2 blur_dxdy;
-layout(location = 2) in vec4 output_pixel_num;
+layout(location = 1) in vec4 output_pixel_num;
+layout(location = 2) in vec2 blur_dxdy;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
 #define input_texture Source
@@ -84,4 +84,4 @@ void main()
         blur_dxdy, quad_vector);
     //  Encode and output the blurred image:
     FragColor = encode_output(float4(color, 1.0));
-}
\ No newline at end of file
+}
diff --git a/handheld/shaders/color/nds-color.slang b/handheld/shaders/color/nds-color.slang
index 4402900..325db58 100644
--- a/handheld/shaders/color/nds-color.slang
+++ b/handheld/shaders/color/nds-color.slang
@@ -1,13 +1,5 @@
 #version 450
 
-layout(std140, set = 0, binding = 0) uniform UBO
-{
-   mat4 MVP;
-   vec4 OutputSize;
-   vec4 OriginalSize;
-   vec4 SourceSize;
-} global;
-
 /*
    Shader Modified: Pokefan531
    Color Mangler
@@ -16,74 +8,105 @@ layout(std140, set = 0, binding = 0) uniform UBO
 */
 // Shader that replicates the LCD dynamics from a Nintendo DS Phat --
 
-#define target_gamma 1.91
-#define display_gamma 1.91
-#define sat 1.0
-#define lum 0.89
-#define contrast 1.0
-#define blr 0.0
-#define blg 0.0
-#define blb 0.0
-#define r 0.87
-#define g 0.645
-#define b 0.73
-#define rg 0.10
-#define rb 0.10
-#define gr 0.255
-#define gb 0.17
-#define br -0.125
-#define bg 0.255
-#define overscan_percent_x 0.0
-#define overscan_percent_y 0.0
+layout(std140, set = 0, binding = 0) uniform UBO
+{
+	mat4 MVP;
+	vec4 OutputSize;
+	vec4 OriginalSize;
+	vec4 SourceSize;
+	float mode, white_toggle;
+} global;
+
+#pragma parameter mode "Color Profile (1=sRGB, 2=DCI, 3=Rec2020)" 1.0 1.0 3.0 1.0
+int color_mode = int(global.mode);
+
+#pragma parameter white_toggle "Toggle White Balance" 0.0 0.0 1.0 1.0
+bool white = bool(global.white_toggle);
+
+#define target_gamma 2.2
+#define display_gamma 2.2
 
 /*
-White-NDS
-#define lum 0.98
-#define r 0.83
-#define g 0.625
-#define b 0.785
-#define rg 0.07
-#define rb 0.075
-#define gr 0.22
-#define gb 0.155
-#define br -0.13
-#define bg 0.22
+We'll define our color weights in this pattern:
+	r,   rg,  rb,  0.0,  //red channel
+	gr,  g,   gb,  0.0,  //green channel
+	br,  bg,  b,   0.0,  //blue channel
+	blr, blg, blb, lum   //alpha channel; we'll hide luma at the end, too
 */
 
+const mat4 NDS_Rec2020 = mat4(
+	0.58, 0.13,  0.09, 0.0,
+	0.38, 0.645, 0.20, 0.0,
+	0.04, 0.225, 0.71, 0.0,
+	0.0,  0.0,   0.0,  1.0
+); 
+
+const mat4 NDS_Rec2020_white = mat4(
+	0.535, 0.12,  0.09, 0.0,
+	0.345, 0.585, 0.20, 0.0,
+	0.04,  0.215, 0.71, 0.0,
+	0.0,   0.0,   0.0,  1.0
+); 
+
+const mat4 NDS_DCI = mat4(
+	0.745, 0.10,  0.09,  0.0,
+	0.315, 0.665, 0.195, 0.0,
+	-0.06, 0.235, 0.715, 0.0,
+	0.0,   0.0,   0.0,   0.95
+); 
+
+const mat4 NDS_DCI_white = mat4(
+	0.685, 0.095, 0.09,  0.0,
+	0.29,  0.605, 0.19,  0.0,
+	-0.06, 0.215, 0.715, 0.0,
+	0.0,   0.0,   0.0,   1.0
+); 
+
+const mat4 NDS_sRGB = mat4(
+	0.815, 0.07, 0.075, 0.0,
+	0.215, 0.62, 0.155, 0.0,
+	-0.12, 0.22, 0.77,  0.0,
+	0.0,   0.0,  0.0,   0.89
+); 
+
+const mat4 NDS_sRGB_white = mat4(
+	0.815, 0.07, 0.075, 0.0,
+	0.215, 0.62, 0.155, 0.0,
+	-0.12, 0.22, 0.77,  0.0,
+	0.0,   0.0,  0.0,   0.97
+); 
+
 #pragma stage vertex
 layout(location = 0) in vec4 Position;
 layout(location = 1) in vec2 TexCoord;
 layout(location = 0) out vec2 vTexCoord;
+layout(location = 1) out mat4 profile;
 
 void main()
 {
-   gl_Position = global.MVP * Position;
-   vTexCoord = TexCoord;
+	gl_Position = global.MVP * Position;
+	vTexCoord = TexCoord;
+
+	if (color_mode == 3) profile = (!white) ? NDS_Rec2020 : NDS_Rec2020_white;
+	else if (color_mode == 2) profile = (!white) ? NDS_DCI : NDS_DCI_white;
+	else if (color_mode == 1) profile = (!white) ? NDS_sRGB : NDS_sRGB_white;
 }
 
 #pragma stage fragment
 layout(location = 0) in vec2 vTexCoord;
+layout(location = 1) in mat4 profile;
 layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
 
 void main()
 {
-   vec4 screen = pow(texture(Source, vTexCoord), vec4(target_gamma)).rgba;
-   vec4 avglum = vec4(0.5);
-   screen = mix(screen, avglum, (1.0 - contrast));
-   
- //				r   g    b   black
-mat4 color = mat4(r,  rg,  rb, 0.0,  //red channel
-			   gr,  g,   gb, 0.0,  //green channel
-			   br,  bg,  b,  0.0,  //blue channel
-			  blr, blg, blb,    0.0); //alpha channel; these numbers do nothing for our purposes.
-			  
-mat4 adjust = mat4((1.0 - sat) * 0.3086 + sat, (1.0 - sat) * 0.3086, (1.0 - sat) * 0.3086, 1.0,
-(1.0 - sat) * 0.6094, (1.0 - sat) * 0.6094 + sat, (1.0 - sat) * 0.6094, 1.0,
-(1.0 - sat) * 0.0820, (1.0 - sat) * 0.0820, (1.0 - sat) * 0.0820 + sat, 1.0,
-0.0, 0.0, 0.0, 1.0);
-	color *= adjust;
+	// bring out our stored luminance value
+	float lum = profile[3].w;
+
+	// our adjustments need to happen in linear gamma
+	vec4 screen = pow(texture(Source, vTexCoord), vec4(target_gamma)).rgba;
+
 	screen = clamp(screen * lum, 0.0, 1.0);
-	screen = color * screen;
+	screen = profile * screen;
 	FragColor = pow(screen, vec4(1.0 / display_gamma));
-}
\ No newline at end of file
+}
diff --git a/misc/ss-gamma-ramp.slang b/misc/ss-gamma-ramp.slang
new file mode 100644
index 0000000..4873152
--- /dev/null
+++ b/misc/ss-gamma-ramp.slang
@@ -0,0 +1,72 @@
+#version 450
+
+// Super Sleuth Gamma Ramp
+// based on Overload's ramp as implemented in bsnes v073
+// ported by hunterk
+// license: GPLv2
+
+layout(push_constant) uniform Push
+{
+	vec4 SourceSize;
+	vec4 OriginalSize;
+	vec4 OutputSize;
+	uint FrameCount;
+	float mixer;
+} params;
+
+#pragma parameter mixer "Gamma Boost (%)" 150.0 100.0 200.0 1.0
+
+layout(std140, set = 0, binding = 0) uniform UBO
+{
+	mat4 MVP;
+} global;
+
+#pragma stage vertex
+layout(location = 0) in vec4 Position;
+layout(location = 1) in vec2 TexCoord;
+layout(location = 0) out vec2 vTexCoord;
+
+void main()
+{
+   gl_Position = global.MVP * Position;
+   vTexCoord = TexCoord;
+}
+
+#pragma stage fragment
+layout(location = 0) in vec2 vTexCoord;
+layout(location = 0) out vec4 FragColor;
+layout(set = 0, binding = 2) uniform sampler2D Source;
+
+// Overload's gamma ramp from Super Sleuth
+// Apparently not really based on anything but it looks nice
+const uint gammaRamp[32] = {
+      0x00, 0x01, 0x03, 0x06, 0x0a, 0x0f, 0x15, 0x1c,
+      0x24, 0x2d, 0x37, 0x42, 0x4e, 0x5b, 0x69, 0x78,
+      0x88, 0x90, 0x98, 0xa0, 0xa8, 0xb0, 0xb8, 0xc0,
+      0xc8, 0xd0, 0xd8, 0xe0, 0xe8, 0xf0, 0xf8, 0xff,
+    };
+
+#define conv(f) ((f >= 1.0) ? 255 : (f <= 0.0 ? 0 : int(floor(f * 256.0))))
+
+void main()
+{
+	vec4 img = texture(Source, vTexCoord);
+
+	// convert standard vec4 to uint color values
+	uvec4 int_img = uvec4(conv(img.r), conv(img.g), conv(img.b), conv(img.a));
+
+	uint r = (int_img.r) & 0xff;
+	uint g = (int_img.g) & 0xff;
+	uint b = (int_img.b) & 0xff;
+
+	// apply the ramp
+	uint R = gammaRamp[r >> 3];
+	uint G = gammaRamp[g >> 3];
+	uint B = gammaRamp[b >> 3];
+
+	uvec3 output_i = uvec3(R, G, B);
+	vec3  output_f = vec3(output_i) * vec3(1./255.);
+
+	// mix between corrected and uncorrected output
+	FragColor.rgb = mix(img.rgb, output_f, (params.mixer / 100.0) - 1.0);
+}