From a0608ed481f47135014ce3afcb3c2fbbd4e7e434 Mon Sep 17 00:00:00 2001
From: rz5 <rz5@users.noreply.github.com>
Date: Tue, 19 Jul 2016 14:20:30 +0100
Subject: [PATCH] Update aann.slang

Notable changes:
* As per https://www.opengl.org/wiki/GLSL_Optimizations, I tried replacing divisions with multiplications.
* All the color space conversion functions accept (and work on) vectors instead of single.

This part is subjective, but I formatted the code all around based on my idea of what looks most legible  .

@wareya, @Monroe88: Hi, could you both take a quick glance at my multiply-add optimizations? The wiki says it would make it faster, dunno by how much but it was easy to change and it didn't hinder legibility too much.
---
 retro/shaders/aann.slang | 134 +++++++++++++++++++++++----------------
 1 file changed, 80 insertions(+), 54 deletions(-)

diff --git a/retro/shaders/aann.slang b/retro/shaders/aann.slang
index 1c39e0d..4df82fa 100644
--- a/retro/shaders/aann.slang
+++ b/retro/shaders/aann.slang
@@ -2,10 +2,10 @@
 
 layout(std140, set = 0, binding = 0) uniform UBO
 {
-   mat4 MVP;
-   vec4 OutputSize;
-   vec4 OriginalSize;
-   vec4 SourceSize;
+    mat4 MVP;
+    vec4 OutputSize;
+    vec4 OriginalSize;
+    vec4 SourceSize;
 } global;
 
 // AntiAliased Nearest Neighbor
@@ -14,9 +14,11 @@ layout(std140, set = 0, binding = 0) uniform UBO
 
 // set to true to interpolate in sRGB instead of a pseudo-perceptual colorspace
 #define NOGAMMA false
+
 // set to true to compensate for 8px overscan masking
 // Note: overscan compensation slightly alters (extremifies) the pixel aspect ratio of the game if said pixel aspect ratio is not exactly 1:1
 #define MASKING false
+
 // Do bilinear filtering instead of anti-aliased nearest neighbor filtering (used for debugging color)
 #define BILINEAR false
 
@@ -32,8 +34,8 @@ layout(location = 0) out vec2 vTexCoord;
 
 void main()
 {
-   gl_Position = global.MVP * Position;
-   vTexCoord = TexCoord;
+    gl_Position = global.MVP * Position;
+    vTexCoord   = TexCoord;
 }
 
 #pragma stage fragment
@@ -42,19 +44,23 @@ layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
 
 
+
+
 // http://entropymine.com/imageworsener/srgbformula/
-float srgb2linear(float srgb) {
-    if(srgb > 0.0404482362771082)
-        return pow(((srgb+0.055)/1.055), 2.4);
-    else
-        return srgb/12.92;
+vec3 srgb2linear(vec3 srgb) {
+    return vec3(
+        srgb.r > 0.0404482362771082 ? pow(srgb.r*0.947867298578199 + 0.052132701421801, 2.4) : srgb.r*0.0773993808049536,
+        srgb.g > 0.0404482362771082 ? pow(srgb.g*0.947867298578199 + 0.052132701421801, 2.4) : srgb.g*0.0773993808049536,
+        srgb.b > 0.0404482362771082 ? pow(srgb.b*0.947867298578199 + 0.052132701421801, 2.4) : srgb.b*0.0773993808049536 
+    );
 }
 
-float linear2srgb(float linear) {
-    if(linear > 0.00313066844250063)
-        return pow(linear,1/2.4)*1.055-0.055;
-    else
-        return linear*12.92;
+vec3 linear2srgb(vec3 linear) {
+    return vec3(
+        linear.x > 0.00313066844250063 ? pow(linear.x, 0.416666666666667)*1.055 - 0.055 : linear.x*12.92,
+        linear.y > 0.00313066844250063 ? pow(linear.y, 0.416666666666667)*1.055 - 0.055 : linear.y*12.92,
+        linear.z > 0.00313066844250063 ? pow(linear.z, 0.416666666666667)*1.055 - 0.055 : linear.z*12.92
+    );
 }
 
 // https://www.w3.org/Graphics/Color/srgb22
@@ -62,75 +68,95 @@ float linear2srgb(float linear) {
 #define GS 0.7152
 #define BS 0.0722
 
-
 vec3 rgb2vry(vec3 rgb) {
-    if(NOGAMMA) return rgb;
+    if (NOGAMMA) 
+        return rgb;
+
     // https://en.wikipedia.org/wiki/Opponent_process
-    float r = srgb2linear(rgb.r);
-    float g = srgb2linear(rgb.g);
-    float b = srgb2linear(rgb.b);
+    vec3 linear = srgb2linear(rgb);
+
     // https://en.wikipedia.org/wiki/Lightness#Relationship_between_lightness.2C_value.2C_and_relative_luminance
     // "scientists eventually converged on a roughly cube-root curve"
     // CIE does the same thing.
-    float V = pow(r*RS + g*GS + b*BS, 1.0/3);
-    float R = r-g;
-    float Y = (r+g)/2-b;
-    return vec3(V,R,Y);
+    vec3 vry = vec3(
+        pow(linear.x*RS + linear.y*GS + linear.z*BS, 0.333333333333333),
+        linear.x - linear.y,
+        (linear.x + linear.y) * 0.5 - linear.z
+    );
+
+    return vry;
 }
 vec3 vry2rgb(vec3 vry) {
-    if(NOGAMMA) return vry;
+    if (NOGAMMA)
+        return vry;
+
     // Magic.
-    float r, g, b;
-        float t = pow(vry.x, 3);
-    r = linear2srgb(t + vry.y*(GS + BS/2) + vry.z*BS);
-    g = linear2srgb(t - vry.y*(RS + BS/2) + vry.z*BS);
-    b = linear2srgb(t + vry.y*(GS/2-RS/2) - vry.z*(RS+GS));
-    return vec3(r,g,b);
+    float t = pow(vry.x, 3);
+    
+    vec3 rgb = vec3(
+        t + vry.y*(GS       + BS * 0.5) + vry.z*BS,
+        t - vry.y*(RS       + BS * 0.5) + vry.z*BS,
+        t + vry.y*(GS * 0.5 - RS * 0.5) - vry.z*(RS+GS)
+    );
+    
+    return linear2srgb(rgb);
 }
 
 vec3 vry_interp(vec3 first, vec3 second, float frac) {
-    if(NOGAMMA) return first*NOT(frac) + second*YES(frac);
+    if (NOGAMMA) 
+        return first*NOT(frac) + second*YES(frac);
+    
     // Because the chroma values were generated on linear light, but the luma must be interpolated in perceptual gamma (3)
-    //   it can cause out-of-gamut oversaturated values, since the chroma field is not a fixed size as luma values change.
+    // it can cause out-of-gamut oversaturated values, since the chroma field is not a fixed size as luma values change.
     // To compensate, we can "pull" the chroma interpolation path in the opposite way the luma path is curved.
     float new_luma = first.x*NOT(frac) + second.x*YES(frac);
     float linear_span = pow(second.x, 3) - pow(first.x, 3);
-    if(linear_span == 0) linear_span = 1;
+    
+    if (linear_span == 0) 
+        linear_span = 1;
+
     float luma_fraction = (pow(new_luma, 3) - pow(first.x, 3)) / linear_span;
-    return vec3(new_luma,
+    
+    return  vec3(new_luma,
                 first.y*NOT(luma_fraction) + second.y*YES(luma_fraction),
-                first.z*NOT(luma_fraction) + second.z*YES(luma_fraction));
+                first.z*NOT(luma_fraction) + second.z*YES(luma_fraction)
+            );
 }
 
 vec3 percent(float ssize, float tsize, float coord) {
-    if(BILINEAR) tsize = ssize;
+    if (BILINEAR) 
+        tsize = ssize;
+    
     float minfull = (coord*tsize - 0.5)/tsize*ssize;
     float maxfull = (coord*tsize + 0.5)/tsize*ssize;
 
     float realfull = floor(maxfull);
 
     if (minfull > realfull) {
-        return vec3(1, (realfull+0.5)/ssize, (realfull+0.5)/ssize);
+        return vec3(1, (realfull + 0.5)/ssize, (realfull + 0.5)/ssize);
     }
 
-    return vec3(
-            (maxfull - realfull) / (maxfull - minfull),
-            (realfull-0.5) / ssize,
-            (realfull+0.5) / ssize
-        );
+    return  vec3(
+                (maxfull - realfull) / (maxfull - minfull),
+                (realfull - 0.5) / ssize,
+                (realfull + 0.5) / ssize
+            );
 }
 
 void main() {
     vec2 viewportSize = global.OutputSize.xy;
     vec2 gameCoord = vTexCoord;
-    if(MASKING) {
+    if (MASKING) {
         float hscale = viewportSize.x/global.SourceSize.x;
         float vscale = viewportSize.y/global.SourceSize.y;
+
         viewportSize.x += hscale*16;
         viewportSize.y += vscale*16;
-        gameCoord.x = (8+gameCoord.x*(global.SourceSize.x))/(global.SourceSize.x+16);
-        gameCoord.y = (8+gameCoord.y*(global.SourceSize.y))/(global.SourceSize.y+16);
+
+        gameCoord.x = (8 + gameCoord.x*global.SourceSize.x)/(global.SourceSize.x + 16);
+        gameCoord.y = (8 + gameCoord.y*global.SourceSize.y)/(global.SourceSize.y + 16);
     }
+
     vec3 xstuff = percent(global.SourceSize.x, viewportSize.x, gameCoord.x);
     vec3 ystuff = percent(global.SourceSize.y, viewportSize.y, gameCoord.y);
 
@@ -138,16 +164,16 @@ void main() {
     float ykeep = ystuff[0];
 
     // get points to interpoflate across in pseudo-perceptual colorspace
-    vec3 a = rgb2vry(texture(Source,vec2(xstuff[1],ystuff[1])).rgb);
-    vec3 b = rgb2vry(texture(Source,vec2(xstuff[2],ystuff[1])).rgb);
-    vec3 c = rgb2vry(texture(Source,vec2(xstuff[1],ystuff[2])).rgb);
-    vec3 d = rgb2vry(texture(Source,vec2(xstuff[2],ystuff[2])).rgb);
+    vec3 a = rgb2vry(texture(Source, vec2(xstuff[1], ystuff[1])).rgb);
+    vec3 b = rgb2vry(texture(Source, vec2(xstuff[2], ystuff[1])).rgb);
+    vec3 c = rgb2vry(texture(Source, vec2(xstuff[1], ystuff[2])).rgb);
+    vec3 d = rgb2vry(texture(Source, vec2(xstuff[2], ystuff[2])).rgb);
 
     // interpolate
-    vec3 x1 = vry_interp(a, b, xkeep);
-    vec3 x2 = vry_interp(c, d, xkeep);
+    vec3 x1     = vry_interp(a,  b,  xkeep);
+    vec3 x2     = vry_interp(c,  d,  xkeep);
     vec3 result = vry_interp(x1, x2, ykeep);
 
     // convert back to sRGB and return
     FragColor = vec4(vry2rgb(result), 1);
-}
\ No newline at end of file
+}