Update aann.slang

Notable changes: * As per https://www.opengl.org/wiki/GLSL_Optimizations, I tried replacing divisions with multiplications. * All the color space conversion functions accept (and work on) vectors instead of single. This part is subjective, but I formatted the code all around based on my idea of what looks most legible . @wareya, @Monroe88: Hi, could you both take a quick glance at my multiply-add optimizations? The wiki says it would make it faster, dunno by how much but it was easy to change and it didn't hinder legibility too much.
2024-11-29 19:01:31 +11:00 · 2016-07-19 14:20:30 +01:00 · 2016-07-19 14:20:30 +01:00 · a0608ed481
parent 27efe97300
commit a0608ed481
1 changed files with 80 additions and 54 deletions
--- a/retro/shaders/aann.slang
+++ b/retro/shaders/aann.slang
@ -14,9 +14,11 @@ layout(std140, set = 0, binding = 0) uniform UBO
 // set to true to interpolate in sRGB instead of a pseudo-perceptual colorspace
 #define NOGAMMA false
 // set to true to compensate for 8px overscan masking
 // Note: overscan compensation slightly alters (extremifies) the pixel aspect ratio of the game if said pixel aspect ratio is not exactly 1:1
 #define MASKING false
 // Do bilinear filtering instead of anti-aliased nearest neighbor filtering (used for debugging color)
 #define BILINEAR false
@ -42,19 +44,23 @@ layout(location = 0) out vec4 FragColor;
 layout(set = 0, binding = 2) uniform sampler2D Source;
 // http://entropymine.com/imageworsener/srgbformula/
-float srgb2linear(float srgb) {
+vec3 srgb2linear(vec3 srgb) {
-    if(srgb > 0.0404482362771082)
+    return vec3(
-        return pow(((srgb+0.055)/1.055), 2.4);
+        srgb.r > 0.0404482362771082 ? pow(srgb.r*0.947867298578199 + 0.052132701421801, 2.4) : srgb.r*0.0773993808049536,
-    else
+        srgb.g > 0.0404482362771082 ? pow(srgb.g*0.947867298578199 + 0.052132701421801, 2.4) : srgb.g*0.0773993808049536,
-        return srgb/12.92;
+        srgb.b > 0.0404482362771082 ? pow(srgb.b*0.947867298578199 + 0.052132701421801, 2.4) : srgb.b*0.0773993808049536 
    );
 }
-float linear2srgb(float linear) {
+vec3 linear2srgb(vec3 linear) {
-    if(linear > 0.00313066844250063)
+    return vec3(
-        return pow(linear,1/2.4)*1.055-0.055;
+        linear.x > 0.00313066844250063 ? pow(linear.x, 0.416666666666667)*1.055 - 0.055 : linear.x*12.92,
-    else
+        linear.y > 0.00313066844250063 ? pow(linear.y, 0.416666666666667)*1.055 - 0.055 : linear.y*12.92,
-        return linear*12.92;
+        linear.z > 0.00313066844250063 ? pow(linear.z, 0.416666666666667)*1.055 - 0.055 : linear.z*12.92
    );
 }
 // https://www.w3.org/Graphics/Color/srgb22
@ -62,48 +68,65 @@ float linear2srgb(float linear) {
 #define GS 0.7152
 #define BS 0.0722
 vec3 rgb2vry(vec3 rgb) {
-    if(NOGAMMA) return rgb;
+    if (NOGAMMA) 
        return rgb;
    // https://en.wikipedia.org/wiki/Opponent_process
-    float r = srgb2linear(rgb.r);
+    vec3 linear = srgb2linear(rgb);
-    float g = srgb2linear(rgb.g);
+
    float b = srgb2linear(rgb.b);
    // https://en.wikipedia.org/wiki/Lightness#Relationship_between_lightness.2C_value.2C_and_relative_luminance
    // "scientists eventually converged on a roughly cube-root curve"
    // CIE does the same thing.
-    float V = pow(r*RS + g*GS + b*BS, 1.0/3);
+    vec3 vry = vec3(
-    float R = r-g;
+        pow(linear.x*RS + linear.y*GS + linear.z*BS, 0.333333333333333),
-    float Y = (r+g)/2-b;
+        linear.x - linear.y,
-    return vec3(V,R,Y);
+        (linear.x + linear.y) * 0.5 - linear.z
    );
    return vry;
 }
 vec3 vry2rgb(vec3 vry) {
-    if(NOGAMMA) return vry;
+    if (NOGAMMA)
        return vry;
    // Magic.
    float r, g, b;
    float t = pow(vry.x, 3);
-    r = linear2srgb(t + vry.y*(GS + BS/2) + vry.z*BS);
+    
-    g = linear2srgb(t - vry.y*(RS + BS/2) + vry.z*BS);
+    vec3 rgb = vec3(
-    b = linear2srgb(t + vry.y*(GS/2-RS/2) - vry.z*(RS+GS));
+        t + vry.y*(GS       + BS * 0.5) + vry.z*BS,
-    return vec3(r,g,b);
+        t - vry.y*(RS       + BS * 0.5) + vry.z*BS,
        t + vry.y*(GS * 0.5 - RS * 0.5) - vry.z*(RS+GS)
    );
    return linear2srgb(rgb);
 }
 vec3 vry_interp(vec3 first, vec3 second, float frac) {
-    if(NOGAMMA) return first*NOT(frac) + second*YES(frac);
+    if (NOGAMMA) 
        return first*NOT(frac) + second*YES(frac);
    // Because the chroma values were generated on linear light, but the luma must be interpolated in perceptual gamma (3)
    // it can cause out-of-gamut oversaturated values, since the chroma field is not a fixed size as luma values change.
    // To compensate, we can "pull" the chroma interpolation path in the opposite way the luma path is curved.
    float new_luma = first.x*NOT(frac) + second.x*YES(frac);
    float linear_span = pow(second.x, 3) - pow(first.x, 3);
-    if(linear_span == 0) linear_span = 1;
+    
    if (linear_span == 0) 
        linear_span = 1;
    float luma_fraction = (pow(new_luma, 3) - pow(first.x, 3)) / linear_span;
    return  vec3(new_luma,
                first.y*NOT(luma_fraction) + second.y*YES(luma_fraction),
-                first.z*NOT(luma_fraction) + second.z*YES(luma_fraction));
+                first.z*NOT(luma_fraction) + second.z*YES(luma_fraction)
            );
 }
 vec3 percent(float ssize, float tsize, float coord) {
-    if(BILINEAR) tsize = ssize;
+    if (BILINEAR) 
        tsize = ssize;
    float minfull = (coord*tsize - 0.5)/tsize*ssize;
    float maxfull = (coord*tsize + 0.5)/tsize*ssize;
@ -126,11 +149,14 @@ void main() {
    if (MASKING) {
        float hscale = viewportSize.x/global.SourceSize.x;
        float vscale = viewportSize.y/global.SourceSize.y;
        viewportSize.x += hscale*16;
        viewportSize.y += vscale*16;
-        gameCoord.x = (8+gameCoord.x*(global.SourceSize.x))/(global.SourceSize.x+16);
+
-        gameCoord.y = (8+gameCoord.y*(global.SourceSize.y))/(global.SourceSize.y+16);
+        gameCoord.x = (8 + gameCoord.x*global.SourceSize.x)/(global.SourceSize.x + 16);
        gameCoord.y = (8 + gameCoord.y*global.SourceSize.y)/(global.SourceSize.y + 16);
    }
    vec3 xstuff = percent(global.SourceSize.x, viewportSize.x, gameCoord.x);
    vec3 ystuff = percent(global.SourceSize.y, viewportSize.y, gameCoord.y);