From a0608ed481f47135014ce3afcb3c2fbbd4e7e434 Mon Sep 17 00:00:00 2001 From: rz5 Date: Tue, 19 Jul 2016 14:20:30 +0100 Subject: [PATCH] Update aann.slang Notable changes: * As per https://www.opengl.org/wiki/GLSL_Optimizations, I tried replacing divisions with multiplications. * All the color space conversion functions accept (and work on) vectors instead of single. This part is subjective, but I formatted the code all around based on my idea of what looks most legible . @wareya, @Monroe88: Hi, could you both take a quick glance at my multiply-add optimizations? The wiki says it would make it faster, dunno by how much but it was easy to change and it didn't hinder legibility too much. --- retro/shaders/aann.slang | 134 +++++++++++++++++++++++---------------- 1 file changed, 80 insertions(+), 54 deletions(-) diff --git a/retro/shaders/aann.slang b/retro/shaders/aann.slang index 1c39e0d..4df82fa 100644 --- a/retro/shaders/aann.slang +++ b/retro/shaders/aann.slang @@ -2,10 +2,10 @@ layout(std140, set = 0, binding = 0) uniform UBO { - mat4 MVP; - vec4 OutputSize; - vec4 OriginalSize; - vec4 SourceSize; + mat4 MVP; + vec4 OutputSize; + vec4 OriginalSize; + vec4 SourceSize; } global; // AntiAliased Nearest Neighbor @@ -14,9 +14,11 @@ layout(std140, set = 0, binding = 0) uniform UBO // set to true to interpolate in sRGB instead of a pseudo-perceptual colorspace #define NOGAMMA false + // set to true to compensate for 8px overscan masking // Note: overscan compensation slightly alters (extremifies) the pixel aspect ratio of the game if said pixel aspect ratio is not exactly 1:1 #define MASKING false + // Do bilinear filtering instead of anti-aliased nearest neighbor filtering (used for debugging color) #define BILINEAR false @@ -32,8 +34,8 @@ layout(location = 0) out vec2 vTexCoord; void main() { - gl_Position = global.MVP * Position; - vTexCoord = TexCoord; + gl_Position = global.MVP * Position; + vTexCoord = TexCoord; } #pragma stage fragment @@ -42,19 +44,23 @@ layout(location = 0) out vec4 FragColor; layout(set = 0, binding = 2) uniform sampler2D Source; + + // http://entropymine.com/imageworsener/srgbformula/ -float srgb2linear(float srgb) { - if(srgb > 0.0404482362771082) - return pow(((srgb+0.055)/1.055), 2.4); - else - return srgb/12.92; +vec3 srgb2linear(vec3 srgb) { + return vec3( + srgb.r > 0.0404482362771082 ? pow(srgb.r*0.947867298578199 + 0.052132701421801, 2.4) : srgb.r*0.0773993808049536, + srgb.g > 0.0404482362771082 ? pow(srgb.g*0.947867298578199 + 0.052132701421801, 2.4) : srgb.g*0.0773993808049536, + srgb.b > 0.0404482362771082 ? pow(srgb.b*0.947867298578199 + 0.052132701421801, 2.4) : srgb.b*0.0773993808049536 + ); } -float linear2srgb(float linear) { - if(linear > 0.00313066844250063) - return pow(linear,1/2.4)*1.055-0.055; - else - return linear*12.92; +vec3 linear2srgb(vec3 linear) { + return vec3( + linear.x > 0.00313066844250063 ? pow(linear.x, 0.416666666666667)*1.055 - 0.055 : linear.x*12.92, + linear.y > 0.00313066844250063 ? pow(linear.y, 0.416666666666667)*1.055 - 0.055 : linear.y*12.92, + linear.z > 0.00313066844250063 ? pow(linear.z, 0.416666666666667)*1.055 - 0.055 : linear.z*12.92 + ); } // https://www.w3.org/Graphics/Color/srgb22 @@ -62,75 +68,95 @@ float linear2srgb(float linear) { #define GS 0.7152 #define BS 0.0722 - vec3 rgb2vry(vec3 rgb) { - if(NOGAMMA) return rgb; + if (NOGAMMA) + return rgb; + // https://en.wikipedia.org/wiki/Opponent_process - float r = srgb2linear(rgb.r); - float g = srgb2linear(rgb.g); - float b = srgb2linear(rgb.b); + vec3 linear = srgb2linear(rgb); + // https://en.wikipedia.org/wiki/Lightness#Relationship_between_lightness.2C_value.2C_and_relative_luminance // "scientists eventually converged on a roughly cube-root curve" // CIE does the same thing. - float V = pow(r*RS + g*GS + b*BS, 1.0/3); - float R = r-g; - float Y = (r+g)/2-b; - return vec3(V,R,Y); + vec3 vry = vec3( + pow(linear.x*RS + linear.y*GS + linear.z*BS, 0.333333333333333), + linear.x - linear.y, + (linear.x + linear.y) * 0.5 - linear.z + ); + + return vry; } vec3 vry2rgb(vec3 vry) { - if(NOGAMMA) return vry; + if (NOGAMMA) + return vry; + // Magic. - float r, g, b; - float t = pow(vry.x, 3); - r = linear2srgb(t + vry.y*(GS + BS/2) + vry.z*BS); - g = linear2srgb(t - vry.y*(RS + BS/2) + vry.z*BS); - b = linear2srgb(t + vry.y*(GS/2-RS/2) - vry.z*(RS+GS)); - return vec3(r,g,b); + float t = pow(vry.x, 3); + + vec3 rgb = vec3( + t + vry.y*(GS + BS * 0.5) + vry.z*BS, + t - vry.y*(RS + BS * 0.5) + vry.z*BS, + t + vry.y*(GS * 0.5 - RS * 0.5) - vry.z*(RS+GS) + ); + + return linear2srgb(rgb); } vec3 vry_interp(vec3 first, vec3 second, float frac) { - if(NOGAMMA) return first*NOT(frac) + second*YES(frac); + if (NOGAMMA) + return first*NOT(frac) + second*YES(frac); + // Because the chroma values were generated on linear light, but the luma must be interpolated in perceptual gamma (3) - // it can cause out-of-gamut oversaturated values, since the chroma field is not a fixed size as luma values change. + // it can cause out-of-gamut oversaturated values, since the chroma field is not a fixed size as luma values change. // To compensate, we can "pull" the chroma interpolation path in the opposite way the luma path is curved. float new_luma = first.x*NOT(frac) + second.x*YES(frac); float linear_span = pow(second.x, 3) - pow(first.x, 3); - if(linear_span == 0) linear_span = 1; + + if (linear_span == 0) + linear_span = 1; + float luma_fraction = (pow(new_luma, 3) - pow(first.x, 3)) / linear_span; - return vec3(new_luma, + + return vec3(new_luma, first.y*NOT(luma_fraction) + second.y*YES(luma_fraction), - first.z*NOT(luma_fraction) + second.z*YES(luma_fraction)); + first.z*NOT(luma_fraction) + second.z*YES(luma_fraction) + ); } vec3 percent(float ssize, float tsize, float coord) { - if(BILINEAR) tsize = ssize; + if (BILINEAR) + tsize = ssize; + float minfull = (coord*tsize - 0.5)/tsize*ssize; float maxfull = (coord*tsize + 0.5)/tsize*ssize; float realfull = floor(maxfull); if (minfull > realfull) { - return vec3(1, (realfull+0.5)/ssize, (realfull+0.5)/ssize); + return vec3(1, (realfull + 0.5)/ssize, (realfull + 0.5)/ssize); } - return vec3( - (maxfull - realfull) / (maxfull - minfull), - (realfull-0.5) / ssize, - (realfull+0.5) / ssize - ); + return vec3( + (maxfull - realfull) / (maxfull - minfull), + (realfull - 0.5) / ssize, + (realfull + 0.5) / ssize + ); } void main() { vec2 viewportSize = global.OutputSize.xy; vec2 gameCoord = vTexCoord; - if(MASKING) { + if (MASKING) { float hscale = viewportSize.x/global.SourceSize.x; float vscale = viewportSize.y/global.SourceSize.y; + viewportSize.x += hscale*16; viewportSize.y += vscale*16; - gameCoord.x = (8+gameCoord.x*(global.SourceSize.x))/(global.SourceSize.x+16); - gameCoord.y = (8+gameCoord.y*(global.SourceSize.y))/(global.SourceSize.y+16); + + gameCoord.x = (8 + gameCoord.x*global.SourceSize.x)/(global.SourceSize.x + 16); + gameCoord.y = (8 + gameCoord.y*global.SourceSize.y)/(global.SourceSize.y + 16); } + vec3 xstuff = percent(global.SourceSize.x, viewportSize.x, gameCoord.x); vec3 ystuff = percent(global.SourceSize.y, viewportSize.y, gameCoord.y); @@ -138,16 +164,16 @@ void main() { float ykeep = ystuff[0]; // get points to interpoflate across in pseudo-perceptual colorspace - vec3 a = rgb2vry(texture(Source,vec2(xstuff[1],ystuff[1])).rgb); - vec3 b = rgb2vry(texture(Source,vec2(xstuff[2],ystuff[1])).rgb); - vec3 c = rgb2vry(texture(Source,vec2(xstuff[1],ystuff[2])).rgb); - vec3 d = rgb2vry(texture(Source,vec2(xstuff[2],ystuff[2])).rgb); + vec3 a = rgb2vry(texture(Source, vec2(xstuff[1], ystuff[1])).rgb); + vec3 b = rgb2vry(texture(Source, vec2(xstuff[2], ystuff[1])).rgb); + vec3 c = rgb2vry(texture(Source, vec2(xstuff[1], ystuff[2])).rgb); + vec3 d = rgb2vry(texture(Source, vec2(xstuff[2], ystuff[2])).rgb); // interpolate - vec3 x1 = vry_interp(a, b, xkeep); - vec3 x2 = vry_interp(c, d, xkeep); + vec3 x1 = vry_interp(a, b, xkeep); + vec3 x2 = vry_interp(c, d, xkeep); vec3 result = vry_interp(x1, x2, ykeep); // convert back to sRGB and return FragColor = vec4(vry2rgb(result), 1); -} \ No newline at end of file +}