Update aann.slang

Notable changes:
* As per https://www.opengl.org/wiki/GLSL_Optimizations, I tried replacing divisions with multiplications.
* All the color space conversion functions accept (and work on) vectors instead of single.

This part is subjective, but I formatted the code all around based on my idea of what looks most legible  .

@wareya, @Monroe88: Hi, could you both take a quick glance at my multiply-add optimizations? The wiki says it would make it faster, dunno by how much but it was easy to change and it didn't hinder legibility too much.
This commit is contained in:
rz5 2016-07-19 14:20:30 +01:00 committed by GitHub
parent 27efe97300
commit a0608ed481

View file

@ -14,9 +14,11 @@ layout(std140, set = 0, binding = 0) uniform UBO
// set to true to interpolate in sRGB instead of a pseudo-perceptual colorspace // set to true to interpolate in sRGB instead of a pseudo-perceptual colorspace
#define NOGAMMA false #define NOGAMMA false
// set to true to compensate for 8px overscan masking // set to true to compensate for 8px overscan masking
// Note: overscan compensation slightly alters (extremifies) the pixel aspect ratio of the game if said pixel aspect ratio is not exactly 1:1 // Note: overscan compensation slightly alters (extremifies) the pixel aspect ratio of the game if said pixel aspect ratio is not exactly 1:1
#define MASKING false #define MASKING false
// Do bilinear filtering instead of anti-aliased nearest neighbor filtering (used for debugging color) // Do bilinear filtering instead of anti-aliased nearest neighbor filtering (used for debugging color)
#define BILINEAR false #define BILINEAR false
@ -42,19 +44,23 @@ layout(location = 0) out vec4 FragColor;
layout(set = 0, binding = 2) uniform sampler2D Source; layout(set = 0, binding = 2) uniform sampler2D Source;
// http://entropymine.com/imageworsener/srgbformula/ // http://entropymine.com/imageworsener/srgbformula/
float srgb2linear(float srgb) { vec3 srgb2linear(vec3 srgb) {
if(srgb > 0.0404482362771082) return vec3(
return pow(((srgb+0.055)/1.055), 2.4); srgb.r > 0.0404482362771082 ? pow(srgb.r*0.947867298578199 + 0.052132701421801, 2.4) : srgb.r*0.0773993808049536,
else srgb.g > 0.0404482362771082 ? pow(srgb.g*0.947867298578199 + 0.052132701421801, 2.4) : srgb.g*0.0773993808049536,
return srgb/12.92; srgb.b > 0.0404482362771082 ? pow(srgb.b*0.947867298578199 + 0.052132701421801, 2.4) : srgb.b*0.0773993808049536
);
} }
float linear2srgb(float linear) { vec3 linear2srgb(vec3 linear) {
if(linear > 0.00313066844250063) return vec3(
return pow(linear,1/2.4)*1.055-0.055; linear.x > 0.00313066844250063 ? pow(linear.x, 0.416666666666667)*1.055 - 0.055 : linear.x*12.92,
else linear.y > 0.00313066844250063 ? pow(linear.y, 0.416666666666667)*1.055 - 0.055 : linear.y*12.92,
return linear*12.92; linear.z > 0.00313066844250063 ? pow(linear.z, 0.416666666666667)*1.055 - 0.055 : linear.z*12.92
);
} }
// https://www.w3.org/Graphics/Color/srgb22 // https://www.w3.org/Graphics/Color/srgb22
@ -62,75 +68,95 @@ float linear2srgb(float linear) {
#define GS 0.7152 #define GS 0.7152
#define BS 0.0722 #define BS 0.0722
vec3 rgb2vry(vec3 rgb) { vec3 rgb2vry(vec3 rgb) {
if(NOGAMMA) return rgb; if (NOGAMMA)
return rgb;
// https://en.wikipedia.org/wiki/Opponent_process // https://en.wikipedia.org/wiki/Opponent_process
float r = srgb2linear(rgb.r); vec3 linear = srgb2linear(rgb);
float g = srgb2linear(rgb.g);
float b = srgb2linear(rgb.b);
// https://en.wikipedia.org/wiki/Lightness#Relationship_between_lightness.2C_value.2C_and_relative_luminance // https://en.wikipedia.org/wiki/Lightness#Relationship_between_lightness.2C_value.2C_and_relative_luminance
// "scientists eventually converged on a roughly cube-root curve" // "scientists eventually converged on a roughly cube-root curve"
// CIE does the same thing. // CIE does the same thing.
float V = pow(r*RS + g*GS + b*BS, 1.0/3); vec3 vry = vec3(
float R = r-g; pow(linear.x*RS + linear.y*GS + linear.z*BS, 0.333333333333333),
float Y = (r+g)/2-b; linear.x - linear.y,
return vec3(V,R,Y); (linear.x + linear.y) * 0.5 - linear.z
);
return vry;
} }
vec3 vry2rgb(vec3 vry) { vec3 vry2rgb(vec3 vry) {
if(NOGAMMA) return vry; if (NOGAMMA)
return vry;
// Magic. // Magic.
float r, g, b;
float t = pow(vry.x, 3); float t = pow(vry.x, 3);
r = linear2srgb(t + vry.y*(GS + BS/2) + vry.z*BS);
g = linear2srgb(t - vry.y*(RS + BS/2) + vry.z*BS); vec3 rgb = vec3(
b = linear2srgb(t + vry.y*(GS/2-RS/2) - vry.z*(RS+GS)); t + vry.y*(GS + BS * 0.5) + vry.z*BS,
return vec3(r,g,b); t - vry.y*(RS + BS * 0.5) + vry.z*BS,
t + vry.y*(GS * 0.5 - RS * 0.5) - vry.z*(RS+GS)
);
return linear2srgb(rgb);
} }
vec3 vry_interp(vec3 first, vec3 second, float frac) { vec3 vry_interp(vec3 first, vec3 second, float frac) {
if(NOGAMMA) return first*NOT(frac) + second*YES(frac); if (NOGAMMA)
return first*NOT(frac) + second*YES(frac);
// Because the chroma values were generated on linear light, but the luma must be interpolated in perceptual gamma (3) // Because the chroma values were generated on linear light, but the luma must be interpolated in perceptual gamma (3)
// it can cause out-of-gamut oversaturated values, since the chroma field is not a fixed size as luma values change. // it can cause out-of-gamut oversaturated values, since the chroma field is not a fixed size as luma values change.
// To compensate, we can "pull" the chroma interpolation path in the opposite way the luma path is curved. // To compensate, we can "pull" the chroma interpolation path in the opposite way the luma path is curved.
float new_luma = first.x*NOT(frac) + second.x*YES(frac); float new_luma = first.x*NOT(frac) + second.x*YES(frac);
float linear_span = pow(second.x, 3) - pow(first.x, 3); float linear_span = pow(second.x, 3) - pow(first.x, 3);
if(linear_span == 0) linear_span = 1;
if (linear_span == 0)
linear_span = 1;
float luma_fraction = (pow(new_luma, 3) - pow(first.x, 3)) / linear_span; float luma_fraction = (pow(new_luma, 3) - pow(first.x, 3)) / linear_span;
return vec3(new_luma, return vec3(new_luma,
first.y*NOT(luma_fraction) + second.y*YES(luma_fraction), first.y*NOT(luma_fraction) + second.y*YES(luma_fraction),
first.z*NOT(luma_fraction) + second.z*YES(luma_fraction)); first.z*NOT(luma_fraction) + second.z*YES(luma_fraction)
);
} }
vec3 percent(float ssize, float tsize, float coord) { vec3 percent(float ssize, float tsize, float coord) {
if(BILINEAR) tsize = ssize; if (BILINEAR)
tsize = ssize;
float minfull = (coord*tsize - 0.5)/tsize*ssize; float minfull = (coord*tsize - 0.5)/tsize*ssize;
float maxfull = (coord*tsize + 0.5)/tsize*ssize; float maxfull = (coord*tsize + 0.5)/tsize*ssize;
float realfull = floor(maxfull); float realfull = floor(maxfull);
if (minfull > realfull) { if (minfull > realfull) {
return vec3(1, (realfull+0.5)/ssize, (realfull+0.5)/ssize); return vec3(1, (realfull + 0.5)/ssize, (realfull + 0.5)/ssize);
} }
return vec3( return vec3(
(maxfull - realfull) / (maxfull - minfull), (maxfull - realfull) / (maxfull - minfull),
(realfull-0.5) / ssize, (realfull - 0.5) / ssize,
(realfull+0.5) / ssize (realfull + 0.5) / ssize
); );
} }
void main() { void main() {
vec2 viewportSize = global.OutputSize.xy; vec2 viewportSize = global.OutputSize.xy;
vec2 gameCoord = vTexCoord; vec2 gameCoord = vTexCoord;
if(MASKING) { if (MASKING) {
float hscale = viewportSize.x/global.SourceSize.x; float hscale = viewportSize.x/global.SourceSize.x;
float vscale = viewportSize.y/global.SourceSize.y; float vscale = viewportSize.y/global.SourceSize.y;
viewportSize.x += hscale*16; viewportSize.x += hscale*16;
viewportSize.y += vscale*16; viewportSize.y += vscale*16;
gameCoord.x = (8+gameCoord.x*(global.SourceSize.x))/(global.SourceSize.x+16);
gameCoord.y = (8+gameCoord.y*(global.SourceSize.y))/(global.SourceSize.y+16); gameCoord.x = (8 + gameCoord.x*global.SourceSize.x)/(global.SourceSize.x + 16);
gameCoord.y = (8 + gameCoord.y*global.SourceSize.y)/(global.SourceSize.y + 16);
} }
vec3 xstuff = percent(global.SourceSize.x, viewportSize.x, gameCoord.x); vec3 xstuff = percent(global.SourceSize.x, viewportSize.x, gameCoord.x);
vec3 ystuff = percent(global.SourceSize.y, viewportSize.y, gameCoord.y); vec3 ystuff = percent(global.SourceSize.y, viewportSize.y, gameCoord.y);
@ -138,10 +164,10 @@ void main() {
float ykeep = ystuff[0]; float ykeep = ystuff[0];
// get points to interpoflate across in pseudo-perceptual colorspace // get points to interpoflate across in pseudo-perceptual colorspace
vec3 a = rgb2vry(texture(Source,vec2(xstuff[1],ystuff[1])).rgb); vec3 a = rgb2vry(texture(Source, vec2(xstuff[1], ystuff[1])).rgb);
vec3 b = rgb2vry(texture(Source,vec2(xstuff[2],ystuff[1])).rgb); vec3 b = rgb2vry(texture(Source, vec2(xstuff[2], ystuff[1])).rgb);
vec3 c = rgb2vry(texture(Source,vec2(xstuff[1],ystuff[2])).rgb); vec3 c = rgb2vry(texture(Source, vec2(xstuff[1], ystuff[2])).rgb);
vec3 d = rgb2vry(texture(Source,vec2(xstuff[2],ystuff[2])).rgb); vec3 d = rgb2vry(texture(Source, vec2(xstuff[2], ystuff[2])).rgb);
// interpolate // interpolate
vec3 x1 = vry_interp(a, b, xkeep); vec3 x1 = vry_interp(a, b, xkeep);