slang-shaders/retro/shaders/aann.slang
rz5 a0608ed481 Update aann.slang
Notable changes:
* As per https://www.opengl.org/wiki/GLSL_Optimizations, I tried replacing divisions with multiplications.
* All the color space conversion functions accept (and work on) vectors instead of single.

This part is subjective, but I formatted the code all around based on my idea of what looks most legible  .

@wareya, @Monroe88: Hi, could you both take a quick glance at my multiply-add optimizations? The wiki says it would make it faster, dunno by how much but it was easy to change and it didn't hinder legibility too much.
2016-07-19 14:20:30 +01:00

180 lines
5.6 KiB
Plaintext

#version 450
layout(std140, set = 0, binding = 0) uniform UBO
{
mat4 MVP;
vec4 OutputSize;
vec4 OriginalSize;
vec4 SourceSize;
} global;
// AntiAliased Nearest Neighbor
// by jimbo1qaz and wareya
// Licensed MIT
// set to true to interpolate in sRGB instead of a pseudo-perceptual colorspace
#define NOGAMMA false
// set to true to compensate for 8px overscan masking
// Note: overscan compensation slightly alters (extremifies) the pixel aspect ratio of the game if said pixel aspect ratio is not exactly 1:1
#define MASKING false
// Do bilinear filtering instead of anti-aliased nearest neighbor filtering (used for debugging color)
#define BILINEAR false
// http://i.imgur.com/kzwZkVf.png
#define NOT(fl) (1-fl)
#define YES(fl) fl
#pragma stage vertex
layout(location = 0) in vec4 Position;
layout(location = 1) in vec2 TexCoord;
layout(location = 0) out vec2 vTexCoord;
void main()
{
gl_Position = global.MVP * Position;
vTexCoord = TexCoord;
}
#pragma stage fragment
layout(location = 0) in vec2 vTexCoord;
layout(location = 0) out vec4 FragColor;
layout(set = 0, binding = 2) uniform sampler2D Source;
// http://entropymine.com/imageworsener/srgbformula/
vec3 srgb2linear(vec3 srgb) {
return vec3(
srgb.r > 0.0404482362771082 ? pow(srgb.r*0.947867298578199 + 0.052132701421801, 2.4) : srgb.r*0.0773993808049536,
srgb.g > 0.0404482362771082 ? pow(srgb.g*0.947867298578199 + 0.052132701421801, 2.4) : srgb.g*0.0773993808049536,
srgb.b > 0.0404482362771082 ? pow(srgb.b*0.947867298578199 + 0.052132701421801, 2.4) : srgb.b*0.0773993808049536
);
}
vec3 linear2srgb(vec3 linear) {
return vec3(
linear.x > 0.00313066844250063 ? pow(linear.x, 0.416666666666667)*1.055 - 0.055 : linear.x*12.92,
linear.y > 0.00313066844250063 ? pow(linear.y, 0.416666666666667)*1.055 - 0.055 : linear.y*12.92,
linear.z > 0.00313066844250063 ? pow(linear.z, 0.416666666666667)*1.055 - 0.055 : linear.z*12.92
);
}
// https://www.w3.org/Graphics/Color/srgb22
#define RS 0.2126
#define GS 0.7152
#define BS 0.0722
vec3 rgb2vry(vec3 rgb) {
if (NOGAMMA)
return rgb;
// https://en.wikipedia.org/wiki/Opponent_process
vec3 linear = srgb2linear(rgb);
// https://en.wikipedia.org/wiki/Lightness#Relationship_between_lightness.2C_value.2C_and_relative_luminance
// "scientists eventually converged on a roughly cube-root curve"
// CIE does the same thing.
vec3 vry = vec3(
pow(linear.x*RS + linear.y*GS + linear.z*BS, 0.333333333333333),
linear.x - linear.y,
(linear.x + linear.y) * 0.5 - linear.z
);
return vry;
}
vec3 vry2rgb(vec3 vry) {
if (NOGAMMA)
return vry;
// Magic.
float t = pow(vry.x, 3);
vec3 rgb = vec3(
t + vry.y*(GS + BS * 0.5) + vry.z*BS,
t - vry.y*(RS + BS * 0.5) + vry.z*BS,
t + vry.y*(GS * 0.5 - RS * 0.5) - vry.z*(RS+GS)
);
return linear2srgb(rgb);
}
vec3 vry_interp(vec3 first, vec3 second, float frac) {
if (NOGAMMA)
return first*NOT(frac) + second*YES(frac);
// Because the chroma values were generated on linear light, but the luma must be interpolated in perceptual gamma (3)
// it can cause out-of-gamut oversaturated values, since the chroma field is not a fixed size as luma values change.
// To compensate, we can "pull" the chroma interpolation path in the opposite way the luma path is curved.
float new_luma = first.x*NOT(frac) + second.x*YES(frac);
float linear_span = pow(second.x, 3) - pow(first.x, 3);
if (linear_span == 0)
linear_span = 1;
float luma_fraction = (pow(new_luma, 3) - pow(first.x, 3)) / linear_span;
return vec3(new_luma,
first.y*NOT(luma_fraction) + second.y*YES(luma_fraction),
first.z*NOT(luma_fraction) + second.z*YES(luma_fraction)
);
}
vec3 percent(float ssize, float tsize, float coord) {
if (BILINEAR)
tsize = ssize;
float minfull = (coord*tsize - 0.5)/tsize*ssize;
float maxfull = (coord*tsize + 0.5)/tsize*ssize;
float realfull = floor(maxfull);
if (minfull > realfull) {
return vec3(1, (realfull + 0.5)/ssize, (realfull + 0.5)/ssize);
}
return vec3(
(maxfull - realfull) / (maxfull - minfull),
(realfull - 0.5) / ssize,
(realfull + 0.5) / ssize
);
}
void main() {
vec2 viewportSize = global.OutputSize.xy;
vec2 gameCoord = vTexCoord;
if (MASKING) {
float hscale = viewportSize.x/global.SourceSize.x;
float vscale = viewportSize.y/global.SourceSize.y;
viewportSize.x += hscale*16;
viewportSize.y += vscale*16;
gameCoord.x = (8 + gameCoord.x*global.SourceSize.x)/(global.SourceSize.x + 16);
gameCoord.y = (8 + gameCoord.y*global.SourceSize.y)/(global.SourceSize.y + 16);
}
vec3 xstuff = percent(global.SourceSize.x, viewportSize.x, gameCoord.x);
vec3 ystuff = percent(global.SourceSize.y, viewportSize.y, gameCoord.y);
float xkeep = xstuff[0];
float ykeep = ystuff[0];
// get points to interpoflate across in pseudo-perceptual colorspace
vec3 a = rgb2vry(texture(Source, vec2(xstuff[1], ystuff[1])).rgb);
vec3 b = rgb2vry(texture(Source, vec2(xstuff[2], ystuff[1])).rgb);
vec3 c = rgb2vry(texture(Source, vec2(xstuff[1], ystuff[2])).rgb);
vec3 d = rgb2vry(texture(Source, vec2(xstuff[2], ystuff[2])).rgb);
// interpolate
vec3 x1 = vry_interp(a, b, xkeep);
vec3 x2 = vry_interp(c, d, xkeep);
vec3 result = vry_interp(x1, x2, ykeep);
// convert back to sRGB and return
FragColor = vec4(vry2rgb(result), 1);
}