mirror of
https://github.com/italicsjenga/slang-shaders.git
synced 2024-11-26 01:11:32 +11:00
Update aann.slang
Notable changes: * As per https://www.opengl.org/wiki/GLSL_Optimizations, I tried replacing divisions with multiplications. * All the color space conversion functions accept (and work on) vectors instead of single. This part is subjective, but I formatted the code all around based on my idea of what looks most legible . @wareya, @Monroe88: Hi, could you both take a quick glance at my multiply-add optimizations? The wiki says it would make it faster, dunno by how much but it was easy to change and it didn't hinder legibility too much.
This commit is contained in:
parent
27efe97300
commit
a0608ed481
|
@ -14,9 +14,11 @@ layout(std140, set = 0, binding = 0) uniform UBO
|
|||
|
||||
// set to true to interpolate in sRGB instead of a pseudo-perceptual colorspace
|
||||
#define NOGAMMA false
|
||||
|
||||
// set to true to compensate for 8px overscan masking
|
||||
// Note: overscan compensation slightly alters (extremifies) the pixel aspect ratio of the game if said pixel aspect ratio is not exactly 1:1
|
||||
#define MASKING false
|
||||
|
||||
// Do bilinear filtering instead of anti-aliased nearest neighbor filtering (used for debugging color)
|
||||
#define BILINEAR false
|
||||
|
||||
|
@ -42,19 +44,23 @@ layout(location = 0) out vec4 FragColor;
|
|||
layout(set = 0, binding = 2) uniform sampler2D Source;
|
||||
|
||||
|
||||
|
||||
|
||||
// http://entropymine.com/imageworsener/srgbformula/
|
||||
float srgb2linear(float srgb) {
|
||||
if(srgb > 0.0404482362771082)
|
||||
return pow(((srgb+0.055)/1.055), 2.4);
|
||||
else
|
||||
return srgb/12.92;
|
||||
vec3 srgb2linear(vec3 srgb) {
|
||||
return vec3(
|
||||
srgb.r > 0.0404482362771082 ? pow(srgb.r*0.947867298578199 + 0.052132701421801, 2.4) : srgb.r*0.0773993808049536,
|
||||
srgb.g > 0.0404482362771082 ? pow(srgb.g*0.947867298578199 + 0.052132701421801, 2.4) : srgb.g*0.0773993808049536,
|
||||
srgb.b > 0.0404482362771082 ? pow(srgb.b*0.947867298578199 + 0.052132701421801, 2.4) : srgb.b*0.0773993808049536
|
||||
);
|
||||
}
|
||||
|
||||
float linear2srgb(float linear) {
|
||||
if(linear > 0.00313066844250063)
|
||||
return pow(linear,1/2.4)*1.055-0.055;
|
||||
else
|
||||
return linear*12.92;
|
||||
vec3 linear2srgb(vec3 linear) {
|
||||
return vec3(
|
||||
linear.x > 0.00313066844250063 ? pow(linear.x, 0.416666666666667)*1.055 - 0.055 : linear.x*12.92,
|
||||
linear.y > 0.00313066844250063 ? pow(linear.y, 0.416666666666667)*1.055 - 0.055 : linear.y*12.92,
|
||||
linear.z > 0.00313066844250063 ? pow(linear.z, 0.416666666666667)*1.055 - 0.055 : linear.z*12.92
|
||||
);
|
||||
}
|
||||
|
||||
// https://www.w3.org/Graphics/Color/srgb22
|
||||
|
@ -62,75 +68,95 @@ float linear2srgb(float linear) {
|
|||
#define GS 0.7152
|
||||
#define BS 0.0722
|
||||
|
||||
|
||||
vec3 rgb2vry(vec3 rgb) {
|
||||
if(NOGAMMA) return rgb;
|
||||
if (NOGAMMA)
|
||||
return rgb;
|
||||
|
||||
// https://en.wikipedia.org/wiki/Opponent_process
|
||||
float r = srgb2linear(rgb.r);
|
||||
float g = srgb2linear(rgb.g);
|
||||
float b = srgb2linear(rgb.b);
|
||||
vec3 linear = srgb2linear(rgb);
|
||||
|
||||
// https://en.wikipedia.org/wiki/Lightness#Relationship_between_lightness.2C_value.2C_and_relative_luminance
|
||||
// "scientists eventually converged on a roughly cube-root curve"
|
||||
// CIE does the same thing.
|
||||
float V = pow(r*RS + g*GS + b*BS, 1.0/3);
|
||||
float R = r-g;
|
||||
float Y = (r+g)/2-b;
|
||||
return vec3(V,R,Y);
|
||||
vec3 vry = vec3(
|
||||
pow(linear.x*RS + linear.y*GS + linear.z*BS, 0.333333333333333),
|
||||
linear.x - linear.y,
|
||||
(linear.x + linear.y) * 0.5 - linear.z
|
||||
);
|
||||
|
||||
return vry;
|
||||
}
|
||||
vec3 vry2rgb(vec3 vry) {
|
||||
if(NOGAMMA) return vry;
|
||||
if (NOGAMMA)
|
||||
return vry;
|
||||
|
||||
// Magic.
|
||||
float r, g, b;
|
||||
float t = pow(vry.x, 3);
|
||||
r = linear2srgb(t + vry.y*(GS + BS/2) + vry.z*BS);
|
||||
g = linear2srgb(t - vry.y*(RS + BS/2) + vry.z*BS);
|
||||
b = linear2srgb(t + vry.y*(GS/2-RS/2) - vry.z*(RS+GS));
|
||||
return vec3(r,g,b);
|
||||
|
||||
vec3 rgb = vec3(
|
||||
t + vry.y*(GS + BS * 0.5) + vry.z*BS,
|
||||
t - vry.y*(RS + BS * 0.5) + vry.z*BS,
|
||||
t + vry.y*(GS * 0.5 - RS * 0.5) - vry.z*(RS+GS)
|
||||
);
|
||||
|
||||
return linear2srgb(rgb);
|
||||
}
|
||||
|
||||
vec3 vry_interp(vec3 first, vec3 second, float frac) {
|
||||
if(NOGAMMA) return first*NOT(frac) + second*YES(frac);
|
||||
if (NOGAMMA)
|
||||
return first*NOT(frac) + second*YES(frac);
|
||||
|
||||
// Because the chroma values were generated on linear light, but the luma must be interpolated in perceptual gamma (3)
|
||||
// it can cause out-of-gamut oversaturated values, since the chroma field is not a fixed size as luma values change.
|
||||
// To compensate, we can "pull" the chroma interpolation path in the opposite way the luma path is curved.
|
||||
float new_luma = first.x*NOT(frac) + second.x*YES(frac);
|
||||
float linear_span = pow(second.x, 3) - pow(first.x, 3);
|
||||
if(linear_span == 0) linear_span = 1;
|
||||
|
||||
if (linear_span == 0)
|
||||
linear_span = 1;
|
||||
|
||||
float luma_fraction = (pow(new_luma, 3) - pow(first.x, 3)) / linear_span;
|
||||
|
||||
return vec3(new_luma,
|
||||
first.y*NOT(luma_fraction) + second.y*YES(luma_fraction),
|
||||
first.z*NOT(luma_fraction) + second.z*YES(luma_fraction));
|
||||
first.z*NOT(luma_fraction) + second.z*YES(luma_fraction)
|
||||
);
|
||||
}
|
||||
|
||||
vec3 percent(float ssize, float tsize, float coord) {
|
||||
if(BILINEAR) tsize = ssize;
|
||||
if (BILINEAR)
|
||||
tsize = ssize;
|
||||
|
||||
float minfull = (coord*tsize - 0.5)/tsize*ssize;
|
||||
float maxfull = (coord*tsize + 0.5)/tsize*ssize;
|
||||
|
||||
float realfull = floor(maxfull);
|
||||
|
||||
if (minfull > realfull) {
|
||||
return vec3(1, (realfull+0.5)/ssize, (realfull+0.5)/ssize);
|
||||
return vec3(1, (realfull + 0.5)/ssize, (realfull + 0.5)/ssize);
|
||||
}
|
||||
|
||||
return vec3(
|
||||
(maxfull - realfull) / (maxfull - minfull),
|
||||
(realfull-0.5) / ssize,
|
||||
(realfull+0.5) / ssize
|
||||
(realfull - 0.5) / ssize,
|
||||
(realfull + 0.5) / ssize
|
||||
);
|
||||
}
|
||||
|
||||
void main() {
|
||||
vec2 viewportSize = global.OutputSize.xy;
|
||||
vec2 gameCoord = vTexCoord;
|
||||
if(MASKING) {
|
||||
if (MASKING) {
|
||||
float hscale = viewportSize.x/global.SourceSize.x;
|
||||
float vscale = viewportSize.y/global.SourceSize.y;
|
||||
|
||||
viewportSize.x += hscale*16;
|
||||
viewportSize.y += vscale*16;
|
||||
gameCoord.x = (8+gameCoord.x*(global.SourceSize.x))/(global.SourceSize.x+16);
|
||||
gameCoord.y = (8+gameCoord.y*(global.SourceSize.y))/(global.SourceSize.y+16);
|
||||
|
||||
gameCoord.x = (8 + gameCoord.x*global.SourceSize.x)/(global.SourceSize.x + 16);
|
||||
gameCoord.y = (8 + gameCoord.y*global.SourceSize.y)/(global.SourceSize.y + 16);
|
||||
}
|
||||
|
||||
vec3 xstuff = percent(global.SourceSize.x, viewportSize.x, gameCoord.x);
|
||||
vec3 ystuff = percent(global.SourceSize.y, viewportSize.y, gameCoord.y);
|
||||
|
||||
|
@ -138,10 +164,10 @@ void main() {
|
|||
float ykeep = ystuff[0];
|
||||
|
||||
// get points to interpoflate across in pseudo-perceptual colorspace
|
||||
vec3 a = rgb2vry(texture(Source,vec2(xstuff[1],ystuff[1])).rgb);
|
||||
vec3 b = rgb2vry(texture(Source,vec2(xstuff[2],ystuff[1])).rgb);
|
||||
vec3 c = rgb2vry(texture(Source,vec2(xstuff[1],ystuff[2])).rgb);
|
||||
vec3 d = rgb2vry(texture(Source,vec2(xstuff[2],ystuff[2])).rgb);
|
||||
vec3 a = rgb2vry(texture(Source, vec2(xstuff[1], ystuff[1])).rgb);
|
||||
vec3 b = rgb2vry(texture(Source, vec2(xstuff[2], ystuff[1])).rgb);
|
||||
vec3 c = rgb2vry(texture(Source, vec2(xstuff[1], ystuff[2])).rgb);
|
||||
vec3 d = rgb2vry(texture(Source, vec2(xstuff[2], ystuff[2])).rgb);
|
||||
|
||||
// interpolate
|
||||
vec3 x1 = vry_interp(a, b, xkeep);
|
||||
|
|
Loading…
Reference in a new issue