base sabr on native GLSL version instead of Cg port

This commit is contained in:
hunterk 2017-03-27 10:00:29 -05:00
parent 9f4fdeb75a
commit fa95194a25

View file

@ -36,68 +36,70 @@ layout(std140, set = 0, binding = 0) uniform UBO
mat4 MVP; mat4 MVP;
} global; } global;
#define saturate(c) clamp(c, 0.0, 1.0) /*
#define lerp(a,b,c) mix(a,b,c) Constants
#define mul(a,b) (b*a) */
#define fmod(a,b) mod(a,b) /*
#define frac(c) fract(c) Inequation coefficients for interpolation
#define tex2D(c,d) texture(c,d) Equations are in the form: Ay + Bx = C
#define float2 vec2 45, 30, and 60 denote the angle from x each line the cooeficient variable set builds
#define float3 vec3 */
#define float4 vec4 const vec4 Ai = vec4( 1.0, -1.0, -1.0, 1.0);
#define int2 ivec2 const vec4 B45 = vec4( 1.0, 1.0, -1.0, -1.0);
#define int3 ivec3 const vec4 C45 = vec4( 1.5, 0.5, -0.5, 0.5);
#define int4 ivec4 const vec4 B30 = vec4( 0.5, 2.0, -0.5, -2.0);
#define bool2 bvec2 const vec4 C30 = vec4( 1.0, 1.0, -0.5, 0.0);
#define bool3 bvec3 const vec4 B60 = vec4( 2.0, 0.5, -2.0, -0.5);
#define bool4 bvec4 const vec4 C60 = vec4( 2.0, 0.0, -1.0, 0.5);
#define float2x2 mat2x2
#define float3x3 mat3x3
#define float4x4 mat4x4
#define decal Source const vec4 M45 = vec4(0.4, 0.4, 0.4, 0.4);
const vec4 M30 = vec4(0.2, 0.4, 0.2, 0.4);
const vec4 M60 = M30.yxwz;
const vec4 Mshift = vec4(0.2);
const float4 Ai = float4( 1.0, -1.0, -1.0, 1.0); // Coefficient for weighted edge detection
const float4 B45 = float4( 1.0, 1.0, -1.0, -1.0); const float coef = 2.0;
const float4 C45 = float4( 1.5, 0.5, -0.5, 0.5); // Threshold for if luminance values are "equal"
const float4 B30 = float4( 0.5, 2.0, -0.5, -2.0); const vec4 threshold = vec4(0.32);
const float4 C30 = float4( 1.0, 1.0, -0.5, 0.0);
const float4 B60 = float4( 2.0, 0.5, -2.0, -0.5);
const float4 C60 = float4( 2.0, 0.0, -1.0, 0.5);
const float4 M45 = float4(0.4, 0.4, 0.4, 0.4); // Conversion from RGB to Luminance (from GIMP)
const float4 M30 = float4(0.2, 0.4, 0.2, 0.4); const vec3 lum = vec3(0.21, 0.72, 0.07);
const float4 M60 = M30.yxwz;
const float4 Mshift = float4(0.2, 0.2, 0.2, 0.2);
const float coef = 2.0; // Performs same logic operation as && for vectors
bvec4 _and_(bvec4 A, bvec4 B) {
const float4 threshold = float4(0.32, 0.32, 0.32, 0.32); return bvec4(A.x && B.x, A.y && B.y, A.z && B.z, A.w && B.w);
const float3 lum = float3(0.21, 0.72, 0.07);
float4 lum_to(float3 v0, float3 v1, float3 v2, float3 v3) {
return float4(dot(lum, v0), dot(lum, v1), dot(lum, v2), dot(lum, v3));
} }
float4 lum_df(float4 A, float4 B) { // Performs same logic operation as || for vectors
bvec4 _or_(bvec4 A, bvec4 B) {
return bvec4(A.x || B.x, A.y || B.y, A.z || B.z, A.w || B.w);
}
// Converts 4 3-color vectors into 1 4-value luminance vector
vec4 lum_to(vec3 v0, vec3 v1, vec3 v2, vec3 v3) {
return vec4(dot(lum, v0), dot(lum, v1), dot(lum, v2), dot(lum, v3));
}
// Gets the difference between 2 4-value luminance vectors
vec4 lum_df(vec4 A, vec4 B) {
return abs(A - B); return abs(A - B);
} }
bool4 lum_eq(float4 A, float4 B) { // Determines if 2 4-value luminance vectors are "equal" based on threshold
return lessThan(lum_df(A, B) , float4(threshold)); bvec4 lum_eq(vec4 A, vec4 B) {
return lessThan(lum_df(A, B), threshold);
} }
float4 lum_wd(float4 a, float4 b, float4 c, float4 d, float4 e, float4 f, float4 g, float4 h) { vec4 lum_wd(vec4 a, vec4 b, vec4 c, vec4 d, vec4 e, vec4 f, vec4 g, vec4 h) {
return lum_df(a, b) + lum_df(a, c) + lum_df(d, e) + lum_df(d, f) + 4.0 * lum_df(g, h); return lum_df(a, b) + lum_df(a, c) + lum_df(d, e) + lum_df(d, f) + 4.0 * lum_df(g, h);
} }
float c_df(float3 c1, float3 c2) { // Gets the difference between 2 3-value rgb colors
float3 df = abs(c1 - c2); float c_df(vec3 c1, vec3 c2) {
vec3 df = abs(c1 - c2);
return df.r + df.g + df.b; return df.r + df.g + df.b;
} }
#pragma stage vertex #pragma stage vertex
layout(location = 0) in vec4 Position; layout(location = 0) in vec4 Position;
layout(location = 1) in vec2 TexCoord; layout(location = 1) in vec2 TexCoord;
@ -114,17 +116,17 @@ void main()
{ {
gl_Position = global.MVP * Position; gl_Position = global.MVP * Position;
float x = params.SourceSize.z;//1.0 / IN.texture_size.x; float x = params.SourceSize.z;
float y = params.SourceSize.w;//1.0 / IN.texture_size.y; float y = params.SourceSize.w;
tc = TexCoord * 1.00001; tc = TexCoord * vec2(1.0004, 1.0);
xyp_1_2_3 = tc.xxxy + float4( -x, 0.0, x, -2.0 * y); xyp_1_2_3 = tc.xxxy + vec4( -x, 0.0, x, -2.0 * y);
xyp_6_7_8 = tc.xxxy + float4( -x, 0.0, x, -y); xyp_6_7_8 = tc.xxxy + vec4( -x, 0.0, x, -y);
xyp_11_12_13 = tc.xxxy + float4( -x, 0.0, x, 0.0); xyp_11_12_13 = tc.xxxy + vec4( -x, 0.0, x, 0.0);
xyp_16_17_18 = tc.xxxy + float4( -x, 0.0, x, y); xyp_16_17_18 = tc.xxxy + vec4( -x, 0.0, x, y);
xyp_21_22_23 = tc.xxxy + float4( -x, 0.0, x, 2.0 * y); xyp_21_22_23 = tc.xxxy + vec4( -x, 0.0, x, 2.0 * y);
xyp_5_10_15 = tc.xyyy + float4(-2.0 * x, -y, 0.0, y); xyp_5_10_15 = tc.xyyy + vec4(-2.0 * x, -y, 0.0, y);
xyp_9_14_9 = tc.xyyy + float4( 2.0 * x, -y, 0.0, y); xyp_9_14_9 = tc.xyyy + vec4( 2.0 * x, -y, 0.0, y);
} }
#pragma stage fragment #pragma stage fragment
@ -141,116 +143,129 @@ layout(set = 0, binding = 2) uniform sampler2D Source;
void main() void main()
{ {
/* /*
Mask for algorithm Mask for algorhithm
+-----+-----+-----+-----+-----+ +-----+-----+-----+-----+-----+
| | 1 | 2 | 3 | | | | 1 | 2 | 3 | |
+-----+-----+-----+-----+-----+ +-----+-----+-----+-----+-----+
| 5 | 6 | 7 | 8 | 9 | | 5 | 6 | 7 | 8 | 9 |
+-----+-----+-----+-----+-----+ +-----+-----+-----+-----+-----+
| 10 | 11 | 12 | 13 | 14 | | 10 | 11 | 12 | 13 | 14 |
+-----+-----+-----+-----+-----+ +-----+-----+-----+-----+-----+
| 15 | 16 | 17 | 18 | 19 | | 15 | 16 | 17 | 18 | 19 |
+-----+-----+-----+-----+-----+ +-----+-----+-----+-----+-----+
| | 21 | 22 | 23 | | | | 21 | 22 | 23 | |
+-----+-----+-----+-----+-----+ +-----+-----+-----+-----+-----+
*/ */
// Store mask values // Get mask values by performing texture lookup with the uniform sampler
float3 P1 = tex2D(decal, xyp_1_2_3.xw ).rgb; vec3 P1 = texture(Source, xyp_1_2_3.xw ).rgb;
float3 P2 = tex2D(decal, xyp_1_2_3.yw ).rgb; vec3 P2 = texture(Source, xyp_1_2_3.yw ).rgb;
float3 P3 = tex2D(decal, xyp_1_2_3.zw ).rgb; vec3 P3 = texture(Source, xyp_1_2_3.zw ).rgb;
float3 P6 = tex2D(decal, xyp_6_7_8.xw ).rgb; vec3 P6 = texture(Source, xyp_6_7_8.xw ).rgb;
float3 P7 = tex2D(decal, xyp_6_7_8.yw ).rgb; vec3 P7 = texture(Source, xyp_6_7_8.yw ).rgb;
float3 P8 = tex2D(decal, xyp_6_7_8.zw ).rgb; vec3 P8 = texture(Source, xyp_6_7_8.zw ).rgb;
float3 P11 = tex2D(decal, xyp_11_12_13.xw).rgb; vec3 P11 = texture(Source, xyp_11_12_13.xw).rgb;
float3 P12 = tex2D(decal, xyp_11_12_13.yw).rgb; vec3 P12 = texture(Source, xyp_11_12_13.yw).rgb;
float3 P13 = tex2D(decal, xyp_11_12_13.zw).rgb; vec3 P13 = texture(Source, xyp_11_12_13.zw).rgb;
float3 P16 = tex2D(decal, xyp_16_17_18.xw).rgb; vec3 P16 = texture(Source, xyp_16_17_18.xw).rgb;
float3 P17 = tex2D(decal, xyp_16_17_18.yw).rgb; vec3 P17 = texture(Source, xyp_16_17_18.yw).rgb;
float3 P18 = tex2D(decal, xyp_16_17_18.zw).rgb; vec3 P18 = texture(Source, xyp_16_17_18.zw).rgb;
float3 P21 = tex2D(decal, xyp_21_22_23.xw).rgb; vec3 P21 = texture(Source, xyp_21_22_23.xw).rgb;
float3 P22 = tex2D(decal, xyp_21_22_23.yw).rgb; vec3 P22 = texture(Source, xyp_21_22_23.yw).rgb;
float3 P23 = tex2D(decal, xyp_21_22_23.zw).rgb; vec3 P23 = texture(Source, xyp_21_22_23.zw).rgb;
float3 P5 = tex2D(decal, xyp_5_10_15.xy ).rgb; vec3 P5 = texture(Source, xyp_5_10_15.xy ).rgb;
float3 P10 = tex2D(decal, xyp_5_10_15.xz ).rgb; vec3 P10 = texture(Source, xyp_5_10_15.xz ).rgb;
float3 P15 = tex2D(decal, xyp_5_10_15.xw ).rgb; vec3 P15 = texture(Source, xyp_5_10_15.xw ).rgb;
float3 P9 = tex2D(decal, xyp_9_14_9.xy ).rgb; vec3 P9 = texture(Source, xyp_9_14_9.xy ).rgb;
float3 P14 = tex2D(decal, xyp_9_14_9.xz ).rgb; vec3 P14 = texture(Source, xyp_9_14_9.xz ).rgb;
float3 P19 = tex2D(decal, xyp_9_14_9.xw ).rgb; vec3 P19 = texture(Source, xyp_9_14_9.xw ).rgb;
// Store luminance values of each point // Store luminance values of each point in groups of 4
float4 p7 = lum_to(P7, P11, P17, P13); // so that we may operate on all four corners at once
float4 p8 = lum_to(P8, P6, P16, P18); vec4 p7 = lum_to(P7, P11, P17, P13);
float4 p11 = p7.yzwx; // P11, P17, P13, P7 vec4 p8 = lum_to(P8, P6, P16, P18);
float4 p12 = lum_to(P12, P12, P12, P12); vec4 p11 = p7.yzwx; // P11, P17, P13, P7
float4 p13 = p7.wxyz; // P13, P7, P11, P17 vec4 p12 = lum_to(P12, P12, P12, P12);
float4 p14 = lum_to(P14, P2, P10, P22); vec4 p13 = p7.wxyz; // P13, P7, P11, P17
float4 p16 = p8.zwxy; // P16, P18, P8, P6 vec4 p14 = lum_to(P14, P2, P10, P22);
float4 p17 = p7.zwxy; // P11, P17, P13, P7 vec4 p16 = p8.zwxy; // P16, P18, P8, P6
float4 p18 = p8.wxyz; // P18, P8, P6, P16 vec4 p17 = p7.zwxy; // P17, P13, P7, P11
float4 p19 = lum_to(P19, P3, P5, P21); vec4 p18 = p8.wxyz; // P18, P8, P6, P16
float4 p22 = p14.wxyz; // P22, P14, P2, P10 vec4 p19 = lum_to(P19, P3, P5, P21);
float4 p23 = lum_to(P23, P9, P1, P15); vec4 p22 = p14.wxyz; // P22, P14, P2, P10
vec4 p23 = lum_to(P23, P9, P1, P15);
float2 fp = frac(tc * params.SourceSize.xy); // Scale current texel coordinate to [0..1]
vec2 fp = fract(tc * params.SourceSize.xy);
float4 ma45 = smoothstep(C45 - M45, C45 + M45, Ai * fp.y + B45 * fp.x); // Determine amount of "smoothing" or mixing that could be done on texel corners
float4 ma30 = smoothstep(C30 - M30, C30 + M30, Ai * fp.y + B30 * fp.x); vec4 ma45 = smoothstep(C45 - M45, C45 + M45, Ai * fp.y + B45 * fp.x);
float4 ma60 = smoothstep(C60 - M60, C60 + M60, Ai * fp.y + B60 * fp.x); vec4 ma30 = smoothstep(C30 - M30, C30 + M30, Ai * fp.y + B30 * fp.x);
float4 marn = smoothstep(C45 - M45 + Mshift, C45 + M45 + Mshift, Ai * fp.y + B45 * fp.x); vec4 ma60 = smoothstep(C60 - M60, C60 + M60, Ai * fp.y + B60 * fp.x);
vec4 marn = smoothstep(C45 - M45 + Mshift, C45 + M45 + Mshift, Ai * fp.y + B45 * fp.x);
float4 e45 = lum_wd(p12, p8, p16, p18, p22, p14, p17, p13); // Perform edge weight calculations
float4 econt = lum_wd(p17, p11, p23, p13, p7, p19, p12, p18); vec4 e45 = lum_wd(p12, p8, p16, p18, p22, p14, p17, p13);
float4 e30 = lum_df(p13, p16); vec4 econt = lum_wd(p17, p11, p23, p13, p7, p19, p12, p18);
float4 e60 = lum_df(p8, p17); vec4 e30 = lum_df(p13, p16);
vec4 e60 = lum_df(p8, p17);
// Calculate rule results for interpolation
bvec4 r45_1 = _and_(notEqual(p12, p13), notEqual(p12, p17));
bvec4 r45_2 = _and_(not(lum_eq(p13, p7)), not(lum_eq(p13, p8)));
bvec4 r45_3 = _and_(not(lum_eq(p17, p11)), not(lum_eq(p17, p16)));
bvec4 r45_4_1 = _and_(not(lum_eq(p13, p14)), not(lum_eq(p13, p19)));
bvec4 r45_4_2 = _and_(not(lum_eq(p17, p22)), not(lum_eq(p17, p23)));
bvec4 r45_4 = _and_(lum_eq(p12, p18), _or_(r45_4_1, r45_4_2));
bvec4 r45_5 = _or_(lum_eq(p12, p16), lum_eq(p12, p8));
bvec4 r45 = _and_(r45_1, _or_(_or_(_or_(r45_2, r45_3), r45_4), r45_5));
bvec4 r30 = _and_(notEqual(p12, p16), notEqual(p11, p16));
bvec4 r60 = _and_(notEqual(p12, p8), notEqual(p7, p8));
// Combine rules with edge weights
bvec4 edr45 = _and_(lessThan(e45, econt), r45);
bvec4 edrrn = lessThanEqual(e45, econt);
bvec4 edr30 = _and_(lessThanEqual(coef * e30, e60), r30);
bvec4 edr60 = _and_(lessThanEqual(coef * e60, e30), r60);
// Finalize interpolation rules and cast to float (0.0 for false, 1.0 for true)
vec4 final45 = vec4(_and_(_and_(not(edr30), not(edr60)), edr45));
vec4 final30 = vec4(_and_(_and_(edr45, not(edr60)), edr30));
vec4 final60 = vec4(_and_(_and_(edr45, not(edr30)), edr60));
vec4 final36 = vec4(_and_(_and_(edr60, edr30), edr45));
vec4 finalrn = vec4(_and_(not(edr45), edrrn));
// Determine the color to mix with for each corner
vec4 px = step(lum_df(p12, p17), lum_df(p12, p13));
// Determine the mix amounts by combining the final rule result and corresponding
// mix amount for the rule in each corner
vec4 mac = final36 * max(ma30, ma60) + final30 * ma30 + final60 * ma60 + final45 * ma45 + finalrn * marn;
/*
Calculate the resulting color by traversing clockwise and counter-clockwise around
the corners of the texel
/* The whole edge detection thing seems broken here, so may as well comment it out to save cycles Finally choose the result that has the largest difference from the texel's original
bool4 r45 = (notEqual(p12 , p13) == bool4(true) && notEqual(p12 , p17) == bool4(true) && ( color
lum_eq(p13, p7) == bool4(false) && lum_eq(p13, p8) == bool4(false) ||
lum_eq(p17, p11) == bool4(false) && lum_eq(p17, p16) == bool4(false) ||
lum_eq(p12, p18) == bool4(true) && (
lum_eq(p13, p14) == bool4(false) && lum_eq(p13, p19) == bool4(false) ||
lum_eq(p17, p22) == bool4(false) && lum_eq(p17, p23) == bool4(false)) ||
lum_eq(p12, p16) == bool4(true) ||
lum_eq(p12, p8) == bool4(true))) ? bool4(true) : bool4(false);
bool4 r30 = (notEqual(p12 , p16) == bool4(true) && notEqual(p11 , p16) == bool4(true)) ? bool4(true) : bool4(false);
bool4 r60 = (notEqual(p12 , p8) == bool4(true) && notEqual(p7 , p8) == bool4(true)) ? bool4(true) : bool4(false);
bool4 edr45 = (lessThan(e45 , econt) == bool4(true) && r45 == bool4(true)) ? bool4(true) : bool4(false);
bool4 edrrn = (lessThanEqual(e45 , econt) == bool4(true)) ? bool4(true) : bool4(false);
bool4 edr30 = (lessThanEqual(e30 * coef , e60) == bool4(true) && (r30 == bool4(true))) ? bool4(true) : bool4(false);
bool4 edr60 = (lessThanEqual(e60 * coef , e30) == bool4(true) && (r60 == bool4(true))) ? bool4(true) : bool4(false);
*/ */
vec3 res1 = P12;
res1 = mix(res1, mix(P13, P17, px.x), mac.x);
res1 = mix(res1, mix(P7, P13, px.y), mac.y);
res1 = mix(res1, mix(P11, P7, px.z), mac.z);
res1 = mix(res1, mix(P17, P11, px.w), mac.w);
//FIXME: dunno what's up here. final45 is the only one that seems to matter and it's either all on or all off vec3 res2 = P12;
// again, may as well comment it out to save cycles :/ res2 = mix(res2, mix(P17, P11, px.w), mac.w);
float4 final45 = float4(1.0);//(edr30 == bool4(false) && edr60 == bool4(false) && edr45 == bool4(true)) ? float4(1.0) : float4(1.0); res2 = mix(res2, mix(P11, P7, px.z), mac.z);
float4 final30 = float4(0.0);//(edr45 == bool4(true) && edr30 == bool4(true) && edr60 == bool4(false)) ? float4(1.0) : float4(0.0); res2 = mix(res2, mix(P7, P13, px.y), mac.y);
float4 final60 = float4(0.0);//(edr45 == bool4(true) && edr60 == bool4(true) && edr30 == bool4(false)) ? float4(1.0) : float4(0.0); res2 = mix(res2, mix(P13, P17, px.x), mac.x);
float4 final36 = float4(0.0);//(edr45 == bool4(true) && edr30 == bool4(true) && edr60 == bool4(true)) ? float4(1.0) : float4(0.0);
float4 finalrn = float4(0.0);//(edr45 == bool4(false) && edrrn == bool4(true)) ? float4(1.0) : float4(0.0);
float4 px = step(lum_df(p12, p17), lum_df(p12, p13)); FragColor = vec4(mix(res1, res2, step(c_df(P12, res1), c_df(P12, res2))), 1.0);
float4 mac = final36 * max(ma30, ma60) + final30 * ma30 + final60 * ma60 + final45 * ma45 + finalrn * marn;
float3 res1 = P12;
res1 = lerp(res1, lerp(P13, P17, px.x), mac.x);
res1 = lerp(res1, lerp(P7 , P13, px.y), mac.y);
res1 = lerp(res1, lerp(P11, P7 , px.z), mac.z);
res1 = lerp(res1, lerp(P17, P11, px.w), mac.w);
float3 res2 = P12;
res2 = lerp(res2, lerp(P17, P11, px.w), mac.w);
res2 = lerp(res2, lerp(P11, P7 , px.z), mac.z);
res2 = lerp(res2, lerp(P7 , P13, px.y), mac.y);
res2 = lerp(res2, lerp(P13, P17, px.x), mac.x);
FragColor = float4(lerp(res1, res2, step(c_df(P12, res1), c_df(P12, res2))), 1.0);
} }