initial commit for cgwg's famicom decode shader

This commit is contained in:
hunterk 2019-05-12 22:24:26 -05:00
parent 932fe31f55
commit 2730bc6b31
6 changed files with 751 additions and 0 deletions

View file

@ -0,0 +1,25 @@
shaders = 5
shader0 = shaders/famicom-raw-cgwg/composite-encode.slang
filter_linear0 = false
scale_type0 = source
scale_x0 = 2.0
frame_count_mod0 = 2
alias0 = encode_pass
float_framebuffer0 = true
shader1 = shaders/famicom-raw-cgwg/lowpass-notch-decode-yiq.slang
frame_count_mod1 = 2
filter_linear1 = false
float_framebuffer1 = true
shader2 = shaders/famicom-raw-cgwg/adaptive-comb-decode.slang
filter_linear2 = false
frame_count_mod2 = 2
float_framebuffer2 = true
shader3 = shaders/famicom-raw-cgwg/lowpass-chroma.slang
filter_linear3 = false
float_framebuffer3 = true
shader4 = shaders/famicom-raw-cgwg/crt-geom-famicom.slang

View file

@ -0,0 +1,118 @@
#version 450
layout(push_constant) uniform Push
{
vec4 SourceSize;
vec4 OriginalSize;
vec4 OutputSize;
uint FrameCount;
vec4 encode_passSize;
} params;
#define phase params.FrameCount
layout(std140, set = 0, binding = 0) uniform UBO
{
mat4 MVP;
} global;
#pragma stage vertex
layout(location = 0) in vec4 Position;
layout(location = 1) in vec2 TexCoord;
layout(location = 0) out vec2 texCoord;
void main()
{
gl_Position = global.MVP * Position;
texCoord = TexCoord;
}
#pragma stage fragment
layout(location = 0) in vec2 texCoord;
layout(location = 0) out vec4 FragColor;
layout(set = 0, binding = 2) uniform sampler2D Source;
layout(set = 0, binding = 3) uniform sampler2D encode_pass;
vec4 decode_sample(vec2 shift) {
const float min = 0.350*0.746;
const float max = 1.962;
const float black = 0.518;
vec2 coord = texCoord + params.encode_passSize.zw*shift;
if (coord.x < 0.0 || coord.x > 1.0 || coord.y < 0.0 || coord.y > 1.0) return vec4(0.0);
return (texture(encode_pass, coord) * vec4(max-min) + vec4(min-black)) / vec4(max-black);
}
vec3 filtsample(vec2 shift) {
vec2 coord = texCoord + params.SourceSize.zw*shift;
if (coord.x < 0.0 || coord.x > 1.0 || coord.y < 0.0 || coord.y > 1.0) return vec3(0.0);
return texture(Source, texCoord + params.SourceSize.zw*shift).xyz-vec3(0,0.5,0.5);
}
void main()
{
vec4 samps[] = vec4[](decode_sample(vec2(-1,0)),
decode_sample(vec2( 0,0)),
decode_sample(vec2( 1,0)));
// quarter-pixel shift between scanlines needed for comb filter
vec4 s1, s2, s3, s4;
s1 = decode_sample(vec2(-2,-1));
s2 = decode_sample(vec2(-1,-1));
s3 = decode_sample(vec2( 0,-1));
s4 = decode_sample(vec2( 1,-1));
vec4 samps_prev[] = vec4[](vec4(s1.zw,s2.xy),
vec4(s2.zw,s3.xy),
vec4(s3.zw,s4.xy));
s1 = decode_sample(vec2(-1,1));
s2 = decode_sample(vec2(-0,1));
s3 = decode_sample(vec2( 1,1));
s4 = decode_sample(vec2( 2,1));
vec4 samps_next[] = vec4[](vec4(s1.zw,s2.xy),
vec4(s2.zw,s3.xy),
vec4(s3.zw,s4.xy));
uint x = uint(floor(texCoord.x*params.SourceSize.x));
uint y = uint(floor(texCoord.y*params.SourceSize.y));
uint p = (x+y+uint(phase)) % 3u;
const vec4 one = vec4(1.0);
const vec4 PI_6 = vec4(3.14159265359/6.0);
const vec4 offs = vec4(-0.0);
const vec4 sins[] = vec4[](sin((vec4(0,1, 2, 3)+offs)*PI_6),
sin((vec4(4,5, 6, 7)+offs)*PI_6),
sin((vec4(8,9,10,11)+offs)*PI_6));
const vec4 coss[] = vec4[](cos((vec4(0,1, 2, 3)+offs)*PI_6),
cos((vec4(4,5, 6, 7)+offs)*PI_6),
cos((vec4(8,9,10,11)+offs)*PI_6));
vec3 filt = filtsample(vec2(0, 0));
vec3 filt_cur = (filt + filtsample(vec2(-1,0))+filtsample(vec2(1,0)))/3.0;
vec3 filt_prev = (filtsample(vec2(0,-1))+filtsample(vec2(-1,-1))+filtsample(vec2(1,-1)))/3.0;
vec3 filt_next = (filtsample(vec2(0, 1))+filtsample(vec2(-1, 1))+filtsample(vec2(1, 1)))/3.0;
vec3 dif1 = filt_cur-filt_prev;
vec3 dif2 = filt_cur-filt_next;
// this formula can be tweaked
float xprev = clamp(0.5-sqrt(dot(dif1,dif1)),0,0.5);
float xnext = clamp(0.5-sqrt(dot(dif2,dif2)),0,0.5);
float xcur = clamp(xprev+xnext,0,0.5);
float scale = (xcur+0.0001)/(xprev+xnext+0.0001);
xprev *= scale;
xnext *= scale;
float xfilt = 1.0 - 2.0*xcur;
vec3 yiq = vec3(0.0);
yiq.x = xcur*dot(samps[1],one) + xprev*dot(samps_prev[1],one) + xnext*dot(samps_next[1],one);
for (uint i = 0u; i < 3u; i++) {
samps[i] = xcur*samps[i] - xprev*samps_prev[i] - xnext*samps_next[i];
yiq.y += dot(samps[i],coss[(i+p+2u)%3u]);
yiq.z += dot(samps[i],sins[(i+p+2u)%3u]);
}
yiq *= vec3(1.0/4.0, 1.0/6.0, 1.0/6.0);
yiq += xfilt*filt;
//FragColor = vec4(dot(yiq, vec3(1.0, 0.946882, 0.623557)),
// dot(yiq, vec3(1.0,-0.274788,-0.635691)),
// dot(yiq, vec3(1.0,-1.108545, 1.709007)),
// 0.0);
FragColor = vec4(yiq+vec3(0,0.5,0.5),0.0);
}

View file

@ -0,0 +1,64 @@
#version 450
#pragma format R32_UINT
#pragma name encode_pass
layout(push_constant) uniform Push
{
vec4 SourceSize;
vec4 OriginalSize;
vec4 OutputSize;
uint FrameCount;
} params;
layout(std140, set = 0, binding = 0) uniform UBO
{
mat4 MVP;
} global;
#pragma stage vertex
layout(location = 0) in vec4 Position;
layout(location = 1) in vec2 TexCoord;
layout(location = 0) out vec2 vTexCoord;
void main()
{
gl_Position = global.MVP * Position;
vTexCoord = TexCoord;
}
#pragma stage fragment
layout(location = 0) in vec2 vTexCoord;
layout(location = 0) out vec4 FragColor;
layout(set = 0, binding = 2) uniform sampler2D Source;
void main()
{
uint n = uint(texture(Source, vTexCoord).r);
uint color = n & 0xfu;
uint level = color < 0xeu ? (n>>4u)&3u : 1u;
uint emphasis = n >> 6u;
const float levels[] = float[](0.350, 0.518, 0.962, 1.550,
1.094, 1.506, 1.962, 1.962);
const float attenuation = 0.746;
const float min = levels[0]*attenuation;
const float max = levels[7];
float lo_hi[] = float[](levels[level + 4u*uint(color == 0x0u)],
levels[level + 4u*uint(color < 0xdu)]);
uint x = uint(floor(vTexCoord.x*params.SourceSize.x*2.0));
uint y = uint(floor(vTexCoord.y*params.SourceSize.y));
float frag[4];
for (uint i = 0u; i < 4u; i++) {
uint p = (x*4u+i + y*4u + uint(params.FrameCount*4)) % 12u;
#define wave(ph,co) (((co)+(ph)+8u)%12u<6u)
float spot = lo_hi[uint(wave(p,color))];
if ( (((emphasis&1u)==1u) && wave(p,12u))
|| (((emphasis&2u)==1u) && wave(p,4u))
|| (((emphasis&4u)==1u) && wave(p,8u)))
spot *= attenuation;
frag[i] = spot;
}
FragColor = (vec4(frag[0], frag[1], frag[2], frag[3]) - vec4(min))/vec4(max-min);
}

View file

@ -0,0 +1,329 @@
#version 450
layout(push_constant) uniform Push
{
vec4 SourceSize;
vec4 OriginalSize;
vec4 OutputSize;
uint FrameCount;
float CURVATURE_toggle, CRTgamma, overscan_x, overscan_y, distance, radius, tiltangle_x, tiltangle_y, cornersize, cornersmooth;
} params;
#pragma parameter CURVATURE_toggle "Curvature Toggle" 1.0 0.0 1.0 1.0
#define CURVATURE bool(params.CURVATURE_toggle)
// gamma of simulated CRT
#pragma parameter CRTgamma "CRT Gamma" 2.4 1.0 4.0 0.05
#define CRTgamma params.CRTgamma
// overscan (e.g. 1.02 for 2% overscan)
#pragma parameter overscan_x "Overscan X" 1.0 0.0 2.0 0.01
#pragma parameter overscan_y "Overscan Y" 1.0 0.0 2.0 0.01
#define overscan vec2(params.overscan_x, params.overscan_y)
#pragma parameter distance "Viewing Distance" 2.0 0.1 5.0 0.1
#define distance params.distance
// radius of curvature
#pragma parameter radius "Curvature Radius" 2.0 0.1 5.0 0.1
#define radius params.radius
// tilt angle in radians
// (behavior might be a bit wrong if both components are nonzero)
#pragma parameter tiltangle_x "Tilt Angle X" 0.0 -1.0 1.0 0.05
#pragma parameter tiltangle_y "Tilt Angle Y" 0.0 -1.0 1.0 0.05
#define tiltangle vec2(params.tiltangle_x, params.tiltangle_y) + 0.001
// size of curved corners
#pragma parameter cornersize "Corner Size" 0.02 0.0001 0.1 0.01
#define cornersize params.cornersize
// border smoothness parameter
// decrease if borders are too aliased
#pragma parameter cornersmooth "Corner Smoothness" 800.0 0.8 2000.0 50.0
#define cornersmooth params.cornersmooth
layout(std140, set = 0, binding = 0) uniform UBO
{
mat4 MVP;
} global;
#pragma stage vertex
layout(location = 0) in vec4 Position;
layout(location = 1) in vec2 TexCoord;
layout(location = 0) out vec2 texCoord;
layout(location = 1) out vec3 stretch;
const vec2 aspect = vec2(1.0, 0.75);
float d = distance;
float R = radius;
vec2 sinangle = sin(tiltangle);
vec2 cosangle = cos(tiltangle);
vec2 one;
float mod_factor;
vec2 ilfac;
#define FIX(c) max(abs(c), 1e-5);
float intersect(vec2 xy)
{
float A = dot(xy,xy)+d*d;
float B = 2.0*(R*(dot(xy,sinangle)-d*cosangle.x*cosangle.y)-d*d);
float C = d*d + 2.0*R*d*cosangle.x*cosangle.y;
return (-B-sqrt(B*B-4.0*A*C))/(2.0*A);
}
vec2 bkwtrans(vec2 xy)
{
float c = intersect(xy);
vec2 point = vec2(c)*xy;
point -= vec2(-R)*sinangle;
point /= vec2(R);
vec2 tang = sinangle/cosangle;
vec2 poc = point/cosangle;
float A = dot(tang,tang)+1.0;
float B = -2.0*dot(poc,tang);
float C = dot(poc,poc)-1.0;
float a = (-B+sqrt(B*B-4.0*A*C))/(2.0*A);
vec2 uv = (point-a*sinangle)/cosangle;
float r = R*acos(a);
return uv*r/sin(r/R);
}
vec2 fwtrans(vec2 uv)
{
float r = FIX(sqrt(dot(uv,uv)));
uv *= sin(r/R)/r;
float x = 1.0-cos(r/R);
float D = d/R + x*cosangle.x*cosangle.y+dot(uv,sinangle);
return d*(uv*cosangle-x*sinangle)/D;
}
vec3 maxscale()
{
vec2 c = bkwtrans(-R * sinangle / (1.0 + R/d*cosangle.x*cosangle.y));
vec2 a = vec2(0.5,0.5)*aspect;
vec2 lo = vec2(fwtrans(vec2(-a.x,c.y)).x,
fwtrans(vec2(c.x,-a.y)).y)/aspect;
vec2 hi = vec2(fwtrans(vec2(+a.x,c.y)).x,
fwtrans(vec2(c.x,+a.y)).y)/aspect;
return vec3((hi+lo)*aspect*0.5,max(hi.x-lo.x,hi.y-lo.y));
}
void main()
{
gl_Position = global.MVP * Position;
texCoord = TexCoord;
stretch = maxscale();
}
#pragma stage fragment
layout(location = 0) in vec2 texCoord;
layout(location = 1) in vec3 stretch;
layout(location = 0) out vec4 FragColor;
layout(set = 0, binding = 2) uniform sampler2D Source;
// Comment the next line to disable interpolation in linear gamma (and gain speed).
//#define LINEAR_PROCESSING
// Enable 3x oversampling of the beam profile
#define OVERSAMPLE
// Use the older, purely gaussian beam profile
//#define USEGAUSSIAN
// gamma of display monitor (typically 2.2 is correct)
#define monitorgamma 2.2
// aspect ratio
const vec2 aspect = vec2(1.0, 0.75);
// lengths are measured in units of (approximately) the width of the monitor
// simulated distance from viewer to monitor
float d = distance;
float R = radius;
vec2 sinangle = sin(tiltangle);
vec2 cosangle = cos(tiltangle);
#define one (params.SourceSize.zw)
// Macros.
#define FIX(c) max(abs(c), 1e-5);
#define PI 3.141592653589
#ifdef LINEAR_PROCESSING
# define TEX2D(c) pow(texture(Source, (c)), vec4(CRTgamma))
#else
# define TEX2D(c) texture(Source, (c))
#endif
#define FIX(c) max(abs(c), 1e-5);
float intersect(vec2 xy)
{
float A = dot(xy,xy)+d*d;
float B = 2.0*(R*(dot(xy,sinangle)-d*cosangle.x*cosangle.y)-d*d);
float C = d*d + 2.0*R*d*cosangle.x*cosangle.y;
return (-B-sqrt(B*B-4.0*A*C))/(2.0*A);
}
vec2 bkwtrans(vec2 xy)
{
float c = intersect(xy);
vec2 point = vec2(c)*xy;
point -= vec2(-R)*sinangle;
point /= vec2(R);
vec2 tang = sinangle/cosangle;
vec2 poc = point/cosangle;
float A = dot(tang,tang)+1.0;
float B = -2.0*dot(poc,tang);
float C = dot(poc,poc)-1.0;
float a = (-B+sqrt(B*B-4.0*A*C))/(2.0*A);
vec2 uv = (point-a*sinangle)/cosangle;
float r = FIX(R*acos(a));
return uv*r/sin(r/R);
}
vec2 transform(vec2 coord)
{
coord = (coord-vec2(0.5))*aspect*stretch.z+stretch.xy;
return (bkwtrans(coord)/overscan/aspect+vec2(0.5));
}
float corner(vec2 coord)
{
coord = (coord - vec2(0.5)) * overscan + vec2(0.5);
coord = min(coord, vec2(1.0)-coord) * aspect;
vec2 cdist = vec2(cornersize);
coord = (cdist - min(coord,cdist));
float dist = sqrt(dot(coord,coord));
return clamp((cdist.x-dist)*cornersmooth,0.0, 1.0);
}
// Calculate the influence of a scanline on the current pixel.
//
// 'dist' is the distance in texture coordinates from the current
// pixel to the scanline in question.
// 'color' is the colour of the scanline at the horizontal location of
// the current pixel.
vec4 scanlineWeights(float dist, vec4 color)
{
// "wid" controls the width of the scanline beam, for each RGB channel
// The "weights" lines basically specify the formula that gives
// you the profile of the beam, i.e. the intensity as
// a function of distance from the vertical center of the
// scanline. In this case, it is gaussian if width=2, and
// becomes nongaussian for larger widths. Ideally this should
// be normalized so that the integral across the beam is
// independent of its width. That is, for a narrower beam
// "weights" should have a higher peak at the center of the
// scanline than for a wider beam.
#ifdef USEGAUSSIAN
vec4 wid = 0.3 + 0.1 * pow(color, vec4(3.0));
vec4 weights = vec4(dist / wid);
return 0.4 * exp(-weights * weights) / wid;
#else
vec4 wid = 2.0 + 2.0 * pow(color, vec4(4.0));
vec4 weights = vec4(dist / 0.3);
return 1.4 * exp(-pow(weights * inversesqrt(0.5 * wid), wid)) / (0.6 + 0.2 * wid);
#endif
}
void main()
{
// Here's a helpful diagram to keep in mind while trying to
// understand the code:
//
// | | | | |
// -------------------------------
// | | | | |
// | 01 | 11 | 21 | 31 | <-- current scanline
// | | @ | | |
// -------------------------------
// | | | | |
// | 02 | 12 | 22 | 32 | <-- next scanline
// | | | | |
// -------------------------------
// | | | | |
//
// Each character-cell represents a pixel on the output
// surface, "@" represents the current pixel (always somewhere
// in the bottom half of the current scan-line, or the top-half
// of the next scanline). The grid of lines represents the
// edges of the texels of the underlying texture.
// Texture coordinates of the texel containing the active pixel.
vec2 xy = CURVATURE ? transform(texCoord) : texCoord;
float cval = corner(xy);
// Of all the pixels that are mapped onto the texel we are
// currently rendering, which pixel are we currently rendering?
vec2 ratio_scale = xy * params.SourceSize.xy - vec2(0.5);
#ifdef OVERSAMPLE
float filter_ = fwidth(ratio_scale.y);
#endif
vec2 uv_ratio = fract(ratio_scale);
// Snap to the center of the underlying texel.
xy = (floor(ratio_scale) + vec2(0.5)) / params.SourceSize.xy;
// Calculate Lanczos scaling coefficients describing the effect
// of various neighbour texels in a scanline on the current
// pixel.
vec4 coeffs = PI * vec4(1.0 + uv_ratio.x, uv_ratio.x, 1.0 - uv_ratio.x, 2.0 - uv_ratio.x);
// Prevent division by zero.
coeffs = FIX(coeffs);
// Lanczos2 kernel.
coeffs = 2.0 * sin(coeffs) * sin(coeffs / 2.0) / (coeffs * coeffs);
// Normalize.
coeffs /= dot(coeffs, vec4(1.0));
// Calculate the effective colour of the current and next
// scanlines at the horizontal location of the current pixel,
// using the Lanczos coefficients above.
vec4 col = clamp(mat4(
TEX2D(xy + vec2(-one.x, 0.0)),
TEX2D(xy),
TEX2D(xy + vec2(one.x, 0.0)),
TEX2D(xy + vec2(2.0 * one.x, 0.0))) * coeffs,
0.0, 1.0);
vec4 col2 = clamp(mat4(
TEX2D(xy + vec2(-one.x, one.y)),
TEX2D(xy + vec2(0.0, one.y)),
TEX2D(xy + one),
TEX2D(xy + vec2(2.0 * one.x, one.y))) * coeffs,
0.0, 1.0);
#ifndef LINEAR_PROCESSING
col = pow(col , vec4(CRTgamma));
col2 = pow(col2, vec4(CRTgamma));
#endif
// Calculate the influence of the current and next scanlines on
// the current pixel.
vec4 weights = scanlineWeights(uv_ratio.y, col);
vec4 weights2 = scanlineWeights(1.0 - uv_ratio.y, col2);
#ifdef OVERSAMPLE
uv_ratio.y =uv_ratio.y+1.0/3.0*filter_;
weights = (weights+scanlineWeights(uv_ratio.y, col))/3.0;
weights2=(weights2+scanlineWeights(abs(1.0-uv_ratio.y), col2))/3.0;
uv_ratio.y =uv_ratio.y-2.0/3.0*filter_;
weights=weights+scanlineWeights(abs(uv_ratio.y), col)/3.0;
weights2=weights2+scanlineWeights(abs(1.0-uv_ratio.y), col2)/3.0;
#endif
vec3 mul_res = (col * weights + col2 * weights2).rgb * vec3(cval);
// dot-mask emulation:
// Output pixels are alternately tinted green and magenta.
vec3 dotMaskWeights = mix(
vec3(1.0, 0.7, 1.0),
vec3(0.7, 1.0, 0.7),
floor(mod(texCoord.x*params.OutputSize.x, 2.0))
);
mul_res *= dotMaskWeights;
// Convert the image gamma for display on our output device.
mul_res = pow(mul_res, vec3(1.0 / monitorgamma));
// Color the texel.
FragColor = vec4(mul_res, 1.0);
}

View file

@ -0,0 +1,82 @@
#version 450
layout(push_constant) uniform Push
{
vec4 SourceSize;
vec4 OriginalSize;
vec4 OutputSize;
uint FrameCount;
float postfilter_taps, postfilter_chromacutoff;
} params;
// after mixing with the adaptive comb filter
#pragma parameter postfilter_taps "Post-filter Taps" 15.0 1.0 30.0 1.0
#define postfiltertaps int(params.postfilter_taps)
#pragma parameter postfilter_chromacutoff "Post-filter Chroma Cutoff" 0.3 0.1 1.0 0.05
#define postfilterchromacutoff params.postfilter_chromacutoff
layout(std140, set = 0, binding = 0) uniform UBO
{
mat4 MVP;
} global;
#pragma stage vertex
layout(location = 0) in vec4 Position;
layout(location = 1) in vec2 TexCoord;
layout(location = 0) out vec2 texCoord;
void main()
{
gl_Position = global.MVP * Position;
texCoord = TexCoord;
}
#pragma stage fragment
layout(location = 0) in vec2 texCoord;
layout(location = 0) out vec4 FragColor;
layout(set = 0, binding = 2) uniform sampler2D Source;
vec3 chroma_sample(vec2 shift) {
vec2 coord = texCoord + params.SourceSize.zw*shift;
if (coord.x < 0.0 || coord.x > 1.0) return vec3(0.0);
return texture(Source, texCoord + params.SourceSize.zw*shift).xyz-vec3(0,0.5,0.5);
}
float sinc(float x) {
x = max(abs(x),0.0001);
return sin(x)/x;
}
void main()
{
const int width = postfiltertaps;
const float bw_c = postfilterchromacutoff/3.0;
const vec4 one = vec4(1.0);
const float PI = 3.14159265359;
const vec4 PI_6 = vec4(PI/6.0);
float norm_c = 0.0;
vec3 yiq = vec3(0.0);
yiq.x = chroma_sample(vec2(0,0)).x;
for (int i = -width/2; i <= width/2; i++) {
// Hamming window
const float alpha = 0.54;
const float beta = 0.46;
float window = alpha - beta * cos(2.0*PI/(width-1)*(width/2+i));
float chromafilt = 2.0*bw_c*sinc(2.0*PI*bw_c*i);
vec3 samp = chroma_sample(vec2(i,0));
float filt = window*chromafilt;
yiq.yz += samp.yz*vec2(filt);
norm_c += filt;
}
yiq *= vec3(1.0, 1.0/norm_c, 1.0/norm_c);
FragColor = vec4(dot(yiq, vec3(1.0, 0.946882, 0.623557)),
dot(yiq, vec3(1.0,-0.274788,-0.635691)),
dot(yiq, vec3(1.0,-1.108545, 1.709007)),
0.0);
}

View file

@ -0,0 +1,133 @@
#version 450
layout(push_constant) uniform Push
{
vec4 SourceSize;
vec4 OriginalSize;
vec4 OutputSize;
uint FrameCount;
float filter_taps, lumacutoff, chromacutoff;
} params;
// frequencies are in units of the chroma subcarrier
// these are for the lowpass+notch filter
#pragma parameter filter_taps "Filter Taps" 15.0 1.0 30.0 1.0
#define filtertaps int(params.filter_taps)
#pragma parameter lumacutoff "Luma Cutoff" 0.9 0.1 1.0 0.05
#define lumacutoff params.lumacutoff
#pragma parameter chromacutoff "Chroma Cutoff" 0.3 0.1 1.0 0.05
#define chromacutoff params.chromacutoff
#define phase int(params.FrameCount)
layout(std140, set = 0, binding = 0) uniform UBO
{
mat4 MVP;
} global;
#pragma stage vertex
layout(location = 0) in vec4 Position;
layout(location = 1) in vec2 TexCoord;
layout(location = 0) out vec2 texCoord;
void main()
{
gl_Position = global.MVP * Position;
texCoord = TexCoord;
}
#pragma stage fragment
layout(location = 0) in vec2 texCoord;
layout(location = 0) out vec4 FragColor;
layout(set = 0, binding = 2) uniform sampler2D Source;
vec4 decode_sample(vec2 coord) {
const float min = 0.350*0.746;
const float max = 1.962;
const float black = 0.518;
if (coord.x < 0.0 || coord.x > 1.0) return vec4(0.0);
return (texture(Source, coord) * vec4(max-min) + vec4(min-black)) / vec4(max-black);
}
vec4 sinc(vec4 x) {
x = max(abs(x),vec4(0.0001));
return sin(x)/x;
}
vec2 sinc(vec2 x) {
x = max(abs(x),vec2(0.0001));
return sin(x)/x;
}
void main()
{
const int width = filtertaps;
const float bw_y = lumacutoff/12.0;
const float bw_c = chromacutoff/12.0;
int x = int(floor(texCoord.x*params.SourceSize.x));
int y = int(floor(texCoord.y*params.SourceSize.y));
int p = (x+y+phase) % 3;
const vec4 one = vec4(1.0);
const float PI = 3.14159265359;
const vec4 PI_6 = vec4(PI/6.0);
float norm_y = 0.0;
float norm_c = 0.0;
vec3 yiq = vec3(0.0);
vec4 samp = decode_sample(texCoord + vec2((-width/2 )*params.SourceSize.z,0));
vec4 samp2 = decode_sample(texCoord + vec2((-width/2+1)*params.SourceSize.z,0));
// Hamming window
const float alpha = 0.54;
const float beta = 0.46;
int i;
for (i = -width/2; i <= width/2-2; i++) {
vec4 window = vec4(alpha) - vec4(beta) * cos(vec4(2.0*PI/(4*width-7))*(vec4(4*(width/2+i))+vec4(0,1,2,3)));
vec4 t = vec4(i*4)+vec4(-1.5,-0.5,+0.5,+1.5);
#define lowpass(freq,x) vec4(2.0*(freq))*sinc(vec4(2.0*PI*(freq))*(x))
vec4 lumafilt = lowpass(bw_y, t);
vec4 chromafilt = lowpass(bw_c, t);
#undef lowpass
vec4 samp3 = decode_sample(texCoord + vec2((i+2)*params.SourceSize.z,0));
vec4 rsamp = vec4(samp2.zw, samp3.xy);
vec4 filt = window*lumafilt;
yiq.x += dot(samp, filt) + dot(rsamp,filt);
norm_y += dot(one, filt);
filt = window*chromafilt;
yiq.y += dot(samp, filt*cos((vec4(0,1,2,3)+vec4(4*(i+p)))*PI_6));
yiq.y += dot(rsamp,filt*cos((vec4(6,7,8,9)+vec4(4*(i+p)))*PI_6));
yiq.z += dot(samp, filt*sin((vec4(0,1,2,3)+vec4(4*(i+p)))*PI_6));
yiq.z += dot(rsamp,filt*sin((vec4(6,7,8,9)+vec4(4*(i+p)))*PI_6));
norm_c += dot(one,filt);
samp = samp2;
samp2 = samp3;
}
vec2 window = vec2(alpha) - vec2(beta) * cos(vec2(2.0*PI/(4*width-7))*(vec2(4*(width/2+i))+vec2(0,1)));
vec2 t = vec2(i*4)+vec2(-1.5,-0.5);
#define lowpass(freq,x) vec2(2.0*(freq))*sinc(vec2(2.0*PI*(freq))*(x))
vec2 lumafilt = lowpass(bw_y, t);
vec2 chromafilt = lowpass(bw_c, t);
#undef lowpass
vec2 filt = window*lumafilt;
yiq.x += dot(samp.xy, filt) + dot(samp2.zw, filt);
norm_y += dot(one.xy, filt);
filt = window*chromafilt;
yiq.y += dot(samp.xy, filt*cos((vec2(0,1)+vec2(4*(i+p)))*vec2(PI/6.0)));
yiq.y += dot(samp2.zw,filt*cos((vec2(6,7)+vec2(4*(i+p)))*vec2(PI/6.0)));
yiq.z += dot(samp.xy, filt*sin((vec2(0,1)+vec2(4*(i+p)))*vec2(PI/6.0)));
yiq.z += dot(samp2.zw,filt*sin((vec2(6,7)+vec2(4*(i+p)))*vec2(PI/6.0)));
norm_c += dot(one.xy, filt);
yiq *= vec3(0.5/norm_y, 1.0/norm_c, 1.0/norm_c);
//FragColor = vec4(dot(yiq, vec3(1.0, 0.946882, 0.623557)),
// dot(yiq, vec3(1.0,-0.274788,-0.635691)),
// dot(yiq, vec3(1.0,-1.108545, 1.709007)),
// 0.0);
FragColor = vec4(yiq+vec3(0,0.5,0.5),0.0);
}