From 2730bc6b31ceff6369070bb8df87bfcf4b194558 Mon Sep 17 00:00:00 2001 From: hunterk Date: Sun, 12 May 2019 22:24:26 -0500 Subject: [PATCH] initial commit for cgwg's famicom decode shader --- test/famicom-raw-cgwg.slangp | 25 ++ .../adaptive-comb-decode.slang | 118 +++++++ .../famicom-raw-cgwg/composite-encode.slang | 64 ++++ .../famicom-raw-cgwg/crt-geom-famicom.slang | 329 ++++++++++++++++++ .../famicom-raw-cgwg/lowpass-chroma.slang | 82 +++++ .../lowpass-notch-decode-yiq.slang | 133 +++++++ 6 files changed, 751 insertions(+) create mode 100644 test/famicom-raw-cgwg.slangp create mode 100644 test/shaders/famicom-raw-cgwg/adaptive-comb-decode.slang create mode 100644 test/shaders/famicom-raw-cgwg/composite-encode.slang create mode 100644 test/shaders/famicom-raw-cgwg/crt-geom-famicom.slang create mode 100644 test/shaders/famicom-raw-cgwg/lowpass-chroma.slang create mode 100644 test/shaders/famicom-raw-cgwg/lowpass-notch-decode-yiq.slang diff --git a/test/famicom-raw-cgwg.slangp b/test/famicom-raw-cgwg.slangp new file mode 100644 index 0000000..6f33ba1 --- /dev/null +++ b/test/famicom-raw-cgwg.slangp @@ -0,0 +1,25 @@ +shaders = 5 + +shader0 = shaders/famicom-raw-cgwg/composite-encode.slang +filter_linear0 = false +scale_type0 = source +scale_x0 = 2.0 +frame_count_mod0 = 2 +alias0 = encode_pass +float_framebuffer0 = true + +shader1 = shaders/famicom-raw-cgwg/lowpass-notch-decode-yiq.slang +frame_count_mod1 = 2 +filter_linear1 = false +float_framebuffer1 = true + +shader2 = shaders/famicom-raw-cgwg/adaptive-comb-decode.slang +filter_linear2 = false +frame_count_mod2 = 2 +float_framebuffer2 = true + +shader3 = shaders/famicom-raw-cgwg/lowpass-chroma.slang +filter_linear3 = false +float_framebuffer3 = true + +shader4 = shaders/famicom-raw-cgwg/crt-geom-famicom.slang diff --git a/test/shaders/famicom-raw-cgwg/adaptive-comb-decode.slang b/test/shaders/famicom-raw-cgwg/adaptive-comb-decode.slang new file mode 100644 index 0000000..1024862 --- /dev/null +++ b/test/shaders/famicom-raw-cgwg/adaptive-comb-decode.slang @@ -0,0 +1,118 @@ +#version 450 + +layout(push_constant) uniform Push +{ + vec4 SourceSize; + vec4 OriginalSize; + vec4 OutputSize; + uint FrameCount; + vec4 encode_passSize; +} params; + +#define phase params.FrameCount + +layout(std140, set = 0, binding = 0) uniform UBO +{ + mat4 MVP; +} global; + +#pragma stage vertex +layout(location = 0) in vec4 Position; +layout(location = 1) in vec2 TexCoord; +layout(location = 0) out vec2 texCoord; + +void main() +{ + gl_Position = global.MVP * Position; + texCoord = TexCoord; +} + +#pragma stage fragment +layout(location = 0) in vec2 texCoord; +layout(location = 0) out vec4 FragColor; +layout(set = 0, binding = 2) uniform sampler2D Source; +layout(set = 0, binding = 3) uniform sampler2D encode_pass; + +vec4 decode_sample(vec2 shift) { + const float min = 0.350*0.746; + const float max = 1.962; + const float black = 0.518; + vec2 coord = texCoord + params.encode_passSize.zw*shift; + if (coord.x < 0.0 || coord.x > 1.0 || coord.y < 0.0 || coord.y > 1.0) return vec4(0.0); + return (texture(encode_pass, coord) * vec4(max-min) + vec4(min-black)) / vec4(max-black); +} + +vec3 filtsample(vec2 shift) { + vec2 coord = texCoord + params.SourceSize.zw*shift; + if (coord.x < 0.0 || coord.x > 1.0 || coord.y < 0.0 || coord.y > 1.0) return vec3(0.0); + return texture(Source, texCoord + params.SourceSize.zw*shift).xyz-vec3(0,0.5,0.5); +} + +void main() +{ + vec4 samps[] = vec4[](decode_sample(vec2(-1,0)), + decode_sample(vec2( 0,0)), + decode_sample(vec2( 1,0))); + // quarter-pixel shift between scanlines needed for comb filter + vec4 s1, s2, s3, s4; + s1 = decode_sample(vec2(-2,-1)); + s2 = decode_sample(vec2(-1,-1)); + s3 = decode_sample(vec2( 0,-1)); + s4 = decode_sample(vec2( 1,-1)); + vec4 samps_prev[] = vec4[](vec4(s1.zw,s2.xy), + vec4(s2.zw,s3.xy), + vec4(s3.zw,s4.xy)); + s1 = decode_sample(vec2(-1,1)); + s2 = decode_sample(vec2(-0,1)); + s3 = decode_sample(vec2( 1,1)); + s4 = decode_sample(vec2( 2,1)); + vec4 samps_next[] = vec4[](vec4(s1.zw,s2.xy), + vec4(s2.zw,s3.xy), + vec4(s3.zw,s4.xy)); + + uint x = uint(floor(texCoord.x*params.SourceSize.x)); + uint y = uint(floor(texCoord.y*params.SourceSize.y)); + uint p = (x+y+uint(phase)) % 3u; + + const vec4 one = vec4(1.0); + const vec4 PI_6 = vec4(3.14159265359/6.0); + const vec4 offs = vec4(-0.0); + const vec4 sins[] = vec4[](sin((vec4(0,1, 2, 3)+offs)*PI_6), + sin((vec4(4,5, 6, 7)+offs)*PI_6), + sin((vec4(8,9,10,11)+offs)*PI_6)); + const vec4 coss[] = vec4[](cos((vec4(0,1, 2, 3)+offs)*PI_6), + cos((vec4(4,5, 6, 7)+offs)*PI_6), + cos((vec4(8,9,10,11)+offs)*PI_6)); + + vec3 filt = filtsample(vec2(0, 0)); + vec3 filt_cur = (filt + filtsample(vec2(-1,0))+filtsample(vec2(1,0)))/3.0; + vec3 filt_prev = (filtsample(vec2(0,-1))+filtsample(vec2(-1,-1))+filtsample(vec2(1,-1)))/3.0; + vec3 filt_next = (filtsample(vec2(0, 1))+filtsample(vec2(-1, 1))+filtsample(vec2(1, 1)))/3.0; + vec3 dif1 = filt_cur-filt_prev; + vec3 dif2 = filt_cur-filt_next; + // this formula can be tweaked + float xprev = clamp(0.5-sqrt(dot(dif1,dif1)),0,0.5); + float xnext = clamp(0.5-sqrt(dot(dif2,dif2)),0,0.5); + float xcur = clamp(xprev+xnext,0,0.5); + float scale = (xcur+0.0001)/(xprev+xnext+0.0001); + xprev *= scale; + xnext *= scale; + float xfilt = 1.0 - 2.0*xcur; + + vec3 yiq = vec3(0.0); + yiq.x = xcur*dot(samps[1],one) + xprev*dot(samps_prev[1],one) + xnext*dot(samps_next[1],one); + for (uint i = 0u; i < 3u; i++) { + samps[i] = xcur*samps[i] - xprev*samps_prev[i] - xnext*samps_next[i]; + yiq.y += dot(samps[i],coss[(i+p+2u)%3u]); + yiq.z += dot(samps[i],sins[(i+p+2u)%3u]); + } + + yiq *= vec3(1.0/4.0, 1.0/6.0, 1.0/6.0); + yiq += xfilt*filt; + + //FragColor = vec4(dot(yiq, vec3(1.0, 0.946882, 0.623557)), + // dot(yiq, vec3(1.0,-0.274788,-0.635691)), + // dot(yiq, vec3(1.0,-1.108545, 1.709007)), + // 0.0); + FragColor = vec4(yiq+vec3(0,0.5,0.5),0.0); +} diff --git a/test/shaders/famicom-raw-cgwg/composite-encode.slang b/test/shaders/famicom-raw-cgwg/composite-encode.slang new file mode 100644 index 0000000..cea13ac --- /dev/null +++ b/test/shaders/famicom-raw-cgwg/composite-encode.slang @@ -0,0 +1,64 @@ +#version 450 +#pragma format R32_UINT +#pragma name encode_pass + +layout(push_constant) uniform Push +{ + vec4 SourceSize; + vec4 OriginalSize; + vec4 OutputSize; + uint FrameCount; +} params; + +layout(std140, set = 0, binding = 0) uniform UBO +{ + mat4 MVP; +} global; + +#pragma stage vertex +layout(location = 0) in vec4 Position; +layout(location = 1) in vec2 TexCoord; +layout(location = 0) out vec2 vTexCoord; + +void main() +{ + gl_Position = global.MVP * Position; + vTexCoord = TexCoord; +} + +#pragma stage fragment +layout(location = 0) in vec2 vTexCoord; +layout(location = 0) out vec4 FragColor; +layout(set = 0, binding = 2) uniform sampler2D Source; + +void main() +{ + uint n = uint(texture(Source, vTexCoord).r); + uint color = n & 0xfu; + uint level = color < 0xeu ? (n>>4u)&3u : 1u; + uint emphasis = n >> 6u; + + const float levels[] = float[](0.350, 0.518, 0.962, 1.550, + 1.094, 1.506, 1.962, 1.962); + const float attenuation = 0.746; + const float min = levels[0]*attenuation; + const float max = levels[7]; + + float lo_hi[] = float[](levels[level + 4u*uint(color == 0x0u)], + levels[level + 4u*uint(color < 0xdu)]); + + uint x = uint(floor(vTexCoord.x*params.SourceSize.x*2.0)); + uint y = uint(floor(vTexCoord.y*params.SourceSize.y)); + float frag[4]; + for (uint i = 0u; i < 4u; i++) { + uint p = (x*4u+i + y*4u + uint(params.FrameCount*4)) % 12u; +#define wave(ph,co) (((co)+(ph)+8u)%12u<6u) + float spot = lo_hi[uint(wave(p,color))]; + if ( (((emphasis&1u)==1u) && wave(p,12u)) + || (((emphasis&2u)==1u) && wave(p,4u)) + || (((emphasis&4u)==1u) && wave(p,8u))) + spot *= attenuation; + frag[i] = spot; + } + FragColor = (vec4(frag[0], frag[1], frag[2], frag[3]) - vec4(min))/vec4(max-min); +} diff --git a/test/shaders/famicom-raw-cgwg/crt-geom-famicom.slang b/test/shaders/famicom-raw-cgwg/crt-geom-famicom.slang new file mode 100644 index 0000000..c56027e --- /dev/null +++ b/test/shaders/famicom-raw-cgwg/crt-geom-famicom.slang @@ -0,0 +1,329 @@ +#version 450 + +layout(push_constant) uniform Push +{ + vec4 SourceSize; + vec4 OriginalSize; + vec4 OutputSize; + uint FrameCount; + float CURVATURE_toggle, CRTgamma, overscan_x, overscan_y, distance, radius, tiltangle_x, tiltangle_y, cornersize, cornersmooth; +} params; + +#pragma parameter CURVATURE_toggle "Curvature Toggle" 1.0 0.0 1.0 1.0 +#define CURVATURE bool(params.CURVATURE_toggle) +// gamma of simulated CRT +#pragma parameter CRTgamma "CRT Gamma" 2.4 1.0 4.0 0.05 +#define CRTgamma params.CRTgamma +// overscan (e.g. 1.02 for 2% overscan) +#pragma parameter overscan_x "Overscan X" 1.0 0.0 2.0 0.01 +#pragma parameter overscan_y "Overscan Y" 1.0 0.0 2.0 0.01 +#define overscan vec2(params.overscan_x, params.overscan_y) +#pragma parameter distance "Viewing Distance" 2.0 0.1 5.0 0.1 +#define distance params.distance +// radius of curvature +#pragma parameter radius "Curvature Radius" 2.0 0.1 5.0 0.1 +#define radius params.radius +// tilt angle in radians +// (behavior might be a bit wrong if both components are nonzero) +#pragma parameter tiltangle_x "Tilt Angle X" 0.0 -1.0 1.0 0.05 +#pragma parameter tiltangle_y "Tilt Angle Y" 0.0 -1.0 1.0 0.05 +#define tiltangle vec2(params.tiltangle_x, params.tiltangle_y) + 0.001 +// size of curved corners +#pragma parameter cornersize "Corner Size" 0.02 0.0001 0.1 0.01 +#define cornersize params.cornersize +// border smoothness parameter +// decrease if borders are too aliased +#pragma parameter cornersmooth "Corner Smoothness" 800.0 0.8 2000.0 50.0 +#define cornersmooth params.cornersmooth + +layout(std140, set = 0, binding = 0) uniform UBO +{ + mat4 MVP; +} global; + +#pragma stage vertex +layout(location = 0) in vec4 Position; +layout(location = 1) in vec2 TexCoord; +layout(location = 0) out vec2 texCoord; +layout(location = 1) out vec3 stretch; + +const vec2 aspect = vec2(1.0, 0.75); +float d = distance; +float R = radius; +vec2 sinangle = sin(tiltangle); +vec2 cosangle = cos(tiltangle); + +vec2 one; +float mod_factor; +vec2 ilfac; + +#define FIX(c) max(abs(c), 1e-5); + +float intersect(vec2 xy) +{ + float A = dot(xy,xy)+d*d; + float B = 2.0*(R*(dot(xy,sinangle)-d*cosangle.x*cosangle.y)-d*d); + float C = d*d + 2.0*R*d*cosangle.x*cosangle.y; + return (-B-sqrt(B*B-4.0*A*C))/(2.0*A); +} + +vec2 bkwtrans(vec2 xy) +{ + float c = intersect(xy); + vec2 point = vec2(c)*xy; + point -= vec2(-R)*sinangle; + point /= vec2(R); + vec2 tang = sinangle/cosangle; + vec2 poc = point/cosangle; + float A = dot(tang,tang)+1.0; + float B = -2.0*dot(poc,tang); + float C = dot(poc,poc)-1.0; + float a = (-B+sqrt(B*B-4.0*A*C))/(2.0*A); + vec2 uv = (point-a*sinangle)/cosangle; + float r = R*acos(a); + return uv*r/sin(r/R); +} + +vec2 fwtrans(vec2 uv) +{ + float r = FIX(sqrt(dot(uv,uv))); + uv *= sin(r/R)/r; + float x = 1.0-cos(r/R); + float D = d/R + x*cosangle.x*cosangle.y+dot(uv,sinangle); + return d*(uv*cosangle-x*sinangle)/D; +} + +vec3 maxscale() +{ + vec2 c = bkwtrans(-R * sinangle / (1.0 + R/d*cosangle.x*cosangle.y)); + vec2 a = vec2(0.5,0.5)*aspect; + vec2 lo = vec2(fwtrans(vec2(-a.x,c.y)).x, + fwtrans(vec2(c.x,-a.y)).y)/aspect; + vec2 hi = vec2(fwtrans(vec2(+a.x,c.y)).x, + fwtrans(vec2(c.x,+a.y)).y)/aspect; + return vec3((hi+lo)*aspect*0.5,max(hi.x-lo.x,hi.y-lo.y)); +} + +void main() +{ + gl_Position = global.MVP * Position; + texCoord = TexCoord; + stretch = maxscale(); +} + +#pragma stage fragment +layout(location = 0) in vec2 texCoord; +layout(location = 1) in vec3 stretch; +layout(location = 0) out vec4 FragColor; +layout(set = 0, binding = 2) uniform sampler2D Source; + +// Comment the next line to disable interpolation in linear gamma (and gain speed). +//#define LINEAR_PROCESSING + +// Enable 3x oversampling of the beam profile +#define OVERSAMPLE + +// Use the older, purely gaussian beam profile +//#define USEGAUSSIAN + +// gamma of display monitor (typically 2.2 is correct) +#define monitorgamma 2.2 + +// aspect ratio +const vec2 aspect = vec2(1.0, 0.75); +// lengths are measured in units of (approximately) the width of the monitor +// simulated distance from viewer to monitor +float d = distance; +float R = radius; + +vec2 sinangle = sin(tiltangle); +vec2 cosangle = cos(tiltangle); + +#define one (params.SourceSize.zw) + +// Macros. +#define FIX(c) max(abs(c), 1e-5); +#define PI 3.141592653589 + +#ifdef LINEAR_PROCESSING +# define TEX2D(c) pow(texture(Source, (c)), vec4(CRTgamma)) +#else +# define TEX2D(c) texture(Source, (c)) +#endif + +#define FIX(c) max(abs(c), 1e-5); + +float intersect(vec2 xy) +{ + float A = dot(xy,xy)+d*d; + float B = 2.0*(R*(dot(xy,sinangle)-d*cosangle.x*cosangle.y)-d*d); + float C = d*d + 2.0*R*d*cosangle.x*cosangle.y; + return (-B-sqrt(B*B-4.0*A*C))/(2.0*A); +} + +vec2 bkwtrans(vec2 xy) +{ + float c = intersect(xy); + vec2 point = vec2(c)*xy; + point -= vec2(-R)*sinangle; + point /= vec2(R); + vec2 tang = sinangle/cosangle; + vec2 poc = point/cosangle; + float A = dot(tang,tang)+1.0; + float B = -2.0*dot(poc,tang); + float C = dot(poc,poc)-1.0; + float a = (-B+sqrt(B*B-4.0*A*C))/(2.0*A); + vec2 uv = (point-a*sinangle)/cosangle; + float r = FIX(R*acos(a)); + return uv*r/sin(r/R); +} + +vec2 transform(vec2 coord) +{ + coord = (coord-vec2(0.5))*aspect*stretch.z+stretch.xy; + return (bkwtrans(coord)/overscan/aspect+vec2(0.5)); +} + +float corner(vec2 coord) +{ + coord = (coord - vec2(0.5)) * overscan + vec2(0.5); + coord = min(coord, vec2(1.0)-coord) * aspect; + vec2 cdist = vec2(cornersize); + coord = (cdist - min(coord,cdist)); + float dist = sqrt(dot(coord,coord)); + return clamp((cdist.x-dist)*cornersmooth,0.0, 1.0); +} + +// Calculate the influence of a scanline on the current pixel. +// +// 'dist' is the distance in texture coordinates from the current +// pixel to the scanline in question. +// 'color' is the colour of the scanline at the horizontal location of +// the current pixel. +vec4 scanlineWeights(float dist, vec4 color) +{ + // "wid" controls the width of the scanline beam, for each RGB channel + // The "weights" lines basically specify the formula that gives + // you the profile of the beam, i.e. the intensity as + // a function of distance from the vertical center of the + // scanline. In this case, it is gaussian if width=2, and + // becomes nongaussian for larger widths. Ideally this should + // be normalized so that the integral across the beam is + // independent of its width. That is, for a narrower beam + // "weights" should have a higher peak at the center of the + // scanline than for a wider beam. +#ifdef USEGAUSSIAN + vec4 wid = 0.3 + 0.1 * pow(color, vec4(3.0)); + vec4 weights = vec4(dist / wid); + return 0.4 * exp(-weights * weights) / wid; +#else + vec4 wid = 2.0 + 2.0 * pow(color, vec4(4.0)); + vec4 weights = vec4(dist / 0.3); + return 1.4 * exp(-pow(weights * inversesqrt(0.5 * wid), wid)) / (0.6 + 0.2 * wid); +#endif +} + +void main() +{ + // Here's a helpful diagram to keep in mind while trying to + // understand the code: + // + // | | | | | + // ------------------------------- + // | | | | | + // | 01 | 11 | 21 | 31 | <-- current scanline + // | | @ | | | + // ------------------------------- + // | | | | | + // | 02 | 12 | 22 | 32 | <-- next scanline + // | | | | | + // ------------------------------- + // | | | | | + // + // Each character-cell represents a pixel on the output + // surface, "@" represents the current pixel (always somewhere + // in the bottom half of the current scan-line, or the top-half + // of the next scanline). The grid of lines represents the + // edges of the texels of the underlying texture. + + // Texture coordinates of the texel containing the active pixel. + vec2 xy = CURVATURE ? transform(texCoord) : texCoord; + float cval = corner(xy); + + // Of all the pixels that are mapped onto the texel we are + // currently rendering, which pixel are we currently rendering? + vec2 ratio_scale = xy * params.SourceSize.xy - vec2(0.5); + +#ifdef OVERSAMPLE + float filter_ = fwidth(ratio_scale.y); +#endif + vec2 uv_ratio = fract(ratio_scale); + + // Snap to the center of the underlying texel. + xy = (floor(ratio_scale) + vec2(0.5)) / params.SourceSize.xy; + + // Calculate Lanczos scaling coefficients describing the effect + // of various neighbour texels in a scanline on the current + // pixel. + vec4 coeffs = PI * vec4(1.0 + uv_ratio.x, uv_ratio.x, 1.0 - uv_ratio.x, 2.0 - uv_ratio.x); + + // Prevent division by zero. + coeffs = FIX(coeffs); + + // Lanczos2 kernel. + coeffs = 2.0 * sin(coeffs) * sin(coeffs / 2.0) / (coeffs * coeffs); + + // Normalize. + coeffs /= dot(coeffs, vec4(1.0)); + + // Calculate the effective colour of the current and next + // scanlines at the horizontal location of the current pixel, + // using the Lanczos coefficients above. + vec4 col = clamp(mat4( + TEX2D(xy + vec2(-one.x, 0.0)), + TEX2D(xy), + TEX2D(xy + vec2(one.x, 0.0)), + TEX2D(xy + vec2(2.0 * one.x, 0.0))) * coeffs, + 0.0, 1.0); + vec4 col2 = clamp(mat4( + TEX2D(xy + vec2(-one.x, one.y)), + TEX2D(xy + vec2(0.0, one.y)), + TEX2D(xy + one), + TEX2D(xy + vec2(2.0 * one.x, one.y))) * coeffs, + 0.0, 1.0); + +#ifndef LINEAR_PROCESSING + col = pow(col , vec4(CRTgamma)); + col2 = pow(col2, vec4(CRTgamma)); +#endif + + // Calculate the influence of the current and next scanlines on + // the current pixel. + vec4 weights = scanlineWeights(uv_ratio.y, col); + vec4 weights2 = scanlineWeights(1.0 - uv_ratio.y, col2); +#ifdef OVERSAMPLE + uv_ratio.y =uv_ratio.y+1.0/3.0*filter_; + weights = (weights+scanlineWeights(uv_ratio.y, col))/3.0; + weights2=(weights2+scanlineWeights(abs(1.0-uv_ratio.y), col2))/3.0; + uv_ratio.y =uv_ratio.y-2.0/3.0*filter_; + weights=weights+scanlineWeights(abs(uv_ratio.y), col)/3.0; + weights2=weights2+scanlineWeights(abs(1.0-uv_ratio.y), col2)/3.0; +#endif + vec3 mul_res = (col * weights + col2 * weights2).rgb * vec3(cval); + + // dot-mask emulation: + // Output pixels are alternately tinted green and magenta. + vec3 dotMaskWeights = mix( + vec3(1.0, 0.7, 1.0), + vec3(0.7, 1.0, 0.7), + floor(mod(texCoord.x*params.OutputSize.x, 2.0)) + ); + + mul_res *= dotMaskWeights; + + // Convert the image gamma for display on our output device. + mul_res = pow(mul_res, vec3(1.0 / monitorgamma)); + + // Color the texel. + FragColor = vec4(mul_res, 1.0); +} diff --git a/test/shaders/famicom-raw-cgwg/lowpass-chroma.slang b/test/shaders/famicom-raw-cgwg/lowpass-chroma.slang new file mode 100644 index 0000000..8ed2e04 --- /dev/null +++ b/test/shaders/famicom-raw-cgwg/lowpass-chroma.slang @@ -0,0 +1,82 @@ +#version 450 + +layout(push_constant) uniform Push +{ + vec4 SourceSize; + vec4 OriginalSize; + vec4 OutputSize; + uint FrameCount; + float postfilter_taps, postfilter_chromacutoff; +} params; + + // after mixing with the adaptive comb filter +#pragma parameter postfilter_taps "Post-filter Taps" 15.0 1.0 30.0 1.0 +#define postfiltertaps int(params.postfilter_taps) +#pragma parameter postfilter_chromacutoff "Post-filter Chroma Cutoff" 0.3 0.1 1.0 0.05 +#define postfilterchromacutoff params.postfilter_chromacutoff + +layout(std140, set = 0, binding = 0) uniform UBO +{ + mat4 MVP; +} global; + +#pragma stage vertex +layout(location = 0) in vec4 Position; +layout(location = 1) in vec2 TexCoord; +layout(location = 0) out vec2 texCoord; + +void main() +{ + gl_Position = global.MVP * Position; + texCoord = TexCoord; +} + +#pragma stage fragment +layout(location = 0) in vec2 texCoord; +layout(location = 0) out vec4 FragColor; +layout(set = 0, binding = 2) uniform sampler2D Source; + +vec3 chroma_sample(vec2 shift) { + vec2 coord = texCoord + params.SourceSize.zw*shift; + if (coord.x < 0.0 || coord.x > 1.0) return vec3(0.0); + return texture(Source, texCoord + params.SourceSize.zw*shift).xyz-vec3(0,0.5,0.5); +} + +float sinc(float x) { + x = max(abs(x),0.0001); + return sin(x)/x; +} + +void main() +{ + const int width = postfiltertaps; + const float bw_c = postfilterchromacutoff/3.0; + + const vec4 one = vec4(1.0); + const float PI = 3.14159265359; + const vec4 PI_6 = vec4(PI/6.0); + + float norm_c = 0.0; + vec3 yiq = vec3(0.0); + yiq.x = chroma_sample(vec2(0,0)).x; + for (int i = -width/2; i <= width/2; i++) { + // Hamming window + const float alpha = 0.54; + const float beta = 0.46; + float window = alpha - beta * cos(2.0*PI/(width-1)*(width/2+i)); + + float chromafilt = 2.0*bw_c*sinc(2.0*PI*bw_c*i); + + vec3 samp = chroma_sample(vec2(i,0)); + float filt = window*chromafilt; + yiq.yz += samp.yz*vec2(filt); + norm_c += filt; + } + + yiq *= vec3(1.0, 1.0/norm_c, 1.0/norm_c); + + FragColor = vec4(dot(yiq, vec3(1.0, 0.946882, 0.623557)), + dot(yiq, vec3(1.0,-0.274788,-0.635691)), + dot(yiq, vec3(1.0,-1.108545, 1.709007)), + 0.0); +} diff --git a/test/shaders/famicom-raw-cgwg/lowpass-notch-decode-yiq.slang b/test/shaders/famicom-raw-cgwg/lowpass-notch-decode-yiq.slang new file mode 100644 index 0000000..eba26b5 --- /dev/null +++ b/test/shaders/famicom-raw-cgwg/lowpass-notch-decode-yiq.slang @@ -0,0 +1,133 @@ +#version 450 + +layout(push_constant) uniform Push +{ + vec4 SourceSize; + vec4 OriginalSize; + vec4 OutputSize; + uint FrameCount; + float filter_taps, lumacutoff, chromacutoff; +} params; + + // frequencies are in units of the chroma subcarrier + // these are for the lowpass+notch filter +#pragma parameter filter_taps "Filter Taps" 15.0 1.0 30.0 1.0 +#define filtertaps int(params.filter_taps) +#pragma parameter lumacutoff "Luma Cutoff" 0.9 0.1 1.0 0.05 +#define lumacutoff params.lumacutoff +#pragma parameter chromacutoff "Chroma Cutoff" 0.3 0.1 1.0 0.05 +#define chromacutoff params.chromacutoff + +#define phase int(params.FrameCount) + +layout(std140, set = 0, binding = 0) uniform UBO +{ + mat4 MVP; +} global; + +#pragma stage vertex +layout(location = 0) in vec4 Position; +layout(location = 1) in vec2 TexCoord; +layout(location = 0) out vec2 texCoord; + +void main() +{ + gl_Position = global.MVP * Position; + texCoord = TexCoord; +} + +#pragma stage fragment +layout(location = 0) in vec2 texCoord; +layout(location = 0) out vec4 FragColor; +layout(set = 0, binding = 2) uniform sampler2D Source; + +vec4 decode_sample(vec2 coord) { + const float min = 0.350*0.746; + const float max = 1.962; + const float black = 0.518; + if (coord.x < 0.0 || coord.x > 1.0) return vec4(0.0); + return (texture(Source, coord) * vec4(max-min) + vec4(min-black)) / vec4(max-black); +} + +vec4 sinc(vec4 x) { + x = max(abs(x),vec4(0.0001)); + return sin(x)/x; +} +vec2 sinc(vec2 x) { + x = max(abs(x),vec2(0.0001)); + return sin(x)/x; +} + +void main() +{ + const int width = filtertaps; + const float bw_y = lumacutoff/12.0; + const float bw_c = chromacutoff/12.0; + + int x = int(floor(texCoord.x*params.SourceSize.x)); + int y = int(floor(texCoord.y*params.SourceSize.y)); + int p = (x+y+phase) % 3; + + const vec4 one = vec4(1.0); + const float PI = 3.14159265359; + const vec4 PI_6 = vec4(PI/6.0); + + float norm_y = 0.0; + float norm_c = 0.0; + vec3 yiq = vec3(0.0); + + vec4 samp = decode_sample(texCoord + vec2((-width/2 )*params.SourceSize.z,0)); + vec4 samp2 = decode_sample(texCoord + vec2((-width/2+1)*params.SourceSize.z,0)); + // Hamming window + const float alpha = 0.54; + const float beta = 0.46; + int i; + for (i = -width/2; i <= width/2-2; i++) { + vec4 window = vec4(alpha) - vec4(beta) * cos(vec4(2.0*PI/(4*width-7))*(vec4(4*(width/2+i))+vec4(0,1,2,3))); + + vec4 t = vec4(i*4)+vec4(-1.5,-0.5,+0.5,+1.5); +#define lowpass(freq,x) vec4(2.0*(freq))*sinc(vec4(2.0*PI*(freq))*(x)) + vec4 lumafilt = lowpass(bw_y, t); + vec4 chromafilt = lowpass(bw_c, t); +#undef lowpass + + vec4 samp3 = decode_sample(texCoord + vec2((i+2)*params.SourceSize.z,0)); + vec4 rsamp = vec4(samp2.zw, samp3.xy); + vec4 filt = window*lumafilt; + yiq.x += dot(samp, filt) + dot(rsamp,filt); + norm_y += dot(one, filt); + filt = window*chromafilt; + yiq.y += dot(samp, filt*cos((vec4(0,1,2,3)+vec4(4*(i+p)))*PI_6)); + yiq.y += dot(rsamp,filt*cos((vec4(6,7,8,9)+vec4(4*(i+p)))*PI_6)); + yiq.z += dot(samp, filt*sin((vec4(0,1,2,3)+vec4(4*(i+p)))*PI_6)); + yiq.z += dot(rsamp,filt*sin((vec4(6,7,8,9)+vec4(4*(i+p)))*PI_6)); + norm_c += dot(one,filt); + + samp = samp2; + samp2 = samp3; + } + vec2 window = vec2(alpha) - vec2(beta) * cos(vec2(2.0*PI/(4*width-7))*(vec2(4*(width/2+i))+vec2(0,1))); + vec2 t = vec2(i*4)+vec2(-1.5,-0.5); + +#define lowpass(freq,x) vec2(2.0*(freq))*sinc(vec2(2.0*PI*(freq))*(x)) + vec2 lumafilt = lowpass(bw_y, t); + vec2 chromafilt = lowpass(bw_c, t); +#undef lowpass + vec2 filt = window*lumafilt; + yiq.x += dot(samp.xy, filt) + dot(samp2.zw, filt); + norm_y += dot(one.xy, filt); + filt = window*chromafilt; + yiq.y += dot(samp.xy, filt*cos((vec2(0,1)+vec2(4*(i+p)))*vec2(PI/6.0))); + yiq.y += dot(samp2.zw,filt*cos((vec2(6,7)+vec2(4*(i+p)))*vec2(PI/6.0))); + yiq.z += dot(samp.xy, filt*sin((vec2(0,1)+vec2(4*(i+p)))*vec2(PI/6.0))); + yiq.z += dot(samp2.zw,filt*sin((vec2(6,7)+vec2(4*(i+p)))*vec2(PI/6.0))); + norm_c += dot(one.xy, filt); + + yiq *= vec3(0.5/norm_y, 1.0/norm_c, 1.0/norm_c); + + //FragColor = vec4(dot(yiq, vec3(1.0, 0.946882, 0.623557)), + // dot(yiq, vec3(1.0,-0.274788,-0.635691)), + // dot(yiq, vec3(1.0,-1.108545, 1.709007)), + // 0.0); + FragColor = vec4(yiq+vec3(0,0.5,0.5),0.0); +}