From 6f921ee4815a7894a33855974285b04545a4fa42 Mon Sep 17 00:00:00 2001 From: fishcu Date: Sat, 29 Jul 2023 23:32:34 +0200 Subject: [PATCH] Optimize and simplify Pixel AA, Average Fill, and Blur Fill presets (#467) * Simplify pixel AA; Dependents to be refactored * Finish refactoring pixel_aa itself; Dependent presets TBD * Update average and border fill * Tune default blur strengths in blur fill * Clean up includes * Minor corrections --- border/blur_fill.slangp | 4 +- border/blur_fill_stronger_blur.slangp | 6 +- border/blur_fill_weaker_blur.slangp | 4 +- border/shaders/average_fill/compose.slang | 73 +++++++-------- border/shaders/average_fill/parameters.slang | 2 +- border/shaders/blur_fill/compose.slang | 65 ++++++-------- border/shaders/blur_fill/parameters.slang | 2 +- interpolation/shaders/pixel_aa/pixel_aa.slang | 45 ++++------ interpolation/shaders/pixel_aa/shared.slang | 90 ++++++++++++------- 9 files changed, 141 insertions(+), 150 deletions(-) diff --git a/border/blur_fill.slangp b/border/blur_fill.slangp index 2d4c312..8302d30 100644 --- a/border/blur_fill.slangp +++ b/border/blur_fill.slangp @@ -27,8 +27,8 @@ wrap_mode2 = mirrored_repeat shader3 = shaders/blur_fill/render_sampling_areas.slang filter_linear3 = true scale_type3 = source -scale_x3 = 2.0 -scale_y3 = 2.0 +scale_x3 = 1.1 +scale_y3 = 1.1 float_framebuffer3 = true alias3 = "Tiled" diff --git a/border/blur_fill_stronger_blur.slangp b/border/blur_fill_stronger_blur.slangp index 1844874..3f55571 100644 --- a/border/blur_fill_stronger_blur.slangp +++ b/border/blur_fill_stronger_blur.slangp @@ -1,7 +1,7 @@ shaders = 12 parameters = "SIGMA;BLUR_RADIUS" -SIGMA = 2.0 +SIGMA = 1.5 BLUR_RADIUS = 3.0 shader0 = ../blurs/shaders/kawase/linearize.slang @@ -27,8 +27,8 @@ wrap_mode2 = mirrored_repeat shader3 = shaders/blur_fill/render_sampling_areas.slang filter_linear3 = true scale_type3 = source -scale_x3 = 1.0 -scale_y3 = 1.0 +scale_x3 = 0.7 +scale_y3 = 0.7 float_framebuffer3 = true alias3 = "Tiled" diff --git a/border/blur_fill_weaker_blur.slangp b/border/blur_fill_weaker_blur.slangp index 7c9e171..abd615d 100644 --- a/border/blur_fill_weaker_blur.slangp +++ b/border/blur_fill_weaker_blur.slangp @@ -27,8 +27,8 @@ wrap_mode2 = mirrored_repeat shader3 = shaders/blur_fill/render_sampling_areas.slang filter_linear3 = true scale_type3 = source -scale_x3 = 3.0 -scale_y3 = 3.0 +scale_x3 = 2.0 +scale_y3 = 2.0 float_framebuffer3 = true alias3 = "Tiled" diff --git a/border/shaders/average_fill/compose.slang b/border/shaders/average_fill/compose.slang index 9dbe42b..f5f9892 100644 --- a/border/shaders/average_fill/compose.slang +++ b/border/shaders/average_fill/compose.slang @@ -1,7 +1,7 @@ #version 450 /* - Average fill v1.4 by fishku + Average fill v1.5 by fishku Copyright (C) 2023 Public domain license (CC0) @@ -27,6 +27,7 @@ 3 = Smooth angle-based blending Changelog: + v1.5: Optimize. Update to new Pixel AA version. v1.4: Add anti-aliased interpolation for non-integer scaling. v1.3: Fix scaling bugs. v1.2: Fix scaling bugs. @@ -34,7 +35,10 @@ v1.0: Initial release. */ +// clang-format off #include "parameters.slang" +#include "../../../interpolation/shaders/pixel_aa/shared.slang" +// clang-format on layout(push_constant) uniform Push { vec4 InputSize; @@ -69,14 +73,24 @@ global; layout(location = 0) in vec4 Position; layout(location = 1) in vec2 TexCoord; layout(location = 0) out vec2 vTexCoord; +layout(location = 1) out vec2 tx_coord; +layout(location = 2) out vec2 tx_per_px; +layout(location = 3) out vec2 tx_to_uv; void main() { gl_Position = global.MVP * Position; vTexCoord = TexCoord; + const vec2 scale_o2i = scale_o2i(); + tx_coord = (vTexCoord - 0.49999) * scale_o2i + get_input_center(); + tx_per_px = scale_o2i * param.FinalViewportSize.zw; + tx_to_uv = param.InputSize.zw; } #pragma stage fragment layout(location = 0) in vec2 vTexCoord; +layout(location = 1) in vec2 tx_coord; +layout(location = 2) in vec2 tx_per_px; +layout(location = 3) in vec2 tx_to_uv; layout(location = 0) out vec4 FragColor; layout(set = 0, binding = 2) uniform sampler2D Input; layout(set = 0, binding = 3) uniform sampler2D Top; @@ -122,20 +136,14 @@ vec3 blend_corner(vec3 a, // The first color to blend } } -#include "../../../interpolation/shaders/pixel_aa/shared.slang" - void main() { - const vec2 scale_o2i = scale_o2i(); - const vec2 pixel_coord = - (vTexCoord - 0.49999) * scale_o2i + get_input_center(); - - if (pixel_coord.x < param.OS_CROP_LEFT) { + if (tx_coord.x < param.OS_CROP_LEFT) { if (param.EXTEND_H < 0.5) { FragColor = vec4(0.0, 0.0, 0.0, 1.0); return; } const vec3 left = textureLod(Left, vec2(0.5), BIG_NUMBER).rgb; - if (pixel_coord.y < param.OS_CROP_TOP) { + if (tx_coord.y < param.OS_CROP_TOP) { if (param.EXTEND_V < 0.5) { FragColor = vec4(0.0, 0.0, 0.0, 1.0); return; @@ -155,7 +163,7 @@ void main() { viewport_corner - content_corner), 1.0); FragColor.rgb = pow(FragColor.rgb, vec3(param.FILL_GAMMA)); - } else if (pixel_coord.y < param.InputSize.y - param.OS_CROP_BOTTOM) { + } else if (tx_coord.y < param.InputSize.y - param.OS_CROP_BOTTOM) { // Left bar FragColor = vec4(pow(left, vec3(param.FILL_GAMMA)), 1.0); } else { @@ -179,8 +187,8 @@ void main() { 1.0); FragColor.rgb = pow(FragColor.rgb, vec3(param.FILL_GAMMA)); } - } else if (pixel_coord.x < param.InputSize.x - param.OS_CROP_RIGHT) { - if (pixel_coord.y < param.OS_CROP_TOP) { + } else if (tx_coord.x < param.InputSize.x - param.OS_CROP_RIGHT) { + if (tx_coord.y < param.OS_CROP_TOP) { if (param.EXTEND_V < 0.5) { FragColor = vec4(0.0, 0.0, 0.0, 1.0); return; @@ -188,41 +196,22 @@ void main() { // Top bar FragColor = vec4(textureLod(Top, vec2(0.5), BIG_NUMBER).rgb, 1.0); FragColor.rgb = pow(FragColor.rgb, vec3(param.FILL_GAMMA)); - } else if (pixel_coord.y < param.InputSize.y - param.OS_CROP_BOTTOM) { + } else if (tx_coord.y < param.InputSize.y - param.OS_CROP_BOTTOM) { // Uncropped if (param.FORCE_INTEGER_SCALING > 0.5) { // Do a perfectly sharp (nearest neighbor) sampling. - FragColor = vec4(texture(Input, (floor(pixel_coord) + 0.5) * - param.InputSize.zw) - .rgb, - 1.0); + FragColor = vec4( + texture(Input, (floor(tx_coord) + 0.5) * param.InputSize.zw) + .rgb, + 1.0); } else { // Do a sharp anti-aliased interpolation. // Do not correct for gamma additionally because the input is // already in linear color space. - if (param.PIX_AA_SUBPX < 0.5) { - const vec2 tx_size = get_texel_size(pixel_coord) * - scale_o2i * param.InputSize.zw; - FragColor = - vec4(sample_aa(Input, pixel_coord, param.InputSize.zw, - false, param.PIX_AA_SHARP, tx_size), - 1.0); - } else { - for (int i = -1; i < 2; ++i) { - const vec2 subpix_coord = - pixel_coord + - vec2((param.PIX_AA_SUBPX_BGR < 0.5 ? i : -i) / 3.0, - 0.0) * - param.FinalViewportSize.zw * param.InputSize.xy; - const vec2 tx_size = get_texel_size(subpix_coord) * - scale_o2i * param.InputSize.zw / - vec2(3.0, 1.0); - FragColor[i + 1] = sample_aa( - Input, subpix_coord, param.InputSize.zw, false, - param.PIX_AA_SHARP, tx_size)[i + 1]; - } - FragColor[3] = 1.0; - } + FragColor = pixel_aa( + Input, tx_per_px, tx_to_uv, tx_coord, param.PIX_AA_SHARP, + /* gamma_correct = */ false, param.PIX_AA_SUBPX > 0.5, + param.PIX_AA_SUBPX_BGR > 0.5); } } else { if (param.EXTEND_V < 0.5) { @@ -240,7 +229,7 @@ void main() { return; } const vec3 right = textureLod(Right, vec2(0.5), BIG_NUMBER).rgb; - if (pixel_coord.y < param.OS_CROP_TOP) { + if (tx_coord.y < param.OS_CROP_TOP) { if (param.EXTEND_V < 0.5) { FragColor = vec4(0.0, 0.0, 0.0, 1.0); return; @@ -260,7 +249,7 @@ void main() { viewport_corner - content_corner), 1.0); FragColor.rgb = pow(FragColor.rgb, vec3(param.FILL_GAMMA)); - } else if (pixel_coord.y < param.InputSize.y - param.OS_CROP_BOTTOM) { + } else if (tx_coord.y < param.InputSize.y - param.OS_CROP_BOTTOM) { // Right bar FragColor = vec4(pow(right, vec3(param.FILL_GAMMA)), 1.0); } else { diff --git a/border/shaders/average_fill/parameters.slang b/border/shaders/average_fill/parameters.slang index 132b399..d43b458 100644 --- a/border/shaders/average_fill/parameters.slang +++ b/border/shaders/average_fill/parameters.slang @@ -1,7 +1,7 @@ // See compose.slang for copyright and other information. // clang-format off -#pragma parameter AVERAGE_FILL_SETTINGS "=== Average fill v1.4 settings ===" 0.0 0.0 1.0 1.0 +#pragma parameter AVERAGE_FILL_SETTINGS "=== Average fill v1.5 settings ===" 0.0 0.0 1.0 1.0 #pragma parameter OS_CROP_TOP "Overscan crop top" 0.0 0.0 1024.0 1.0 #pragma parameter OS_CROP_BOTTOM "Overscan crop bottom" 0.0 0.0 1024.0 1.0 #pragma parameter OS_CROP_LEFT "Overscan crop left" 0.0 0.0 1024.0 1.0 diff --git a/border/shaders/blur_fill/compose.slang b/border/shaders/blur_fill/compose.slang index a02de76..d80a27e 100644 --- a/border/shaders/blur_fill/compose.slang +++ b/border/shaders/blur_fill/compose.slang @@ -1,7 +1,7 @@ #version 450 /* - Blur fill v1.5 by fishku + Blur fill v1.6 by fishku Copyright (C) 2023 Public domain license (CC0) @@ -27,6 +27,7 @@ strength of the blur. Changelog: + v1.6: Optimize. Update to new Pixel AA version. Tune default blur strength. v1.5: Add anti-aliased interpolation for non-integer scaling. v1.4: Fix scaling bugs. v1.3: Reduce shimmering artifacts. @@ -35,8 +36,11 @@ v1.0: Initial release. */ -#include "../../../blurs/shaders/dual_filter/parameters.slang" +// clang-format off #include "parameters.slang" +#include "../../../blurs/shaders/dual_filter/parameters.slang" +#include "../../../interpolation/shaders/pixel_aa/shared.slang" +// clang-format on layout(push_constant) uniform Push { vec4 InputSize; @@ -71,31 +75,37 @@ global; layout(location = 0) in vec4 Position; layout(location = 1) in vec2 TexCoord; layout(location = 0) out vec2 vTexCoord; +layout(location = 1) out vec2 tx_coord; +layout(location = 2) out vec2 tx_per_px; +layout(location = 3) out vec2 tx_to_uv; +layout(location = 4) out vec4 input_extrema; void main() { gl_Position = global.MVP * Position; vTexCoord = TexCoord; + const vec2 scale_o2i = scale_o2i(); + tx_coord = (vTexCoord - 0.49999) * scale_o2i + get_input_center(); + tx_per_px = scale_o2i * param.FinalViewportSize.zw; + tx_to_uv = param.InputSize.zw; + input_extrema = vec4(param.OS_CROP_LEFT, param.OS_CROP_TOP, + param.InputSize.x - param.OS_CROP_RIGHT, + param.InputSize.y - param.OS_CROP_BOTTOM); } #pragma stage fragment layout(location = 0) in vec2 vTexCoord; +layout(location = 1) in vec2 tx_coord; +layout(location = 2) in vec2 tx_per_px; +layout(location = 3) in vec2 tx_to_uv; +layout(location = 4) in vec4 input_extrema; layout(location = 0) out vec4 FragColor; layout(set = 0, binding = 2) uniform sampler2D Input; layout(set = 0, binding = 3) uniform sampler2D Tiled; layout(set = 0, binding = 4) uniform sampler2D Blurred; -#include "../../../interpolation/shaders/pixel_aa/shared.slang" - void main() { - const vec2 scale_o2i = scale_o2i(); - const vec2 pixel_coord = - (vTexCoord - 0.49999) * scale_o2i + get_input_center(); - - const vec4 input_extrema = vec4(param.OS_CROP_LEFT, param.OS_CROP_TOP, - param.InputSize.x - param.OS_CROP_RIGHT, - param.InputSize.y - param.OS_CROP_BOTTOM); - if (any(lessThan(pixel_coord, input_extrema.xy)) || - any(greaterThanEqual(pixel_coord, input_extrema.zw))) { + if (any(lessThan(tx_coord, input_extrema.xy)) || + any(greaterThanEqual(tx_coord, input_extrema.zw))) { if (param.BLUR_RADIUS > 0.0) { // Sample blur. FragColor = vec4( @@ -117,36 +127,17 @@ void main() { if (param.FORCE_INTEGER_SCALING > 0.5) { // Do a perfectly sharp (nearest neighbor) sampling. FragColor = vec4( - texture(Input, (floor(pixel_coord) + 0.5) * param.InputSize.zw) + texture(Input, (floor(tx_coord) + 0.5) * param.InputSize.zw) .rgb, 1.0); } else { // Do a sharp anti-aliased interpolation. // Do not correct for gamma additionally because the input is // already in linear color space. - if (param.PIX_AA_SUBPX < 0.5) { - const vec2 tx_size = get_texel_size(pixel_coord) * scale_o2i * - param.InputSize.zw; - FragColor = - vec4(sample_aa(Input, pixel_coord, param.InputSize.zw, - false, param.PIX_AA_SHARP, tx_size), - 1.0); - } else { - for (int i = -1; i < 2; ++i) { - const vec2 subpix_coord = - pixel_coord + - vec2((param.PIX_AA_SUBPX_BGR < 0.5 ? i : -i) / 3.0, - 0.0) * - param.FinalViewportSize.zw * param.InputSize.xy; - const vec2 tx_size = get_texel_size(subpix_coord) * - scale_o2i * param.InputSize.zw / - vec2(3.0, 1.0); - FragColor[i + 1] = - sample_aa(Input, subpix_coord, param.InputSize.zw, - false, param.PIX_AA_SHARP, tx_size)[i + 1]; - } - FragColor[3] = 1.0; - } + FragColor = pixel_aa( + Input, tx_per_px, tx_to_uv, tx_coord, param.PIX_AA_SHARP, + /* gamma_correct = */ false, param.PIX_AA_SUBPX > 0.5, + param.PIX_AA_SUBPX_BGR > 0.5); } } } diff --git a/border/shaders/blur_fill/parameters.slang b/border/shaders/blur_fill/parameters.slang index b48b5a8..8467850 100644 --- a/border/shaders/blur_fill/parameters.slang +++ b/border/shaders/blur_fill/parameters.slang @@ -1,7 +1,7 @@ // See compose.slang for copyright and other information. // clang-format off -#pragma parameter BLUR_FILL_SETTINGS "=== Blur fill v1.5 settings ===" 0.0 0.0 1.0 1.0 +#pragma parameter BLUR_FILL_SETTINGS "=== Blur fill v1.6 settings ===" 0.0 0.0 1.0 1.0 #pragma parameter OS_CROP_TOP "Overscan crop top" 0.0 0.0 1024.0 1.0 #pragma parameter OS_CROP_BOTTOM "Overscan crop bottom" 0.0 0.0 1024.0 1.0 #pragma parameter OS_CROP_LEFT "Overscan crop left" 0.0 0.0 1024.0 1.0 diff --git a/interpolation/shaders/pixel_aa/pixel_aa.slang b/interpolation/shaders/pixel_aa/pixel_aa.slang index d04a326..14545dd 100644 --- a/interpolation/shaders/pixel_aa/pixel_aa.slang +++ b/interpolation/shaders/pixel_aa/pixel_aa.slang @@ -1,7 +1,7 @@ #version 450 /* - Pixel AA v1.1 by fishku + Pixel AA v1.2 by fishku Copyright (C) 2023 Public domain license (CC0) @@ -24,6 +24,8 @@ subpixel anti-aliasing, results are identical to the "pixellate" shader. Changelog: + v1.2: Optimize and simplify algorithm. Enable sharpness < 1.0. Fix subpixel + sampling bug. v1.1: Better subpixel sampling. v1.0: Initial release. */ @@ -44,44 +46,29 @@ global; #pragma stage vertex layout(location = 0) in vec4 Position; layout(location = 1) in vec2 TexCoord; -layout(location = 0) out vec2 vTexCoord; -layout(location = 1) out vec2 pix_coord; +layout(location = 0) out vec2 tx_coord; +layout(location = 1) out vec2 tx_per_px; +layout(location = 2) out vec2 tx_to_uv; void main() { gl_Position = global.MVP * Position; - vTexCoord = TexCoord; - pix_coord = vTexCoord * param.SourceSize.xy; + tx_coord = TexCoord * param.SourceSize.xy; + tx_per_px = param.SourceSize.xy * param.OutputSize.zw; + tx_to_uv = param.SourceSize.zw; } #pragma stage fragment -layout(location = 0) in vec2 vTexCoord; -layout(location = 1) in vec2 pix_coord; +layout(location = 0) in vec2 tx_coord; +layout(location = 1) in vec2 tx_per_px; +layout(location = 2) in vec2 tx_to_uv; layout(location = 0) out vec4 FragColor; layout(set = 0, binding = 2) uniform sampler2D Source; #include "shared.slang" void main() { - if (param.PIX_AA_SUBPX < 0.5) { - FragColor = - vec4(sample_aa(Source, pix_coord, param.SourceSize.zw, - param.PIX_AA_GAMMA > 0.5, param.PIX_AA_SHARP), - 1.0); - } else { - // Subpixel sampling: Shift the sampling by 1/3rd of an output pixel, - // assuming that the output size is at monitor resolution. - for (int i = -1; i < 2; ++i) { - const vec2 subpix_coord = - pix_coord + - vec2((param.PIX_AA_SUBPX_BGR < 0.5 ? i : -i) / 3.0, 0.0) * - param.OutputSize.zw * param.SourceSize.xy; - // With subpixel sampling, the sampling area is effectively reduced - // to a third. - const vec2 tx_size = get_texel_size(subpix_coord) / vec2(3.0, 1.0); - FragColor[i + 1] = sample_aa( - Source, subpix_coord, param.SourceSize.zw, - param.PIX_AA_GAMMA > 0.5, param.PIX_AA_SHARP, tx_size)[i + 1]; - } - FragColor[3] = 1.0; - } + FragColor = + pixel_aa(Source, tx_per_px, tx_to_uv, tx_coord, param.PIX_AA_SHARP, + param.PIX_AA_GAMMA > 0.5, param.PIX_AA_SUBPX > 0.5, + param.PIX_AA_SUBPX_BGR > 0.5); } diff --git a/interpolation/shaders/pixel_aa/shared.slang b/interpolation/shaders/pixel_aa/shared.slang index 4074c04..ae09acd 100644 --- a/interpolation/shaders/pixel_aa/shared.slang +++ b/interpolation/shaders/pixel_aa/shared.slang @@ -1,9 +1,8 @@ -// This file has to be included at the frag shader stage so that fwidth() is -// defined. +// See pixel_aa.slang for copyright and other information. // clang-format off -#pragma parameter PIX_AA_SETTINGS "=== Pixel AA v1.1 settings ===" 0.0 0.0 1.0 1.0 -#pragma parameter PIX_AA_SHARP "Pixel AA sharpening amount" 1.0 1.0 4.0 0.05 +#pragma parameter PIX_AA_SETTINGS "=== Pixel AA v1.2 settings ===" 0.0 0.0 1.0 1.0 +#pragma parameter PIX_AA_SHARP "Pixel AA sharpening amount" 1.5 0.0 2.0 0.05 #pragma parameter PIX_AA_GAMMA "Enable gamma-correct blending" 1.0 0.0 1.0 1.0 #pragma parameter PIX_AA_SUBPX "Enable subpixel AA" 0.0 0.0 1.0 1.0 #pragma parameter PIX_AA_SUBPX_BGR "Use BGR subpx. instead of RGB" 0.0 0.0 1.0 1.0 @@ -25,40 +24,65 @@ vec3 to_lin(vec3 x) { return pow(x, vec3(2.2)); } vec3 to_srgb(vec3 x) { return pow(x, vec3(1.0 / 2.2)); } -vec2 get_texel_size(vec2 pix_coord) { - return clamp(fwidth(pix_coord), 1.0e-5, 1.0); -} - +// Function to get a single sample using the "pixel AA" method. // Params: -// pix_coord: Coordinate in source pixel coordinates -// px_size_uv: 1 / source resolution -vec3 sample_aa(sampler2D tex, vec2 pix_coord, vec2 px_size_uv, - bool gamma_correct, float sharpness, vec2 tx_size) { - const vec2 tx_coord = pix_coord - 0.5 * tx_size; - const vec2 tx_coord_i = floor(tx_coord); - const vec2 tx_offset = - slopestep(1.0 - tx_size, vec2(1.0), fract(tx_coord), sharpness); - // With gamma correct blending, we have to do 4 taps and blend manually. - // Without it, we can make use of a single tap using bilinear interpolation. +// tx_coord: Coordinate in source pixel (texel) coordinates +vec3 sample_aa(sampler2D tex, vec2 tx_per_px, vec2 tx_to_uv, vec2 tx_coord, + float sharpness, bool gamma_correct) { + // The offset for interpolation is a periodic function with + // a period length of 1 texel. + // The input coordinate is shifted so that the center of the texel + // aligns with the start of the period. + // First, get the period and phase. + vec2 period; + const vec2 phase = modf(tx_coord - 0.5, period); + // The function starts at 0, then starts transitioning at + // 0.5 - 0.5 / pixels_per_texel, then reaches 0.5 at 0.5, + // Then reaches 1 at 0.5 + 0.5 / pixels_per_texel. + // For sharpness values < 1.0, blend to bilinear filtering. + const vec2 offset = + slopestep(min(1.0, sharpness) * (0.5 - 0.5 * tx_per_px), + 1.0 - min(1.0, sharpness) * (1.0 - (0.5 + 0.5 * tx_per_px)), + phase, max(1.0, sharpness)); + + // With gamma correct blending, we have to do 4 taps and interpolate + // manually. Without it, we can make use of a single tap using bilinear + // interpolation. The offsets are shifted back to the texel center before + // sampling. if (gamma_correct) { const vec3 samples[] = { - to_lin(texture(tex, (tx_coord_i + 0.5) * px_size_uv).rgb), - to_lin( - texture(tex, (tx_coord_i + vec2(1.5, 0.5)) * px_size_uv).rgb), - to_lin( - texture(tex, (tx_coord_i + vec2(0.5, 1.5)) * px_size_uv).rgb), - to_lin(texture(tex, (tx_coord_i + 1.5) * px_size_uv).rgb)}; - return to_srgb(mix(mix(samples[0], samples[1], tx_offset.x), - mix(samples[2], samples[3], tx_offset.x), - tx_offset.y)); + to_lin(texture(tex, (period + 0.5) * tx_to_uv).rgb), + to_lin(texture(tex, (period + vec2(1.5, 0.5)) * tx_to_uv).rgb), + to_lin(texture(tex, (period + vec2(0.5, 1.5)) * tx_to_uv).rgb), + to_lin(texture(tex, (period + 1.5) * tx_to_uv).rgb)}; + return to_srgb(mix(mix(samples[0], samples[1], offset.x), + mix(samples[2], samples[3], offset.x), offset.y)); } else { - return texture(tex, (tx_coord_i + 0.5 + tx_offset) * px_size_uv).rgb; + return texture(tex, (period + 0.5 + offset) * tx_to_uv).rgb; } } -vec3 sample_aa(sampler2D tex, vec2 pix_coord, vec2 px_size_uv, - bool gamma_correct, float sharpness) { - const vec2 tx_size = get_texel_size(pix_coord); - return sample_aa(tex, pix_coord, px_size_uv, gamma_correct, sharpness, - tx_size); +// Function to get a pixel value, taking into consideration possible subpixel +// interpolation. +vec4 pixel_aa(sampler2D tex, vec2 tx_per_px, vec2 tx_to_uv, vec2 tx_coord, + float sharpness, bool gamma_correct, bool sample_subpx, + bool subpx_bgr) { + if (sample_subpx) { + // Subpixel sampling: Shift the sampling by 1/3rd of an output pixel for + // each subpixel, assuming that the output size is at monitor + // resolution. + const vec2 sub_tx_offset = + vec2(tx_per_px.x / 3.0 * (subpx_bgr ? -1.0 : 1.0), 0.0); + vec3 res; + for (int i = -1; i < 2; ++i) { + res[i + 1] = sample_aa(tex, tx_per_px, tx_to_uv, + tx_coord + sub_tx_offset * float(i), + sharpness, gamma_correct)[i + 1]; + } + return vec4(res, 1.0); + } else { + return vec4(sample_aa(tex, tx_per_px, tx_to_uv, tx_coord, sharpness, + gamma_correct), + 1.0); + } }