From 6f47464db3ad038b3ba9aa62d1ac0225f39a6b47 Mon Sep 17 00:00:00 2001 From: hunterk Date: Thu, 4 May 2017 16:34:39 -0500 Subject: [PATCH] fix beam_horiz_filter, update kurozumi, add fallback to fix my AMD crash --- crt/crt-royale_fallback.slangp | 206 ++++++++++ .../crt-royale/src/bind-shader-params.h | 2 +- .../crt-royale-bloom-approx_fallback.slang | 354 ++++++++++++++++++ ...-pass-linearize-crt-gamma-bob-fields.slang | 2 + .../crt-royale-mask-resize-horizontal.slang | 2 +- .../crt-royale/src/scanline-functions.h | 6 +- presets/crt-royale-kurozumi.slangp | 8 +- 7 files changed, 571 insertions(+), 9 deletions(-) create mode 100644 crt/crt-royale_fallback.slangp create mode 100644 crt/shaders/crt-royale/src/crt-royale-bloom-approx_fallback.slang diff --git a/crt/crt-royale_fallback.slangp b/crt/crt-royale_fallback.slangp new file mode 100644 index 0000000..931f032 --- /dev/null +++ b/crt/crt-royale_fallback.slangp @@ -0,0 +1,206 @@ +# IMPORTANT: +# Shader passes need to know details about the image in the mask_texture LUT +# files, so set the following constants in user-preset-constants.h accordingly: +# 1.) mask_triads_per_tile = (number of horizontal triads in mask texture LUT's) +# 2.) mask_texture_small_size = (texture size of mask*texture_small LUT's) +# 3.) mask_texture_large_size = (texture size of mask*texture_large LUT's) +# 4.) mask_grille_avg_color = (avg. brightness of mask_grille_texture* LUT's, in [0, 1]) +# 5.) mask_slot_avg_color = (avg. brightness of mask_slot_texture* LUT's, in [0, 1]) +# 6.) mask_shadow_avg_color = (avg. brightness of mask_shadow_texture* LUT's, in [0, 1]) +# Shader passes also need to know certain scales set in this preset, but their +# compilation model doesn't currently allow the preset file to tell them. Make +# sure to set the following constants in user-preset-constants.h accordingly too: +# 1.) bloom_approx_scale_x = scale_x2 +# 2.) mask_resize_viewport_scale = vec2(scale_x6, scale_y5) +# Finally, shader passes need to know the value of geom_max_aspect_ratio used to +# calculate scale_y5 (among other values): +# 1.) geom_max_aspect_ratio = (geom_max_aspect_ratio used to calculate scale_y5) + +shaders = "12" + +# Set an identifier, filename, and sampling traits for the phosphor mask texture. +# Load an aperture grille, slot mask, and an EDP shadow mask, and load a small +# non-mipmapped version and a large mipmapped version. +# TODO: Test masks in other directories. +textures = "mask_grille_texture_small;mask_grille_texture_large;mask_slot_texture_small;mask_slot_texture_large;mask_shadow_texture_small;mask_shadow_texture_large" +mask_grille_texture_small = "shaders/crt-royale/TileableLinearApertureGrille15Wide8And5d5SpacingResizeTo64.png" +mask_grille_texture_large = "shaders/crt-royale/TileableLinearApertureGrille15Wide8And5d5Spacing.png" +mask_slot_texture_small = "shaders/crt-royale/TileableLinearSlotMaskTall15Wide9And4d5Horizontal9d14VerticalSpacingResizeTo64.png" +mask_slot_texture_large = "shaders/crt-royale/TileableLinearSlotMaskTall15Wide9And4d5Horizontal9d14VerticalSpacing.png" +mask_shadow_texture_small = "shaders/crt-royale/TileableLinearShadowMaskEDPResizeTo64.png" +mask_shadow_texture_large = "shaders/crt-royale/TileableLinearShadowMaskEDP.png" +mask_grille_texture_small_wrap_mode = "repeat" +mask_grille_texture_large_wrap_mode = "repeat" +mask_slot_texture_small_wrap_mode = "repeat" +mask_slot_texture_large_wrap_mode = "repeat" +mask_shadow_texture_small_wrap_mode = "repeat" +mask_shadow_texture_large_wrap_mode = "repeat" +mask_grille_texture_small_linear = "true" +mask_grille_texture_large_linear = "true" +mask_slot_texture_small_linear = "true" +mask_slot_texture_large_linear = "true" +mask_shadow_texture_small_linear = "true" +mask_shadow_texture_large_linear = "true" +mask_grille_texture_small_mipmap = "false" # Mipmapping causes artifacts with manually resized masks without tex2Dlod +mask_grille_texture_large_mipmap = "true" # Essential for hardware-resized masks +mask_slot_texture_small_mipmap = "false" # Mipmapping causes artifacts with manually resized masks without tex2Dlod +mask_slot_texture_large_mipmap = "true" # Essential for hardware-resized masks +mask_shadow_texture_small_mipmap = "false" # Mipmapping causes artifacts with manually resized masks without tex2Dlod +mask_shadow_texture_large_mipmap = "true" # Essential for hardware-resized masks + + +# Pass0: Linearize the input based on CRT gamma and bob interlaced fields. +# (Bobbing ensures we can immediately blur without getting artifacts.) +shader0 = "shaders/crt-royale/src/crt-royale-first-pass-linearize-crt-gamma-bob-fields.slang" +alias0 = "ORIG_LINEARIZED" +filter_linear0 = "false" +scale_type0 = "source" +scale0 = "1.0" +srgb_framebuffer0 = "true" + +# Pass1: Resample interlaced (and misconverged) scanlines vertically. +# Separating vertical/horizontal scanline sampling is faster: It lets us +# consider more scanlines while calculating weights for fewer pixels, and +# it reduces our samples from vertical*horizontal to vertical+horizontal. +# This has to come right after ORIG_LINEARIZED, because there's no +# "original_source" scale_type we can use later. +shader1 = "shaders/crt-royale/src/crt-royale-scanlines-vertical-interlacing.slang" +alias1 = "VERTICAL_SCANLINES" +filter_linear1 = "true" +scale_type_x1 = "source" +scale_x1 = "1.0" +scale_type_y1 = "viewport" +scale_y1 = "1.0" +srgb_framebuffer1 = "true" + +# Pass2: Do a small resize blur of ORIG_LINEARIZED at an absolute size, and +# account for convergence offsets. We want to blur a predictable portion of the +# screen to match the phosphor bloom, and absolute scale works best for +# reliable results with a fixed-size bloom. Picking a scale is tricky: +# a.) 400x300 is a good compromise for the "fake-bloom" version: It's low enough +# to blur high-res/interlaced sources but high enough that resampling +# doesn't smear low-res sources too much. +# b.) 320x240 works well for the "real bloom" version: It's 1-1.5% faster, and +# the only noticeable visual difference is a larger halation spread (which +# may be a good thing for people who like to crank it up). +# Note the 4:3 aspect ratio assumes the input has cropped geom_overscan (so it's +# *intended* for an ~4:3 aspect ratio). +shader2 = "shaders/crt-royale/src/crt-royale-bloom-approx_fallback.slang" +alias2 = "BLOOM_APPROX" +filter_linear2 = "true" +scale_type2 = "absolute" +scale_x2 = "320" +scale_y2 = "240" +srgb_framebuffer2 = "true" + +# Pass3: Vertically blur the input for halation and refractive diffusion. +# Base this on BLOOM_APPROX: This blur should be small and fast, and blurring +# a constant portion of the screen is probably physically correct if the +# viewport resolution is proportional to the simulated CRT size. +shader3 = "../blurs/blur9fast-vertical.slang" +filter_linear3 = "true" +scale_type3 = "source" +scale3 = "1.0" +srgb_framebuffer3 = "true" + +# Pass4: Horizontally blur the input for halation and refractive diffusion. +# Note: Using a one-pass 9x9 blur is about 1% slower. +shader4 = "../blurs/blur9fast-horizontal.slang" +alias4 = "HALATION_BLUR" +filter_linear4 = "true" +scale_type4 = "source" +scale4 = "1.0" +srgb_framebuffer4 = "true" + +# Pass5: Lanczos-resize the phosphor mask vertically. Set the absolute +# scale_x5 == mask_texture_small_size.x (see IMPORTANT above). Larger scales +# will blur, and smaller scales could get nasty. The vertical size must be +# based on the viewport size and calculated carefully to avoid artifacts later. +# First calculate the minimum number of mask tiles we need to draw. +# Since curvature is computed after the scanline masking pass: +# num_resized_mask_tiles = 2.0; +# If curvature were computed in the scanline masking pass (it's not): +# max_mask_texel_border = ~3.0 * (1/3.0 + 4.0*sqrt(2.0) + 0.5 + 1.0); +# max_mask_tile_border = max_mask_texel_border/ +# (min_resized_phosphor_triad_size * mask_triads_per_tile); +# num_resized_mask_tiles = max(2.0, 1.0 + max_mask_tile_border * 2.0); +# At typical values (triad_size >= 2.0, mask_triads_per_tile == 8): +# num_resized_mask_tiles = ~3.8 +# Triad sizes are given in horizontal terms, so we need geom_max_aspect_ratio +# to relate them to vertical resolution. The widest we expect is: +# geom_max_aspect_ratio = 4.0/3.0 # Note: Shader passes need to know this! +# The fewer triads we tile across the screen, the larger each triad will be as a +# fraction of the viewport size, and the larger scale_y5 must be to draw a full +# num_resized_mask_tiles. Therefore, we must decide the smallest number of +# triads we'll guarantee can be displayed on screen. We'll set this according +# to 3-pixel triads at 768p resolution (the lowest anyone's likely to use): +# min_allowed_viewport_triads = 768.0*geom_max_aspect_ratio / 3.0 = 341.333333 +# Now calculate the viewport scale that ensures we can draw resized_mask_tiles: +# min_scale_x = resized_mask_tiles * mask_triads_per_tile / +# min_allowed_viewport_triads +# scale_y5 = geom_max_aspect_ratio * min_scale_x +# # Some code might depend on equal scales: +# scale_x6 = scale_y5 +# Given our default geom_max_aspect_ratio and min_allowed_viewport_triads: +# scale_y5 = 4.0/3.0 * 2.0/(341.33333 / 8.0) = 0.0625 +# IMPORTANT: The scales MUST be calculated in this way. If you wish to change +# geom_max_aspect_ratio, update that constant in user-preset-constants.h! +shader5 = "shaders/crt-royale/src/crt-royale-mask-resize-vertical.slang" +filter_linear5 = "true" +scale_type_x5 = "absolute" +scale_x5 = "64" +scale_type_y5 = "viewport" +scale_y5 = "0.0625" # Safe for >= 341.333 horizontal triads at viewport size +#srgb_framebuffer5 = "false" # mask_texture is already assumed linear + +# Pass6: Lanczos-resize the phosphor mask horizontally. scale_x6 = scale_y5. +# TODO: Check again if the shaders actually require equal scales. +shader6 = "shaders/crt-royale/src/crt-royale-mask-resize-horizontal.slang" +alias6 = "MASK_RESIZE" +filter_linear6 = "false" +scale_type_x6 = "viewport" +scale_x6 = "0.0625" +scale_type_y6 = "source" +scale_y6 = "1.0" +#srgb_framebuffer6 = "false" # mask_texture is already assumed linear + +# Pass7: Resample (misconverged) scanlines horizontally, apply halation, and +# apply the phosphor mask. +shader7 = "shaders/crt-royale/src/crt-royale-scanlines-horizontal-apply-mask.slang" +alias7 = "MASKED_SCANLINES" +filter_linear7 = "true" # This could just as easily be nearest neighbor. +scale_type7 = "viewport" +scale7 = "1.0" +srgb_framebuffer7 = "true" + +# Pass 8: Compute a brightpass. This will require reading the final mask. +shader8 = "shaders/crt-royale/src/crt-royale-brightpass.slang" +alias8 = "BRIGHTPASS" +filter_linear8 = "true" # This could just as easily be nearest neighbor. +scale_type8 = "viewport" +scale8 = "1.0" +srgb_framebuffer8 = "true" + +# Pass 9: Blur the brightpass vertically +shader9 = "shaders/crt-royale/src/crt-royale-bloom-vertical.slang" +filter_linear9 = "true" # This could just as easily be nearest neighbor. +scale_type9 = "source" +scale9 = "1.0" +srgb_framebuffer9 = "true" + +# Pass 10: Blur the brightpass horizontally and combine it with the dimpass: +shader10 = "shaders/crt-royale/src/crt-royale-bloom-horizontal-reconstitute.slang" +filter_linear10 = "true" +scale_type10 = "source" +scale10 = "1.0" +srgb_framebuffer10 = "true" + +# Pass 11: Compute curvature/AA: +shader11 = "shaders/crt-royale/src/crt-royale-geometry-aa-last-pass.slang" +filter_linear11 = "true" +scale_type11 = "viewport" +mipmap_input11 = "true" +texture_wrap_mode11 = "clamp_to_edge" + +parameters = "beam_num_scanlines" +beam_num_scanlines = 3.0 \ No newline at end of file diff --git a/crt/shaders/crt-royale/src/bind-shader-params.h b/crt/shaders/crt-royale/src/bind-shader-params.h index 76145b0..5a1792e 100644 --- a/crt/shaders/crt-royale/src/bind-shader-params.h +++ b/crt/shaders/crt-royale/src/bind-shader-params.h @@ -113,7 +113,7 @@ const float gba_gamma = 3.5; // Irrelevant but necessary to define. const float beam_min_shape = max(2.0, beam_min_shape_static); const float beam_max_shape = max(beam_min_shape, beam_max_shape_static); const float beam_shape_power = max(0.0, beam_shape_power_static); - const float beam_horiz_filter = clamp(beam_horiz_filter_static, 0.0, 2.0); +// const float beam_horiz_filter = clamp(beam_horiz_filter_static, 0.0, 2.0); const float beam_horiz_sigma = max(FIX_ZERO(0.0), beam_horiz_sigma_static); const float beam_horiz_linear_rgb_weight = clamp(beam_horiz_linear_rgb_weight_static, 0.0, 1.0); // Unpack vector elements to match scalar uniforms: diff --git a/crt/shaders/crt-royale/src/crt-royale-bloom-approx_fallback.slang b/crt/shaders/crt-royale/src/crt-royale-bloom-approx_fallback.slang new file mode 100644 index 0000000..0fd6d24 --- /dev/null +++ b/crt/shaders/crt-royale/src/crt-royale-bloom-approx_fallback.slang @@ -0,0 +1,354 @@ +#version 450 + +layout(push_constant) uniform Push +{ + vec4 SourceSize; + vec4 OriginalSize; + vec4 OutputSize; + uint FrameCount; + vec4 ORIG_LINEARIZEDSize; +} registers; + +#include "params.inc" + +///////////////////////////// GPL LICENSE NOTICE ///////////////////////////// + +// crt-royale: A full-featured CRT shader, with cheese. +// Copyright (C) 2014 TroggleMonkey +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 2 of the License, or any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 59 Temple +// Place, Suite 330, Boston, MA 02111-1307 USA + + +////////////////////////////////// INCLUDES ////////////////////////////////// + +#include "../user-settings.h" +#include "derived-settings-and-constants.h" +#include "bind-shader-params.h" +#include "../../../../include/gamma-management.h" +#include "../../../../include/blur-functions.h" +#include "scanline-functions.h" +#include "bloom-functions.h" + +/////////////////////////////////// HELPERS ////////////////////////////////// + +vec3 tex2Dresize_gaussian4x4(const sampler2D tex, const vec2 tex_uv, + const vec2 dxdy, const vec2 texture_size, const vec2 texture_size_inv, + const vec2 tex_uv_to_pixel_scale, const float sigma) +{ + // Requires: 1.) All requirements of gamma-management.h must be satisfied! + // 2.) filter_linearN must == "true" in your .cgp preset. + // 3.) mipmap_inputN must == "true" in your .cgp preset if + // IN.output_size << SRC.video_size. + // 4.) dxdy should contain the uv pixel spacing: + // dxdy = max(vec2(1.0), + // SRC.video_size/IN.output_size)/SRC.texture_size; + // 5.) texture_size == SRC.texture_size + // 6.) texture_size_inv == vec2(1.0)/SRC.texture_size + // 7.) tex_uv_to_pixel_scale == IN.output_size * + // SRC.texture_size / SRC.video_size; + // 8.) sigma is the desired Gaussian standard deviation, in + // terms of output pixels. It should be < ~0.66171875 to + // ensure the first unused sample (outside the 4x4 box) has + // a weight < 1.0/256.0. + // Returns: A true 4x4 Gaussian resize of the input. + // Description: + // Given correct inputs, this Gaussian resizer samples 4 pixel locations + // along each downsized dimension and/or 4 texel locations along each + // upsized dimension. It computes dynamic weights based on the pixel-space + // distance of each sample from the destination pixel. It is arbitrarily + // resizable and higher quality than tex2Dblur3x3_resize, but it's slower. + // TODO: Move this to a more suitable file once there are others like it. + const float denom_inv = 0.5/(sigma*sigma); + // We're taking 4x4 samples, and we're snapping to texels for upsizing. + // Find texture coords for sample 5 (second row, second column): + const vec2 curr_texel = tex_uv * texture_size; + const vec2 prev_texel = + floor(curr_texel - vec2(under_half)) + vec2(0.5); + const vec2 prev_texel_uv = prev_texel * texture_size_inv; + const bvec2 snap = lessThanEqual(dxdy , texture_size_inv); + const vec2 sample5_downsize_uv = tex_uv - 0.5 * dxdy; + const vec2 sample5_uv = mix(sample5_downsize_uv, prev_texel_uv, snap); + // Compute texture coords for other samples: + const vec2 dx = vec2(dxdy.x, 0.0); + const vec2 sample0_uv = sample5_uv - dxdy; + const vec2 sample10_uv = sample5_uv + dxdy; + const vec2 sample15_uv = sample5_uv + 2.0 * dxdy; + const vec2 sample1_uv = sample0_uv + dx; + const vec2 sample2_uv = sample0_uv + 2.0 * dx; + const vec2 sample3_uv = sample0_uv + 3.0 * dx; + const vec2 sample4_uv = sample5_uv - dx; + const vec2 sample6_uv = sample5_uv + dx; + const vec2 sample7_uv = sample5_uv + 2.0 * dx; + const vec2 sample8_uv = sample10_uv - 2.0 * dx; + const vec2 sample9_uv = sample10_uv - dx; + const vec2 sample11_uv = sample10_uv + dx; + const vec2 sample12_uv = sample15_uv - 3.0 * dx; + const vec2 sample13_uv = sample15_uv - 2.0 * dx; + const vec2 sample14_uv = sample15_uv - dx; + // Load each sample: + const vec3 sample0 = tex2D_linearize(tex, sample0_uv).rgb; + const vec3 sample1 = tex2D_linearize(tex, sample1_uv).rgb; + const vec3 sample2 = tex2D_linearize(tex, sample2_uv).rgb; + const vec3 sample3 = tex2D_linearize(tex, sample3_uv).rgb; + const vec3 sample4 = tex2D_linearize(tex, sample4_uv).rgb; + const vec3 sample5 = tex2D_linearize(tex, sample5_uv).rgb; + const vec3 sample6 = tex2D_linearize(tex, sample6_uv).rgb; + const vec3 sample7 = tex2D_linearize(tex, sample7_uv).rgb; + const vec3 sample8 = tex2D_linearize(tex, sample8_uv).rgb; + const vec3 sample9 = tex2D_linearize(tex, sample9_uv).rgb; + const vec3 sample10 = tex2D_linearize(tex, sample10_uv).rgb; + const vec3 sample11 = tex2D_linearize(tex, sample11_uv).rgb; + const vec3 sample12 = tex2D_linearize(tex, sample12_uv).rgb; + const vec3 sample13 = tex2D_linearize(tex, sample13_uv).rgb; + const vec3 sample14 = tex2D_linearize(tex, sample14_uv).rgb; + const vec3 sample15 = tex2D_linearize(tex, sample15_uv).rgb; + // Compute destination pixel offsets for each sample: + const vec2 dest_pixel = tex_uv * tex_uv_to_pixel_scale; + const vec2 sample0_offset = sample0_uv * tex_uv_to_pixel_scale - dest_pixel; + const vec2 sample1_offset = sample1_uv * tex_uv_to_pixel_scale - dest_pixel; + const vec2 sample2_offset = sample2_uv * tex_uv_to_pixel_scale - dest_pixel; + const vec2 sample3_offset = sample3_uv * tex_uv_to_pixel_scale - dest_pixel; + const vec2 sample4_offset = sample4_uv * tex_uv_to_pixel_scale - dest_pixel; + const vec2 sample5_offset = sample5_uv * tex_uv_to_pixel_scale - dest_pixel; + const vec2 sample6_offset = sample6_uv * tex_uv_to_pixel_scale - dest_pixel; + const vec2 sample7_offset = sample7_uv * tex_uv_to_pixel_scale - dest_pixel; + const vec2 sample8_offset = sample8_uv * tex_uv_to_pixel_scale - dest_pixel; + const vec2 sample9_offset = sample9_uv * tex_uv_to_pixel_scale - dest_pixel; + const vec2 sample10_offset = sample10_uv * tex_uv_to_pixel_scale - dest_pixel; + const vec2 sample11_offset = sample11_uv * tex_uv_to_pixel_scale - dest_pixel; + const vec2 sample12_offset = sample12_uv * tex_uv_to_pixel_scale - dest_pixel; + const vec2 sample13_offset = sample13_uv * tex_uv_to_pixel_scale - dest_pixel; + const vec2 sample14_offset = sample14_uv * tex_uv_to_pixel_scale - dest_pixel; + const vec2 sample15_offset = sample15_uv * tex_uv_to_pixel_scale - dest_pixel; + // Compute Gaussian sample weights: + const float w0 = exp(-LENGTH_SQ(sample0_offset) * denom_inv); + const float w1 = exp(-LENGTH_SQ(sample1_offset) * denom_inv); + const float w2 = exp(-LENGTH_SQ(sample2_offset) * denom_inv); + const float w3 = exp(-LENGTH_SQ(sample3_offset) * denom_inv); + const float w4 = exp(-LENGTH_SQ(sample4_offset) * denom_inv); + const float w5 = exp(-LENGTH_SQ(sample5_offset) * denom_inv); + const float w6 = exp(-LENGTH_SQ(sample6_offset) * denom_inv); + const float w7 = exp(-LENGTH_SQ(sample7_offset) * denom_inv); + const float w8 = exp(-LENGTH_SQ(sample8_offset) * denom_inv); + const float w9 = exp(-LENGTH_SQ(sample9_offset) * denom_inv); + const float w10 = exp(-LENGTH_SQ(sample10_offset) * denom_inv); + const float w11 = exp(-LENGTH_SQ(sample11_offset) * denom_inv); + const float w12 = exp(-LENGTH_SQ(sample12_offset) * denom_inv); + const float w13 = exp(-LENGTH_SQ(sample13_offset) * denom_inv); + const float w14 = exp(-LENGTH_SQ(sample14_offset) * denom_inv); + const float w15 = exp(-LENGTH_SQ(sample15_offset) * denom_inv); + const float weight_sum_inv = 1.0/( + w0 + w1 + w2 + w3 + w4 + w5 + w6 + w7 + + w8 +w9 + w10 + w11 + w12 + w13 + w14 + w15); + // Weight and sum the samples: + const vec3 sum = w0 * sample0 + w1 * sample1 + w2 * sample2 + w3 * sample3 + + w4 * sample4 + w5 * sample5 + w6 * sample6 + w7 * sample7 + + w8 * sample8 + w9 * sample9 + w10 * sample10 + w11 * sample11 + + w12 * sample12 + w13 * sample13 + w14 * sample14 + w15 * sample15; + return sum * weight_sum_inv; +} + +#pragma stage vertex +layout(location = 0) in vec4 Position; +layout(location = 1) in vec2 TexCoord; +layout(location = 0) out vec2 tex_uv; +layout(location = 1) out float estimated_viewport_size_x; +layout(location = 2) out vec2 blur_dxdy; +layout(location = 3) out vec2 uv_scanline_step; +layout(location = 4) out vec2 texture_size_inv; +layout(location = 5) out vec2 tex_uv_to_pixel_scale; + +void main() +{ + // This vertex shader copies blurs/vertex-shader-blur-one-pass-resize.h, + // except we're using a different source image. + gl_Position = params.MVP * Position; + const vec2 video_uv = TexCoord; + tex_uv = video_uv; + // The last pass (vertical scanlines) had a viewport y scale, so we can + // use it to calculate a better runtime sigma: + estimated_viewport_size_x = registers.SourceSize.y * params.geom_aspect_ratio_x / params.geom_aspect_ratio_y; + + // Get the uv sample distance between output pixels. We're using a resize + // blur, so arbitrary upsizing will be acceptable if filter_linearN = + // "true," and arbitrary downsizing will be acceptable if mipmap_inputN = + // "true" too. The blur will be much more accurate if a true 4x4 Gaussian + // resize is used instead of tex2Dblur3x3_resize (which samples between + // texels even for upsizing). + const vec2 dxdy_min_scale = registers.ORIG_LINEARIZEDSize.xy * registers.OutputSize.zw; + texture_size_inv = registers.ORIG_LINEARIZEDSize.zw; + if(bloom_approx_filter > 1.5) // 4x4 true Gaussian resize + { + // For upsizing, we'll snap to texels and sample the nearest 4. + const vec2 dxdy_scale = max(dxdy_min_scale, vec2(1.0)); + blur_dxdy = dxdy_scale * texture_size_inv; + } + else + { + const vec2 dxdy_scale = dxdy_min_scale; + blur_dxdy = dxdy_scale * texture_size_inv; + } + + tex_uv_to_pixel_scale = registers.OutputSize.xy; +// texture_size_inv = texture_size_inv; <- commented out because it's pointless in slang + + // Detecting interlacing again here lets us apply convergence offsets in + // this pass. il_step_multiple contains the (texel, scanline) step + // multiple: 1 for progressive, 2 for interlaced. + const vec2 orig_video_size = registers.ORIG_LINEARIZEDSize.xy; + float interlace_check = 0.0; + if (is_interlaced(orig_video_size.y) == true) interlace_check = 1.0; + const float y_step = 1.0 + interlace_check; + const vec2 il_step_multiple = vec2(1.0, y_step); + // Get the uv distance between (texels, same-field scanlines): + uv_scanline_step = il_step_multiple * registers.ORIG_LINEARIZEDSize.zw; +} + +#pragma stage fragment +#pragma format R8G8B8A8_SRGB +layout(location = 0) in vec2 tex_uv; +layout(location = 1) in float estimated_viewport_size_x; +layout(location = 2) in vec2 blur_dxdy; +layout(location = 3) in vec2 uv_scanline_step; +layout(location = 4) in vec2 texture_size_inv; +layout(location = 5) in vec2 tex_uv_to_pixel_scale; +layout(location = 0) out vec4 FragColor; +layout(set = 0, binding = 2) uniform sampler2D Source; +layout(set = 0, binding = 3) uniform sampler2D ORIG_LINEARIZED; + +void main() +{ + // Would a viewport-relative size work better for this pass? (No.) + // PROS: + // 1.) Instead of writing an absolute size to user-cgp-constants.h, we'd + // write a viewport scale. That number could be used to directly scale + // the viewport-resolution bloom sigma and/or triad size to a smaller + // scale. This way, we could calculate an optimal dynamic sigma no + // matter how the dot pitch is specified. + // CONS: + // 1.) Texel smearing would be much worse at small viewport sizes, but + // performance would be much worse at large viewport sizes, so there + // would be no easy way to calculate a decent scale. + // 2.) Worse, we could no longer get away with using a constant-size blur! + // Instead, we'd have to face all the same difficulties as the real + // phosphor bloom, which requires static #ifdefs to decide the blur + // size based on the expected triad size...a dynamic value. + // 3.) Like the phosphor bloom, we'd have less control over making the blur + // size correct for an optical blur. That said, we likely overblur (to + // maintain brightness) more than the eye would do by itself: 20/20 + // human vision distinguishes ~1 arc minute, or 1/60 of a degree. The + // highest viewing angle recommendation I know of is THX's 40.04 degree + // recommendation, at which 20/20 vision can distinguish about 2402.4 + // lines. Assuming the "TV lines" definition, that means 1201.2 + // distinct light lines and 1201.2 distinct dark lines can be told + // apart, i.e. 1201.2 pairs of lines. This would correspond to 1201.2 + // pairs of alternating lit/unlit phosphors, so 2402.4 phosphors total + // (if they're alternately lit). That's a max of 800.8 triads. Using + // a more popular 30 degree viewing angle recommendation, 20/20 vision + // can distinguish 1800 lines, or 600 triads of alternately lit + // phosphors. In contrast, we currently blur phosphors all the way + // down to 341.3 triads to ensure full brightness. + // 4.) Realistically speaking, we're usually just going to use bilinear + // filtering in this pass anyway, but it only works well to limit + // bandwidth if it's done at a small constant scale. + + // Get the constants we need to sample: + const vec2 texture_size = registers.ORIG_LINEARIZEDSize.xy; + vec2 tex_uv_r, tex_uv_g, tex_uv_b; + + if(beam_misconvergence == true) + { + const vec2 convergence_offsets_r = vec2(params.convergence_offset_x_r, params.convergence_offset_y_r);//get_convergence_offsets_r_vector(); + const vec2 convergence_offsets_g = vec2(params.convergence_offset_x_g, params.convergence_offset_y_g);//get_convergence_offsets_g_vector(); + const vec2 convergence_offsets_b = vec2(params.convergence_offset_x_b, params.convergence_offset_y_b);//get_convergence_offsets_b_vector(); + tex_uv_r = tex_uv - vec2(params.convergence_offset_x_r, params.convergence_offset_y_r) * uv_scanline_step; + tex_uv_g = tex_uv - vec2(params.convergence_offset_x_g, params.convergence_offset_y_g) * uv_scanline_step; + tex_uv_b = tex_uv - vec2(params.convergence_offset_x_b, params.convergence_offset_y_b) * uv_scanline_step; + } + // Get the blur sigma: + const float bloom_approx_sigma = get_bloom_approx_sigma(registers.OutputSize.x, estimated_viewport_size_x); + + // Sample the resized and blurred texture, and apply convergence offsets if + // necessary. Applying convergence offsets here triples our samples from + // 16/9/1 to 48/27/3, but faster and easier than sampling BLOOM_APPROX and + // HALATION_BLUR 3 times at full resolution every time they're used. + vec3 color_r, color_g, color_b, color; + if(bloom_approx_filter > 1.5) + { + // Use a 4x4 Gaussian resize. This is slower but technically correct. + if(beam_misconvergence == true) + { + color_r = tex2Dresize_gaussian4x4(ORIG_LINEARIZED, tex_uv_r, + blur_dxdy, texture_size, texture_size_inv, + tex_uv_to_pixel_scale, bloom_approx_sigma); + color_g = tex2Dresize_gaussian4x4(ORIG_LINEARIZED, tex_uv_g, + blur_dxdy, texture_size, texture_size_inv, + tex_uv_to_pixel_scale, bloom_approx_sigma); + color_b = tex2Dresize_gaussian4x4(ORIG_LINEARIZED, tex_uv_b, + blur_dxdy, texture_size, texture_size_inv, + tex_uv_to_pixel_scale, bloom_approx_sigma); + } + else + { + color = tex2Dresize_gaussian4x4(ORIG_LINEARIZED, tex_uv, + blur_dxdy, texture_size, texture_size_inv, + tex_uv_to_pixel_scale, bloom_approx_sigma); + } + } + else if(bloom_approx_filter > 0.5) + { + // Use a 3x3 resize blur. This is the softest option, because we're + // blurring already blurry bilinear samples. It doesn't play quite as + // nicely with convergence offsets, but it has its charms. + if(beam_misconvergence == true) + { + color_r = tex2Dblur3x3resize(ORIG_LINEARIZED, tex_uv_r, + blur_dxdy, bloom_approx_sigma); + color_g = tex2Dblur3x3resize(ORIG_LINEARIZED, tex_uv_g, + blur_dxdy, bloom_approx_sigma); + color_b = tex2Dblur3x3resize(ORIG_LINEARIZED, tex_uv_b, + blur_dxdy, bloom_approx_sigma); + } + else + { + color = tex2Dblur3x3resize(ORIG_LINEARIZED, tex_uv, blur_dxdy); + } + } + else + { + // Use bilinear sampling. This approximates a 4x4 Gaussian resize MUCH + // better than tex2Dblur3x3_resize for the very small sigmas we're + // likely to use at small output resolutions. (This estimate becomes + // too sharp above ~400x300, but the blurs break down above that + // resolution too, unless min_allowed_viewport_triads is high enough to + // keep bloom_approx_scale_x/min_allowed_viewport_triads < ~1.1658025.) + if(beam_misconvergence == true) + { + color_r = tex2D_linearize(ORIG_LINEARIZED, tex_uv_r).rgb; + color_g = tex2D_linearize(ORIG_LINEARIZED, tex_uv_g).rgb; + color_b = tex2D_linearize(ORIG_LINEARIZED, tex_uv_b).rgb; + } + else + { + color = tex2D_linearize(ORIG_LINEARIZED, tex_uv).rgb; + } + } + // Pack the colors from the red/green/blue beams into a single vector: + if(beam_misconvergence == true) + { + color = vec3(color_r.r, color_g.g, color_b.b); + } + // Encode and output the blurred image: + FragColor = vec4(texture(ORIG_LINEARIZED, tex_uv));//vec4(color, 1.0);// +} diff --git a/crt/shaders/crt-royale/src/crt-royale-first-pass-linearize-crt-gamma-bob-fields.slang b/crt/shaders/crt-royale/src/crt-royale-first-pass-linearize-crt-gamma-bob-fields.slang index fd67019..02ec577 100755 --- a/crt/shaders/crt-royale/src/crt-royale-first-pass-linearize-crt-gamma-bob-fields.slang +++ b/crt/shaders/crt-royale/src/crt-royale-first-pass-linearize-crt-gamma-bob-fields.slang @@ -10,9 +10,11 @@ layout(std140, set = 0, binding = 0) uniform UBO { mat4 MVP; float interlace_bff; + float beam_horiz_filter; } params; #pragma parameter interlace_bff "interlace_bff" 1.0 0.0 1.0 1.0 +#pragma parameter beam_horiz_filter "beam_horiz_filter" 0.0 0.0 2.0 1.0 ///////////////////////////// GPL LICENSE NOTICE ///////////////////////////// diff --git a/crt/shaders/crt-royale/src/crt-royale-mask-resize-horizontal.slang b/crt/shaders/crt-royale/src/crt-royale-mask-resize-horizontal.slang index d5b6fcb..0fcdc2f 100755 --- a/crt/shaders/crt-royale/src/crt-royale-mask-resize-horizontal.slang +++ b/crt/shaders/crt-royale/src/crt-royale-mask-resize-horizontal.slang @@ -108,7 +108,7 @@ void main() // easier tiled sampling later. #ifdef PHOSPHOR_MASK_MANUALLY_RESIZE // Discard unneeded fragments in case our profile allows real branches. - const vec2 tile_uv_wrap = tile_uv_wrap; +// const vec2 tile_uv_wrap = tile_uv_wrap; if(params.mask_sample_mode_desired < 0.5 && max(tile_uv_wrap.x, tile_uv_wrap.y) <= mask_resize_num_tiles) { diff --git a/crt/shaders/crt-royale/src/scanline-functions.h b/crt/shaders/crt-royale/src/scanline-functions.h index ede23d8..5169b3d 100644 --- a/crt/shaders/crt-royale/src/scanline-functions.h +++ b/crt/shaders/crt-royale/src/scanline-functions.h @@ -156,7 +156,7 @@ vec3 get_scanline_color(const sampler2D tex, const vec2 scanline_uv, const vec3 color2 = texture(tex, scanline_uv + uv_step_x).rgb; vec3 color0 = vec3(0.0); vec3 color3 = vec3(0.0); - if(beam_horiz_filter > 0.5) + if(params.beam_horiz_filter > 0.5) { color0 = texture(tex, scanline_uv - uv_step_x).rgb; color3 = texture(tex, scanline_uv + 2.0 * uv_step_x).rgb; @@ -183,14 +183,14 @@ vec3 sample_single_scanline_horizontal(const sampler2D texture, 1.0 - prev_dist, 2.0 - prev_dist); // Get Quilez, Lanczos2, or Gaussian resize weights for 2/4 nearby texels: vec4 weights; - if(beam_horiz_filter < 0.5) + if(params.beam_horiz_filter < 0.5) { // Quilez: const float x = sample_dists.y; const float w2 = x*x*x*(x*(x*6.0 - 15.0) + 10.0); weights = vec4(0.0, 1.0 - w2, w2, 0.0); } - else if(beam_horiz_filter < 1.5) + else if(params.beam_horiz_filter < 1.5) { // Gaussian: float inner_denom_inv = 1.0/(2.0*beam_horiz_sigma*beam_horiz_sigma); diff --git a/presets/crt-royale-kurozumi.slangp b/presets/crt-royale-kurozumi.slangp index 8364cfc..a412b85 100644 --- a/presets/crt-royale-kurozumi.slangp +++ b/presets/crt-royale-kurozumi.slangp @@ -219,12 +219,12 @@ beam_max_shape = "4.000000" beam_shape_power = "0.250000" beam_horiz_filter = "0.000000" beam_horiz_sigma = "0.545000" -convergence_offset_x_r = "0.000000" +convergence_offset_x_r = "-0.050000" convergence_offset_x_g = "0.000000" convergence_offset_x_b = "0.000000" -convergence_offset_y_r = "0.000000" -convergence_offset_y_g = "0.000000" -convergence_offset_y_b = "0.000000" +convergence_offset_y_r = "0.100000" +convergence_offset_y_g = "-0.050000" +convergence_offset_y_b = "0.100000" mask_type = "0.000000" mask_sample_mode_desired = "1.000000" mask_specify_num_triads = "0.000000"