fix beam_horiz_filter, update kurozumi, add fallback to fix my AMD crash

This commit is contained in:
hunterk 2017-05-04 16:34:39 -05:00
parent 8590410b30
commit 6f47464db3
7 changed files with 571 additions and 9 deletions

View file

@ -0,0 +1,206 @@
# IMPORTANT:
# Shader passes need to know details about the image in the mask_texture LUT
# files, so set the following constants in user-preset-constants.h accordingly:
# 1.) mask_triads_per_tile = (number of horizontal triads in mask texture LUT's)
# 2.) mask_texture_small_size = (texture size of mask*texture_small LUT's)
# 3.) mask_texture_large_size = (texture size of mask*texture_large LUT's)
# 4.) mask_grille_avg_color = (avg. brightness of mask_grille_texture* LUT's, in [0, 1])
# 5.) mask_slot_avg_color = (avg. brightness of mask_slot_texture* LUT's, in [0, 1])
# 6.) mask_shadow_avg_color = (avg. brightness of mask_shadow_texture* LUT's, in [0, 1])
# Shader passes also need to know certain scales set in this preset, but their
# compilation model doesn't currently allow the preset file to tell them. Make
# sure to set the following constants in user-preset-constants.h accordingly too:
# 1.) bloom_approx_scale_x = scale_x2
# 2.) mask_resize_viewport_scale = vec2(scale_x6, scale_y5)
# Finally, shader passes need to know the value of geom_max_aspect_ratio used to
# calculate scale_y5 (among other values):
# 1.) geom_max_aspect_ratio = (geom_max_aspect_ratio used to calculate scale_y5)
shaders = "12"
# Set an identifier, filename, and sampling traits for the phosphor mask texture.
# Load an aperture grille, slot mask, and an EDP shadow mask, and load a small
# non-mipmapped version and a large mipmapped version.
# TODO: Test masks in other directories.
textures = "mask_grille_texture_small;mask_grille_texture_large;mask_slot_texture_small;mask_slot_texture_large;mask_shadow_texture_small;mask_shadow_texture_large"
mask_grille_texture_small = "shaders/crt-royale/TileableLinearApertureGrille15Wide8And5d5SpacingResizeTo64.png"
mask_grille_texture_large = "shaders/crt-royale/TileableLinearApertureGrille15Wide8And5d5Spacing.png"
mask_slot_texture_small = "shaders/crt-royale/TileableLinearSlotMaskTall15Wide9And4d5Horizontal9d14VerticalSpacingResizeTo64.png"
mask_slot_texture_large = "shaders/crt-royale/TileableLinearSlotMaskTall15Wide9And4d5Horizontal9d14VerticalSpacing.png"
mask_shadow_texture_small = "shaders/crt-royale/TileableLinearShadowMaskEDPResizeTo64.png"
mask_shadow_texture_large = "shaders/crt-royale/TileableLinearShadowMaskEDP.png"
mask_grille_texture_small_wrap_mode = "repeat"
mask_grille_texture_large_wrap_mode = "repeat"
mask_slot_texture_small_wrap_mode = "repeat"
mask_slot_texture_large_wrap_mode = "repeat"
mask_shadow_texture_small_wrap_mode = "repeat"
mask_shadow_texture_large_wrap_mode = "repeat"
mask_grille_texture_small_linear = "true"
mask_grille_texture_large_linear = "true"
mask_slot_texture_small_linear = "true"
mask_slot_texture_large_linear = "true"
mask_shadow_texture_small_linear = "true"
mask_shadow_texture_large_linear = "true"
mask_grille_texture_small_mipmap = "false" # Mipmapping causes artifacts with manually resized masks without tex2Dlod
mask_grille_texture_large_mipmap = "true" # Essential for hardware-resized masks
mask_slot_texture_small_mipmap = "false" # Mipmapping causes artifacts with manually resized masks without tex2Dlod
mask_slot_texture_large_mipmap = "true" # Essential for hardware-resized masks
mask_shadow_texture_small_mipmap = "false" # Mipmapping causes artifacts with manually resized masks without tex2Dlod
mask_shadow_texture_large_mipmap = "true" # Essential for hardware-resized masks
# Pass0: Linearize the input based on CRT gamma and bob interlaced fields.
# (Bobbing ensures we can immediately blur without getting artifacts.)
shader0 = "shaders/crt-royale/src/crt-royale-first-pass-linearize-crt-gamma-bob-fields.slang"
alias0 = "ORIG_LINEARIZED"
filter_linear0 = "false"
scale_type0 = "source"
scale0 = "1.0"
srgb_framebuffer0 = "true"
# Pass1: Resample interlaced (and misconverged) scanlines vertically.
# Separating vertical/horizontal scanline sampling is faster: It lets us
# consider more scanlines while calculating weights for fewer pixels, and
# it reduces our samples from vertical*horizontal to vertical+horizontal.
# This has to come right after ORIG_LINEARIZED, because there's no
# "original_source" scale_type we can use later.
shader1 = "shaders/crt-royale/src/crt-royale-scanlines-vertical-interlacing.slang"
alias1 = "VERTICAL_SCANLINES"
filter_linear1 = "true"
scale_type_x1 = "source"
scale_x1 = "1.0"
scale_type_y1 = "viewport"
scale_y1 = "1.0"
srgb_framebuffer1 = "true"
# Pass2: Do a small resize blur of ORIG_LINEARIZED at an absolute size, and
# account for convergence offsets. We want to blur a predictable portion of the
# screen to match the phosphor bloom, and absolute scale works best for
# reliable results with a fixed-size bloom. Picking a scale is tricky:
# a.) 400x300 is a good compromise for the "fake-bloom" version: It's low enough
# to blur high-res/interlaced sources but high enough that resampling
# doesn't smear low-res sources too much.
# b.) 320x240 works well for the "real bloom" version: It's 1-1.5% faster, and
# the only noticeable visual difference is a larger halation spread (which
# may be a good thing for people who like to crank it up).
# Note the 4:3 aspect ratio assumes the input has cropped geom_overscan (so it's
# *intended* for an ~4:3 aspect ratio).
shader2 = "shaders/crt-royale/src/crt-royale-bloom-approx_fallback.slang"
alias2 = "BLOOM_APPROX"
filter_linear2 = "true"
scale_type2 = "absolute"
scale_x2 = "320"
scale_y2 = "240"
srgb_framebuffer2 = "true"
# Pass3: Vertically blur the input for halation and refractive diffusion.
# Base this on BLOOM_APPROX: This blur should be small and fast, and blurring
# a constant portion of the screen is probably physically correct if the
# viewport resolution is proportional to the simulated CRT size.
shader3 = "../blurs/blur9fast-vertical.slang"
filter_linear3 = "true"
scale_type3 = "source"
scale3 = "1.0"
srgb_framebuffer3 = "true"
# Pass4: Horizontally blur the input for halation and refractive diffusion.
# Note: Using a one-pass 9x9 blur is about 1% slower.
shader4 = "../blurs/blur9fast-horizontal.slang"
alias4 = "HALATION_BLUR"
filter_linear4 = "true"
scale_type4 = "source"
scale4 = "1.0"
srgb_framebuffer4 = "true"
# Pass5: Lanczos-resize the phosphor mask vertically. Set the absolute
# scale_x5 == mask_texture_small_size.x (see IMPORTANT above). Larger scales
# will blur, and smaller scales could get nasty. The vertical size must be
# based on the viewport size and calculated carefully to avoid artifacts later.
# First calculate the minimum number of mask tiles we need to draw.
# Since curvature is computed after the scanline masking pass:
# num_resized_mask_tiles = 2.0;
# If curvature were computed in the scanline masking pass (it's not):
# max_mask_texel_border = ~3.0 * (1/3.0 + 4.0*sqrt(2.0) + 0.5 + 1.0);
# max_mask_tile_border = max_mask_texel_border/
# (min_resized_phosphor_triad_size * mask_triads_per_tile);
# num_resized_mask_tiles = max(2.0, 1.0 + max_mask_tile_border * 2.0);
# At typical values (triad_size >= 2.0, mask_triads_per_tile == 8):
# num_resized_mask_tiles = ~3.8
# Triad sizes are given in horizontal terms, so we need geom_max_aspect_ratio
# to relate them to vertical resolution. The widest we expect is:
# geom_max_aspect_ratio = 4.0/3.0 # Note: Shader passes need to know this!
# The fewer triads we tile across the screen, the larger each triad will be as a
# fraction of the viewport size, and the larger scale_y5 must be to draw a full
# num_resized_mask_tiles. Therefore, we must decide the smallest number of
# triads we'll guarantee can be displayed on screen. We'll set this according
# to 3-pixel triads at 768p resolution (the lowest anyone's likely to use):
# min_allowed_viewport_triads = 768.0*geom_max_aspect_ratio / 3.0 = 341.333333
# Now calculate the viewport scale that ensures we can draw resized_mask_tiles:
# min_scale_x = resized_mask_tiles * mask_triads_per_tile /
# min_allowed_viewport_triads
# scale_y5 = geom_max_aspect_ratio * min_scale_x
# # Some code might depend on equal scales:
# scale_x6 = scale_y5
# Given our default geom_max_aspect_ratio and min_allowed_viewport_triads:
# scale_y5 = 4.0/3.0 * 2.0/(341.33333 / 8.0) = 0.0625
# IMPORTANT: The scales MUST be calculated in this way. If you wish to change
# geom_max_aspect_ratio, update that constant in user-preset-constants.h!
shader5 = "shaders/crt-royale/src/crt-royale-mask-resize-vertical.slang"
filter_linear5 = "true"
scale_type_x5 = "absolute"
scale_x5 = "64"
scale_type_y5 = "viewport"
scale_y5 = "0.0625" # Safe for >= 341.333 horizontal triads at viewport size
#srgb_framebuffer5 = "false" # mask_texture is already assumed linear
# Pass6: Lanczos-resize the phosphor mask horizontally. scale_x6 = scale_y5.
# TODO: Check again if the shaders actually require equal scales.
shader6 = "shaders/crt-royale/src/crt-royale-mask-resize-horizontal.slang"
alias6 = "MASK_RESIZE"
filter_linear6 = "false"
scale_type_x6 = "viewport"
scale_x6 = "0.0625"
scale_type_y6 = "source"
scale_y6 = "1.0"
#srgb_framebuffer6 = "false" # mask_texture is already assumed linear
# Pass7: Resample (misconverged) scanlines horizontally, apply halation, and
# apply the phosphor mask.
shader7 = "shaders/crt-royale/src/crt-royale-scanlines-horizontal-apply-mask.slang"
alias7 = "MASKED_SCANLINES"
filter_linear7 = "true" # This could just as easily be nearest neighbor.
scale_type7 = "viewport"
scale7 = "1.0"
srgb_framebuffer7 = "true"
# Pass 8: Compute a brightpass. This will require reading the final mask.
shader8 = "shaders/crt-royale/src/crt-royale-brightpass.slang"
alias8 = "BRIGHTPASS"
filter_linear8 = "true" # This could just as easily be nearest neighbor.
scale_type8 = "viewport"
scale8 = "1.0"
srgb_framebuffer8 = "true"
# Pass 9: Blur the brightpass vertically
shader9 = "shaders/crt-royale/src/crt-royale-bloom-vertical.slang"
filter_linear9 = "true" # This could just as easily be nearest neighbor.
scale_type9 = "source"
scale9 = "1.0"
srgb_framebuffer9 = "true"
# Pass 10: Blur the brightpass horizontally and combine it with the dimpass:
shader10 = "shaders/crt-royale/src/crt-royale-bloom-horizontal-reconstitute.slang"
filter_linear10 = "true"
scale_type10 = "source"
scale10 = "1.0"
srgb_framebuffer10 = "true"
# Pass 11: Compute curvature/AA:
shader11 = "shaders/crt-royale/src/crt-royale-geometry-aa-last-pass.slang"
filter_linear11 = "true"
scale_type11 = "viewport"
mipmap_input11 = "true"
texture_wrap_mode11 = "clamp_to_edge"
parameters = "beam_num_scanlines"
beam_num_scanlines = 3.0

View file

@ -113,7 +113,7 @@ const float gba_gamma = 3.5; // Irrelevant but necessary to define.
const float beam_min_shape = max(2.0, beam_min_shape_static);
const float beam_max_shape = max(beam_min_shape, beam_max_shape_static);
const float beam_shape_power = max(0.0, beam_shape_power_static);
const float beam_horiz_filter = clamp(beam_horiz_filter_static, 0.0, 2.0);
// const float beam_horiz_filter = clamp(beam_horiz_filter_static, 0.0, 2.0);
const float beam_horiz_sigma = max(FIX_ZERO(0.0), beam_horiz_sigma_static);
const float beam_horiz_linear_rgb_weight = clamp(beam_horiz_linear_rgb_weight_static, 0.0, 1.0);
// Unpack vector elements to match scalar uniforms:

View file

@ -0,0 +1,354 @@
#version 450
layout(push_constant) uniform Push
{
vec4 SourceSize;
vec4 OriginalSize;
vec4 OutputSize;
uint FrameCount;
vec4 ORIG_LINEARIZEDSize;
} registers;
#include "params.inc"
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
////////////////////////////////// INCLUDES //////////////////////////////////
#include "../user-settings.h"
#include "derived-settings-and-constants.h"
#include "bind-shader-params.h"
#include "../../../../include/gamma-management.h"
#include "../../../../include/blur-functions.h"
#include "scanline-functions.h"
#include "bloom-functions.h"
/////////////////////////////////// HELPERS //////////////////////////////////
vec3 tex2Dresize_gaussian4x4(const sampler2D tex, const vec2 tex_uv,
const vec2 dxdy, const vec2 texture_size, const vec2 texture_size_inv,
const vec2 tex_uv_to_pixel_scale, const float sigma)
{
// Requires: 1.) All requirements of gamma-management.h must be satisfied!
// 2.) filter_linearN must == "true" in your .cgp preset.
// 3.) mipmap_inputN must == "true" in your .cgp preset if
// IN.output_size << SRC.video_size.
// 4.) dxdy should contain the uv pixel spacing:
// dxdy = max(vec2(1.0),
// SRC.video_size/IN.output_size)/SRC.texture_size;
// 5.) texture_size == SRC.texture_size
// 6.) texture_size_inv == vec2(1.0)/SRC.texture_size
// 7.) tex_uv_to_pixel_scale == IN.output_size *
// SRC.texture_size / SRC.video_size;
// 8.) sigma is the desired Gaussian standard deviation, in
// terms of output pixels. It should be < ~0.66171875 to
// ensure the first unused sample (outside the 4x4 box) has
// a weight < 1.0/256.0.
// Returns: A true 4x4 Gaussian resize of the input.
// Description:
// Given correct inputs, this Gaussian resizer samples 4 pixel locations
// along each downsized dimension and/or 4 texel locations along each
// upsized dimension. It computes dynamic weights based on the pixel-space
// distance of each sample from the destination pixel. It is arbitrarily
// resizable and higher quality than tex2Dblur3x3_resize, but it's slower.
// TODO: Move this to a more suitable file once there are others like it.
const float denom_inv = 0.5/(sigma*sigma);
// We're taking 4x4 samples, and we're snapping to texels for upsizing.
// Find texture coords for sample 5 (second row, second column):
const vec2 curr_texel = tex_uv * texture_size;
const vec2 prev_texel =
floor(curr_texel - vec2(under_half)) + vec2(0.5);
const vec2 prev_texel_uv = prev_texel * texture_size_inv;
const bvec2 snap = lessThanEqual(dxdy , texture_size_inv);
const vec2 sample5_downsize_uv = tex_uv - 0.5 * dxdy;
const vec2 sample5_uv = mix(sample5_downsize_uv, prev_texel_uv, snap);
// Compute texture coords for other samples:
const vec2 dx = vec2(dxdy.x, 0.0);
const vec2 sample0_uv = sample5_uv - dxdy;
const vec2 sample10_uv = sample5_uv + dxdy;
const vec2 sample15_uv = sample5_uv + 2.0 * dxdy;
const vec2 sample1_uv = sample0_uv + dx;
const vec2 sample2_uv = sample0_uv + 2.0 * dx;
const vec2 sample3_uv = sample0_uv + 3.0 * dx;
const vec2 sample4_uv = sample5_uv - dx;
const vec2 sample6_uv = sample5_uv + dx;
const vec2 sample7_uv = sample5_uv + 2.0 * dx;
const vec2 sample8_uv = sample10_uv - 2.0 * dx;
const vec2 sample9_uv = sample10_uv - dx;
const vec2 sample11_uv = sample10_uv + dx;
const vec2 sample12_uv = sample15_uv - 3.0 * dx;
const vec2 sample13_uv = sample15_uv - 2.0 * dx;
const vec2 sample14_uv = sample15_uv - dx;
// Load each sample:
const vec3 sample0 = tex2D_linearize(tex, sample0_uv).rgb;
const vec3 sample1 = tex2D_linearize(tex, sample1_uv).rgb;
const vec3 sample2 = tex2D_linearize(tex, sample2_uv).rgb;
const vec3 sample3 = tex2D_linearize(tex, sample3_uv).rgb;
const vec3 sample4 = tex2D_linearize(tex, sample4_uv).rgb;
const vec3 sample5 = tex2D_linearize(tex, sample5_uv).rgb;
const vec3 sample6 = tex2D_linearize(tex, sample6_uv).rgb;
const vec3 sample7 = tex2D_linearize(tex, sample7_uv).rgb;
const vec3 sample8 = tex2D_linearize(tex, sample8_uv).rgb;
const vec3 sample9 = tex2D_linearize(tex, sample9_uv).rgb;
const vec3 sample10 = tex2D_linearize(tex, sample10_uv).rgb;
const vec3 sample11 = tex2D_linearize(tex, sample11_uv).rgb;
const vec3 sample12 = tex2D_linearize(tex, sample12_uv).rgb;
const vec3 sample13 = tex2D_linearize(tex, sample13_uv).rgb;
const vec3 sample14 = tex2D_linearize(tex, sample14_uv).rgb;
const vec3 sample15 = tex2D_linearize(tex, sample15_uv).rgb;
// Compute destination pixel offsets for each sample:
const vec2 dest_pixel = tex_uv * tex_uv_to_pixel_scale;
const vec2 sample0_offset = sample0_uv * tex_uv_to_pixel_scale - dest_pixel;
const vec2 sample1_offset = sample1_uv * tex_uv_to_pixel_scale - dest_pixel;
const vec2 sample2_offset = sample2_uv * tex_uv_to_pixel_scale - dest_pixel;
const vec2 sample3_offset = sample3_uv * tex_uv_to_pixel_scale - dest_pixel;
const vec2 sample4_offset = sample4_uv * tex_uv_to_pixel_scale - dest_pixel;
const vec2 sample5_offset = sample5_uv * tex_uv_to_pixel_scale - dest_pixel;
const vec2 sample6_offset = sample6_uv * tex_uv_to_pixel_scale - dest_pixel;
const vec2 sample7_offset = sample7_uv * tex_uv_to_pixel_scale - dest_pixel;
const vec2 sample8_offset = sample8_uv * tex_uv_to_pixel_scale - dest_pixel;
const vec2 sample9_offset = sample9_uv * tex_uv_to_pixel_scale - dest_pixel;
const vec2 sample10_offset = sample10_uv * tex_uv_to_pixel_scale - dest_pixel;
const vec2 sample11_offset = sample11_uv * tex_uv_to_pixel_scale - dest_pixel;
const vec2 sample12_offset = sample12_uv * tex_uv_to_pixel_scale - dest_pixel;
const vec2 sample13_offset = sample13_uv * tex_uv_to_pixel_scale - dest_pixel;
const vec2 sample14_offset = sample14_uv * tex_uv_to_pixel_scale - dest_pixel;
const vec2 sample15_offset = sample15_uv * tex_uv_to_pixel_scale - dest_pixel;
// Compute Gaussian sample weights:
const float w0 = exp(-LENGTH_SQ(sample0_offset) * denom_inv);
const float w1 = exp(-LENGTH_SQ(sample1_offset) * denom_inv);
const float w2 = exp(-LENGTH_SQ(sample2_offset) * denom_inv);
const float w3 = exp(-LENGTH_SQ(sample3_offset) * denom_inv);
const float w4 = exp(-LENGTH_SQ(sample4_offset) * denom_inv);
const float w5 = exp(-LENGTH_SQ(sample5_offset) * denom_inv);
const float w6 = exp(-LENGTH_SQ(sample6_offset) * denom_inv);
const float w7 = exp(-LENGTH_SQ(sample7_offset) * denom_inv);
const float w8 = exp(-LENGTH_SQ(sample8_offset) * denom_inv);
const float w9 = exp(-LENGTH_SQ(sample9_offset) * denom_inv);
const float w10 = exp(-LENGTH_SQ(sample10_offset) * denom_inv);
const float w11 = exp(-LENGTH_SQ(sample11_offset) * denom_inv);
const float w12 = exp(-LENGTH_SQ(sample12_offset) * denom_inv);
const float w13 = exp(-LENGTH_SQ(sample13_offset) * denom_inv);
const float w14 = exp(-LENGTH_SQ(sample14_offset) * denom_inv);
const float w15 = exp(-LENGTH_SQ(sample15_offset) * denom_inv);
const float weight_sum_inv = 1.0/(
w0 + w1 + w2 + w3 + w4 + w5 + w6 + w7 +
w8 +w9 + w10 + w11 + w12 + w13 + w14 + w15);
// Weight and sum the samples:
const vec3 sum = w0 * sample0 + w1 * sample1 + w2 * sample2 + w3 * sample3 +
w4 * sample4 + w5 * sample5 + w6 * sample6 + w7 * sample7 +
w8 * sample8 + w9 * sample9 + w10 * sample10 + w11 * sample11 +
w12 * sample12 + w13 * sample13 + w14 * sample14 + w15 * sample15;
return sum * weight_sum_inv;
}
#pragma stage vertex
layout(location = 0) in vec4 Position;
layout(location = 1) in vec2 TexCoord;
layout(location = 0) out vec2 tex_uv;
layout(location = 1) out float estimated_viewport_size_x;
layout(location = 2) out vec2 blur_dxdy;
layout(location = 3) out vec2 uv_scanline_step;
layout(location = 4) out vec2 texture_size_inv;
layout(location = 5) out vec2 tex_uv_to_pixel_scale;
void main()
{
// This vertex shader copies blurs/vertex-shader-blur-one-pass-resize.h,
// except we're using a different source image.
gl_Position = params.MVP * Position;
const vec2 video_uv = TexCoord;
tex_uv = video_uv;
// The last pass (vertical scanlines) had a viewport y scale, so we can
// use it to calculate a better runtime sigma:
estimated_viewport_size_x = registers.SourceSize.y * params.geom_aspect_ratio_x / params.geom_aspect_ratio_y;
// Get the uv sample distance between output pixels. We're using a resize
// blur, so arbitrary upsizing will be acceptable if filter_linearN =
// "true," and arbitrary downsizing will be acceptable if mipmap_inputN =
// "true" too. The blur will be much more accurate if a true 4x4 Gaussian
// resize is used instead of tex2Dblur3x3_resize (which samples between
// texels even for upsizing).
const vec2 dxdy_min_scale = registers.ORIG_LINEARIZEDSize.xy * registers.OutputSize.zw;
texture_size_inv = registers.ORIG_LINEARIZEDSize.zw;
if(bloom_approx_filter > 1.5) // 4x4 true Gaussian resize
{
// For upsizing, we'll snap to texels and sample the nearest 4.
const vec2 dxdy_scale = max(dxdy_min_scale, vec2(1.0));
blur_dxdy = dxdy_scale * texture_size_inv;
}
else
{
const vec2 dxdy_scale = dxdy_min_scale;
blur_dxdy = dxdy_scale * texture_size_inv;
}
tex_uv_to_pixel_scale = registers.OutputSize.xy;
// texture_size_inv = texture_size_inv; <- commented out because it's pointless in slang
// Detecting interlacing again here lets us apply convergence offsets in
// this pass. il_step_multiple contains the (texel, scanline) step
// multiple: 1 for progressive, 2 for interlaced.
const vec2 orig_video_size = registers.ORIG_LINEARIZEDSize.xy;
float interlace_check = 0.0;
if (is_interlaced(orig_video_size.y) == true) interlace_check = 1.0;
const float y_step = 1.0 + interlace_check;
const vec2 il_step_multiple = vec2(1.0, y_step);
// Get the uv distance between (texels, same-field scanlines):
uv_scanline_step = il_step_multiple * registers.ORIG_LINEARIZEDSize.zw;
}
#pragma stage fragment
#pragma format R8G8B8A8_SRGB
layout(location = 0) in vec2 tex_uv;
layout(location = 1) in float estimated_viewport_size_x;
layout(location = 2) in vec2 blur_dxdy;
layout(location = 3) in vec2 uv_scanline_step;
layout(location = 4) in vec2 texture_size_inv;
layout(location = 5) in vec2 tex_uv_to_pixel_scale;
layout(location = 0) out vec4 FragColor;
layout(set = 0, binding = 2) uniform sampler2D Source;
layout(set = 0, binding = 3) uniform sampler2D ORIG_LINEARIZED;
void main()
{
// Would a viewport-relative size work better for this pass? (No.)
// PROS:
// 1.) Instead of writing an absolute size to user-cgp-constants.h, we'd
// write a viewport scale. That number could be used to directly scale
// the viewport-resolution bloom sigma and/or triad size to a smaller
// scale. This way, we could calculate an optimal dynamic sigma no
// matter how the dot pitch is specified.
// CONS:
// 1.) Texel smearing would be much worse at small viewport sizes, but
// performance would be much worse at large viewport sizes, so there
// would be no easy way to calculate a decent scale.
// 2.) Worse, we could no longer get away with using a constant-size blur!
// Instead, we'd have to face all the same difficulties as the real
// phosphor bloom, which requires static #ifdefs to decide the blur
// size based on the expected triad size...a dynamic value.
// 3.) Like the phosphor bloom, we'd have less control over making the blur
// size correct for an optical blur. That said, we likely overblur (to
// maintain brightness) more than the eye would do by itself: 20/20
// human vision distinguishes ~1 arc minute, or 1/60 of a degree. The
// highest viewing angle recommendation I know of is THX's 40.04 degree
// recommendation, at which 20/20 vision can distinguish about 2402.4
// lines. Assuming the "TV lines" definition, that means 1201.2
// distinct light lines and 1201.2 distinct dark lines can be told
// apart, i.e. 1201.2 pairs of lines. This would correspond to 1201.2
// pairs of alternating lit/unlit phosphors, so 2402.4 phosphors total
// (if they're alternately lit). That's a max of 800.8 triads. Using
// a more popular 30 degree viewing angle recommendation, 20/20 vision
// can distinguish 1800 lines, or 600 triads of alternately lit
// phosphors. In contrast, we currently blur phosphors all the way
// down to 341.3 triads to ensure full brightness.
// 4.) Realistically speaking, we're usually just going to use bilinear
// filtering in this pass anyway, but it only works well to limit
// bandwidth if it's done at a small constant scale.
// Get the constants we need to sample:
const vec2 texture_size = registers.ORIG_LINEARIZEDSize.xy;
vec2 tex_uv_r, tex_uv_g, tex_uv_b;
if(beam_misconvergence == true)
{
const vec2 convergence_offsets_r = vec2(params.convergence_offset_x_r, params.convergence_offset_y_r);//get_convergence_offsets_r_vector();
const vec2 convergence_offsets_g = vec2(params.convergence_offset_x_g, params.convergence_offset_y_g);//get_convergence_offsets_g_vector();
const vec2 convergence_offsets_b = vec2(params.convergence_offset_x_b, params.convergence_offset_y_b);//get_convergence_offsets_b_vector();
tex_uv_r = tex_uv - vec2(params.convergence_offset_x_r, params.convergence_offset_y_r) * uv_scanline_step;
tex_uv_g = tex_uv - vec2(params.convergence_offset_x_g, params.convergence_offset_y_g) * uv_scanline_step;
tex_uv_b = tex_uv - vec2(params.convergence_offset_x_b, params.convergence_offset_y_b) * uv_scanline_step;
}
// Get the blur sigma:
const float bloom_approx_sigma = get_bloom_approx_sigma(registers.OutputSize.x, estimated_viewport_size_x);
// Sample the resized and blurred texture, and apply convergence offsets if
// necessary. Applying convergence offsets here triples our samples from
// 16/9/1 to 48/27/3, but faster and easier than sampling BLOOM_APPROX and
// HALATION_BLUR 3 times at full resolution every time they're used.
vec3 color_r, color_g, color_b, color;
if(bloom_approx_filter > 1.5)
{
// Use a 4x4 Gaussian resize. This is slower but technically correct.
if(beam_misconvergence == true)
{
color_r = tex2Dresize_gaussian4x4(ORIG_LINEARIZED, tex_uv_r,
blur_dxdy, texture_size, texture_size_inv,
tex_uv_to_pixel_scale, bloom_approx_sigma);
color_g = tex2Dresize_gaussian4x4(ORIG_LINEARIZED, tex_uv_g,
blur_dxdy, texture_size, texture_size_inv,
tex_uv_to_pixel_scale, bloom_approx_sigma);
color_b = tex2Dresize_gaussian4x4(ORIG_LINEARIZED, tex_uv_b,
blur_dxdy, texture_size, texture_size_inv,
tex_uv_to_pixel_scale, bloom_approx_sigma);
}
else
{
color = tex2Dresize_gaussian4x4(ORIG_LINEARIZED, tex_uv,
blur_dxdy, texture_size, texture_size_inv,
tex_uv_to_pixel_scale, bloom_approx_sigma);
}
}
else if(bloom_approx_filter > 0.5)
{
// Use a 3x3 resize blur. This is the softest option, because we're
// blurring already blurry bilinear samples. It doesn't play quite as
// nicely with convergence offsets, but it has its charms.
if(beam_misconvergence == true)
{
color_r = tex2Dblur3x3resize(ORIG_LINEARIZED, tex_uv_r,
blur_dxdy, bloom_approx_sigma);
color_g = tex2Dblur3x3resize(ORIG_LINEARIZED, tex_uv_g,
blur_dxdy, bloom_approx_sigma);
color_b = tex2Dblur3x3resize(ORIG_LINEARIZED, tex_uv_b,
blur_dxdy, bloom_approx_sigma);
}
else
{
color = tex2Dblur3x3resize(ORIG_LINEARIZED, tex_uv, blur_dxdy);
}
}
else
{
// Use bilinear sampling. This approximates a 4x4 Gaussian resize MUCH
// better than tex2Dblur3x3_resize for the very small sigmas we're
// likely to use at small output resolutions. (This estimate becomes
// too sharp above ~400x300, but the blurs break down above that
// resolution too, unless min_allowed_viewport_triads is high enough to
// keep bloom_approx_scale_x/min_allowed_viewport_triads < ~1.1658025.)
if(beam_misconvergence == true)
{
color_r = tex2D_linearize(ORIG_LINEARIZED, tex_uv_r).rgb;
color_g = tex2D_linearize(ORIG_LINEARIZED, tex_uv_g).rgb;
color_b = tex2D_linearize(ORIG_LINEARIZED, tex_uv_b).rgb;
}
else
{
color = tex2D_linearize(ORIG_LINEARIZED, tex_uv).rgb;
}
}
// Pack the colors from the red/green/blue beams into a single vector:
if(beam_misconvergence == true)
{
color = vec3(color_r.r, color_g.g, color_b.b);
}
// Encode and output the blurred image:
FragColor = vec4(texture(ORIG_LINEARIZED, tex_uv));//vec4(color, 1.0);//
}

View file

@ -10,9 +10,11 @@ layout(std140, set = 0, binding = 0) uniform UBO
{
mat4 MVP;
float interlace_bff;
float beam_horiz_filter;
} params;
#pragma parameter interlace_bff "interlace_bff" 1.0 0.0 1.0 1.0
#pragma parameter beam_horiz_filter "beam_horiz_filter" 0.0 0.0 2.0 1.0
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////

View file

@ -108,7 +108,7 @@ void main()
// easier tiled sampling later.
#ifdef PHOSPHOR_MASK_MANUALLY_RESIZE
// Discard unneeded fragments in case our profile allows real branches.
const vec2 tile_uv_wrap = tile_uv_wrap;
// const vec2 tile_uv_wrap = tile_uv_wrap;
if(params.mask_sample_mode_desired < 0.5 &&
max(tile_uv_wrap.x, tile_uv_wrap.y) <= mask_resize_num_tiles)
{

View file

@ -156,7 +156,7 @@ vec3 get_scanline_color(const sampler2D tex, const vec2 scanline_uv,
const vec3 color2 = texture(tex, scanline_uv + uv_step_x).rgb;
vec3 color0 = vec3(0.0);
vec3 color3 = vec3(0.0);
if(beam_horiz_filter > 0.5)
if(params.beam_horiz_filter > 0.5)
{
color0 = texture(tex, scanline_uv - uv_step_x).rgb;
color3 = texture(tex, scanline_uv + 2.0 * uv_step_x).rgb;
@ -183,14 +183,14 @@ vec3 sample_single_scanline_horizontal(const sampler2D texture,
1.0 - prev_dist, 2.0 - prev_dist);
// Get Quilez, Lanczos2, or Gaussian resize weights for 2/4 nearby texels:
vec4 weights;
if(beam_horiz_filter < 0.5)
if(params.beam_horiz_filter < 0.5)
{
// Quilez:
const float x = sample_dists.y;
const float w2 = x*x*x*(x*(x*6.0 - 15.0) + 10.0);
weights = vec4(0.0, 1.0 - w2, w2, 0.0);
}
else if(beam_horiz_filter < 1.5)
else if(params.beam_horiz_filter < 1.5)
{
// Gaussian:
float inner_denom_inv = 1.0/(2.0*beam_horiz_sigma*beam_horiz_sigma);

View file

@ -219,12 +219,12 @@ beam_max_shape = "4.000000"
beam_shape_power = "0.250000"
beam_horiz_filter = "0.000000"
beam_horiz_sigma = "0.545000"
convergence_offset_x_r = "0.000000"
convergence_offset_x_r = "-0.050000"
convergence_offset_x_g = "0.000000"
convergence_offset_x_b = "0.000000"
convergence_offset_y_r = "0.000000"
convergence_offset_y_g = "0.000000"
convergence_offset_y_b = "0.000000"
convergence_offset_y_r = "0.100000"
convergence_offset_y_g = "-0.050000"
convergence_offset_y_b = "0.100000"
mask_type = "0.000000"
mask_sample_mode_desired = "1.000000"
mask_specify_num_triads = "0.000000"