mirror of
https://github.com/italicsjenga/slang-shaders.git
synced 2024-11-22 15:51:30 +11:00
fix beam_horiz_filter, update kurozumi, add fallback to fix my AMD crash
This commit is contained in:
parent
8590410b30
commit
6f47464db3
206
crt/crt-royale_fallback.slangp
Normal file
206
crt/crt-royale_fallback.slangp
Normal file
|
@ -0,0 +1,206 @@
|
|||
# IMPORTANT:
|
||||
# Shader passes need to know details about the image in the mask_texture LUT
|
||||
# files, so set the following constants in user-preset-constants.h accordingly:
|
||||
# 1.) mask_triads_per_tile = (number of horizontal triads in mask texture LUT's)
|
||||
# 2.) mask_texture_small_size = (texture size of mask*texture_small LUT's)
|
||||
# 3.) mask_texture_large_size = (texture size of mask*texture_large LUT's)
|
||||
# 4.) mask_grille_avg_color = (avg. brightness of mask_grille_texture* LUT's, in [0, 1])
|
||||
# 5.) mask_slot_avg_color = (avg. brightness of mask_slot_texture* LUT's, in [0, 1])
|
||||
# 6.) mask_shadow_avg_color = (avg. brightness of mask_shadow_texture* LUT's, in [0, 1])
|
||||
# Shader passes also need to know certain scales set in this preset, but their
|
||||
# compilation model doesn't currently allow the preset file to tell them. Make
|
||||
# sure to set the following constants in user-preset-constants.h accordingly too:
|
||||
# 1.) bloom_approx_scale_x = scale_x2
|
||||
# 2.) mask_resize_viewport_scale = vec2(scale_x6, scale_y5)
|
||||
# Finally, shader passes need to know the value of geom_max_aspect_ratio used to
|
||||
# calculate scale_y5 (among other values):
|
||||
# 1.) geom_max_aspect_ratio = (geom_max_aspect_ratio used to calculate scale_y5)
|
||||
|
||||
shaders = "12"
|
||||
|
||||
# Set an identifier, filename, and sampling traits for the phosphor mask texture.
|
||||
# Load an aperture grille, slot mask, and an EDP shadow mask, and load a small
|
||||
# non-mipmapped version and a large mipmapped version.
|
||||
# TODO: Test masks in other directories.
|
||||
textures = "mask_grille_texture_small;mask_grille_texture_large;mask_slot_texture_small;mask_slot_texture_large;mask_shadow_texture_small;mask_shadow_texture_large"
|
||||
mask_grille_texture_small = "shaders/crt-royale/TileableLinearApertureGrille15Wide8And5d5SpacingResizeTo64.png"
|
||||
mask_grille_texture_large = "shaders/crt-royale/TileableLinearApertureGrille15Wide8And5d5Spacing.png"
|
||||
mask_slot_texture_small = "shaders/crt-royale/TileableLinearSlotMaskTall15Wide9And4d5Horizontal9d14VerticalSpacingResizeTo64.png"
|
||||
mask_slot_texture_large = "shaders/crt-royale/TileableLinearSlotMaskTall15Wide9And4d5Horizontal9d14VerticalSpacing.png"
|
||||
mask_shadow_texture_small = "shaders/crt-royale/TileableLinearShadowMaskEDPResizeTo64.png"
|
||||
mask_shadow_texture_large = "shaders/crt-royale/TileableLinearShadowMaskEDP.png"
|
||||
mask_grille_texture_small_wrap_mode = "repeat"
|
||||
mask_grille_texture_large_wrap_mode = "repeat"
|
||||
mask_slot_texture_small_wrap_mode = "repeat"
|
||||
mask_slot_texture_large_wrap_mode = "repeat"
|
||||
mask_shadow_texture_small_wrap_mode = "repeat"
|
||||
mask_shadow_texture_large_wrap_mode = "repeat"
|
||||
mask_grille_texture_small_linear = "true"
|
||||
mask_grille_texture_large_linear = "true"
|
||||
mask_slot_texture_small_linear = "true"
|
||||
mask_slot_texture_large_linear = "true"
|
||||
mask_shadow_texture_small_linear = "true"
|
||||
mask_shadow_texture_large_linear = "true"
|
||||
mask_grille_texture_small_mipmap = "false" # Mipmapping causes artifacts with manually resized masks without tex2Dlod
|
||||
mask_grille_texture_large_mipmap = "true" # Essential for hardware-resized masks
|
||||
mask_slot_texture_small_mipmap = "false" # Mipmapping causes artifacts with manually resized masks without tex2Dlod
|
||||
mask_slot_texture_large_mipmap = "true" # Essential for hardware-resized masks
|
||||
mask_shadow_texture_small_mipmap = "false" # Mipmapping causes artifacts with manually resized masks without tex2Dlod
|
||||
mask_shadow_texture_large_mipmap = "true" # Essential for hardware-resized masks
|
||||
|
||||
|
||||
# Pass0: Linearize the input based on CRT gamma and bob interlaced fields.
|
||||
# (Bobbing ensures we can immediately blur without getting artifacts.)
|
||||
shader0 = "shaders/crt-royale/src/crt-royale-first-pass-linearize-crt-gamma-bob-fields.slang"
|
||||
alias0 = "ORIG_LINEARIZED"
|
||||
filter_linear0 = "false"
|
||||
scale_type0 = "source"
|
||||
scale0 = "1.0"
|
||||
srgb_framebuffer0 = "true"
|
||||
|
||||
# Pass1: Resample interlaced (and misconverged) scanlines vertically.
|
||||
# Separating vertical/horizontal scanline sampling is faster: It lets us
|
||||
# consider more scanlines while calculating weights for fewer pixels, and
|
||||
# it reduces our samples from vertical*horizontal to vertical+horizontal.
|
||||
# This has to come right after ORIG_LINEARIZED, because there's no
|
||||
# "original_source" scale_type we can use later.
|
||||
shader1 = "shaders/crt-royale/src/crt-royale-scanlines-vertical-interlacing.slang"
|
||||
alias1 = "VERTICAL_SCANLINES"
|
||||
filter_linear1 = "true"
|
||||
scale_type_x1 = "source"
|
||||
scale_x1 = "1.0"
|
||||
scale_type_y1 = "viewport"
|
||||
scale_y1 = "1.0"
|
||||
srgb_framebuffer1 = "true"
|
||||
|
||||
# Pass2: Do a small resize blur of ORIG_LINEARIZED at an absolute size, and
|
||||
# account for convergence offsets. We want to blur a predictable portion of the
|
||||
# screen to match the phosphor bloom, and absolute scale works best for
|
||||
# reliable results with a fixed-size bloom. Picking a scale is tricky:
|
||||
# a.) 400x300 is a good compromise for the "fake-bloom" version: It's low enough
|
||||
# to blur high-res/interlaced sources but high enough that resampling
|
||||
# doesn't smear low-res sources too much.
|
||||
# b.) 320x240 works well for the "real bloom" version: It's 1-1.5% faster, and
|
||||
# the only noticeable visual difference is a larger halation spread (which
|
||||
# may be a good thing for people who like to crank it up).
|
||||
# Note the 4:3 aspect ratio assumes the input has cropped geom_overscan (so it's
|
||||
# *intended* for an ~4:3 aspect ratio).
|
||||
shader2 = "shaders/crt-royale/src/crt-royale-bloom-approx_fallback.slang"
|
||||
alias2 = "BLOOM_APPROX"
|
||||
filter_linear2 = "true"
|
||||
scale_type2 = "absolute"
|
||||
scale_x2 = "320"
|
||||
scale_y2 = "240"
|
||||
srgb_framebuffer2 = "true"
|
||||
|
||||
# Pass3: Vertically blur the input for halation and refractive diffusion.
|
||||
# Base this on BLOOM_APPROX: This blur should be small and fast, and blurring
|
||||
# a constant portion of the screen is probably physically correct if the
|
||||
# viewport resolution is proportional to the simulated CRT size.
|
||||
shader3 = "../blurs/blur9fast-vertical.slang"
|
||||
filter_linear3 = "true"
|
||||
scale_type3 = "source"
|
||||
scale3 = "1.0"
|
||||
srgb_framebuffer3 = "true"
|
||||
|
||||
# Pass4: Horizontally blur the input for halation and refractive diffusion.
|
||||
# Note: Using a one-pass 9x9 blur is about 1% slower.
|
||||
shader4 = "../blurs/blur9fast-horizontal.slang"
|
||||
alias4 = "HALATION_BLUR"
|
||||
filter_linear4 = "true"
|
||||
scale_type4 = "source"
|
||||
scale4 = "1.0"
|
||||
srgb_framebuffer4 = "true"
|
||||
|
||||
# Pass5: Lanczos-resize the phosphor mask vertically. Set the absolute
|
||||
# scale_x5 == mask_texture_small_size.x (see IMPORTANT above). Larger scales
|
||||
# will blur, and smaller scales could get nasty. The vertical size must be
|
||||
# based on the viewport size and calculated carefully to avoid artifacts later.
|
||||
# First calculate the minimum number of mask tiles we need to draw.
|
||||
# Since curvature is computed after the scanline masking pass:
|
||||
# num_resized_mask_tiles = 2.0;
|
||||
# If curvature were computed in the scanline masking pass (it's not):
|
||||
# max_mask_texel_border = ~3.0 * (1/3.0 + 4.0*sqrt(2.0) + 0.5 + 1.0);
|
||||
# max_mask_tile_border = max_mask_texel_border/
|
||||
# (min_resized_phosphor_triad_size * mask_triads_per_tile);
|
||||
# num_resized_mask_tiles = max(2.0, 1.0 + max_mask_tile_border * 2.0);
|
||||
# At typical values (triad_size >= 2.0, mask_triads_per_tile == 8):
|
||||
# num_resized_mask_tiles = ~3.8
|
||||
# Triad sizes are given in horizontal terms, so we need geom_max_aspect_ratio
|
||||
# to relate them to vertical resolution. The widest we expect is:
|
||||
# geom_max_aspect_ratio = 4.0/3.0 # Note: Shader passes need to know this!
|
||||
# The fewer triads we tile across the screen, the larger each triad will be as a
|
||||
# fraction of the viewport size, and the larger scale_y5 must be to draw a full
|
||||
# num_resized_mask_tiles. Therefore, we must decide the smallest number of
|
||||
# triads we'll guarantee can be displayed on screen. We'll set this according
|
||||
# to 3-pixel triads at 768p resolution (the lowest anyone's likely to use):
|
||||
# min_allowed_viewport_triads = 768.0*geom_max_aspect_ratio / 3.0 = 341.333333
|
||||
# Now calculate the viewport scale that ensures we can draw resized_mask_tiles:
|
||||
# min_scale_x = resized_mask_tiles * mask_triads_per_tile /
|
||||
# min_allowed_viewport_triads
|
||||
# scale_y5 = geom_max_aspect_ratio * min_scale_x
|
||||
# # Some code might depend on equal scales:
|
||||
# scale_x6 = scale_y5
|
||||
# Given our default geom_max_aspect_ratio and min_allowed_viewport_triads:
|
||||
# scale_y5 = 4.0/3.0 * 2.0/(341.33333 / 8.0) = 0.0625
|
||||
# IMPORTANT: The scales MUST be calculated in this way. If you wish to change
|
||||
# geom_max_aspect_ratio, update that constant in user-preset-constants.h!
|
||||
shader5 = "shaders/crt-royale/src/crt-royale-mask-resize-vertical.slang"
|
||||
filter_linear5 = "true"
|
||||
scale_type_x5 = "absolute"
|
||||
scale_x5 = "64"
|
||||
scale_type_y5 = "viewport"
|
||||
scale_y5 = "0.0625" # Safe for >= 341.333 horizontal triads at viewport size
|
||||
#srgb_framebuffer5 = "false" # mask_texture is already assumed linear
|
||||
|
||||
# Pass6: Lanczos-resize the phosphor mask horizontally. scale_x6 = scale_y5.
|
||||
# TODO: Check again if the shaders actually require equal scales.
|
||||
shader6 = "shaders/crt-royale/src/crt-royale-mask-resize-horizontal.slang"
|
||||
alias6 = "MASK_RESIZE"
|
||||
filter_linear6 = "false"
|
||||
scale_type_x6 = "viewport"
|
||||
scale_x6 = "0.0625"
|
||||
scale_type_y6 = "source"
|
||||
scale_y6 = "1.0"
|
||||
#srgb_framebuffer6 = "false" # mask_texture is already assumed linear
|
||||
|
||||
# Pass7: Resample (misconverged) scanlines horizontally, apply halation, and
|
||||
# apply the phosphor mask.
|
||||
shader7 = "shaders/crt-royale/src/crt-royale-scanlines-horizontal-apply-mask.slang"
|
||||
alias7 = "MASKED_SCANLINES"
|
||||
filter_linear7 = "true" # This could just as easily be nearest neighbor.
|
||||
scale_type7 = "viewport"
|
||||
scale7 = "1.0"
|
||||
srgb_framebuffer7 = "true"
|
||||
|
||||
# Pass 8: Compute a brightpass. This will require reading the final mask.
|
||||
shader8 = "shaders/crt-royale/src/crt-royale-brightpass.slang"
|
||||
alias8 = "BRIGHTPASS"
|
||||
filter_linear8 = "true" # This could just as easily be nearest neighbor.
|
||||
scale_type8 = "viewport"
|
||||
scale8 = "1.0"
|
||||
srgb_framebuffer8 = "true"
|
||||
|
||||
# Pass 9: Blur the brightpass vertically
|
||||
shader9 = "shaders/crt-royale/src/crt-royale-bloom-vertical.slang"
|
||||
filter_linear9 = "true" # This could just as easily be nearest neighbor.
|
||||
scale_type9 = "source"
|
||||
scale9 = "1.0"
|
||||
srgb_framebuffer9 = "true"
|
||||
|
||||
# Pass 10: Blur the brightpass horizontally and combine it with the dimpass:
|
||||
shader10 = "shaders/crt-royale/src/crt-royale-bloom-horizontal-reconstitute.slang"
|
||||
filter_linear10 = "true"
|
||||
scale_type10 = "source"
|
||||
scale10 = "1.0"
|
||||
srgb_framebuffer10 = "true"
|
||||
|
||||
# Pass 11: Compute curvature/AA:
|
||||
shader11 = "shaders/crt-royale/src/crt-royale-geometry-aa-last-pass.slang"
|
||||
filter_linear11 = "true"
|
||||
scale_type11 = "viewport"
|
||||
mipmap_input11 = "true"
|
||||
texture_wrap_mode11 = "clamp_to_edge"
|
||||
|
||||
parameters = "beam_num_scanlines"
|
||||
beam_num_scanlines = 3.0
|
|
@ -113,7 +113,7 @@ const float gba_gamma = 3.5; // Irrelevant but necessary to define.
|
|||
const float beam_min_shape = max(2.0, beam_min_shape_static);
|
||||
const float beam_max_shape = max(beam_min_shape, beam_max_shape_static);
|
||||
const float beam_shape_power = max(0.0, beam_shape_power_static);
|
||||
const float beam_horiz_filter = clamp(beam_horiz_filter_static, 0.0, 2.0);
|
||||
// const float beam_horiz_filter = clamp(beam_horiz_filter_static, 0.0, 2.0);
|
||||
const float beam_horiz_sigma = max(FIX_ZERO(0.0), beam_horiz_sigma_static);
|
||||
const float beam_horiz_linear_rgb_weight = clamp(beam_horiz_linear_rgb_weight_static, 0.0, 1.0);
|
||||
// Unpack vector elements to match scalar uniforms:
|
||||
|
|
|
@ -0,0 +1,354 @@
|
|||
#version 450
|
||||
|
||||
layout(push_constant) uniform Push
|
||||
{
|
||||
vec4 SourceSize;
|
||||
vec4 OriginalSize;
|
||||
vec4 OutputSize;
|
||||
uint FrameCount;
|
||||
vec4 ORIG_LINEARIZEDSize;
|
||||
} registers;
|
||||
|
||||
#include "params.inc"
|
||||
|
||||
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
|
||||
|
||||
// crt-royale: A full-featured CRT shader, with cheese.
|
||||
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify it
|
||||
// under the terms of the GNU General Public License as published by the Free
|
||||
// Software Foundation; either version 2 of the License, or any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
// more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License along with
|
||||
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
|
||||
// Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
|
||||
////////////////////////////////// INCLUDES //////////////////////////////////
|
||||
|
||||
#include "../user-settings.h"
|
||||
#include "derived-settings-and-constants.h"
|
||||
#include "bind-shader-params.h"
|
||||
#include "../../../../include/gamma-management.h"
|
||||
#include "../../../../include/blur-functions.h"
|
||||
#include "scanline-functions.h"
|
||||
#include "bloom-functions.h"
|
||||
|
||||
/////////////////////////////////// HELPERS //////////////////////////////////
|
||||
|
||||
vec3 tex2Dresize_gaussian4x4(const sampler2D tex, const vec2 tex_uv,
|
||||
const vec2 dxdy, const vec2 texture_size, const vec2 texture_size_inv,
|
||||
const vec2 tex_uv_to_pixel_scale, const float sigma)
|
||||
{
|
||||
// Requires: 1.) All requirements of gamma-management.h must be satisfied!
|
||||
// 2.) filter_linearN must == "true" in your .cgp preset.
|
||||
// 3.) mipmap_inputN must == "true" in your .cgp preset if
|
||||
// IN.output_size << SRC.video_size.
|
||||
// 4.) dxdy should contain the uv pixel spacing:
|
||||
// dxdy = max(vec2(1.0),
|
||||
// SRC.video_size/IN.output_size)/SRC.texture_size;
|
||||
// 5.) texture_size == SRC.texture_size
|
||||
// 6.) texture_size_inv == vec2(1.0)/SRC.texture_size
|
||||
// 7.) tex_uv_to_pixel_scale == IN.output_size *
|
||||
// SRC.texture_size / SRC.video_size;
|
||||
// 8.) sigma is the desired Gaussian standard deviation, in
|
||||
// terms of output pixels. It should be < ~0.66171875 to
|
||||
// ensure the first unused sample (outside the 4x4 box) has
|
||||
// a weight < 1.0/256.0.
|
||||
// Returns: A true 4x4 Gaussian resize of the input.
|
||||
// Description:
|
||||
// Given correct inputs, this Gaussian resizer samples 4 pixel locations
|
||||
// along each downsized dimension and/or 4 texel locations along each
|
||||
// upsized dimension. It computes dynamic weights based on the pixel-space
|
||||
// distance of each sample from the destination pixel. It is arbitrarily
|
||||
// resizable and higher quality than tex2Dblur3x3_resize, but it's slower.
|
||||
// TODO: Move this to a more suitable file once there are others like it.
|
||||
const float denom_inv = 0.5/(sigma*sigma);
|
||||
// We're taking 4x4 samples, and we're snapping to texels for upsizing.
|
||||
// Find texture coords for sample 5 (second row, second column):
|
||||
const vec2 curr_texel = tex_uv * texture_size;
|
||||
const vec2 prev_texel =
|
||||
floor(curr_texel - vec2(under_half)) + vec2(0.5);
|
||||
const vec2 prev_texel_uv = prev_texel * texture_size_inv;
|
||||
const bvec2 snap = lessThanEqual(dxdy , texture_size_inv);
|
||||
const vec2 sample5_downsize_uv = tex_uv - 0.5 * dxdy;
|
||||
const vec2 sample5_uv = mix(sample5_downsize_uv, prev_texel_uv, snap);
|
||||
// Compute texture coords for other samples:
|
||||
const vec2 dx = vec2(dxdy.x, 0.0);
|
||||
const vec2 sample0_uv = sample5_uv - dxdy;
|
||||
const vec2 sample10_uv = sample5_uv + dxdy;
|
||||
const vec2 sample15_uv = sample5_uv + 2.0 * dxdy;
|
||||
const vec2 sample1_uv = sample0_uv + dx;
|
||||
const vec2 sample2_uv = sample0_uv + 2.0 * dx;
|
||||
const vec2 sample3_uv = sample0_uv + 3.0 * dx;
|
||||
const vec2 sample4_uv = sample5_uv - dx;
|
||||
const vec2 sample6_uv = sample5_uv + dx;
|
||||
const vec2 sample7_uv = sample5_uv + 2.0 * dx;
|
||||
const vec2 sample8_uv = sample10_uv - 2.0 * dx;
|
||||
const vec2 sample9_uv = sample10_uv - dx;
|
||||
const vec2 sample11_uv = sample10_uv + dx;
|
||||
const vec2 sample12_uv = sample15_uv - 3.0 * dx;
|
||||
const vec2 sample13_uv = sample15_uv - 2.0 * dx;
|
||||
const vec2 sample14_uv = sample15_uv - dx;
|
||||
// Load each sample:
|
||||
const vec3 sample0 = tex2D_linearize(tex, sample0_uv).rgb;
|
||||
const vec3 sample1 = tex2D_linearize(tex, sample1_uv).rgb;
|
||||
const vec3 sample2 = tex2D_linearize(tex, sample2_uv).rgb;
|
||||
const vec3 sample3 = tex2D_linearize(tex, sample3_uv).rgb;
|
||||
const vec3 sample4 = tex2D_linearize(tex, sample4_uv).rgb;
|
||||
const vec3 sample5 = tex2D_linearize(tex, sample5_uv).rgb;
|
||||
const vec3 sample6 = tex2D_linearize(tex, sample6_uv).rgb;
|
||||
const vec3 sample7 = tex2D_linearize(tex, sample7_uv).rgb;
|
||||
const vec3 sample8 = tex2D_linearize(tex, sample8_uv).rgb;
|
||||
const vec3 sample9 = tex2D_linearize(tex, sample9_uv).rgb;
|
||||
const vec3 sample10 = tex2D_linearize(tex, sample10_uv).rgb;
|
||||
const vec3 sample11 = tex2D_linearize(tex, sample11_uv).rgb;
|
||||
const vec3 sample12 = tex2D_linearize(tex, sample12_uv).rgb;
|
||||
const vec3 sample13 = tex2D_linearize(tex, sample13_uv).rgb;
|
||||
const vec3 sample14 = tex2D_linearize(tex, sample14_uv).rgb;
|
||||
const vec3 sample15 = tex2D_linearize(tex, sample15_uv).rgb;
|
||||
// Compute destination pixel offsets for each sample:
|
||||
const vec2 dest_pixel = tex_uv * tex_uv_to_pixel_scale;
|
||||
const vec2 sample0_offset = sample0_uv * tex_uv_to_pixel_scale - dest_pixel;
|
||||
const vec2 sample1_offset = sample1_uv * tex_uv_to_pixel_scale - dest_pixel;
|
||||
const vec2 sample2_offset = sample2_uv * tex_uv_to_pixel_scale - dest_pixel;
|
||||
const vec2 sample3_offset = sample3_uv * tex_uv_to_pixel_scale - dest_pixel;
|
||||
const vec2 sample4_offset = sample4_uv * tex_uv_to_pixel_scale - dest_pixel;
|
||||
const vec2 sample5_offset = sample5_uv * tex_uv_to_pixel_scale - dest_pixel;
|
||||
const vec2 sample6_offset = sample6_uv * tex_uv_to_pixel_scale - dest_pixel;
|
||||
const vec2 sample7_offset = sample7_uv * tex_uv_to_pixel_scale - dest_pixel;
|
||||
const vec2 sample8_offset = sample8_uv * tex_uv_to_pixel_scale - dest_pixel;
|
||||
const vec2 sample9_offset = sample9_uv * tex_uv_to_pixel_scale - dest_pixel;
|
||||
const vec2 sample10_offset = sample10_uv * tex_uv_to_pixel_scale - dest_pixel;
|
||||
const vec2 sample11_offset = sample11_uv * tex_uv_to_pixel_scale - dest_pixel;
|
||||
const vec2 sample12_offset = sample12_uv * tex_uv_to_pixel_scale - dest_pixel;
|
||||
const vec2 sample13_offset = sample13_uv * tex_uv_to_pixel_scale - dest_pixel;
|
||||
const vec2 sample14_offset = sample14_uv * tex_uv_to_pixel_scale - dest_pixel;
|
||||
const vec2 sample15_offset = sample15_uv * tex_uv_to_pixel_scale - dest_pixel;
|
||||
// Compute Gaussian sample weights:
|
||||
const float w0 = exp(-LENGTH_SQ(sample0_offset) * denom_inv);
|
||||
const float w1 = exp(-LENGTH_SQ(sample1_offset) * denom_inv);
|
||||
const float w2 = exp(-LENGTH_SQ(sample2_offset) * denom_inv);
|
||||
const float w3 = exp(-LENGTH_SQ(sample3_offset) * denom_inv);
|
||||
const float w4 = exp(-LENGTH_SQ(sample4_offset) * denom_inv);
|
||||
const float w5 = exp(-LENGTH_SQ(sample5_offset) * denom_inv);
|
||||
const float w6 = exp(-LENGTH_SQ(sample6_offset) * denom_inv);
|
||||
const float w7 = exp(-LENGTH_SQ(sample7_offset) * denom_inv);
|
||||
const float w8 = exp(-LENGTH_SQ(sample8_offset) * denom_inv);
|
||||
const float w9 = exp(-LENGTH_SQ(sample9_offset) * denom_inv);
|
||||
const float w10 = exp(-LENGTH_SQ(sample10_offset) * denom_inv);
|
||||
const float w11 = exp(-LENGTH_SQ(sample11_offset) * denom_inv);
|
||||
const float w12 = exp(-LENGTH_SQ(sample12_offset) * denom_inv);
|
||||
const float w13 = exp(-LENGTH_SQ(sample13_offset) * denom_inv);
|
||||
const float w14 = exp(-LENGTH_SQ(sample14_offset) * denom_inv);
|
||||
const float w15 = exp(-LENGTH_SQ(sample15_offset) * denom_inv);
|
||||
const float weight_sum_inv = 1.0/(
|
||||
w0 + w1 + w2 + w3 + w4 + w5 + w6 + w7 +
|
||||
w8 +w9 + w10 + w11 + w12 + w13 + w14 + w15);
|
||||
// Weight and sum the samples:
|
||||
const vec3 sum = w0 * sample0 + w1 * sample1 + w2 * sample2 + w3 * sample3 +
|
||||
w4 * sample4 + w5 * sample5 + w6 * sample6 + w7 * sample7 +
|
||||
w8 * sample8 + w9 * sample9 + w10 * sample10 + w11 * sample11 +
|
||||
w12 * sample12 + w13 * sample13 + w14 * sample14 + w15 * sample15;
|
||||
return sum * weight_sum_inv;
|
||||
}
|
||||
|
||||
#pragma stage vertex
|
||||
layout(location = 0) in vec4 Position;
|
||||
layout(location = 1) in vec2 TexCoord;
|
||||
layout(location = 0) out vec2 tex_uv;
|
||||
layout(location = 1) out float estimated_viewport_size_x;
|
||||
layout(location = 2) out vec2 blur_dxdy;
|
||||
layout(location = 3) out vec2 uv_scanline_step;
|
||||
layout(location = 4) out vec2 texture_size_inv;
|
||||
layout(location = 5) out vec2 tex_uv_to_pixel_scale;
|
||||
|
||||
void main()
|
||||
{
|
||||
// This vertex shader copies blurs/vertex-shader-blur-one-pass-resize.h,
|
||||
// except we're using a different source image.
|
||||
gl_Position = params.MVP * Position;
|
||||
const vec2 video_uv = TexCoord;
|
||||
tex_uv = video_uv;
|
||||
// The last pass (vertical scanlines) had a viewport y scale, so we can
|
||||
// use it to calculate a better runtime sigma:
|
||||
estimated_viewport_size_x = registers.SourceSize.y * params.geom_aspect_ratio_x / params.geom_aspect_ratio_y;
|
||||
|
||||
// Get the uv sample distance between output pixels. We're using a resize
|
||||
// blur, so arbitrary upsizing will be acceptable if filter_linearN =
|
||||
// "true," and arbitrary downsizing will be acceptable if mipmap_inputN =
|
||||
// "true" too. The blur will be much more accurate if a true 4x4 Gaussian
|
||||
// resize is used instead of tex2Dblur3x3_resize (which samples between
|
||||
// texels even for upsizing).
|
||||
const vec2 dxdy_min_scale = registers.ORIG_LINEARIZEDSize.xy * registers.OutputSize.zw;
|
||||
texture_size_inv = registers.ORIG_LINEARIZEDSize.zw;
|
||||
if(bloom_approx_filter > 1.5) // 4x4 true Gaussian resize
|
||||
{
|
||||
// For upsizing, we'll snap to texels and sample the nearest 4.
|
||||
const vec2 dxdy_scale = max(dxdy_min_scale, vec2(1.0));
|
||||
blur_dxdy = dxdy_scale * texture_size_inv;
|
||||
}
|
||||
else
|
||||
{
|
||||
const vec2 dxdy_scale = dxdy_min_scale;
|
||||
blur_dxdy = dxdy_scale * texture_size_inv;
|
||||
}
|
||||
|
||||
tex_uv_to_pixel_scale = registers.OutputSize.xy;
|
||||
// texture_size_inv = texture_size_inv; <- commented out because it's pointless in slang
|
||||
|
||||
// Detecting interlacing again here lets us apply convergence offsets in
|
||||
// this pass. il_step_multiple contains the (texel, scanline) step
|
||||
// multiple: 1 for progressive, 2 for interlaced.
|
||||
const vec2 orig_video_size = registers.ORIG_LINEARIZEDSize.xy;
|
||||
float interlace_check = 0.0;
|
||||
if (is_interlaced(orig_video_size.y) == true) interlace_check = 1.0;
|
||||
const float y_step = 1.0 + interlace_check;
|
||||
const vec2 il_step_multiple = vec2(1.0, y_step);
|
||||
// Get the uv distance between (texels, same-field scanlines):
|
||||
uv_scanline_step = il_step_multiple * registers.ORIG_LINEARIZEDSize.zw;
|
||||
}
|
||||
|
||||
#pragma stage fragment
|
||||
#pragma format R8G8B8A8_SRGB
|
||||
layout(location = 0) in vec2 tex_uv;
|
||||
layout(location = 1) in float estimated_viewport_size_x;
|
||||
layout(location = 2) in vec2 blur_dxdy;
|
||||
layout(location = 3) in vec2 uv_scanline_step;
|
||||
layout(location = 4) in vec2 texture_size_inv;
|
||||
layout(location = 5) in vec2 tex_uv_to_pixel_scale;
|
||||
layout(location = 0) out vec4 FragColor;
|
||||
layout(set = 0, binding = 2) uniform sampler2D Source;
|
||||
layout(set = 0, binding = 3) uniform sampler2D ORIG_LINEARIZED;
|
||||
|
||||
void main()
|
||||
{
|
||||
// Would a viewport-relative size work better for this pass? (No.)
|
||||
// PROS:
|
||||
// 1.) Instead of writing an absolute size to user-cgp-constants.h, we'd
|
||||
// write a viewport scale. That number could be used to directly scale
|
||||
// the viewport-resolution bloom sigma and/or triad size to a smaller
|
||||
// scale. This way, we could calculate an optimal dynamic sigma no
|
||||
// matter how the dot pitch is specified.
|
||||
// CONS:
|
||||
// 1.) Texel smearing would be much worse at small viewport sizes, but
|
||||
// performance would be much worse at large viewport sizes, so there
|
||||
// would be no easy way to calculate a decent scale.
|
||||
// 2.) Worse, we could no longer get away with using a constant-size blur!
|
||||
// Instead, we'd have to face all the same difficulties as the real
|
||||
// phosphor bloom, which requires static #ifdefs to decide the blur
|
||||
// size based on the expected triad size...a dynamic value.
|
||||
// 3.) Like the phosphor bloom, we'd have less control over making the blur
|
||||
// size correct for an optical blur. That said, we likely overblur (to
|
||||
// maintain brightness) more than the eye would do by itself: 20/20
|
||||
// human vision distinguishes ~1 arc minute, or 1/60 of a degree. The
|
||||
// highest viewing angle recommendation I know of is THX's 40.04 degree
|
||||
// recommendation, at which 20/20 vision can distinguish about 2402.4
|
||||
// lines. Assuming the "TV lines" definition, that means 1201.2
|
||||
// distinct light lines and 1201.2 distinct dark lines can be told
|
||||
// apart, i.e. 1201.2 pairs of lines. This would correspond to 1201.2
|
||||
// pairs of alternating lit/unlit phosphors, so 2402.4 phosphors total
|
||||
// (if they're alternately lit). That's a max of 800.8 triads. Using
|
||||
// a more popular 30 degree viewing angle recommendation, 20/20 vision
|
||||
// can distinguish 1800 lines, or 600 triads of alternately lit
|
||||
// phosphors. In contrast, we currently blur phosphors all the way
|
||||
// down to 341.3 triads to ensure full brightness.
|
||||
// 4.) Realistically speaking, we're usually just going to use bilinear
|
||||
// filtering in this pass anyway, but it only works well to limit
|
||||
// bandwidth if it's done at a small constant scale.
|
||||
|
||||
// Get the constants we need to sample:
|
||||
const vec2 texture_size = registers.ORIG_LINEARIZEDSize.xy;
|
||||
vec2 tex_uv_r, tex_uv_g, tex_uv_b;
|
||||
|
||||
if(beam_misconvergence == true)
|
||||
{
|
||||
const vec2 convergence_offsets_r = vec2(params.convergence_offset_x_r, params.convergence_offset_y_r);//get_convergence_offsets_r_vector();
|
||||
const vec2 convergence_offsets_g = vec2(params.convergence_offset_x_g, params.convergence_offset_y_g);//get_convergence_offsets_g_vector();
|
||||
const vec2 convergence_offsets_b = vec2(params.convergence_offset_x_b, params.convergence_offset_y_b);//get_convergence_offsets_b_vector();
|
||||
tex_uv_r = tex_uv - vec2(params.convergence_offset_x_r, params.convergence_offset_y_r) * uv_scanline_step;
|
||||
tex_uv_g = tex_uv - vec2(params.convergence_offset_x_g, params.convergence_offset_y_g) * uv_scanline_step;
|
||||
tex_uv_b = tex_uv - vec2(params.convergence_offset_x_b, params.convergence_offset_y_b) * uv_scanline_step;
|
||||
}
|
||||
// Get the blur sigma:
|
||||
const float bloom_approx_sigma = get_bloom_approx_sigma(registers.OutputSize.x, estimated_viewport_size_x);
|
||||
|
||||
// Sample the resized and blurred texture, and apply convergence offsets if
|
||||
// necessary. Applying convergence offsets here triples our samples from
|
||||
// 16/9/1 to 48/27/3, but faster and easier than sampling BLOOM_APPROX and
|
||||
// HALATION_BLUR 3 times at full resolution every time they're used.
|
||||
vec3 color_r, color_g, color_b, color;
|
||||
if(bloom_approx_filter > 1.5)
|
||||
{
|
||||
// Use a 4x4 Gaussian resize. This is slower but technically correct.
|
||||
if(beam_misconvergence == true)
|
||||
{
|
||||
color_r = tex2Dresize_gaussian4x4(ORIG_LINEARIZED, tex_uv_r,
|
||||
blur_dxdy, texture_size, texture_size_inv,
|
||||
tex_uv_to_pixel_scale, bloom_approx_sigma);
|
||||
color_g = tex2Dresize_gaussian4x4(ORIG_LINEARIZED, tex_uv_g,
|
||||
blur_dxdy, texture_size, texture_size_inv,
|
||||
tex_uv_to_pixel_scale, bloom_approx_sigma);
|
||||
color_b = tex2Dresize_gaussian4x4(ORIG_LINEARIZED, tex_uv_b,
|
||||
blur_dxdy, texture_size, texture_size_inv,
|
||||
tex_uv_to_pixel_scale, bloom_approx_sigma);
|
||||
}
|
||||
else
|
||||
{
|
||||
color = tex2Dresize_gaussian4x4(ORIG_LINEARIZED, tex_uv,
|
||||
blur_dxdy, texture_size, texture_size_inv,
|
||||
tex_uv_to_pixel_scale, bloom_approx_sigma);
|
||||
}
|
||||
}
|
||||
else if(bloom_approx_filter > 0.5)
|
||||
{
|
||||
// Use a 3x3 resize blur. This is the softest option, because we're
|
||||
// blurring already blurry bilinear samples. It doesn't play quite as
|
||||
// nicely with convergence offsets, but it has its charms.
|
||||
if(beam_misconvergence == true)
|
||||
{
|
||||
color_r = tex2Dblur3x3resize(ORIG_LINEARIZED, tex_uv_r,
|
||||
blur_dxdy, bloom_approx_sigma);
|
||||
color_g = tex2Dblur3x3resize(ORIG_LINEARIZED, tex_uv_g,
|
||||
blur_dxdy, bloom_approx_sigma);
|
||||
color_b = tex2Dblur3x3resize(ORIG_LINEARIZED, tex_uv_b,
|
||||
blur_dxdy, bloom_approx_sigma);
|
||||
}
|
||||
else
|
||||
{
|
||||
color = tex2Dblur3x3resize(ORIG_LINEARIZED, tex_uv, blur_dxdy);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Use bilinear sampling. This approximates a 4x4 Gaussian resize MUCH
|
||||
// better than tex2Dblur3x3_resize for the very small sigmas we're
|
||||
// likely to use at small output resolutions. (This estimate becomes
|
||||
// too sharp above ~400x300, but the blurs break down above that
|
||||
// resolution too, unless min_allowed_viewport_triads is high enough to
|
||||
// keep bloom_approx_scale_x/min_allowed_viewport_triads < ~1.1658025.)
|
||||
if(beam_misconvergence == true)
|
||||
{
|
||||
color_r = tex2D_linearize(ORIG_LINEARIZED, tex_uv_r).rgb;
|
||||
color_g = tex2D_linearize(ORIG_LINEARIZED, tex_uv_g).rgb;
|
||||
color_b = tex2D_linearize(ORIG_LINEARIZED, tex_uv_b).rgb;
|
||||
}
|
||||
else
|
||||
{
|
||||
color = tex2D_linearize(ORIG_LINEARIZED, tex_uv).rgb;
|
||||
}
|
||||
}
|
||||
// Pack the colors from the red/green/blue beams into a single vector:
|
||||
if(beam_misconvergence == true)
|
||||
{
|
||||
color = vec3(color_r.r, color_g.g, color_b.b);
|
||||
}
|
||||
// Encode and output the blurred image:
|
||||
FragColor = vec4(texture(ORIG_LINEARIZED, tex_uv));//vec4(color, 1.0);//
|
||||
}
|
|
@ -10,9 +10,11 @@ layout(std140, set = 0, binding = 0) uniform UBO
|
|||
{
|
||||
mat4 MVP;
|
||||
float interlace_bff;
|
||||
float beam_horiz_filter;
|
||||
} params;
|
||||
|
||||
#pragma parameter interlace_bff "interlace_bff" 1.0 0.0 1.0 1.0
|
||||
#pragma parameter beam_horiz_filter "beam_horiz_filter" 0.0 0.0 2.0 1.0
|
||||
|
||||
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
|
||||
|
||||
|
|
|
@ -108,7 +108,7 @@ void main()
|
|||
// easier tiled sampling later.
|
||||
#ifdef PHOSPHOR_MASK_MANUALLY_RESIZE
|
||||
// Discard unneeded fragments in case our profile allows real branches.
|
||||
const vec2 tile_uv_wrap = tile_uv_wrap;
|
||||
// const vec2 tile_uv_wrap = tile_uv_wrap;
|
||||
if(params.mask_sample_mode_desired < 0.5 &&
|
||||
max(tile_uv_wrap.x, tile_uv_wrap.y) <= mask_resize_num_tiles)
|
||||
{
|
||||
|
|
|
@ -156,7 +156,7 @@ vec3 get_scanline_color(const sampler2D tex, const vec2 scanline_uv,
|
|||
const vec3 color2 = texture(tex, scanline_uv + uv_step_x).rgb;
|
||||
vec3 color0 = vec3(0.0);
|
||||
vec3 color3 = vec3(0.0);
|
||||
if(beam_horiz_filter > 0.5)
|
||||
if(params.beam_horiz_filter > 0.5)
|
||||
{
|
||||
color0 = texture(tex, scanline_uv - uv_step_x).rgb;
|
||||
color3 = texture(tex, scanline_uv + 2.0 * uv_step_x).rgb;
|
||||
|
@ -183,14 +183,14 @@ vec3 sample_single_scanline_horizontal(const sampler2D texture,
|
|||
1.0 - prev_dist, 2.0 - prev_dist);
|
||||
// Get Quilez, Lanczos2, or Gaussian resize weights for 2/4 nearby texels:
|
||||
vec4 weights;
|
||||
if(beam_horiz_filter < 0.5)
|
||||
if(params.beam_horiz_filter < 0.5)
|
||||
{
|
||||
// Quilez:
|
||||
const float x = sample_dists.y;
|
||||
const float w2 = x*x*x*(x*(x*6.0 - 15.0) + 10.0);
|
||||
weights = vec4(0.0, 1.0 - w2, w2, 0.0);
|
||||
}
|
||||
else if(beam_horiz_filter < 1.5)
|
||||
else if(params.beam_horiz_filter < 1.5)
|
||||
{
|
||||
// Gaussian:
|
||||
float inner_denom_inv = 1.0/(2.0*beam_horiz_sigma*beam_horiz_sigma);
|
||||
|
|
|
@ -219,12 +219,12 @@ beam_max_shape = "4.000000"
|
|||
beam_shape_power = "0.250000"
|
||||
beam_horiz_filter = "0.000000"
|
||||
beam_horiz_sigma = "0.545000"
|
||||
convergence_offset_x_r = "0.000000"
|
||||
convergence_offset_x_r = "-0.050000"
|
||||
convergence_offset_x_g = "0.000000"
|
||||
convergence_offset_x_b = "0.000000"
|
||||
convergence_offset_y_r = "0.000000"
|
||||
convergence_offset_y_g = "0.000000"
|
||||
convergence_offset_y_b = "0.000000"
|
||||
convergence_offset_y_r = "0.100000"
|
||||
convergence_offset_y_g = "-0.050000"
|
||||
convergence_offset_y_b = "0.100000"
|
||||
mask_type = "0.000000"
|
||||
mask_sample_mode_desired = "1.000000"
|
||||
mask_specify_num_triads = "0.000000"
|
||||
|
|
Loading…
Reference in a new issue