slang-shaders/crt/shaders/crt-royale/src/derived-settings-and-constants.h

316 lines
14 KiB
C

#ifndef DERIVED_SETTINGS_AND_CONSTANTS_H
#define DERIVED_SETTINGS_AND_CONSTANTS_H
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
///////////////////////////////// DESCRIPTION ////////////////////////////////
// These macros and constants can be used across the whole codebase.
// Unlike the values in user-settings.cgh, end users shouldn't modify these.
////////////////////////////////// INCLUDES //////////////////////////////////
#include "../user-settings.h"
#include "user-cgp-constants.h"
/////////////////////////////// FIXED SETTINGS ///////////////////////////////
// Avoid dividing by zero; using a macro overloads for float, float2, etc.:
#define FIX_ZERO(c) (max(abs(c), 0.0000152587890625)) // 2^-16
// Ensure the first pass decodes CRT gamma and the last encodes LCD gamma.
#ifndef SIMULATE_CRT_ON_LCD
#define SIMULATE_CRT_ON_LCD
#endif
// Manually tiling a manually resized texture creates texture coord derivative
// discontinuities and confuses anisotropic filtering, causing discolored tile
// seams in the phosphor mask. Workarounds:
// a.) Using tex2Dlod disables anisotropic filtering for tiled masks. It's
// downgraded to tex2Dbias without DRIVERS_ALLOW_TEX2DLOD #defined and
// disabled without DRIVERS_ALLOW_TEX2DBIAS #defined either.
// b.) "Tile flat twice" requires drawing two full tiles without border padding
// to the resized mask FBO, and it's incompatible with same-pass curvature.
// (Same-pass curvature isn't used but could be in the future...maybe.)
// c.) "Fix discontinuities" requires derivatives and drawing one tile with
// border padding to the resized mask FBO, but it works with same-pass
// curvature. It's disabled without DRIVERS_ALLOW_DERIVATIVES #defined.
// Precedence: a, then, b, then c (if multiple strategies are #defined).
#define ANISOTROPIC_TILING_COMPAT_TEX2DLOD // 129.7 FPS, 4x, flat; 101.8 at fullscreen
#define ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE // 128.1 FPS, 4x, flat; 101.5 at fullscreen
#define ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES // 124.4 FPS, 4x, flat; 97.4 at fullscreen
// Also, manually resampling the phosphor mask is slightly blurrier with
// anisotropic filtering. (Resampling with mipmapping is even worse: It
// creates artifacts, but only with the fully bloomed shader.) The difference
// is subtle with small triads, but you can fix it for a small cost.
//#define ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD
////////////////////////////// DERIVED SETTINGS //////////////////////////////
// Intel HD 4000 GPU's can't handle manual mask resizing (for now), setting the
// geometry mode at runtime, or a 4x4 true Gaussian resize. Disable
// incompatible settings ASAP. (INTEGRATED_GRAPHICS_COMPATIBILITY_MODE may be
// #defined by either user-settings.h or a wrapper .cg that #includes the
// current .cg pass.)
#ifdef INTEGRATED_GRAPHICS_COMPATIBILITY_MODE
#ifdef PHOSPHOR_MASK_MANUALLY_RESIZE
#undef PHOSPHOR_MASK_MANUALLY_RESIZE
#endif
#ifdef RUNTIME_GEOMETRY_MODE
#undef RUNTIME_GEOMETRY_MODE
#endif
// Mode 2 (4x4 Gaussian resize) won't work, and mode 1 (3x3 blur) is
// inferior in most cases, so replace 2.0 with 0.0:
static const float bloom_approx_filter =
bloom_approx_filter_static > 1.5 ? 0.0 : bloom_approx_filter_static;
#else
static const float bloom_approx_filter = bloom_approx_filter_static;
#endif
// Disable slow runtime paths if static parameters are used. Most of these
// won't be a problem anyway once the params are disabled, but some will.
#ifndef RUNTIME_SHADER_PARAMS_ENABLE
#ifdef RUNTIME_PHOSPHOR_BLOOM_SIGMA
#undef RUNTIME_PHOSPHOR_BLOOM_SIGMA
#endif
#ifdef RUNTIME_ANTIALIAS_WEIGHTS
#undef RUNTIME_ANTIALIAS_WEIGHTS
#endif
#ifdef RUNTIME_ANTIALIAS_SUBPIXEL_OFFSETS
#undef RUNTIME_ANTIALIAS_SUBPIXEL_OFFSETS
#endif
#ifdef RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE
#undef RUNTIME_SCANLINES_HORIZ_FILTER_COLORSPACE
#endif
#ifdef RUNTIME_GEOMETRY_TILT
#undef RUNTIME_GEOMETRY_TILT
#endif
#ifdef RUNTIME_GEOMETRY_MODE
#undef RUNTIME_GEOMETRY_MODE
#endif
#ifdef FORCE_RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT
#undef FORCE_RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT
#endif
#endif
// Make tex2Dbias a backup for tex2Dlod for wider compatibility.
#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DLOD
#define ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
#endif
#ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD
#define ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS
#endif
// Rule out unavailable anisotropic compatibility strategies:
#ifndef DRIVERS_ALLOW_DERIVATIVES
#ifdef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
#undef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
#endif
#endif
#ifndef DRIVERS_ALLOW_TEX2DLOD
#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DLOD
#undef ANISOTROPIC_TILING_COMPAT_TEX2DLOD
#endif
#ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD
#undef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD
#endif
#ifdef ANTIALIAS_DISABLE_ANISOTROPIC
#undef ANTIALIAS_DISABLE_ANISOTROPIC
#endif
#endif
#ifndef DRIVERS_ALLOW_TEX2DBIAS
#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
#undef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
#endif
#ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS
#undef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS
#endif
#endif
// Prioritize anisotropic tiling compatibility strategies by performance and
// disable unused strategies. This concentrates all the nesting in one place.
#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DLOD
#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
#undef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
#endif
#ifdef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE
#undef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE
#endif
#ifdef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
#undef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
#endif
#else
#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
#ifdef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE
#undef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE
#endif
#ifdef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
#undef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
#endif
#else
// ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE is only compatible with
// flat texture coords in the same pass, but that's all we use.
#ifdef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE
#ifdef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
#undef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
#endif
#endif
#endif
#endif
// The tex2Dlod and tex2Dbias strategies share a lot in common, and we can
// reduce some #ifdef nesting in the next section by essentially OR'ing them:
#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DLOD
#define ANISOTROPIC_TILING_COMPAT_TEX2DLOD_FAMILY
#endif
#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
#define ANISOTROPIC_TILING_COMPAT_TEX2DLOD_FAMILY
#endif
// Prioritize anisotropic resampling compatibility strategies the same way:
#ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD
#ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS
#undef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DBIAS
#endif
#endif
/////////////////////// DERIVED PHOSPHOR MASK CONSTANTS //////////////////////
// If we can use the large mipmapped LUT without mipmapping artifacts, we
// should: It gives us more options for using fewer samples.
#ifdef DRIVERS_ALLOW_TEX2DLOD
#ifdef ANISOTROPIC_RESAMPLING_COMPAT_TEX2DLOD
// TODO: Take advantage of this!
#define PHOSPHOR_MASK_RESIZE_MIPMAPPED_LUT
static const float2 mask_resize_src_lut_size = mask_texture_large_size;
#else
static const float2 mask_resize_src_lut_size = mask_texture_small_size;
#endif
#else
static const float2 mask_resize_src_lut_size = mask_texture_small_size;
#endif
// tex2D's sampler2D parameter MUST be a uniform global, a uniform input to
// main_fragment, or a static alias of one of the above. This makes it hard
// to select the phosphor mask at runtime: We can't even assign to a uniform
// global in the vertex shader or select a sampler2D in the vertex shader and
// pass it to the fragment shader (even with explicit TEXUNIT# bindings),
// because it just gives us the input texture or a black screen. However, we
// can get around these limitations by calling tex2D three times with different
// uniform samplers (or resizing the phosphor mask three times altogether).
// With dynamic branches, we can process only one of these branches on top of
// quickly discarding fragments we don't need (cgc seems able to overcome
// limigations around dependent texture fetches inside of branches). Without
// dynamic branches, we have to process every branch for every fragment...which
// is slower. Runtime sampling mode selection is slower without dynamic
// branches as well. Let the user's static #defines decide if it's worth it.
#ifdef DRIVERS_ALLOW_DYNAMIC_BRANCHES
#define RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT
#else
#ifdef FORCE_RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT
#define RUNTIME_PHOSPHOR_MASK_MODE_TYPE_SELECT
#endif
#endif
// We need to render some minimum number of tiles in the resize passes.
// We need at least 1.0 just to repeat a single tile, and we need extra
// padding beyond that for anisotropic filtering, discontinuitity fixing,
// antialiasing, same-pass curvature (not currently used), etc. First
// determine how many border texels and tiles we need, based on how the result
// will be sampled:
#ifdef GEOMETRY_EARLY
static const float max_subpixel_offset = aa_subpixel_r_offset_static.x;
// Most antialiasing filters have a base radius of 4.0 pixels:
static const float max_aa_base_pixel_border = 4.0 +
max_subpixel_offset;
#else
static const float max_aa_base_pixel_border = 0.0;
#endif
// Anisotropic filtering adds about 0.5 to the pixel border:
#ifndef ANISOTROPIC_TILING_COMPAT_TEX2DLOD_FAMILY
static const float max_aniso_pixel_border = max_aa_base_pixel_border + 0.5;
#else
static const float max_aniso_pixel_border = max_aa_base_pixel_border;
#endif
// Fixing discontinuities adds 1.0 more to the pixel border:
#ifdef ANISOTROPIC_TILING_COMPAT_FIX_DISCONTINUITIES
static const float max_tiled_pixel_border = max_aniso_pixel_border + 1.0;
#else
static const float max_tiled_pixel_border = max_aniso_pixel_border;
#endif
// Convert the pixel border to an integer texel border. Assume same-pass
// curvature about triples the texel frequency:
#ifdef GEOMETRY_EARLY
static const float max_mask_texel_border =
ceil(max_tiled_pixel_border * 3.0);
#else
static const float max_mask_texel_border = ceil(max_tiled_pixel_border);
#endif
// Convert the texel border to a tile border using worst-case assumptions:
static const float max_mask_tile_border = max_mask_texel_border/
(mask_min_allowed_triad_size * mask_triads_per_tile);
// Finally, set the number of resized tiles to render to MASK_RESIZE, and set
// the starting texel (inside borders) for sampling it.
#ifndef GEOMETRY_EARLY
#ifdef ANISOTROPIC_TILING_COMPAT_TILE_FLAT_TWICE
// Special case: Render two tiles without borders. Anisotropic
// filtering doesn't seem to be a problem here.
static const float mask_resize_num_tiles = 1.0 + 1.0;
static const float mask_start_texels = 0.0;
#else
static const float mask_resize_num_tiles = 1.0 +
2.0 * max_mask_tile_border;
static const float mask_start_texels = max_mask_texel_border;
#endif
#else
static const float mask_resize_num_tiles = 1.0 + 2.0*max_mask_tile_border;
static const float mask_start_texels = max_mask_texel_border;
#endif
// We have to fit mask_resize_num_tiles into an FBO with a viewport scale of
// mask_resize_viewport_scale. This limits the maximum final triad size.
// Estimate the minimum number of triads we can split the screen into in each
// dimension (we'll be as correct as mask_resize_viewport_scale is):
static const float mask_resize_num_triads =
mask_resize_num_tiles * mask_triads_per_tile;
static const float2 min_allowed_viewport_triads =
float2(mask_resize_num_triads) / mask_resize_viewport_scale;
//////////////////////// COMMON MATHEMATICAL CONSTANTS ///////////////////////
static const float pi = 3.141592653589;
// We often want to find the location of the previous texel, e.g.:
// const float2 curr_texel = uv * texture_size;
// const float2 prev_texel = floor(curr_texel - float2(0.5)) + float2(0.5);
// const float2 prev_texel_uv = prev_texel / texture_size;
// However, many GPU drivers round incorrectly around exact texel locations.
// We need to subtract a little less than 0.5 before flooring, and some GPU's
// require this value to be farther from 0.5 than others; define it here.
// const float2 prev_texel =
// floor(curr_texel - float2(under_half)) + float2(0.5);
static const float under_half = 0.4995;
#endif // DERIVED_SETTINGS_AND_CONSTANTS_H