mirror of
https://github.com/italicsjenga/slang-shaders.git
synced 2024-11-22 15:51:30 +11:00
another royale pass down...
This commit is contained in:
parent
7182761964
commit
f82445a2a7
|
@ -16,7 +16,7 @@
|
|||
# calculate scale_y5 (among other values):
|
||||
# 1.) geom_max_aspect_ratio = (geom_max_aspect_ratio used to calculate scale_y5)
|
||||
|
||||
shaders = "5"//"12"
|
||||
shaders = "6"//"12"
|
||||
|
||||
# Set an identifier, filename, and sampling traits for the phosphor mask texture.
|
||||
# Load an aperture grille, slot mask, and an EDP shadow mask, and load a small
|
||||
|
|
|
@ -345,5 +345,5 @@ void main()
|
|||
color = vec3(color_r.r, color_g.g, color_b.b);
|
||||
}
|
||||
// Encode and output the blurred image:
|
||||
FragColor = vec4(color, 1.0);
|
||||
FragColor = vec4(1.0);//vec4(color, 1.0);
|
||||
}
|
|
@ -6,6 +6,14 @@ layout(push_constant) uniform Push
|
|||
uint FrameCount;
|
||||
} registers;
|
||||
|
||||
layout(std140, set = 0, binding = 0) uniform UBO
|
||||
{
|
||||
mat4 MVP;
|
||||
float interlace_bff;
|
||||
} params;
|
||||
|
||||
#pragma parameter interlace_bff "interlace_bff" 0.0 0.0 1.0 1.0
|
||||
|
||||
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
|
||||
|
||||
// crt-royale: A full-featured CRT shader, with cheese.
|
||||
|
@ -34,7 +42,7 @@ layout(push_constant) uniform Push
|
|||
#define FIRST_PASS
|
||||
#define SIMULATE_CRT_ON_LCD
|
||||
|
||||
#include "params.inc"
|
||||
//#include "params.inc"
|
||||
|
||||
////////////////////////////////// INCLUDES //////////////////////////////////
|
||||
|
||||
|
|
|
@ -6,12 +6,9 @@ layout(push_constant) uniform Push
|
|||
vec4 OriginalSize;
|
||||
vec4 OutputSize;
|
||||
uint FrameCount;
|
||||
} params;
|
||||
} registers;
|
||||
|
||||
layout(std140, set = 0, binding = 0) uniform UBO
|
||||
{
|
||||
mat4 MVP;
|
||||
} global;
|
||||
#include "params.inc"
|
||||
|
||||
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
|
||||
|
||||
|
@ -40,7 +37,6 @@ layout(std140, set = 0, binding = 0) uniform UBO
|
|||
|
||||
////////////////////////////////// INCLUDES //////////////////////////////////
|
||||
|
||||
#include "includes.h"
|
||||
#include "phosphor-mask-resizing.h"
|
||||
|
||||
#pragma stage vertex
|
||||
|
@ -52,12 +48,12 @@ layout(location = 2) out vec2 resize_magnification_scale;
|
|||
|
||||
void main()
|
||||
{
|
||||
gl_Position = global.MVP * Position;
|
||||
gl_Position = params.MVP * Position;
|
||||
tex_uv = TexCoord;
|
||||
|
||||
// First estimate the viewport size (the user will get the wrong number of
|
||||
// triads if it's wrong and mask_specify_num_triads is 1.0/true).
|
||||
const float viewport_y = params.OutputSize.y / mask_resize_viewport_scale.y;
|
||||
const float viewport_y = registers.OutputSize.y / mask_resize_viewport_scale.y;
|
||||
const float aspect_ratio = geom_aspect_ratio_x / geom_aspect_ratio_y;
|
||||
const vec2 estimated_viewport_size =
|
||||
vec2(viewport_y * aspect_ratio, viewport_y);
|
||||
|
@ -66,23 +62,23 @@ void main()
|
|||
// we're not swearing it's correct (if we did, the x result would influence
|
||||
// the y result to maintain the tile aspect ratio).
|
||||
const vec2 estimated_mask_resize_output_size =
|
||||
vec2(params.OutputSize.y * aspect_ratio, params.OutputSize.y);
|
||||
vec2(registers.OutputSize.y * aspect_ratio, registers.OutputSize.y);
|
||||
// Find the final intended [y] size of our resized phosphor mask tiles,
|
||||
// then the tile size for the current pass (resize y only):
|
||||
const vec2 mask_resize_tile_size = get_resized_mask_tile_size(
|
||||
estimated_viewport_size, estimated_mask_resize_output_size, false);
|
||||
const vec2 pass_output_tile_size = vec2(min(
|
||||
mask_resize_src_lut_size.x, params.OutputSize.x), mask_resize_tile_size.y);
|
||||
mask_resize_src_lut_size.x, registers.OutputSize.x), mask_resize_tile_size.y);
|
||||
|
||||
// We'll render resized tiles until filling the output FBO or meeting a
|
||||
// limit, so compute [wrapped] tile uv coords based on the output uv coords
|
||||
// and the number of tiles that will fit in the FBO.
|
||||
const vec2 output_tiles_this_pass = params.OutputSize.xy / pass_output_tile_size;
|
||||
const vec2 output_tiles_this_pass = registers.OutputSize.xy / pass_output_tile_size;
|
||||
const vec2 output_video_uv = tex_uv;
|
||||
const vec2 tile_uv_wrap = output_video_uv * output_tiles_this_pass;
|
||||
|
||||
// The input LUT is just a single mask tile, so texture uv coords are the
|
||||
// same as tile uv coords (save frac() for the fragment shader). The
|
||||
// same as tile uv coords (save fract() for the fragment shader). The
|
||||
// magnification scale is also straightforward:
|
||||
src_tex_uv_wrap = tile_uv_wrap;
|
||||
resize_magnification_scale =
|
||||
|
@ -116,7 +112,7 @@ void main()
|
|||
tile_uv_wrap.y <= mask_resize_num_tiles)
|
||||
{
|
||||
const float src_dy = 1.0/mask_resize_src_lut_size.y;
|
||||
const vec2 src_tex_uv = frac(src_tex_uv_wrap);
|
||||
const vec2 src_tex_uv = fract(src_tex_uv_wrap);
|
||||
vec3 pixel_color;
|
||||
// If mask_type is static, this branch will be resolved statically.
|
||||
if(mask_type < 0.5)
|
||||
|
@ -170,7 +166,7 @@ void main()
|
|||
tile_uv_wrap.y <= mask_resize_num_tiles)
|
||||
{
|
||||
const float src_dy = 1.0/mask_resize_src_lut_size.y;
|
||||
const vec2 src_tex_uv = frac(src_tex_uv_wrap);
|
||||
const vec2 src_tex_uv = fract(src_tex_uv_wrap);
|
||||
vec3 pixel_color;
|
||||
// If mask_type is static, this branch will be resolved statically.
|
||||
if(mask_type < 0.5)
|
||||
|
|
|
@ -8,4 +8,4 @@
|
|||
#include "blur-functions.h" //#include "../../../../include/blur-functions.h" <-move includes into crt-royale's src directory until it's actually working
|
||||
#include "scanline-functions.h"
|
||||
#include "bloom-functions.h"
|
||||
#include "phosphor-mask-resizing.h"
|
||||
//#include "phosphor-mask-resizing.h"
|
|
@ -44,7 +44,6 @@ layout(std140, set = 0, binding = 0) uniform UBO
|
|||
float border_size;
|
||||
float border_darkness;
|
||||
float border_compress;
|
||||
float interlace_bff;
|
||||
float interlace_1080i;
|
||||
} params;
|
||||
|
||||
|
@ -92,5 +91,4 @@ layout(std140, set = 0, binding = 0) uniform UBO
|
|||
#pragma parameter border_size "border_size" 0.015 0.0000001 0.5 0.005
|
||||
#pragma parameter border_darkness "border_darkness" 2.0 0.0 16.0 0.0625
|
||||
#pragma parameter border_compress "border_compress" 2.5 1.0 64.0 0.0625
|
||||
#pragma parameter interlace_bff "interlace_bff" 0.0 0.0 1.0 1.0
|
||||
#pragma parameter interlace_1080i "interlace_1080i" 0.0 0.0 1.0 1.0
|
|
@ -41,6 +41,45 @@
|
|||
#endif // No else needed: Dynamic loops assumed.
|
||||
|
||||
|
||||
#define CALCULATE_R_COORD_FOR_4_SAMPLES \
|
||||
const vec4 true_i = vec4(i_base + i) + vec4(0.0, 1.0, 2.0, 3.0); \
|
||||
const vec4 tile_uv_r = fract( \
|
||||
first_texel_tile_uv_rrrr + true_i * tile_dr); \
|
||||
const vec4 tex_uv_r = tile_uv_r * tile_size_uv_r;
|
||||
|
||||
#define VERTICAL_SINC_RESAMPLE_LOOP_BODY \
|
||||
CALCULATE_R_COORD_FOR_4_SAMPLES; \
|
||||
const vec3 new_sample0 = tex2Dlod0try(texture, \
|
||||
vec2(tex_uv.x, tex_uv_r.x)).rgb; \
|
||||
const vec3 new_sample1 = tex2Dlod0try(texture, \
|
||||
vec2(tex_uv.x, tex_uv_r.y)).rgb; \
|
||||
const vec3 new_sample2 = tex2Dlod0try(texture, \
|
||||
vec2(tex_uv.x, tex_uv_r.z)).rgb; \
|
||||
const vec3 new_sample3 = tex2Dlod0try(texture, \
|
||||
vec2(tex_uv.x, tex_uv_r.w)).rgb; \
|
||||
UPDATE_COLOR_AND_WEIGHT_SUMS;
|
||||
|
||||
#define UPDATE_COLOR_AND_WEIGHT_SUMS \
|
||||
const vec4 dist = magnification_scale * \
|
||||
abs(first_dist_unscaled - true_i); \
|
||||
const vec4 pi_dist = pi * dist; \
|
||||
CALCULATE_SINC_RESAMPLE_WEIGHTS; \
|
||||
pixel_color += new_sample0 * weights.xxx; \
|
||||
pixel_color += new_sample1 * weights.yyy; \
|
||||
pixel_color += new_sample2 * weights.zzz; \
|
||||
pixel_color += new_sample3 * weights.www; \
|
||||
weight_sum += weights;
|
||||
|
||||
#ifdef PHOSPHOR_MASK_RESIZE_LANCZOS_WINDOW
|
||||
#define CALCULATE_SINC_RESAMPLE_WEIGHTS \
|
||||
const vec4 pi_dist_over_lobes = pi_over_lobes * dist; \
|
||||
const vec4 weights = min(sin(pi_dist) * sin(pi_dist_over_lobes) /\
|
||||
(pi_dist*pi_dist_over_lobes), vec4(1.0));
|
||||
#else
|
||||
#define CALCULATE_SINC_RESAMPLE_WEIGHTS \
|
||||
const vec4 weights = min(sin(pi_dist)/pi_dist, vec4(1.0));
|
||||
#endif
|
||||
|
||||
////////////////////////////////// CONSTANTS /////////////////////////////////
|
||||
|
||||
// The larger the resized tile, the fewer samples we'll need for downsizing.
|
||||
|
@ -57,8 +96,7 @@ const float max_sinc_resize_samples_float = 2.0 * mask_sinc_lobes *
|
|||
const float max_sinc_resize_samples_m4 = ceil(
|
||||
max_sinc_resize_samples_float * 0.25) * 4.0;
|
||||
|
||||
|
||||
///////////////////////// RESAMPLING FUNCTION HELPERS ////////////////////////
|
||||
///////////////////////// RESAMPLING FUNCTION HELPERS ////////////////////////
|
||||
|
||||
float get_dynamic_loop_size(const float magnification_scale)
|
||||
{
|
||||
|
@ -113,8 +151,9 @@ vec2 get_first_texel_tile_uv_and_dist(const vec2 tex_uv,
|
|||
first_texel_uv_wrap_2D * input_tiles_per_texture_r;
|
||||
// Project wrapped coordinates to the [0, 1] range. We'll do this with all
|
||||
// samples,but the first texel is special, since it might be negative.
|
||||
const vec2 coord_negative =
|
||||
vec2(first_texel_tile_uv_wrap_2D < vec2(0.0));
|
||||
vec2 coord_negative = vec2(0.0);
|
||||
if(first_texel_tile_uv_wrap_2D.x < 0.0) coord_negative.x = first_texel_tile_uv_wrap_2D.x;
|
||||
if(first_texel_tile_uv_wrap_2D.x < 0.0) coord_negative.y = first_texel_tile_uv_wrap_2D.y;
|
||||
const vec2 first_texel_tile_uv_2D =
|
||||
fract(first_texel_tile_uv_wrap_2D) + coord_negative;
|
||||
// Pack the first texel's tile_uv coord and texel distance in 1D:
|
||||
|
@ -141,80 +180,6 @@ vec4 tex2Dlod0try(const sampler2D tex, const vec2 tex_uv)
|
|||
#endif
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////// LOOP BODY MACROS //////////////////////////////
|
||||
|
||||
// Using inline functions can exceed the temporary register limit, so we're
|
||||
// stuck with #define macros (I'm TRULY sorry). They're declared here instead
|
||||
// of above to be closer to the actual invocation sites. Steps:
|
||||
// 1.) Get the exact texel location.
|
||||
// 2.) Sample the phosphor mask (already assumed encoded in linear RGB).
|
||||
// 3.) Get the distance from the current pixel and sinc weight:
|
||||
// sinc(dist) = sin(pi * dist)/(pi * dist)
|
||||
// We can also use the slower/smoother Lanczos instead:
|
||||
// L(x) = sinc(dist) * sinc(dist / lobes)
|
||||
// 4.) Accumulate the weight sum in weights, and accumulate the weighted texels
|
||||
// in pixel_color (we'll normalize outside the loop at the end).
|
||||
// We vectorize the loop to help reduce the Lanczos window's cost.
|
||||
|
||||
// The r coord is the coord in the dimension we're resizing along (u or v),
|
||||
// and first_texel_tile_uv_rrrr is a vec4 of the first texel's u or v
|
||||
// tile_uv coord in [0, 1]. tex_uv_r will contain the tile_uv u or v coord
|
||||
// for four new texel samples.
|
||||
#define CALCULATE_R_COORD_FOR_4_SAMPLES \
|
||||
const vec4 true_i = vec4(i_base + i) + vec4(0.0, 1.0, 2.0, 3.0); \
|
||||
const vec4 tile_uv_r = fract( \
|
||||
first_texel_tile_uv_rrrr + true_i * tile_dr); \
|
||||
const vec4 tex_uv_r = tile_uv_r * tile_size_uv_r;
|
||||
|
||||
#ifdef PHOSPHOR_MASK_RESIZE_LANCZOS_WINDOW
|
||||
#define CALCULATE_SINC_RESAMPLE_WEIGHTS \
|
||||
const vec4 pi_dist_over_lobes = pi_over_lobes * dist; \
|
||||
const vec4 weights = min(sin(pi_dist) * sin(pi_dist_over_lobes) /\
|
||||
(pi_dist*pi_dist_over_lobes), vec4(1.0));
|
||||
#else
|
||||
#define CALCULATE_SINC_RESAMPLE_WEIGHTS \
|
||||
const vec4 weights = min(sin(pi_dist)/pi_dist, vec4(1.0));
|
||||
#endif
|
||||
|
||||
#define UPDATE_COLOR_AND_WEIGHT_SUMS \
|
||||
const vec4 dist = magnification_scale * \
|
||||
abs(first_dist_unscaled - true_i); \
|
||||
const vec4 pi_dist = pi * dist; \
|
||||
CALCULATE_SINC_RESAMPLE_WEIGHTS; \
|
||||
pixel_color += new_sample0 * weights.xxx; \
|
||||
pixel_color += new_sample1 * weights.yyy; \
|
||||
pixel_color += new_sample2 * weights.zzz; \
|
||||
pixel_color += new_sample3 * weights.www; \
|
||||
weight_sum += weights;
|
||||
|
||||
#define VERTICAL_SINC_RESAMPLE_LOOP_BODY \
|
||||
CALCULATE_R_COORD_FOR_4_SAMPLES; \
|
||||
const vec3 new_sample0 = tex2Dlod0try(texture, \
|
||||
vec2(tex_uv.x, tex_uv_r.x)).rgb; \
|
||||
const vec3 new_sample1 = tex2Dlod0try(texture, \
|
||||
vec2(tex_uv.x, tex_uv_r.y)).rgb; \
|
||||
const vec3 new_sample2 = tex2Dlod0try(texture, \
|
||||
vec2(tex_uv.x, tex_uv_r.z)).rgb; \
|
||||
const vec3 new_sample3 = tex2Dlod0try(texture, \
|
||||
vec2(tex_uv.x, tex_uv_r.w)).rgb; \
|
||||
UPDATE_COLOR_AND_WEIGHT_SUMS;
|
||||
|
||||
#define HORIZONTAL_SINC_RESAMPLE_LOOP_BODY \
|
||||
CALCULATE_R_COORD_FOR_4_SAMPLES; \
|
||||
const vec3 new_sample0 = tex2Dlod0try(texture, \
|
||||
vec2(tex_uv_r.x, tex_uv.y)).rgb; \
|
||||
const vec3 new_sample1 = tex2Dlod0try(texture, \
|
||||
vec2(tex_uv_r.y, tex_uv.y)).rgb; \
|
||||
const vec3 new_sample2 = tex2Dlod0try(texture, \
|
||||
vec2(tex_uv_r.z, tex_uv.y)).rgb; \
|
||||
const vec3 new_sample3 = tex2Dlod0try(texture, \
|
||||
vec2(tex_uv_r.w, tex_uv.y)).rgb; \
|
||||
UPDATE_COLOR_AND_WEIGHT_SUMS;
|
||||
|
||||
|
||||
//////////////////////////// RESAMPLING FUNCTIONS ////////////////////////////
|
||||
|
||||
//////////////////////////// TILE SIZE CALCULATION ///////////////////////////
|
||||
|
||||
vec2 get_resized_mask_tile_size(const vec2 estimated_viewport_size,
|
||||
|
@ -317,7 +282,115 @@ vec2 get_resized_mask_tile_size(const vec2 estimated_viewport_size,
|
|||
return final_resized_tile_size;
|
||||
}
|
||||
|
||||
//////////////////////////// RESAMPLING FUNCTIONS ////////////////////////////
|
||||
|
||||
vec3 downsample_vertical_sinc_tiled(const sampler2D texture,
|
||||
const vec2 tex_uv, const vec2 texture_size, const float dr,
|
||||
const float magnification_scale, const float tile_size_uv_r)
|
||||
{
|
||||
// Requires: 1.) dr == du == 1.0/texture_size.x or
|
||||
// dr == dv == 1.0/texture_size.y
|
||||
// (whichever direction we're resampling in).
|
||||
// It's a scalar to save register space.
|
||||
// 2.) tile_size_uv_r is the number of texels an input tile
|
||||
// takes up in the input texture, in the direction we're
|
||||
// resampling this pass.
|
||||
// 3.) magnification_scale must be <= 1.0.
|
||||
// Returns: Return a [Lanczos] sinc-resampled pixel of a vertically
|
||||
// downsized input tile embedded in an input texture. (The
|
||||
// vertical version is special-cased though: It assumes the
|
||||
// tile size equals the [static] texture size, since it's used
|
||||
// on an LUT texture input containing one tile. For more
|
||||
// generic use, eliminate the "static" in the parameters.)
|
||||
// The "r" in "dr," "tile_size_uv_r," etc. refers to the dimension
|
||||
// we're resizing along, e.g. "dy" in this case.
|
||||
#ifdef USE_SINGLE_STATIC_LOOP
|
||||
// A static loop can be faster, but it might blur too much from using
|
||||
// more samples than it should.
|
||||
const int samples = int(max_sinc_resize_samples_m4);
|
||||
#else
|
||||
const int samples = int(get_dynamic_loop_size(magnification_scale));
|
||||
#endif
|
||||
|
||||
// Get the first sample location (scalar tile uv coord along the resized
|
||||
// dimension) and distance from the output location (in texels):
|
||||
const float input_tiles_per_texture_r = 1.0/tile_size_uv_r;
|
||||
// true = vertical resize:
|
||||
const vec2 first_texel_tile_r_and_dist = get_first_texel_tile_uv_and_dist(
|
||||
tex_uv, texture_size, dr, input_tiles_per_texture_r, samples, true);
|
||||
const vec4 first_texel_tile_uv_rrrr = first_texel_tile_r_and_dist.xxxx;
|
||||
const vec4 first_dist_unscaled = first_texel_tile_r_and_dist.yyyy;
|
||||
// Get the tile sample offset:
|
||||
const float tile_dr = dr * input_tiles_per_texture_r;
|
||||
|
||||
// Sum up each weight and weighted sample color, varying the looping
|
||||
// strategy based on our expected dynamic loop capabilities. See the
|
||||
// loop body macros above.
|
||||
int i_base = 0;
|
||||
vec4 weight_sum = vec4(0.0);
|
||||
vec3 pixel_color = vec3(0.0);
|
||||
const int i_step = 4;
|
||||
#ifdef BREAK_LOOPS_INTO_PIECES
|
||||
if(samples - i_base >= 64)
|
||||
{
|
||||
for(int i = 0; i < 64; i += i_step)
|
||||
{
|
||||
VERTICAL_SINC_RESAMPLE_LOOP_BODY;
|
||||
}
|
||||
i_base += 64;
|
||||
}
|
||||
if(samples - i_base >= 32)
|
||||
{
|
||||
for(int i = 0; i < 32; i += i_step)
|
||||
{
|
||||
VERTICAL_SINC_RESAMPLE_LOOP_BODY;
|
||||
}
|
||||
i_base += 32;
|
||||
}
|
||||
if(samples - i_base >= 16)
|
||||
{
|
||||
for(int i = 0; i < 16; i += i_step)
|
||||
{
|
||||
VERTICAL_SINC_RESAMPLE_LOOP_BODY;
|
||||
}
|
||||
i_base += 16;
|
||||
}
|
||||
if(samples - i_base >= 8)
|
||||
{
|
||||
for(int i = 0; i < 8; i += i_step)
|
||||
{
|
||||
VERTICAL_SINC_RESAMPLE_LOOP_BODY;
|
||||
}
|
||||
i_base += 8;
|
||||
}
|
||||
if(samples - i_base >= 4)
|
||||
{
|
||||
for(int i = 0; i < 4; i += i_step)
|
||||
{
|
||||
VERTICAL_SINC_RESAMPLE_LOOP_BODY;
|
||||
}
|
||||
i_base += 4;
|
||||
}
|
||||
// Do another 4-sample block for a total of 128 max samples.
|
||||
if(samples - i_base > 0)
|
||||
{
|
||||
for(int i = 0; i < 4; i += i_step)
|
||||
{
|
||||
VERTICAL_SINC_RESAMPLE_LOOP_BODY;
|
||||
}
|
||||
}
|
||||
#else
|
||||
for(int i = 0; i < samples; i += i_step)
|
||||
{
|
||||
VERTICAL_SINC_RESAMPLE_LOOP_BODY;
|
||||
}
|
||||
#endif
|
||||
// Normalize so the weight_sum == 1.0, and return:
|
||||
const vec2 weight_sum_reduce = weight_sum.xy + weight_sum.zw;
|
||||
const vec3 scalar_weight_sum = vec3(weight_sum_reduce.x +
|
||||
weight_sum_reduce.y);
|
||||
return (pixel_color/scalar_weight_sum);
|
||||
}
|
||||
|
||||
#endif // PHOSPHOR_MASK_RESIZING_H
|
||||
|
||||
|
|
Loading…
Reference in a new issue