diff --git a/crt/crt-royale.slangp b/crt/crt-royale.slangp index dea143f..1e3f185 100644 --- a/crt/crt-royale.slangp +++ b/crt/crt-royale.slangp @@ -16,7 +16,7 @@ # calculate scale_y5 (among other values): # 1.) geom_max_aspect_ratio = (geom_max_aspect_ratio used to calculate scale_y5) -shaders = "5"//"12" +shaders = "6"//"12" # Set an identifier, filename, and sampling traits for the phosphor mask texture. # Load an aperture grille, slot mask, and an EDP shadow mask, and load a small diff --git a/crt/shaders/crt-royale/src/crt-royale-bloom-approx.slang b/crt/shaders/crt-royale/src/crt-royale-bloom-approx.slang index 77aa1c1..13c06d6 100644 --- a/crt/shaders/crt-royale/src/crt-royale-bloom-approx.slang +++ b/crt/shaders/crt-royale/src/crt-royale-bloom-approx.slang @@ -345,5 +345,5 @@ void main() color = vec3(color_r.r, color_g.g, color_b.b); } // Encode and output the blurred image: - FragColor = vec4(color, 1.0); + FragColor = vec4(1.0);//vec4(color, 1.0); } \ No newline at end of file diff --git a/crt/shaders/crt-royale/src/crt-royale-first-pass-linearize-crt-gamma-bob-fields.slang b/crt/shaders/crt-royale/src/crt-royale-first-pass-linearize-crt-gamma-bob-fields.slang index c9b11aa..9e0c97c 100644 --- a/crt/shaders/crt-royale/src/crt-royale-first-pass-linearize-crt-gamma-bob-fields.slang +++ b/crt/shaders/crt-royale/src/crt-royale-first-pass-linearize-crt-gamma-bob-fields.slang @@ -6,6 +6,14 @@ layout(push_constant) uniform Push uint FrameCount; } registers; +layout(std140, set = 0, binding = 0) uniform UBO +{ + mat4 MVP; + float interlace_bff; +} params; + +#pragma parameter interlace_bff "interlace_bff" 0.0 0.0 1.0 1.0 + ///////////////////////////// GPL LICENSE NOTICE ///////////////////////////// // crt-royale: A full-featured CRT shader, with cheese. @@ -34,7 +42,7 @@ layout(push_constant) uniform Push #define FIRST_PASS #define SIMULATE_CRT_ON_LCD -#include "params.inc" +//#include "params.inc" ////////////////////////////////// INCLUDES ////////////////////////////////// diff --git a/crt/shaders/crt-royale/src/crt-royale-mask-resize-vertical.slang b/crt/shaders/crt-royale/src/crt-royale-mask-resize-vertical.slang index 744b450..63cea12 100644 --- a/crt/shaders/crt-royale/src/crt-royale-mask-resize-vertical.slang +++ b/crt/shaders/crt-royale/src/crt-royale-mask-resize-vertical.slang @@ -6,12 +6,9 @@ layout(push_constant) uniform Push vec4 OriginalSize; vec4 OutputSize; uint FrameCount; -} params; +} registers; -layout(std140, set = 0, binding = 0) uniform UBO -{ - mat4 MVP; -} global; +#include "params.inc" ///////////////////////////// GPL LICENSE NOTICE ///////////////////////////// @@ -40,7 +37,6 @@ layout(std140, set = 0, binding = 0) uniform UBO ////////////////////////////////// INCLUDES ////////////////////////////////// -#include "includes.h" #include "phosphor-mask-resizing.h" #pragma stage vertex @@ -52,12 +48,12 @@ layout(location = 2) out vec2 resize_magnification_scale; void main() { - gl_Position = global.MVP * Position; + gl_Position = params.MVP * Position; tex_uv = TexCoord; // First estimate the viewport size (the user will get the wrong number of // triads if it's wrong and mask_specify_num_triads is 1.0/true). - const float viewport_y = params.OutputSize.y / mask_resize_viewport_scale.y; + const float viewport_y = registers.OutputSize.y / mask_resize_viewport_scale.y; const float aspect_ratio = geom_aspect_ratio_x / geom_aspect_ratio_y; const vec2 estimated_viewport_size = vec2(viewport_y * aspect_ratio, viewport_y); @@ -66,23 +62,23 @@ void main() // we're not swearing it's correct (if we did, the x result would influence // the y result to maintain the tile aspect ratio). const vec2 estimated_mask_resize_output_size = - vec2(params.OutputSize.y * aspect_ratio, params.OutputSize.y); + vec2(registers.OutputSize.y * aspect_ratio, registers.OutputSize.y); // Find the final intended [y] size of our resized phosphor mask tiles, // then the tile size for the current pass (resize y only): const vec2 mask_resize_tile_size = get_resized_mask_tile_size( estimated_viewport_size, estimated_mask_resize_output_size, false); const vec2 pass_output_tile_size = vec2(min( - mask_resize_src_lut_size.x, params.OutputSize.x), mask_resize_tile_size.y); + mask_resize_src_lut_size.x, registers.OutputSize.x), mask_resize_tile_size.y); // We'll render resized tiles until filling the output FBO or meeting a // limit, so compute [wrapped] tile uv coords based on the output uv coords // and the number of tiles that will fit in the FBO. - const vec2 output_tiles_this_pass = params.OutputSize.xy / pass_output_tile_size; + const vec2 output_tiles_this_pass = registers.OutputSize.xy / pass_output_tile_size; const vec2 output_video_uv = tex_uv; const vec2 tile_uv_wrap = output_video_uv * output_tiles_this_pass; // The input LUT is just a single mask tile, so texture uv coords are the - // same as tile uv coords (save frac() for the fragment shader). The + // same as tile uv coords (save fract() for the fragment shader). The // magnification scale is also straightforward: src_tex_uv_wrap = tile_uv_wrap; resize_magnification_scale = @@ -116,7 +112,7 @@ void main() tile_uv_wrap.y <= mask_resize_num_tiles) { const float src_dy = 1.0/mask_resize_src_lut_size.y; - const vec2 src_tex_uv = frac(src_tex_uv_wrap); + const vec2 src_tex_uv = fract(src_tex_uv_wrap); vec3 pixel_color; // If mask_type is static, this branch will be resolved statically. if(mask_type < 0.5) @@ -170,7 +166,7 @@ void main() tile_uv_wrap.y <= mask_resize_num_tiles) { const float src_dy = 1.0/mask_resize_src_lut_size.y; - const vec2 src_tex_uv = frac(src_tex_uv_wrap); + const vec2 src_tex_uv = fract(src_tex_uv_wrap); vec3 pixel_color; // If mask_type is static, this branch will be resolved statically. if(mask_type < 0.5) diff --git a/crt/shaders/crt-royale/src/includes.h b/crt/shaders/crt-royale/src/includes.h index 15948d9..2b7c7c2 100644 --- a/crt/shaders/crt-royale/src/includes.h +++ b/crt/shaders/crt-royale/src/includes.h @@ -8,4 +8,4 @@ #include "blur-functions.h" //#include "../../../../include/blur-functions.h" <-move includes into crt-royale's src directory until it's actually working #include "scanline-functions.h" #include "bloom-functions.h" -#include "phosphor-mask-resizing.h" \ No newline at end of file +//#include "phosphor-mask-resizing.h" \ No newline at end of file diff --git a/crt/shaders/crt-royale/src/params.inc b/crt/shaders/crt-royale/src/params.inc index b43bfb5..99593bb 100644 --- a/crt/shaders/crt-royale/src/params.inc +++ b/crt/shaders/crt-royale/src/params.inc @@ -44,7 +44,6 @@ layout(std140, set = 0, binding = 0) uniform UBO float border_size; float border_darkness; float border_compress; - float interlace_bff; float interlace_1080i; } params; @@ -92,5 +91,4 @@ layout(std140, set = 0, binding = 0) uniform UBO #pragma parameter border_size "border_size" 0.015 0.0000001 0.5 0.005 #pragma parameter border_darkness "border_darkness" 2.0 0.0 16.0 0.0625 #pragma parameter border_compress "border_compress" 2.5 1.0 64.0 0.0625 -#pragma parameter interlace_bff "interlace_bff" 0.0 0.0 1.0 1.0 #pragma parameter interlace_1080i "interlace_1080i" 0.0 0.0 1.0 1.0 \ No newline at end of file diff --git a/crt/shaders/crt-royale/src/phosphor-mask-resizing.h b/crt/shaders/crt-royale/src/phosphor-mask-resizing.h index 7d07c34..a288476 100644 --- a/crt/shaders/crt-royale/src/phosphor-mask-resizing.h +++ b/crt/shaders/crt-royale/src/phosphor-mask-resizing.h @@ -41,6 +41,45 @@ #endif // No else needed: Dynamic loops assumed. + #define CALCULATE_R_COORD_FOR_4_SAMPLES \ + const vec4 true_i = vec4(i_base + i) + vec4(0.0, 1.0, 2.0, 3.0); \ + const vec4 tile_uv_r = fract( \ + first_texel_tile_uv_rrrr + true_i * tile_dr); \ + const vec4 tex_uv_r = tile_uv_r * tile_size_uv_r; + + #define VERTICAL_SINC_RESAMPLE_LOOP_BODY \ + CALCULATE_R_COORD_FOR_4_SAMPLES; \ + const vec3 new_sample0 = tex2Dlod0try(texture, \ + vec2(tex_uv.x, tex_uv_r.x)).rgb; \ + const vec3 new_sample1 = tex2Dlod0try(texture, \ + vec2(tex_uv.x, tex_uv_r.y)).rgb; \ + const vec3 new_sample2 = tex2Dlod0try(texture, \ + vec2(tex_uv.x, tex_uv_r.z)).rgb; \ + const vec3 new_sample3 = tex2Dlod0try(texture, \ + vec2(tex_uv.x, tex_uv_r.w)).rgb; \ + UPDATE_COLOR_AND_WEIGHT_SUMS; + + #define UPDATE_COLOR_AND_WEIGHT_SUMS \ + const vec4 dist = magnification_scale * \ + abs(first_dist_unscaled - true_i); \ + const vec4 pi_dist = pi * dist; \ + CALCULATE_SINC_RESAMPLE_WEIGHTS; \ + pixel_color += new_sample0 * weights.xxx; \ + pixel_color += new_sample1 * weights.yyy; \ + pixel_color += new_sample2 * weights.zzz; \ + pixel_color += new_sample3 * weights.www; \ + weight_sum += weights; + + #ifdef PHOSPHOR_MASK_RESIZE_LANCZOS_WINDOW + #define CALCULATE_SINC_RESAMPLE_WEIGHTS \ + const vec4 pi_dist_over_lobes = pi_over_lobes * dist; \ + const vec4 weights = min(sin(pi_dist) * sin(pi_dist_over_lobes) /\ + (pi_dist*pi_dist_over_lobes), vec4(1.0)); + #else + #define CALCULATE_SINC_RESAMPLE_WEIGHTS \ + const vec4 weights = min(sin(pi_dist)/pi_dist, vec4(1.0)); + #endif + ////////////////////////////////// CONSTANTS ///////////////////////////////// // The larger the resized tile, the fewer samples we'll need for downsizing. @@ -56,9 +95,8 @@ const float max_sinc_resize_samples_float = 2.0 * mask_sinc_lobes * // Vectorized loops sample in multiples of 4. Round up to be safe: const float max_sinc_resize_samples_m4 = ceil( max_sinc_resize_samples_float * 0.25) * 4.0; - - -///////////////////////// RESAMPLING FUNCTION HELPERS //////////////////////// + + ///////////////////////// RESAMPLING FUNCTION HELPERS //////////////////////// float get_dynamic_loop_size(const float magnification_scale) { @@ -113,8 +151,9 @@ vec2 get_first_texel_tile_uv_and_dist(const vec2 tex_uv, first_texel_uv_wrap_2D * input_tiles_per_texture_r; // Project wrapped coordinates to the [0, 1] range. We'll do this with all // samples,but the first texel is special, since it might be negative. - const vec2 coord_negative = - vec2(first_texel_tile_uv_wrap_2D < vec2(0.0)); + vec2 coord_negative = vec2(0.0); + if(first_texel_tile_uv_wrap_2D.x < 0.0) coord_negative.x = first_texel_tile_uv_wrap_2D.x; + if(first_texel_tile_uv_wrap_2D.x < 0.0) coord_negative.y = first_texel_tile_uv_wrap_2D.y; const vec2 first_texel_tile_uv_2D = fract(first_texel_tile_uv_wrap_2D) + coord_negative; // Pack the first texel's tile_uv coord and texel distance in 1D: @@ -140,81 +179,7 @@ vec4 tex2Dlod0try(const sampler2D tex, const vec2 tex_uv) #endif #endif } - - -////////////////////////////// LOOP BODY MACROS ////////////////////////////// - -// Using inline functions can exceed the temporary register limit, so we're -// stuck with #define macros (I'm TRULY sorry). They're declared here instead -// of above to be closer to the actual invocation sites. Steps: -// 1.) Get the exact texel location. -// 2.) Sample the phosphor mask (already assumed encoded in linear RGB). -// 3.) Get the distance from the current pixel and sinc weight: -// sinc(dist) = sin(pi * dist)/(pi * dist) -// We can also use the slower/smoother Lanczos instead: -// L(x) = sinc(dist) * sinc(dist / lobes) -// 4.) Accumulate the weight sum in weights, and accumulate the weighted texels -// in pixel_color (we'll normalize outside the loop at the end). -// We vectorize the loop to help reduce the Lanczos window's cost. - - // The r coord is the coord in the dimension we're resizing along (u or v), - // and first_texel_tile_uv_rrrr is a vec4 of the first texel's u or v - // tile_uv coord in [0, 1]. tex_uv_r will contain the tile_uv u or v coord - // for four new texel samples. - #define CALCULATE_R_COORD_FOR_4_SAMPLES \ - const vec4 true_i = vec4(i_base + i) + vec4(0.0, 1.0, 2.0, 3.0); \ - const vec4 tile_uv_r = fract( \ - first_texel_tile_uv_rrrr + true_i * tile_dr); \ - const vec4 tex_uv_r = tile_uv_r * tile_size_uv_r; - - #ifdef PHOSPHOR_MASK_RESIZE_LANCZOS_WINDOW - #define CALCULATE_SINC_RESAMPLE_WEIGHTS \ - const vec4 pi_dist_over_lobes = pi_over_lobes * dist; \ - const vec4 weights = min(sin(pi_dist) * sin(pi_dist_over_lobes) /\ - (pi_dist*pi_dist_over_lobes), vec4(1.0)); - #else - #define CALCULATE_SINC_RESAMPLE_WEIGHTS \ - const vec4 weights = min(sin(pi_dist)/pi_dist, vec4(1.0)); - #endif - - #define UPDATE_COLOR_AND_WEIGHT_SUMS \ - const vec4 dist = magnification_scale * \ - abs(first_dist_unscaled - true_i); \ - const vec4 pi_dist = pi * dist; \ - CALCULATE_SINC_RESAMPLE_WEIGHTS; \ - pixel_color += new_sample0 * weights.xxx; \ - pixel_color += new_sample1 * weights.yyy; \ - pixel_color += new_sample2 * weights.zzz; \ - pixel_color += new_sample3 * weights.www; \ - weight_sum += weights; - - #define VERTICAL_SINC_RESAMPLE_LOOP_BODY \ - CALCULATE_R_COORD_FOR_4_SAMPLES; \ - const vec3 new_sample0 = tex2Dlod0try(texture, \ - vec2(tex_uv.x, tex_uv_r.x)).rgb; \ - const vec3 new_sample1 = tex2Dlod0try(texture, \ - vec2(tex_uv.x, tex_uv_r.y)).rgb; \ - const vec3 new_sample2 = tex2Dlod0try(texture, \ - vec2(tex_uv.x, tex_uv_r.z)).rgb; \ - const vec3 new_sample3 = tex2Dlod0try(texture, \ - vec2(tex_uv.x, tex_uv_r.w)).rgb; \ - UPDATE_COLOR_AND_WEIGHT_SUMS; - - #define HORIZONTAL_SINC_RESAMPLE_LOOP_BODY \ - CALCULATE_R_COORD_FOR_4_SAMPLES; \ - const vec3 new_sample0 = tex2Dlod0try(texture, \ - vec2(tex_uv_r.x, tex_uv.y)).rgb; \ - const vec3 new_sample1 = tex2Dlod0try(texture, \ - vec2(tex_uv_r.y, tex_uv.y)).rgb; \ - const vec3 new_sample2 = tex2Dlod0try(texture, \ - vec2(tex_uv_r.z, tex_uv.y)).rgb; \ - const vec3 new_sample3 = tex2Dlod0try(texture, \ - vec2(tex_uv_r.w, tex_uv.y)).rgb; \ - UPDATE_COLOR_AND_WEIGHT_SUMS; - - -//////////////////////////// RESAMPLING FUNCTIONS //////////////////////////// - + //////////////////////////// TILE SIZE CALCULATION /////////////////////////// vec2 get_resized_mask_tile_size(const vec2 estimated_viewport_size, @@ -317,7 +282,115 @@ vec2 get_resized_mask_tile_size(const vec2 estimated_viewport_size, return final_resized_tile_size; } +//////////////////////////// RESAMPLING FUNCTIONS //////////////////////////// +vec3 downsample_vertical_sinc_tiled(const sampler2D texture, + const vec2 tex_uv, const vec2 texture_size, const float dr, + const float magnification_scale, const float tile_size_uv_r) +{ + // Requires: 1.) dr == du == 1.0/texture_size.x or + // dr == dv == 1.0/texture_size.y + // (whichever direction we're resampling in). + // It's a scalar to save register space. + // 2.) tile_size_uv_r is the number of texels an input tile + // takes up in the input texture, in the direction we're + // resampling this pass. + // 3.) magnification_scale must be <= 1.0. + // Returns: Return a [Lanczos] sinc-resampled pixel of a vertically + // downsized input tile embedded in an input texture. (The + // vertical version is special-cased though: It assumes the + // tile size equals the [static] texture size, since it's used + // on an LUT texture input containing one tile. For more + // generic use, eliminate the "static" in the parameters.) + // The "r" in "dr," "tile_size_uv_r," etc. refers to the dimension + // we're resizing along, e.g. "dy" in this case. + #ifdef USE_SINGLE_STATIC_LOOP + // A static loop can be faster, but it might blur too much from using + // more samples than it should. + const int samples = int(max_sinc_resize_samples_m4); + #else + const int samples = int(get_dynamic_loop_size(magnification_scale)); + #endif + + // Get the first sample location (scalar tile uv coord along the resized + // dimension) and distance from the output location (in texels): + const float input_tiles_per_texture_r = 1.0/tile_size_uv_r; + // true = vertical resize: + const vec2 first_texel_tile_r_and_dist = get_first_texel_tile_uv_and_dist( + tex_uv, texture_size, dr, input_tiles_per_texture_r, samples, true); + const vec4 first_texel_tile_uv_rrrr = first_texel_tile_r_and_dist.xxxx; + const vec4 first_dist_unscaled = first_texel_tile_r_and_dist.yyyy; + // Get the tile sample offset: + const float tile_dr = dr * input_tiles_per_texture_r; + + // Sum up each weight and weighted sample color, varying the looping + // strategy based on our expected dynamic loop capabilities. See the + // loop body macros above. + int i_base = 0; + vec4 weight_sum = vec4(0.0); + vec3 pixel_color = vec3(0.0); + const int i_step = 4; + #ifdef BREAK_LOOPS_INTO_PIECES + if(samples - i_base >= 64) + { + for(int i = 0; i < 64; i += i_step) + { + VERTICAL_SINC_RESAMPLE_LOOP_BODY; + } + i_base += 64; + } + if(samples - i_base >= 32) + { + for(int i = 0; i < 32; i += i_step) + { + VERTICAL_SINC_RESAMPLE_LOOP_BODY; + } + i_base += 32; + } + if(samples - i_base >= 16) + { + for(int i = 0; i < 16; i += i_step) + { + VERTICAL_SINC_RESAMPLE_LOOP_BODY; + } + i_base += 16; + } + if(samples - i_base >= 8) + { + for(int i = 0; i < 8; i += i_step) + { + VERTICAL_SINC_RESAMPLE_LOOP_BODY; + } + i_base += 8; + } + if(samples - i_base >= 4) + { + for(int i = 0; i < 4; i += i_step) + { + VERTICAL_SINC_RESAMPLE_LOOP_BODY; + } + i_base += 4; + } + // Do another 4-sample block for a total of 128 max samples. + if(samples - i_base > 0) + { + for(int i = 0; i < 4; i += i_step) + { + VERTICAL_SINC_RESAMPLE_LOOP_BODY; + } + } + #else + for(int i = 0; i < samples; i += i_step) + { + VERTICAL_SINC_RESAMPLE_LOOP_BODY; + } + #endif + // Normalize so the weight_sum == 1.0, and return: + const vec2 weight_sum_reduce = weight_sum.xy + weight_sum.zw; + const vec3 scalar_weight_sum = vec3(weight_sum_reduce.x + + weight_sum_reduce.y); + return (pixel_color/scalar_weight_sum); +} #endif // PHOSPHOR_MASK_RESIZING_H