another royale pass down...

2025-02-19 08:17:43 +11:00 · 2016-08-23 16:07:26 -05:00 · 2016-08-23 16:07:26 -05:00 · f82445a2a7
commit f82445a2a7
parent 7182761964
7 changed files with 175 additions and 100 deletions
--- a/crt/crt-royale.slangp
+++ b/crt/crt-royale.slangp
@ -16,7 +16,7 @@
 # calculate scale_y5 (among other values):
 # 1.) geom_max_aspect_ratio = (geom_max_aspect_ratio used to calculate scale_y5)

-shaders = "5"//"12"
+shaders = "6"//"12"

 # Set an identifier, filename, and sampling traits for the phosphor mask texture.
 # Load an aperture grille, slot mask, and an EDP shadow mask, and load a small
--- a/crt/shaders/crt-royale/src/crt-royale-bloom-approx.slang
+++ b/crt/shaders/crt-royale/src/crt-royale-bloom-approx.slang
@ -345,5 +345,5 @@ void main()
        color = vec3(color_r.r, color_g.g, color_b.b);
    }
    //  Encode and output the blurred image:
-   FragColor = vec4(color, 1.0);
+   FragColor = vec4(1.0);//vec4(color, 1.0);
 }
--- a/crt/shaders/crt-royale/src/crt-royale-first-pass-linearize-crt-gamma-bob-fields.slang
+++ b/crt/shaders/crt-royale/src/crt-royale-first-pass-linearize-crt-gamma-bob-fields.slang
@ -6,6 +6,14 @@ layout(push_constant) uniform Push
 	uint FrameCount;
 } registers;

+layout(std140, set = 0, binding = 0) uniform UBO
+{
+	mat4 MVP;
+    float interlace_bff;
+} params;
+
+#pragma parameter interlace_bff "interlace_bff" 0.0 0.0 1.0 1.0
+
 /////////////////////////////  GPL LICENSE NOTICE  /////////////////////////////

 //  crt-royale: A full-featured CRT shader, with cheese.
@ -34,7 +42,7 @@ layout(push_constant) uniform Push
 #define FIRST_PASS
 #define SIMULATE_CRT_ON_LCD

-#include "params.inc"
+//#include "params.inc"

 //////////////////////////////////  INCLUDES  //////////////////////////////////

--- a/crt/shaders/crt-royale/src/crt-royale-mask-resize-vertical.slang
+++ b/crt/shaders/crt-royale/src/crt-royale-mask-resize-vertical.slang
@ -6,12 +6,9 @@ layout(push_constant) uniform Push
 	vec4 OriginalSize;
 	vec4 OutputSize;
 	uint FrameCount;
-} params;
+} registers;

-layout(std140, set = 0, binding = 0) uniform UBO
-{
-	mat4 MVP;
-} global;
+#include "params.inc"

 /////////////////////////////  GPL LICENSE NOTICE  /////////////////////////////

@ -40,7 +37,6 @@ layout(std140, set = 0, binding = 0) uniform UBO

 //////////////////////////////////  INCLUDES  //////////////////////////////////

-#include "includes.h"
 #include "phosphor-mask-resizing.h"

 #pragma stage vertex
@ -52,12 +48,12 @@ layout(location = 2) out vec2 resize_magnification_scale;

 void main()
 {
-   gl_Position = global.MVP * Position;
+   gl_Position = params.MVP * Position;
   tex_uv = TexCoord;
   
    //  First estimate the viewport size (the user will get the wrong number of
    //  triads if it's wrong and mask_specify_num_triads is 1.0/true).
-    const float viewport_y = params.OutputSize.y / mask_resize_viewport_scale.y;
+    const float viewport_y = registers.OutputSize.y / mask_resize_viewport_scale.y;
    const float aspect_ratio = geom_aspect_ratio_x / geom_aspect_ratio_y;
    const vec2 estimated_viewport_size =
        vec2(viewport_y * aspect_ratio, viewport_y);
@ -66,23 +62,23 @@ void main()
    //  we're not swearing it's correct (if we did, the x result would influence
    //  the y result to maintain the tile aspect ratio).
    const vec2 estimated_mask_resize_output_size =
-        vec2(params.OutputSize.y * aspect_ratio, params.OutputSize.y);
+        vec2(registers.OutputSize.y * aspect_ratio, registers.OutputSize.y);
    //  Find the final intended [y] size of our resized phosphor mask tiles,
    //  then the tile size for the current pass (resize y only):
    const vec2 mask_resize_tile_size = get_resized_mask_tile_size(
        estimated_viewport_size, estimated_mask_resize_output_size, false);
    const vec2 pass_output_tile_size = vec2(min(
-        mask_resize_src_lut_size.x, params.OutputSize.x), mask_resize_tile_size.y);
+        mask_resize_src_lut_size.x, registers.OutputSize.x), mask_resize_tile_size.y);

    //  We'll render resized tiles until filling the output FBO or meeting a
    //  limit, so compute [wrapped] tile uv coords based on the output uv coords
    //  and the number of tiles that will fit in the FBO.
-    const vec2 output_tiles_this_pass = params.OutputSize.xy / pass_output_tile_size;
+    const vec2 output_tiles_this_pass = registers.OutputSize.xy / pass_output_tile_size;
    const vec2 output_video_uv = tex_uv;
    const vec2 tile_uv_wrap = output_video_uv * output_tiles_this_pass;

    //  The input LUT is just a single mask tile, so texture uv coords are the
-    //  same as tile uv coords (save frac() for the fragment shader).  The
+    //  same as tile uv coords (save fract() for the fragment shader).  The
    //  magnification scale is also straightforward:
    src_tex_uv_wrap = tile_uv_wrap;
    resize_magnification_scale =
@ -116,7 +112,7 @@ void main()
            tile_uv_wrap.y <= mask_resize_num_tiles)
        {
            const float src_dy = 1.0/mask_resize_src_lut_size.y;
-            const vec2 src_tex_uv = frac(src_tex_uv_wrap);
+            const vec2 src_tex_uv = fract(src_tex_uv_wrap);
            vec3 pixel_color;
            //  If mask_type is static, this branch will be resolved statically.
            if(mask_type < 0.5)
@ -170,7 +166,7 @@ void main()
            tile_uv_wrap.y <= mask_resize_num_tiles)
        {
            const float src_dy = 1.0/mask_resize_src_lut_size.y;
-            const vec2 src_tex_uv = frac(src_tex_uv_wrap);
+            const vec2 src_tex_uv = fract(src_tex_uv_wrap);
            vec3 pixel_color;
            //  If mask_type is static, this branch will be resolved statically.
            if(mask_type < 0.5)
--- a/crt/shaders/crt-royale/src/includes.h
+++ b/crt/shaders/crt-royale/src/includes.h
@ -8,4 +8,4 @@
 #include "blur-functions.h" //#include "../../../../include/blur-functions.h" <-move includes into crt-royale's src directory until it's actually working
 #include "scanline-functions.h"
 #include "bloom-functions.h"
-#include "phosphor-mask-resizing.h"
+//#include "phosphor-mask-resizing.h"
--- a/crt/shaders/crt-royale/src/params.inc
+++ b/crt/shaders/crt-royale/src/params.inc
@ -44,7 +44,6 @@ layout(std140, set = 0, binding = 0) uniform UBO
    float border_size;
    float border_darkness;
    float border_compress;
-    float interlace_bff;
    float interlace_1080i;
 } params;

@ -92,5 +91,4 @@ layout(std140, set = 0, binding = 0) uniform UBO
 #pragma parameter border_size "border_size" 0.015 0.0000001 0.5 0.005
 #pragma parameter border_darkness "border_darkness" 2.0 0.0 16.0 0.0625
 #pragma parameter border_compress "border_compress" 2.5 1.0 64.0 0.0625
-#pragma parameter interlace_bff "interlace_bff" 0.0 0.0 1.0 1.0
 #pragma parameter interlace_1080i "interlace_1080i" 0.0 0.0 1.0 1.0
--- a/crt/shaders/crt-royale/src/phosphor-mask-resizing.h
+++ b/crt/shaders/crt-royale/src/phosphor-mask-resizing.h
@ -41,6 +41,45 @@
 #endif  //  No else needed: Dynamic loops assumed.


+    #define CALCULATE_R_COORD_FOR_4_SAMPLES                                    \
+        const vec4 true_i = vec4(i_base + i) + vec4(0.0, 1.0, 2.0, 3.0); \
+        const vec4 tile_uv_r = fract(                                         \
+            first_texel_tile_uv_rrrr + true_i * tile_dr);                      \
+        const vec4 tex_uv_r = tile_uv_r * tile_size_uv_r;
+
+    #define VERTICAL_SINC_RESAMPLE_LOOP_BODY                                   \
+        CALCULATE_R_COORD_FOR_4_SAMPLES;                                       \
+        const vec3 new_sample0 = tex2Dlod0try(texture,                       \
+            vec2(tex_uv.x, tex_uv_r.x)).rgb;                                 \
+        const vec3 new_sample1 = tex2Dlod0try(texture,                       \
+            vec2(tex_uv.x, tex_uv_r.y)).rgb;                                 \
+        const vec3 new_sample2 = tex2Dlod0try(texture,                       \
+            vec2(tex_uv.x, tex_uv_r.z)).rgb;                                 \
+        const vec3 new_sample3 = tex2Dlod0try(texture,                       \
+            vec2(tex_uv.x, tex_uv_r.w)).rgb;                                 \
+        UPDATE_COLOR_AND_WEIGHT_SUMS;
+		
+	#define UPDATE_COLOR_AND_WEIGHT_SUMS                                       \
+        const vec4 dist = magnification_scale *                              \
+            abs(first_dist_unscaled - true_i);                                 \
+        const vec4 pi_dist = pi * dist;                                      \
+        CALCULATE_SINC_RESAMPLE_WEIGHTS;                                       \
+        pixel_color += new_sample0 * weights.xxx;                              \
+        pixel_color += new_sample1 * weights.yyy;                              \
+        pixel_color += new_sample2 * weights.zzz;                              \
+        pixel_color += new_sample3 * weights.www;                              \
+        weight_sum += weights;
+		
+	#ifdef PHOSPHOR_MASK_RESIZE_LANCZOS_WINDOW
+        #define CALCULATE_SINC_RESAMPLE_WEIGHTS                                \
+            const vec4 pi_dist_over_lobes = pi_over_lobes * dist;            \
+            const vec4 weights = min(sin(pi_dist) * sin(pi_dist_over_lobes) /\
+                (pi_dist*pi_dist_over_lobes), vec4(1.0));
+    #else
+        #define CALCULATE_SINC_RESAMPLE_WEIGHTS                                \
+            const vec4 weights = min(sin(pi_dist)/pi_dist, vec4(1.0));
+    #endif
+
 //////////////////////////////////  CONSTANTS  /////////////////////////////////

 //  The larger the resized tile, the fewer samples we'll need for downsizing.
@ -57,8 +96,7 @@ const float max_sinc_resize_samples_float = 2.0 * mask_sinc_lobes *
 const float max_sinc_resize_samples_m4 = ceil(
    max_sinc_resize_samples_float * 0.25) * 4.0;
 	
-
-/////////////////////////  RESAMPLING FUNCTION HELPERS  ////////////////////////
+	/////////////////////////  RESAMPLING FUNCTION HELPERS  ////////////////////////

 float get_dynamic_loop_size(const float magnification_scale)
 {
@ -113,8 +151,9 @@ vec2 get_first_texel_tile_uv_and_dist(const vec2 tex_uv,
        first_texel_uv_wrap_2D * input_tiles_per_texture_r;
    //  Project wrapped coordinates to the [0, 1] range.  We'll do this with all
    //  samples,but the first texel is special, since it might be negative.
-    const vec2 coord_negative =
-        vec2(first_texel_tile_uv_wrap_2D < vec2(0.0));
+    vec2 coord_negative = vec2(0.0);
+        if(first_texel_tile_uv_wrap_2D.x < 0.0) coord_negative.x = first_texel_tile_uv_wrap_2D.x;
+		if(first_texel_tile_uv_wrap_2D.x < 0.0) coord_negative.y = first_texel_tile_uv_wrap_2D.y;
    const vec2 first_texel_tile_uv_2D =
        fract(first_texel_tile_uv_wrap_2D) + coord_negative;
    //  Pack the first texel's tile_uv coord and texel distance in 1D:
@ -141,80 +180,6 @@ vec4 tex2Dlod0try(const sampler2D tex, const vec2 tex_uv)
    #endif
 }
 	
-
-//////////////////////////////  LOOP BODY MACROS  //////////////////////////////
-
-//  Using inline functions can exceed the temporary register limit, so we're
-//  stuck with #define macros (I'm TRULY sorry).  They're declared here instead
-//  of above to be closer to the actual invocation sites.  Steps:
-//  1.) Get the exact texel location.
-//  2.) Sample the phosphor mask (already assumed encoded in linear RGB).
-//  3.) Get the distance from the current pixel and sinc weight:
-//          sinc(dist) = sin(pi * dist)/(pi * dist)
-//      We can also use the slower/smoother Lanczos instead:
-//          L(x) = sinc(dist) * sinc(dist / lobes)
-//  4.) Accumulate the weight sum in weights, and accumulate the weighted texels
-//      in pixel_color (we'll normalize outside the loop at the end).
-//  We vectorize the loop to help reduce the Lanczos window's cost.
-
-    //  The r coord is the coord in the dimension we're resizing along (u or v),
-    //  and first_texel_tile_uv_rrrr is a vec4 of the first texel's u or v
-    //  tile_uv coord in [0, 1].  tex_uv_r will contain the tile_uv u or v coord
-    //  for four new texel samples.
-    #define CALCULATE_R_COORD_FOR_4_SAMPLES                                    \
-        const vec4 true_i = vec4(i_base + i) + vec4(0.0, 1.0, 2.0, 3.0); \
-        const vec4 tile_uv_r = fract(                                         \
-            first_texel_tile_uv_rrrr + true_i * tile_dr);                      \
-        const vec4 tex_uv_r = tile_uv_r * tile_size_uv_r;
-
-    #ifdef PHOSPHOR_MASK_RESIZE_LANCZOS_WINDOW
-        #define CALCULATE_SINC_RESAMPLE_WEIGHTS                                \
-            const vec4 pi_dist_over_lobes = pi_over_lobes * dist;            \
-            const vec4 weights = min(sin(pi_dist) * sin(pi_dist_over_lobes) /\
-                (pi_dist*pi_dist_over_lobes), vec4(1.0));
-    #else
-        #define CALCULATE_SINC_RESAMPLE_WEIGHTS                                \
-            const vec4 weights = min(sin(pi_dist)/pi_dist, vec4(1.0));
-    #endif
-
-    #define UPDATE_COLOR_AND_WEIGHT_SUMS                                       \
-        const vec4 dist = magnification_scale *                              \
-            abs(first_dist_unscaled - true_i);                                 \
-        const vec4 pi_dist = pi * dist;                                      \
-        CALCULATE_SINC_RESAMPLE_WEIGHTS;                                       \
-        pixel_color += new_sample0 * weights.xxx;                              \
-        pixel_color += new_sample1 * weights.yyy;                              \
-        pixel_color += new_sample2 * weights.zzz;                              \
-        pixel_color += new_sample3 * weights.www;                              \
-        weight_sum += weights;
-
-    #define VERTICAL_SINC_RESAMPLE_LOOP_BODY                                   \
-        CALCULATE_R_COORD_FOR_4_SAMPLES;                                       \
-        const vec3 new_sample0 = tex2Dlod0try(texture,                       \
-            vec2(tex_uv.x, tex_uv_r.x)).rgb;                                 \
-        const vec3 new_sample1 = tex2Dlod0try(texture,                       \
-            vec2(tex_uv.x, tex_uv_r.y)).rgb;                                 \
-        const vec3 new_sample2 = tex2Dlod0try(texture,                       \
-            vec2(tex_uv.x, tex_uv_r.z)).rgb;                                 \
-        const vec3 new_sample3 = tex2Dlod0try(texture,                       \
-            vec2(tex_uv.x, tex_uv_r.w)).rgb;                                 \
-        UPDATE_COLOR_AND_WEIGHT_SUMS;
-
-    #define HORIZONTAL_SINC_RESAMPLE_LOOP_BODY                                 \
-        CALCULATE_R_COORD_FOR_4_SAMPLES;                                       \
-        const vec3 new_sample0 = tex2Dlod0try(texture,                       \
-            vec2(tex_uv_r.x, tex_uv.y)).rgb;                                 \
-        const vec3 new_sample1 = tex2Dlod0try(texture,                       \
-            vec2(tex_uv_r.y, tex_uv.y)).rgb;                                 \
-        const vec3 new_sample2 = tex2Dlod0try(texture,                       \
-            vec2(tex_uv_r.z, tex_uv.y)).rgb;                                 \
-        const vec3 new_sample3 = tex2Dlod0try(texture,                       \
-            vec2(tex_uv_r.w, tex_uv.y)).rgb;                                 \
-        UPDATE_COLOR_AND_WEIGHT_SUMS;
-
-
-////////////////////////////  RESAMPLING FUNCTIONS  ////////////////////////////
-
 ////////////////////////////  TILE SIZE CALCULATION  ///////////////////////////

 vec2 get_resized_mask_tile_size(const vec2 estimated_viewport_size,
@ -317,7 +282,115 @@ vec2 get_resized_mask_tile_size(const vec2 estimated_viewport_size,
    return final_resized_tile_size;
 }

+////////////////////////////  RESAMPLING FUNCTIONS  ////////////////////////////

+vec3 downsample_vertical_sinc_tiled(const sampler2D texture,
+    const vec2 tex_uv, const vec2 texture_size, const float dr,
+    const float magnification_scale, const float tile_size_uv_r)
+{
+    //  Requires:   1.) dr == du == 1.0/texture_size.x or
+    //                  dr == dv == 1.0/texture_size.y
+    //                  (whichever direction we're resampling in).
+    //                  It's a scalar to save register space.
+    //              2.) tile_size_uv_r is the number of texels an input tile
+    //                  takes up in the input texture, in the direction we're
+    //                  resampling this pass.
+    //              3.) magnification_scale must be <= 1.0.
+    //  Returns:    Return a [Lanczos] sinc-resampled pixel of a vertically
+    //              downsized input tile embedded in an input texture.  (The
+    //              vertical version is special-cased though: It assumes the
+    //              tile size equals the [static] texture size, since it's used
+    //              on an LUT texture input containing one tile.  For more
+    //              generic use, eliminate the "static" in the parameters.)
+    //  The "r" in "dr," "tile_size_uv_r," etc. refers to the dimension
+    //  we're resizing along, e.g. "dy" in this case.
+    #ifdef USE_SINGLE_STATIC_LOOP
+        //  A static loop can be faster, but it might blur too much from using
+        //  more samples than it should.
+        const int samples = int(max_sinc_resize_samples_m4);
+    #else
+        const int samples = int(get_dynamic_loop_size(magnification_scale));
+    #endif
+
+    //  Get the first sample location (scalar tile uv coord along the resized
+    //  dimension) and distance from the output location (in texels):
+    const float input_tiles_per_texture_r = 1.0/tile_size_uv_r;
+    //  true = vertical resize:
+    const vec2 first_texel_tile_r_and_dist = get_first_texel_tile_uv_and_dist(
+        tex_uv, texture_size, dr, input_tiles_per_texture_r, samples, true);
+    const vec4 first_texel_tile_uv_rrrr = first_texel_tile_r_and_dist.xxxx;
+    const vec4 first_dist_unscaled = first_texel_tile_r_and_dist.yyyy;
+    //  Get the tile sample offset:
+    const float tile_dr = dr * input_tiles_per_texture_r;
+
+    //  Sum up each weight and weighted sample color, varying the looping
+    //  strategy based on our expected dynamic loop capabilities.  See the
+    //  loop body macros above.
+    int i_base = 0;
+    vec4 weight_sum = vec4(0.0);
+    vec3 pixel_color = vec3(0.0);
+    const int i_step = 4;
+    #ifdef BREAK_LOOPS_INTO_PIECES
+        if(samples - i_base >= 64)
+        {
+            for(int i = 0; i < 64; i += i_step)
+            {
+                VERTICAL_SINC_RESAMPLE_LOOP_BODY;
+            }
+            i_base += 64;
+        }
+        if(samples - i_base >= 32)
+        {
+            for(int i = 0; i < 32; i += i_step)
+            {
+                VERTICAL_SINC_RESAMPLE_LOOP_BODY;
+            }
+            i_base += 32;
+        }
+        if(samples - i_base >= 16)
+        {
+            for(int i = 0; i < 16; i += i_step)
+            {
+                VERTICAL_SINC_RESAMPLE_LOOP_BODY;
+            }
+            i_base += 16;
+        }
+        if(samples - i_base >= 8)
+        {
+            for(int i = 0; i < 8; i += i_step)
+            {
+                VERTICAL_SINC_RESAMPLE_LOOP_BODY;
+            }
+            i_base += 8;
+        }
+        if(samples - i_base >= 4)
+        {
+            for(int i = 0; i < 4; i += i_step)
+            {
+                VERTICAL_SINC_RESAMPLE_LOOP_BODY;
+            }
+            i_base += 4;
+        }
+        //  Do another 4-sample block for a total of 128 max samples.
+        if(samples - i_base > 0)
+        {
+            for(int i = 0; i < 4; i += i_step)
+            {
+                VERTICAL_SINC_RESAMPLE_LOOP_BODY;
+            }
+        }
+    #else
+        for(int i = 0; i < samples; i += i_step)
+        {
+            VERTICAL_SINC_RESAMPLE_LOOP_BODY;
+        }
+    #endif
+    //  Normalize so the weight_sum == 1.0, and return:
+    const vec2 weight_sum_reduce = weight_sum.xy + weight_sum.zw;
+    const vec3 scalar_weight_sum = vec3(weight_sum_reduce.x + 
+        weight_sum_reduce.y);
+    return (pixel_color/scalar_weight_sum);
+}

 #endif  //  PHOSPHOR_MASK_RESIZING_H