#version 450 ///////////////////////////// GPL LICENSE NOTICE ///////////////////////////// // crt-royale: A full-featured CRT shader, with cheese. // Copyright (C) 2014 TroggleMonkey // // This program is free software; you can redistribute it and/or modify it // under the terms of the GNU General Public License as published by the Free // Software Foundation; either version 2 of the License, or any later version. // // This program is distributed in the hope that it will be useful, but WITHOUT // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for // more details. // // You should have received a copy of the GNU General Public License along with // this program; if not, write to the Free Software Foundation, Inc., 59 Temple // Place, Suite 330, Boston, MA 02111-1307 USA layout(push_constant) uniform Push { vec4 SourceSize; vec4 OriginalSize; vec4 OutputSize; uint FrameCount; } params; layout(std140, set = 0, binding = 0) uniform UBO { mat4 MVP; float crt_gamma; float lcd_gamma; float levels_contrast; float halation_weight; float diffusion_weight; float bloom_underestimate_levels; float bloom_excess; float beam_min_sigma; float beam_max_sigma; float beam_spot_power; float beam_min_shape; float beam_max_shape; float beam_shape_power; float beam_horiz_filter; float beam_horiz_sigma; float beam_horiz_linear_rgb_weight; float convergence_offset_x_r; float convergence_offset_x_g; float convergence_offset_x_b; float convergence_offset_y_r; float convergence_offset_y_g; float convergence_offset_y_b; float mask_type; float mask_sample_mode_desired; float mask_num_triads_desired; float aa_subpixel_r_offset_x_runtime; float aa_subpixel_r_offset_y_runtime; float aa_cubic_c; float aa_gauss_sigma; float geom_mode_runtime; float geom_radius; float geom_view_dist; float geom_tilt_angle_x; float geom_tilt_angle_y; float geom_aspect_ratio_x; float geom_aspect_ratio_y; float geom_overscan_x; float geom_overscan_y; float border_size; float border_darkness; float border_compress; float interlace_bff; float interlace_1080i; } global; ////////////////////////////////// INCLUDES ////////////////////////////////// #include "params.inc" #include "../../../../include/compat_macros.inc" #include "../user-settings.h" #include "derived-settings-and-constants.h" #include "bind-shader-params.h" #include "scanline-functions.h" #include "../../../../include/gamma-management.h" #pragma stage vertex layout(location = 0) in vec4 Position; layout(location = 1) in vec2 TexCoord; layout(location = 0) out vec2 tex_uv; layout(location = 1) out vec2 uv_step; // uv size of a texel (x) and scanline (y) layout(location = 2) out vec2 il_step_multiple; // (1, 1) = progressive, (1, 2) = interlaced layout(location = 3) out float pixel_height_in_scanlines; // Height of an output pixel in scanlines void main() { gl_Position = global.MVP * Position; tex_uv = TexCoord * 1.00001; // Detect interlacing: il_step_multiple indicates the step multiple between // lines: 1 is for progressive sources, and 2 is for interlaced sources. float2 video_size_ = IN.video_size.xy; const float y_step = 1.0 + float(is_interlaced(video_size_.y)); il_step_multiple = float2(1.0, y_step); // Get the uv tex coords step between one texel (x) and scanline (y): uv_step = il_step_multiple / IN.texture_size; // If shader parameters are used, {min, max}_{sigma, shape} are runtime // values. Compute {sigma, shape}_range outside of scanline_contrib() so // they aren't computed once per scanline (6 times per fragment and up to // 18 times per vertex): // TODO/FIXME: if these aren't used, why are they calculated? commenting for now // const floatsigma_range = max(beam_max_sigma, beam_min_sigma) - // beam_min_sigma; // const float shape_range = max(beam_max_shape, beam_min_shape) - // beam_min_shape; // We need the pixel height in scanlines for antialiased/integral sampling: const float ph = (video_size_.y / IN.output_size.y) / il_step_multiple.y; pixel_height_in_scanlines = ph; } #pragma stage fragment #pragma format R8G8B8A8_SRGB layout(location = 0) in vec2 tex_uv; layout(location = 1) in vec2 uv_step; // uv size of a texel (x) and scanline (y) layout(location = 2) in vec2 il_step_multiple; // (1, 1) = progressive, (1, 2) = interlaced layout(location = 3) in float pixel_height_in_scanlines; // Height of an output pixel in scanlines layout(location = 0) out vec4 FragColor; layout(set = 0, binding = 2) uniform sampler2D Source; #define input_texture Source void main() { // This pass: Sample multiple (misconverged?) scanlines to the final // vertical resolution. Temporarily auto-dim the output to avoid clipping. // Read some attributes into local variables: float2 texture_size_ = IN.texture_size; float2 texture_size_inv = 1.0/texture_size_; //const float2 uv_step = uv_step; //const float2 il_step_multiple = il_step_multiple; float frame_count = float(IN.frame_count); const float ph = pixel_height_in_scanlines; // Get the uv coords of the previous scanline (in this field), and the // scanline's distance from this sample, in scanlines. float dist; const float2 scanline_uv = get_last_scanline_uv(tex_uv, texture_size_, texture_size_inv, il_step_multiple, frame_count, dist); // Consider 2, 3, 4, or 6 scanlines numbered 0-5: The previous and next // scanlines are numbered 2 and 3. Get scanline colors colors (ignore // horizontal sampling, since since IN.output_size.x = video_size.x). // NOTE: Anisotropic filtering creates interlacing artifacts, which is why // ORIG_LINEARIZED bobbed any interlaced input before this pass. const float2 v_step = float2(0.0, uv_step.y); const float3 scanline2_color = tex2D_linearize(input_texture, scanline_uv).rgb; const float3 scanline3_color = tex2D_linearize(input_texture, scanline_uv + v_step).rgb; float3 scanline0_color, scanline1_color, scanline4_color, scanline5_color, scanline_outside_color; float dist_round; // Use scanlines 0, 1, 4, and 5 for a total of 6 scanlines: if(beam_num_scanlines > 5.5) { scanline1_color = tex2D_linearize(input_texture, scanline_uv - v_step).rgb; scanline4_color = tex2D_linearize(input_texture, scanline_uv + 2.0 * v_step).rgb; scanline0_color = tex2D_linearize(input_texture, scanline_uv - 2.0 * v_step).rgb; scanline5_color = tex2D_linearize(input_texture, scanline_uv + 3.0 * v_step).rgb; } // Use scanlines 1, 4, and either 0 or 5 for a total of 5 scanlines: else if(beam_num_scanlines > 4.5) { scanline1_color = tex2D_linearize(input_texture, scanline_uv - v_step).rgb; scanline4_color = tex2D_linearize(input_texture, scanline_uv + 2.0 * v_step).rgb; // dist is in [0, 1] dist_round = round(dist); const float2 sample_0_or_5_uv_off = lerp(-2.0 * v_step, 3.0 * v_step, dist_round); // Call this "scanline_outside_color" to cope with the conditional // scanline number: scanline_outside_color = tex2D_linearize( input_texture, scanline_uv + sample_0_or_5_uv_off).rgb; } // Use scanlines 1 and 4 for a total of 4 scanlines: else if(beam_num_scanlines > 3.5) { scanline1_color = tex2D_linearize(input_texture, scanline_uv - v_step).rgb; scanline4_color = tex2D_linearize(input_texture, scanline_uv + 2.0 * v_step).rgb; } // Use scanline 1 or 4 for a total of 3 scanlines: else if(beam_num_scanlines > 2.5) { // dist is in [0, 1] dist_round = round(dist); const float2 sample_1or4_uv_off = lerp(-v_step, 2.0 * v_step, dist_round); scanline_outside_color = tex2D_linearize( input_texture, scanline_uv + sample_1or4_uv_off).rgb; } // Compute scanline contributions, accounting for vertical convergence. // Vertical convergence offsets are in units of current-field scanlines. // dist2 means "positive sample distance from scanline 2, in scanlines:" float3 dist2 = float3(dist); if(beam_misconvergence) { const float3 convergence_offsets_vert_rgb = get_convergence_offsets_y_vector(); dist2 = float3(dist) - convergence_offsets_vert_rgb; } // Calculate {sigma, shape}_range outside of scanline_contrib so it's only // done once per pixel (not 6 times) with runtime params. Don't reuse the // vertex shader calculations, so static versions can be constant-folded. const float sigma_range = max(beam_max_sigma, beam_min_sigma) - beam_min_sigma; const float shape_range = max(beam_max_shape, beam_min_shape) - beam_min_shape; // Calculate and sum final scanline contributions, starting with lines 2/3. // There is no normalization step, because we're not interpolating a // continuous signal. Instead, each scanline is an additive light source. const float3 scanline2_contrib = scanline_contrib(dist2, scanline2_color, ph, sigma_range, shape_range); const float3 scanline3_contrib = scanline_contrib(abs(float3(1.0,1.0,1.0) - dist2), scanline3_color, ph, sigma_range, shape_range); float3 scanline_intensity = scanline2_contrib + scanline3_contrib; if(beam_num_scanlines > 5.5) { const float3 scanline0_contrib = scanline_contrib(dist2 + float3(2.0,2.0,2.0), scanline0_color, ph, sigma_range, shape_range); const float3 scanline1_contrib = scanline_contrib(dist2 + float3(1.0,1.0,1.0), scanline1_color, ph, sigma_range, shape_range); const float3 scanline4_contrib = scanline_contrib(abs(float3(2.0,2.0,2.0) - dist2), scanline4_color, ph, sigma_range, shape_range); const float3 scanline5_contrib = scanline_contrib(abs(float3(3.0) - dist2), scanline5_color, ph, sigma_range, shape_range); scanline_intensity += scanline0_contrib + scanline1_contrib + scanline4_contrib + scanline5_contrib; } else if(beam_num_scanlines > 4.5) { const float3 scanline1_contrib = scanline_contrib(dist2 + float3(1.0,1.0,1.0), scanline1_color, ph, sigma_range, shape_range); const float3 scanline4_contrib = scanline_contrib(abs(float3(2.0,2.0,2.0) - dist2), scanline4_color, ph, sigma_range, shape_range); const float3 dist0or5 = lerp( dist2 + float3(2.0,2.0,2.0), float3(3.0,3.0,3.0) - dist2, dist_round); const float3 scanline0or5_contrib = scanline_contrib( dist0or5, scanline_outside_color, ph, sigma_range, shape_range); scanline_intensity += scanline1_contrib + scanline4_contrib + scanline0or5_contrib; } else if(beam_num_scanlines > 3.5) { const float3 scanline1_contrib = scanline_contrib(dist2 + float3(1.0,1.0,1.0), scanline1_color, ph, sigma_range, shape_range); const float3 scanline4_contrib = scanline_contrib(abs(float3(2.0,2.0,2.0) - dist2), scanline4_color, ph, sigma_range, shape_range); scanline_intensity += scanline1_contrib + scanline4_contrib; } else if(beam_num_scanlines > 2.5) { const float3 dist1or4 = lerp( dist2 + float3(1.0,1.0,1.0), float3(2.0,2.0,2.0) - dist2, dist_round); const float3 scanline1or4_contrib = scanline_contrib( dist1or4, scanline_outside_color, ph, sigma_range, shape_range); scanline_intensity += scanline1or4_contrib; } // Auto-dim the image to avoid clipping, encode if necessary, and output. // My original idea was to compute a minimal auto-dim factor and put it in // the alpha channel, but it wasn't working, at least not reliably. This // is faster anyway, levels_autodim_temp = 0.5 isn't causing banding. FragColor = encode_output(float4(scanline_intensity * levels_autodim_temp, 1.0)); }