mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-23 18:06:34 +11:00
replace branches with chained selects
This exchanges the per-pixel branching with additional ALU + selects. My expectation is that this will be faster, but that may be hardware/driver dependent and likely requires profiling and examination of generated code. The original code is kept in a comment with notes to explain the more obfuscated select version.
This commit is contained in:
parent
b103a55301
commit
5e1188f968
1 changed files with 89 additions and 36 deletions
125
shader/fine.wgsl
125
shader/fine.wgsl
|
@ -115,22 +115,33 @@ fn read_end_clip(cmd_ix: u32) -> CmdEndClip {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn extend_mode(t: f32, mode: u32) -> f32 {
|
fn extend_mode(t: f32, mode: u32) -> f32 {
|
||||||
// This can be replaced with two selects, exchanging the cost
|
let EXTEND_PAD = 0u;
|
||||||
// of a branch for additional ALU
|
let EXTEND_REPEAT = 1u;
|
||||||
switch mode {
|
let EXTEND_REFLECT = 2u;
|
||||||
// PAD
|
// Branching version of the code below:
|
||||||
case 0u: {
|
//
|
||||||
return clamp(t, 0.0, 1.0);
|
// switch mode {
|
||||||
}
|
// // EXTEND_PAD
|
||||||
// REPEAT
|
// case 0u: {
|
||||||
case 1u: {
|
// return clamp(t, 0.0, 1.0);
|
||||||
return fract(t);
|
// }
|
||||||
}
|
// // EXTEND_REPEAT
|
||||||
// REFLECT (2)
|
// case 1u: {
|
||||||
default: {
|
// return fract(t);
|
||||||
return abs(t - 2.0 * round(0.5 * t));
|
// }
|
||||||
}
|
// // EXTEND_REFLECT
|
||||||
}
|
// default: {
|
||||||
|
// return abs(t - 2.0 * round(0.5 * t));
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
let pad = clamp(t, 0.0, 1.0);
|
||||||
|
let repeat = fract(t);
|
||||||
|
let reflect = abs(t - 2.0 * round(0.5 * t));
|
||||||
|
return select(
|
||||||
|
select(pad, repeat, mode == EXTEND_REPEAT),
|
||||||
|
reflect,
|
||||||
|
mode == EXTEND_REFLECT
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
@ -304,9 +315,9 @@ fn main(
|
||||||
let is_circular = rad.kind == RAD_GRAD_KIND_CIRCULAR;
|
let is_circular = rad.kind == RAD_GRAD_KIND_CIRCULAR;
|
||||||
let is_focal_on_circle = rad.kind == RAD_GRAD_KIND_FOCAL_ON_CIRCLE;
|
let is_focal_on_circle = rad.kind == RAD_GRAD_KIND_FOCAL_ON_CIRCLE;
|
||||||
let is_swapped = (rad.flags & RAD_GRAD_SWAPPED) != 0u;
|
let is_swapped = (rad.flags & RAD_GRAD_SWAPPED) != 0u;
|
||||||
|
let is_greater = radius > 1.0;
|
||||||
let inv_r1 = select(1.0 / radius, 0.0, is_circular);
|
let inv_r1 = select(1.0 / radius, 0.0, is_circular);
|
||||||
let root_f = select(1.0, -1.0, is_swapped || one_minus_focal_x < 0.0);
|
let less_scale = select(1.0, -1.0, is_swapped || one_minus_focal_x < 0.0);
|
||||||
let t_base_scale = select(vec2(0.0, -1.0), vec2(1.0, 1.0), is_swapped);
|
|
||||||
let t_sign = sign(one_minus_focal_x);
|
let t_sign = sign(one_minus_focal_x);
|
||||||
for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) {
|
for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) {
|
||||||
let my_xy = vec2(xy.x + f32(i), xy.y);
|
let my_xy = vec2(xy.x + f32(i), xy.y);
|
||||||
|
@ -316,25 +327,67 @@ fn main(
|
||||||
let xx = x * x;
|
let xx = x * x;
|
||||||
let yy = y * y;
|
let yy = y * y;
|
||||||
let x_inv_r1 = x * inv_r1;
|
let x_inv_r1 = x * inv_r1;
|
||||||
var t = 0.0;
|
// This is the branching version of the code implemented
|
||||||
var valid = true;
|
// by the chained selects below:
|
||||||
if is_strip {
|
//
|
||||||
let a = radius - yy;
|
// var t = 0.0;
|
||||||
t = sqrt(a) + x;
|
// var is_valid = true;
|
||||||
valid = a >= 0.0;
|
// if is_strip {
|
||||||
} else if is_focal_on_circle {
|
// let a = radius - yy;
|
||||||
t = (xx + yy) / x;
|
// t = sqrt(a) + x;
|
||||||
valid = t >= 0.0;
|
// is_valid = a >= 0.0;
|
||||||
} else if radius > 1.0 {
|
// } else if is_focal_on_circle {
|
||||||
t = sqrt(xx + yy) - x_inv_r1;
|
// t = (xx + yy) / x;
|
||||||
} else {
|
// is_valid = t >= 0.0;
|
||||||
let a = xx - yy;
|
// } else if radius > 1.0 {
|
||||||
t = root_f * sqrt(a) - x_inv_r1;
|
// t = sqrt(xx + yy) - x_inv_r1;
|
||||||
valid = a >= 0.0 && t >= 0.0;
|
// } else {
|
||||||
}
|
// let a = xx - yy;
|
||||||
if valid {
|
// t = root_f * sqrt(a) - x_inv_r1;
|
||||||
|
// is_valid = a >= 0.0 && t >= 0.0;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// The pattern is that these can all be computed with
|
||||||
|
// the expression: a * sqrt(b) + c
|
||||||
|
//
|
||||||
|
// The parameters to the expression are computed up front
|
||||||
|
// and chosen with chained selects based on their
|
||||||
|
// respective conditions. The same process is done
|
||||||
|
// for determining the validity of the resulting value.
|
||||||
|
var strip_params = vec3(1.0, radius - yy, x);
|
||||||
|
var foc_params = vec3(1.0, 0.0, (xx + yy) / x);
|
||||||
|
var greater_params = vec3(1.0, xx + yy, -x_inv_r1);
|
||||||
|
var less_params = vec3(less_scale, xx - yy, -x_inv_r1);
|
||||||
|
var params = select(
|
||||||
|
select(
|
||||||
|
select(
|
||||||
|
less_params,
|
||||||
|
greater_params,
|
||||||
|
is_greater,
|
||||||
|
),
|
||||||
|
foc_params,
|
||||||
|
is_focal_on_circle,
|
||||||
|
),
|
||||||
|
strip_params,
|
||||||
|
is_strip,
|
||||||
|
);
|
||||||
|
var t = params.x * sqrt(params.y) + params.z;
|
||||||
|
let is_valid = select(
|
||||||
|
select(
|
||||||
|
select(
|
||||||
|
params.y >= 0.0 && t >= 0.0,
|
||||||
|
true,
|
||||||
|
is_greater
|
||||||
|
),
|
||||||
|
t >= 0.0 && x != 0.0,
|
||||||
|
is_focal_on_circle,
|
||||||
|
),
|
||||||
|
params.y >= 0.0,
|
||||||
|
is_strip,
|
||||||
|
);
|
||||||
|
if is_valid {
|
||||||
t = extend_mode(focal_x + t_sign * t, rad.extend_mode);
|
t = extend_mode(focal_x + t_sign * t, rad.extend_mode);
|
||||||
t = (t_base_scale.x - t) * t_base_scale.y;
|
t = select(t, 1.0 - t, is_swapped);
|
||||||
let x = i32(round(t * f32(GRADIENT_WIDTH - 1)));
|
let x = i32(round(t * f32(GRADIENT_WIDTH - 1)));
|
||||||
let fg_rgba = textureLoad(gradients, vec2(x, i32(rad.index)), 0);
|
let fg_rgba = textureLoad(gradients, vec2(x, i32(rad.index)), 0);
|
||||||
let fg_i = fg_rgba * area[i];
|
let fg_i = fg_rgba * area[i];
|
||||||
|
|
Loading…
Add table
Reference in a new issue