2020-12-12 01:01:48 +11:00
|
|
|
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
|
|
|
|
2020-06-03 10:10:20 +10:00
|
|
|
// Coarse rasterization of path segments.
|
|
|
|
|
|
|
|
// Allocation and initialization of tiles for paths.
|
|
|
|
|
|
|
|
#version 450
|
|
|
|
#extension GL_GOOGLE_include_directive : enable
|
|
|
|
|
2020-12-12 04:30:20 +11:00
|
|
|
#include "mem.h"
|
2020-12-24 22:00:53 +11:00
|
|
|
#include "setup.h"
|
2020-06-03 10:10:20 +10:00
|
|
|
|
2020-06-05 08:58:38 +10:00
|
|
|
#define LG_COARSE_WG 5
|
|
|
|
#define COARSE_WG (1 << LG_COARSE_WG)
|
2020-06-03 10:10:20 +10:00
|
|
|
|
2020-06-05 08:58:38 +10:00
|
|
|
layout(local_size_x = COARSE_WG, local_size_y = 1) in;
|
2020-06-03 10:10:20 +10:00
|
|
|
|
2020-12-12 04:30:20 +11:00
|
|
|
layout(set = 0, binding = 1) readonly buffer ConfigBuf {
|
|
|
|
Config conf;
|
2020-06-03 10:10:20 +10:00
|
|
|
};
|
|
|
|
|
|
|
|
#include "pathseg.h"
|
|
|
|
#include "tile.h"
|
|
|
|
|
|
|
|
// scale factors useful for converting coordinates to tiles
|
|
|
|
#define SX (1.0 / float(TILE_WIDTH_PX))
|
|
|
|
#define SY (1.0 / float(TILE_HEIGHT_PX))
|
|
|
|
|
2020-06-10 11:43:49 +10:00
|
|
|
#define ACCURACY 0.25
|
2020-06-10 13:35:27 +10:00
|
|
|
#define Q_ACCURACY (ACCURACY * 0.1)
|
|
|
|
#define REM_ACCURACY (ACCURACY - Q_ACCURACY)
|
2020-06-10 10:20:58 +10:00
|
|
|
#define MAX_HYPOT2 (432.0 * Q_ACCURACY * Q_ACCURACY)
|
2021-07-16 04:18:48 +10:00
|
|
|
#define MAX_QUADS 16
|
2020-06-10 10:20:58 +10:00
|
|
|
|
2020-06-10 13:35:27 +10:00
|
|
|
vec2 eval_quad(vec2 p0, vec2 p1, vec2 p2, float t) {
|
|
|
|
float mt = 1.0 - t;
|
|
|
|
return p0 * (mt * mt) + (p1 * (mt * 2.0) + p2 * t) * t;
|
|
|
|
}
|
|
|
|
|
2020-06-10 10:20:58 +10:00
|
|
|
vec2 eval_cubic(vec2 p0, vec2 p1, vec2 p2, vec2 p3, float t) {
|
|
|
|
float mt = 1.0 - t;
|
|
|
|
return p0 * (mt * mt * mt) + (p1 * (mt * mt * 3.0) + (p2 * (mt * 3.0) + p3 * t) * t) * t;
|
|
|
|
}
|
|
|
|
|
2020-06-10 13:35:27 +10:00
|
|
|
struct SubdivResult {
|
|
|
|
float val;
|
|
|
|
float a0;
|
|
|
|
float a2;
|
|
|
|
};
|
|
|
|
|
|
|
|
/// An approximation to $\int (1 + 4x^2) ^ -0.25 dx$
|
|
|
|
///
|
|
|
|
/// This is used for flattening curves.
|
|
|
|
#define D 0.67
|
|
|
|
float approx_parabola_integral(float x) {
|
|
|
|
return x * inversesqrt(sqrt(1.0 - D + (D * D * D * D + 0.25 * x * x)));
|
|
|
|
}
|
|
|
|
|
|
|
|
/// An approximation to the inverse parabola integral.
|
|
|
|
#define B 0.39
|
|
|
|
float approx_parabola_inv_integral(float x) {
|
|
|
|
return x * sqrt(1.0 - B + (B * B + 0.25 * x * x));
|
|
|
|
}
|
|
|
|
|
|
|
|
SubdivResult estimate_subdiv(vec2 p0, vec2 p1, vec2 p2, float sqrt_tol) {
|
|
|
|
vec2 d01 = p1 - p0;
|
|
|
|
vec2 d12 = p2 - p1;
|
|
|
|
vec2 dd = d01 - d12;
|
|
|
|
float cross = (p2.x - p0.x) * dd.y - (p2.y - p0.y) * dd.x;
|
|
|
|
float x0 = (d01.x * dd.x + d01.y * dd.y) / cross;
|
|
|
|
float x2 = (d12.x * dd.x + d12.y * dd.y) / cross;
|
|
|
|
float scale = abs(cross / (length(dd) * (x2 - x0)));
|
|
|
|
|
|
|
|
float a0 = approx_parabola_integral(x0);
|
|
|
|
float a2 = approx_parabola_integral(x2);
|
|
|
|
float val = 0.0;
|
|
|
|
if (scale < 1e9) {
|
|
|
|
float da = abs(a2 - a0);
|
|
|
|
float sqrt_scale = sqrt(scale);
|
|
|
|
if (sign(x0) == sign(x2)) {
|
|
|
|
val = da * sqrt_scale;
|
|
|
|
} else {
|
|
|
|
float xmin = sqrt_tol / sqrt_scale;
|
|
|
|
val = sqrt_tol * da / approx_parabola_integral(xmin);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return SubdivResult(val, a0, a2);
|
|
|
|
}
|
|
|
|
|
2022-06-24 01:48:26 +10:00
|
|
|
// All writes to the output must be gated by mem_ok.
|
|
|
|
bool mem_ok = true;
|
|
|
|
|
2020-06-03 10:10:20 +10:00
|
|
|
void main() {
|
2022-06-24 01:48:26 +10:00
|
|
|
if (!check_deps(STAGE_BINNING | STAGE_TILE_ALLOC | STAGE_PATH_COARSE)) {
|
|
|
|
return;
|
|
|
|
}
|
2020-06-03 10:10:20 +10:00
|
|
|
uint element_ix = gl_GlobalInvocationID.x;
|
2020-12-24 22:00:53 +11:00
|
|
|
PathSegRef ref = PathSegRef(conf.pathseg_alloc.offset + element_ix * PathSeg_size);
|
2020-06-03 10:10:20 +10:00
|
|
|
|
2021-03-18 02:34:21 +11:00
|
|
|
PathSegTag tag = PathSegTag(PathSeg_Nop, 0);
|
2020-12-12 04:30:20 +11:00
|
|
|
if (element_ix < conf.n_pathseg) {
|
2021-03-18 02:34:21 +11:00
|
|
|
tag = PathSeg_tag(conf.pathseg_alloc, ref);
|
2020-06-03 10:10:20 +10:00
|
|
|
}
|
2021-03-18 02:34:21 +11:00
|
|
|
switch (tag.tag) {
|
|
|
|
case PathSeg_Cubic:
|
|
|
|
PathCubic cubic = PathSeg_Cubic_read(conf.pathseg_alloc, ref);
|
2021-03-15 22:28:04 +11:00
|
|
|
|
2020-06-10 10:20:58 +10:00
|
|
|
vec2 err_v = 3.0 * (cubic.p2 - cubic.p1) + cubic.p0 - cubic.p3;
|
|
|
|
float err = err_v.x * err_v.x + err_v.y * err_v.y;
|
|
|
|
// The number of quadratics.
|
2020-06-10 13:35:27 +10:00
|
|
|
uint n_quads = max(uint(ceil(pow(err * (1.0 / MAX_HYPOT2), 1.0 / 6.0))), 1);
|
2021-07-16 04:18:48 +10:00
|
|
|
n_quads = min(n_quads, MAX_QUADS);
|
|
|
|
SubdivResult keep_params[MAX_QUADS];
|
2020-06-10 13:35:27 +10:00
|
|
|
// Iterate over quadratics and tote up the estimated number of segments.
|
|
|
|
float val = 0.0;
|
|
|
|
vec2 qp0 = cubic.p0;
|
|
|
|
float step = 1.0 / float(n_quads);
|
|
|
|
for (uint i = 0; i < n_quads; i++) {
|
|
|
|
float t = float(i + 1) * step;
|
|
|
|
vec2 qp2 = eval_cubic(cubic.p0, cubic.p1, cubic.p2, cubic.p3, t);
|
|
|
|
vec2 qp1 = eval_cubic(cubic.p0, cubic.p1, cubic.p2, cubic.p3, t - 0.5 * step);
|
|
|
|
qp1 = 2.0 * qp1 - 0.5 * (qp0 + qp2);
|
|
|
|
SubdivResult params = estimate_subdiv(qp0, qp1, qp2, sqrt(REM_ACCURACY));
|
2021-07-16 04:18:48 +10:00
|
|
|
keep_params[i] = params;
|
2020-06-10 13:35:27 +10:00
|
|
|
val += params.val;
|
|
|
|
|
|
|
|
qp0 = qp2;
|
|
|
|
}
|
|
|
|
uint n = max(uint(ceil(val * 0.5 / sqrt(REM_ACCURACY))), 1);
|
|
|
|
|
2021-03-18 02:34:21 +11:00
|
|
|
bool is_stroke = fill_mode_from_flags(tag.flags) == MODE_STROKE;
|
2020-06-10 10:20:58 +10:00
|
|
|
uint path_ix = cubic.path_ix;
|
2020-12-24 22:00:53 +11:00
|
|
|
Path path = Path_read(conf.tile_alloc, PathRef(conf.tile_alloc.offset + path_ix * Path_size));
|
2022-01-30 18:23:18 +11:00
|
|
|
Alloc path_alloc =
|
2022-06-24 01:48:26 +10:00
|
|
|
new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, true);
|
2020-06-10 10:20:58 +10:00
|
|
|
ivec4 bbox = ivec4(path.bbox);
|
2020-06-10 13:35:27 +10:00
|
|
|
vec2 p0 = cubic.p0;
|
|
|
|
qp0 = cubic.p0;
|
|
|
|
float v_step = val / float(n);
|
|
|
|
int n_out = 1;
|
|
|
|
float val_sum = 0.0;
|
|
|
|
for (uint i = 0; i < n_quads; i++) {
|
2020-06-10 10:20:58 +10:00
|
|
|
float t = float(i + 1) * step;
|
2020-06-10 13:35:27 +10:00
|
|
|
vec2 qp2 = eval_cubic(cubic.p0, cubic.p1, cubic.p2, cubic.p3, t);
|
|
|
|
vec2 qp1 = eval_cubic(cubic.p0, cubic.p1, cubic.p2, cubic.p3, t - 0.5 * step);
|
|
|
|
qp1 = 2.0 * qp1 - 0.5 * (qp0 + qp2);
|
2021-07-16 04:18:48 +10:00
|
|
|
SubdivResult params = keep_params[i];
|
2020-06-10 13:35:27 +10:00
|
|
|
float u0 = approx_parabola_inv_integral(params.a0);
|
|
|
|
float u2 = approx_parabola_inv_integral(params.a2);
|
|
|
|
float uscale = 1.0 / (u2 - u0);
|
|
|
|
float target = float(n_out) * v_step;
|
|
|
|
while (n_out == n || target < val_sum + params.val) {
|
|
|
|
vec2 p1;
|
|
|
|
if (n_out == n) {
|
|
|
|
p1 = cubic.p3;
|
|
|
|
} else {
|
|
|
|
float u = (target - val_sum) / params.val;
|
|
|
|
float a = mix(params.a0, params.a2, u);
|
|
|
|
float au = approx_parabola_inv_integral(a);
|
|
|
|
float t = (au - u0) * uscale;
|
|
|
|
p1 = eval_quad(qp0, qp1, qp2, t);
|
2020-06-10 10:20:58 +10:00
|
|
|
}
|
2020-06-10 13:35:27 +10:00
|
|
|
|
|
|
|
// Output line segment
|
2020-12-02 03:59:37 +11:00
|
|
|
|
|
|
|
// Bounding box of element in pixel coordinates.
|
|
|
|
float xmin = min(p0.x, p1.x) - cubic.stroke.x;
|
|
|
|
float xmax = max(p0.x, p1.x) + cubic.stroke.x;
|
|
|
|
float ymin = min(p0.y, p1.y) - cubic.stroke.y;
|
|
|
|
float ymax = max(p0.y, p1.y) + cubic.stroke.y;
|
2020-06-10 13:35:27 +10:00
|
|
|
float dx = p1.x - p0.x;
|
|
|
|
float dy = p1.y - p0.y;
|
|
|
|
// Set up for per-scanline coverage formula, below.
|
|
|
|
float invslope = abs(dy) < 1e-9 ? 1e9 : dx / dy;
|
2020-12-02 03:59:37 +11:00
|
|
|
float c = (cubic.stroke.x + abs(invslope) * (0.5 * float(TILE_HEIGHT_PX) + cubic.stroke.y)) * SX;
|
|
|
|
float b = invslope; // Note: assumes square tiles, otherwise scale.
|
|
|
|
float a = (p0.x - (p0.y - 0.5 * float(TILE_HEIGHT_PX)) * b) * SX;
|
|
|
|
|
|
|
|
int x0 = int(floor(xmin * SX));
|
2020-12-02 04:13:33 +11:00
|
|
|
int x1 = int(floor(xmax * SX) + 1);
|
2020-12-02 03:59:37 +11:00
|
|
|
int y0 = int(floor(ymin * SY));
|
2020-12-02 04:13:33 +11:00
|
|
|
int y1 = int(floor(ymax * SY) + 1);
|
2020-06-10 13:35:27 +10:00
|
|
|
|
|
|
|
x0 = clamp(x0, bbox.x, bbox.z);
|
|
|
|
y0 = clamp(y0, bbox.y, bbox.w);
|
|
|
|
x1 = clamp(x1, bbox.x, bbox.z);
|
|
|
|
y1 = clamp(y1, bbox.y, bbox.w);
|
|
|
|
float xc = a + b * float(y0);
|
|
|
|
int stride = bbox.z - bbox.x;
|
|
|
|
int base = (y0 - bbox.y) * stride - bbox.x;
|
|
|
|
// TODO: can be tighter, use c to bound width
|
|
|
|
uint n_tile_alloc = uint((x1 - x0) * (y1 - y0));
|
|
|
|
// Consider using subgroups to aggregate atomic add.
|
2022-06-24 01:48:26 +10:00
|
|
|
uint malloc_size = n_tile_alloc * TileSeg_size;
|
|
|
|
uint tile_offset = malloc_stage(malloc_size, conf.mem_size, STAGE_PATH_COARSE);
|
|
|
|
if (tile_offset == MALLOC_FAILED) {
|
|
|
|
mem_ok = false;
|
2020-12-12 04:30:20 +11:00
|
|
|
}
|
2022-06-24 01:48:26 +10:00
|
|
|
Alloc tile_alloc = new_alloc(tile_offset, malloc_size, true);
|
2020-12-12 04:30:20 +11:00
|
|
|
|
2020-06-10 13:35:27 +10:00
|
|
|
TileSeg tile_seg;
|
2020-12-02 04:13:33 +11:00
|
|
|
|
2022-01-30 18:23:18 +11:00
|
|
|
int xray = int(floor(p0.x * SX));
|
|
|
|
int last_xray = int(floor(p1.x * SX));
|
2020-12-02 04:13:33 +11:00
|
|
|
if (p0.y > p1.y) {
|
|
|
|
int tmp = xray;
|
|
|
|
xray = last_xray;
|
|
|
|
last_xray = tmp;
|
|
|
|
}
|
2020-06-10 13:35:27 +10:00
|
|
|
for (int y = y0; y < y1; y++) {
|
path_coarse.comp: fix intersection inconsistencies, take 2
The previous attempt to fix inconsistent intersections because of floating
point inaccuracy[0] missed two cases.
The first case is that for top intersections with the very first row would fail
the test
tag == PathSeg_FillCubic && y > y0 && xbackdrop < bbox.z
In particular, y is not larger than y0 when y0 has been clipped to 0.
Fix that by re-introducing the min(p0.y, p1.y) < tile_y0 check that does work
and is just as consistent. Add similar check, min(p0.x, p1.x) < tile_x0, for
deciding when to clip the segment to the left edge (but keep consistent xray check
for deciding left edge *intersections*).
The second case is that the tracking left intersections in the [xray, next_xray]
range of tiles may fail when next_xray is forced to last_xray, the final xray value.
Fix that case by computing next_xray explicitly, before looping over the
x tiles. The code is now much simpler.
Finally, ensure that xx0 and xx1 doesn't overflow the allocated number of tiles
by clamping them *after* setting them. Adjust xx0 to include xray, just as xx1
is adjusted; I haven't seen corruption without it, but it's not obvious xx0
always includes xray.
While here, replace a "+=" on a guaranteed zero value to just "=".
Updates #23
[0] https://github.com/linebender/piet-gpu/commit/29cfb8b63edc28517b16e3ba1da7790e360ed557
Signed-off-by: Elias Naur <mail@eliasnaur.com>
2020-12-12 16:17:19 +11:00
|
|
|
float tile_y0 = float(y * TILE_HEIGHT_PX);
|
2020-12-02 04:13:33 +11:00
|
|
|
int xbackdrop = max(xray + 1, bbox.x);
|
2021-03-18 02:34:21 +11:00
|
|
|
if (!is_stroke && min(p0.y, p1.y) < tile_y0 && xbackdrop < bbox.z) {
|
2020-12-02 04:13:33 +11:00
|
|
|
int backdrop = p1.y < p0.y ? 1 : -1;
|
|
|
|
TileRef tile_ref = Tile_index(path.tiles, uint(base + xbackdrop));
|
|
|
|
uint tile_el = tile_ref.offset >> 2;
|
2022-06-24 01:48:26 +10:00
|
|
|
atomicAdd(memory[tile_el + 1], backdrop);
|
2020-06-10 13:35:27 +10:00
|
|
|
}
|
2020-12-02 04:13:33 +11:00
|
|
|
|
path_coarse.comp: fix intersection inconsistencies, take 2
The previous attempt to fix inconsistent intersections because of floating
point inaccuracy[0] missed two cases.
The first case is that for top intersections with the very first row would fail
the test
tag == PathSeg_FillCubic && y > y0 && xbackdrop < bbox.z
In particular, y is not larger than y0 when y0 has been clipped to 0.
Fix that by re-introducing the min(p0.y, p1.y) < tile_y0 check that does work
and is just as consistent. Add similar check, min(p0.x, p1.x) < tile_x0, for
deciding when to clip the segment to the left edge (but keep consistent xray check
for deciding left edge *intersections*).
The second case is that the tracking left intersections in the [xray, next_xray]
range of tiles may fail when next_xray is forced to last_xray, the final xray value.
Fix that case by computing next_xray explicitly, before looping over the
x tiles. The code is now much simpler.
Finally, ensure that xx0 and xx1 doesn't overflow the allocated number of tiles
by clamping them *after* setting them. Adjust xx0 to include xray, just as xx1
is adjusted; I haven't seen corruption without it, but it's not obvious xx0
always includes xray.
While here, replace a "+=" on a guaranteed zero value to just "=".
Updates #23
[0] https://github.com/linebender/piet-gpu/commit/29cfb8b63edc28517b16e3ba1da7790e360ed557
Signed-off-by: Elias Naur <mail@eliasnaur.com>
2020-12-12 16:17:19 +11:00
|
|
|
// next_xray is the xray for the next scanline; the line segment intersects
|
|
|
|
// all tiles between xray and next_xray.
|
|
|
|
int next_xray = last_xray;
|
|
|
|
if (y < y1 - 1) {
|
|
|
|
float tile_y1 = float((y + 1) * TILE_HEIGHT_PX);
|
|
|
|
float x_edge = mix(p0.x, p1.x, (tile_y1 - p0.y) / dy);
|
2022-01-30 18:23:18 +11:00
|
|
|
next_xray = int(floor(x_edge * SX));
|
path_coarse.comp: fix intersection inconsistencies, take 2
The previous attempt to fix inconsistent intersections because of floating
point inaccuracy[0] missed two cases.
The first case is that for top intersections with the very first row would fail
the test
tag == PathSeg_FillCubic && y > y0 && xbackdrop < bbox.z
In particular, y is not larger than y0 when y0 has been clipped to 0.
Fix that by re-introducing the min(p0.y, p1.y) < tile_y0 check that does work
and is just as consistent. Add similar check, min(p0.x, p1.x) < tile_x0, for
deciding when to clip the segment to the left edge (but keep consistent xray check
for deciding left edge *intersections*).
The second case is that the tracking left intersections in the [xray, next_xray]
range of tiles may fail when next_xray is forced to last_xray, the final xray value.
Fix that case by computing next_xray explicitly, before looping over the
x tiles. The code is now much simpler.
Finally, ensure that xx0 and xx1 doesn't overflow the allocated number of tiles
by clamping them *after* setting them. Adjust xx0 to include xray, just as xx1
is adjusted; I haven't seen corruption without it, but it's not obvious xx0
always includes xray.
While here, replace a "+=" on a guaranteed zero value to just "=".
Updates #23
[0] https://github.com/linebender/piet-gpu/commit/29cfb8b63edc28517b16e3ba1da7790e360ed557
Signed-off-by: Elias Naur <mail@eliasnaur.com>
2020-12-12 16:17:19 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
int min_xray = min(xray, next_xray);
|
|
|
|
int max_xray = max(xray, next_xray);
|
|
|
|
int xx0 = min(int(floor(xc - c)), min_xray);
|
|
|
|
int xx1 = max(int(ceil(xc + c)), max_xray + 1);
|
|
|
|
xx0 = clamp(xx0, x0, x1);
|
|
|
|
xx1 = clamp(xx1, x0, x1);
|
2020-12-02 04:13:33 +11:00
|
|
|
|
2020-06-10 13:35:27 +10:00
|
|
|
for (int x = xx0; x < xx1; x++) {
|
|
|
|
float tile_x0 = float(x * TILE_WIDTH_PX);
|
2020-12-24 22:00:53 +11:00
|
|
|
TileRef tile_ref = Tile_index(TileRef(path.tiles.offset), uint(base + x));
|
2020-06-10 13:35:27 +10:00
|
|
|
uint tile_el = tile_ref.offset >> 2;
|
2020-12-24 22:00:53 +11:00
|
|
|
uint old = 0;
|
2022-06-24 01:48:26 +10:00
|
|
|
old = atomicExchange(memory[tile_el], tile_offset);
|
2020-12-02 04:06:09 +11:00
|
|
|
tile_seg.origin = p0;
|
|
|
|
tile_seg.vector = p1 - p0;
|
2020-06-10 13:35:27 +10:00
|
|
|
float y_edge = 0.0;
|
2021-03-18 02:34:21 +11:00
|
|
|
if (!is_stroke) {
|
2020-06-10 13:35:27 +10:00
|
|
|
y_edge = mix(p0.y, p1.y, (tile_x0 - p0.x) / dx);
|
path_coarse.comp: fix intersection inconsistencies, take 2
The previous attempt to fix inconsistent intersections because of floating
point inaccuracy[0] missed two cases.
The first case is that for top intersections with the very first row would fail
the test
tag == PathSeg_FillCubic && y > y0 && xbackdrop < bbox.z
In particular, y is not larger than y0 when y0 has been clipped to 0.
Fix that by re-introducing the min(p0.y, p1.y) < tile_y0 check that does work
and is just as consistent. Add similar check, min(p0.x, p1.x) < tile_x0, for
deciding when to clip the segment to the left edge (but keep consistent xray check
for deciding left edge *intersections*).
The second case is that the tracking left intersections in the [xray, next_xray]
range of tiles may fail when next_xray is forced to last_xray, the final xray value.
Fix that case by computing next_xray explicitly, before looping over the
x tiles. The code is now much simpler.
Finally, ensure that xx0 and xx1 doesn't overflow the allocated number of tiles
by clamping them *after* setting them. Adjust xx0 to include xray, just as xx1
is adjusted; I haven't seen corruption without it, but it's not obvious xx0
always includes xray.
While here, replace a "+=" on a guaranteed zero value to just "=".
Updates #23
[0] https://github.com/linebender/piet-gpu/commit/29cfb8b63edc28517b16e3ba1da7790e360ed557
Signed-off-by: Elias Naur <mail@eliasnaur.com>
2020-12-12 16:17:19 +11:00
|
|
|
if (min(p0.x, p1.x) < tile_x0) {
|
2020-12-02 04:06:09 +11:00
|
|
|
vec2 p = vec2(tile_x0, y_edge);
|
2020-06-10 13:35:27 +10:00
|
|
|
if (p0.x > p1.x) {
|
2020-12-02 04:06:09 +11:00
|
|
|
tile_seg.vector = p - p0;
|
2020-06-10 13:35:27 +10:00
|
|
|
} else {
|
2020-12-02 04:06:09 +11:00
|
|
|
tile_seg.origin = p;
|
|
|
|
tile_seg.vector = p1 - p;
|
2020-06-10 13:35:27 +10:00
|
|
|
}
|
2020-12-02 04:13:33 +11:00
|
|
|
// kernel4 uses sign(vector.x) for the sign of the intersection backdrop.
|
|
|
|
// Nudge zeroes towards the intended sign.
|
|
|
|
if (tile_seg.vector.x == 0) {
|
2022-01-30 18:23:18 +11:00
|
|
|
tile_seg.vector.x = sign(p1.x - p0.x) * 1e-9;
|
2020-12-02 04:13:33 +11:00
|
|
|
}
|
|
|
|
}
|
path_coarse.comp: fix intersection inconsistencies, take 2
The previous attempt to fix inconsistent intersections because of floating
point inaccuracy[0] missed two cases.
The first case is that for top intersections with the very first row would fail
the test
tag == PathSeg_FillCubic && y > y0 && xbackdrop < bbox.z
In particular, y is not larger than y0 when y0 has been clipped to 0.
Fix that by re-introducing the min(p0.y, p1.y) < tile_y0 check that does work
and is just as consistent. Add similar check, min(p0.x, p1.x) < tile_x0, for
deciding when to clip the segment to the left edge (but keep consistent xray check
for deciding left edge *intersections*).
The second case is that the tracking left intersections in the [xray, next_xray]
range of tiles may fail when next_xray is forced to last_xray, the final xray value.
Fix that case by computing next_xray explicitly, before looping over the
x tiles. The code is now much simpler.
Finally, ensure that xx0 and xx1 doesn't overflow the allocated number of tiles
by clamping them *after* setting them. Adjust xx0 to include xray, just as xx1
is adjusted; I haven't seen corruption without it, but it's not obvious xx0
always includes xray.
While here, replace a "+=" on a guaranteed zero value to just "=".
Updates #23
[0] https://github.com/linebender/piet-gpu/commit/29cfb8b63edc28517b16e3ba1da7790e360ed557
Signed-off-by: Elias Naur <mail@eliasnaur.com>
2020-12-12 16:17:19 +11:00
|
|
|
if (x <= min_xray || max_xray < x) {
|
|
|
|
// Reject inconsistent intersections.
|
2020-06-10 13:35:27 +10:00
|
|
|
y_edge = 1e9;
|
2020-06-10 10:20:58 +10:00
|
|
|
}
|
|
|
|
}
|
2020-06-10 13:35:27 +10:00
|
|
|
tile_seg.y_edge = y_edge;
|
|
|
|
tile_seg.next.offset = old;
|
2022-06-24 01:48:26 +10:00
|
|
|
if (mem_ok) {
|
|
|
|
TileSeg_write(tile_alloc, TileSegRef(tile_offset), tile_seg);
|
|
|
|
}
|
2020-06-10 13:35:27 +10:00
|
|
|
tile_offset += TileSeg_size;
|
2020-06-06 08:07:02 +10:00
|
|
|
}
|
2020-06-10 13:35:27 +10:00
|
|
|
xc += b;
|
|
|
|
base += stride;
|
2020-12-02 04:13:33 +11:00
|
|
|
xray = next_xray;
|
2020-06-06 08:07:02 +10:00
|
|
|
}
|
2020-06-10 13:35:27 +10:00
|
|
|
|
|
|
|
n_out += 1;
|
|
|
|
target += v_step;
|
|
|
|
p0 = p1;
|
2020-06-06 08:07:02 +10:00
|
|
|
}
|
2020-06-10 13:35:27 +10:00
|
|
|
val_sum += params.val;
|
2020-06-10 10:20:58 +10:00
|
|
|
|
2020-06-10 13:35:27 +10:00
|
|
|
qp0 = qp2;
|
2020-06-03 10:10:20 +10:00
|
|
|
}
|
2020-06-10 13:35:27 +10:00
|
|
|
|
2020-06-10 10:20:58 +10:00
|
|
|
break;
|
2020-06-03 10:10:20 +10:00
|
|
|
}
|
|
|
|
}
|