// Coarse rasterization of path segments. // Allocation and initialization of tiles for paths. #version 450 #extension GL_GOOGLE_include_directive : enable #include "setup.h" #define LG_COARSE_WG 5 #define COARSE_WG (1 << LG_COARSE_WG) layout(local_size_x = COARSE_WG, local_size_y = 1) in; layout(set = 0, binding = 0) buffer PathSegBuf { uint[] pathseg; }; layout(set = 0, binding = 1) buffer AllocBuf { uint n_paths; uint n_pathseg; uint alloc; }; layout(set = 0, binding = 2) buffer TileBuf { uint[] tile; }; #include "pathseg.h" #include "tile.h" // scale factors useful for converting coordinates to tiles #define SX (1.0 / float(TILE_WIDTH_PX)) #define SY (1.0 / float(TILE_HEIGHT_PX)) #define ACCURACY 0.25 #define Q_ACCURACY (ACCURACY * 0.1) #define REM_ACCURACY (ACCURACY - Q_ACCURACY) #define MAX_HYPOT2 (432.0 * Q_ACCURACY * Q_ACCURACY) vec2 eval_quad(vec2 p0, vec2 p1, vec2 p2, float t) { float mt = 1.0 - t; return p0 * (mt * mt) + (p1 * (mt * 2.0) + p2 * t) * t; } vec2 eval_cubic(vec2 p0, vec2 p1, vec2 p2, vec2 p3, float t) { float mt = 1.0 - t; return p0 * (mt * mt * mt) + (p1 * (mt * mt * 3.0) + (p2 * (mt * 3.0) + p3 * t) * t) * t; } struct SubdivResult { float val; float a0; float a2; }; /// An approximation to $\int (1 + 4x^2) ^ -0.25 dx$ /// /// This is used for flattening curves. #define D 0.67 float approx_parabola_integral(float x) { return x * inversesqrt(sqrt(1.0 - D + (D * D * D * D + 0.25 * x * x))); } /// An approximation to the inverse parabola integral. #define B 0.39 float approx_parabola_inv_integral(float x) { return x * sqrt(1.0 - B + (B * B + 0.25 * x * x)); } SubdivResult estimate_subdiv(vec2 p0, vec2 p1, vec2 p2, float sqrt_tol) { vec2 d01 = p1 - p0; vec2 d12 = p2 - p1; vec2 dd = d01 - d12; float cross = (p2.x - p0.x) * dd.y - (p2.y - p0.y) * dd.x; float x0 = (d01.x * dd.x + d01.y * dd.y) / cross; float x2 = (d12.x * dd.x + d12.y * dd.y) / cross; float scale = abs(cross / (length(dd) * (x2 - x0))); float a0 = approx_parabola_integral(x0); float a2 = approx_parabola_integral(x2); float val = 0.0; if (scale < 1e9) { float da = abs(a2 - a0); float sqrt_scale = sqrt(scale); if (sign(x0) == sign(x2)) { val = da * sqrt_scale; } else { float xmin = sqrt_tol / sqrt_scale; val = sqrt_tol * da / approx_parabola_integral(xmin); } } return SubdivResult(val, a0, a2); } void main() { uint element_ix = gl_GlobalInvocationID.x; PathSegRef ref = PathSegRef(element_ix * PathSeg_size); uint tag = PathSeg_Nop; if (element_ix < n_pathseg) { tag = PathSeg_tag(ref); } // Setup for coverage algorithm. float a, b, c; // Bounding box of element in pixel coordinates. float xmin, xmax, ymin, ymax; PathStrokeLine line; float dx; switch (tag) { case PathSeg_FillCubic: case PathSeg_StrokeCubic: PathStrokeCubic cubic = PathSeg_StrokeCubic_read(ref); vec2 err_v = 3.0 * (cubic.p2 - cubic.p1) + cubic.p0 - cubic.p3; float err = err_v.x * err_v.x + err_v.y * err_v.y; // The number of quadratics. uint n_quads = max(uint(ceil(pow(err * (1.0 / MAX_HYPOT2), 1.0 / 6.0))), 1); // Iterate over quadratics and tote up the estimated number of segments. float val = 0.0; vec2 qp0 = cubic.p0; float step = 1.0 / float(n_quads); for (uint i = 0; i < n_quads; i++) { float t = float(i + 1) * step; vec2 qp2 = eval_cubic(cubic.p0, cubic.p1, cubic.p2, cubic.p3, t); vec2 qp1 = eval_cubic(cubic.p0, cubic.p1, cubic.p2, cubic.p3, t - 0.5 * step); qp1 = 2.0 * qp1 - 0.5 * (qp0 + qp2); SubdivResult params = estimate_subdiv(qp0, qp1, qp2, sqrt(REM_ACCURACY)); val += params.val; qp0 = qp2; } uint n = max(uint(ceil(val * 0.5 / sqrt(REM_ACCURACY))), 1); uint path_ix = cubic.path_ix; Path path = Path_read(PathRef(path_ix * Path_size)); ivec4 bbox = ivec4(path.bbox); vec2 p0 = cubic.p0; qp0 = cubic.p0; float v_step = val / float(n); int n_out = 1; float val_sum = 0.0; for (uint i = 0; i < n_quads; i++) { float t = float(i + 1) * step; vec2 qp2 = eval_cubic(cubic.p0, cubic.p1, cubic.p2, cubic.p3, t); vec2 qp1 = eval_cubic(cubic.p0, cubic.p1, cubic.p2, cubic.p3, t - 0.5 * step); qp1 = 2.0 * qp1 - 0.5 * (qp0 + qp2); SubdivResult params = estimate_subdiv(qp0, qp1, qp2, sqrt(REM_ACCURACY)); float u0 = approx_parabola_inv_integral(params.a0); float u2 = approx_parabola_inv_integral(params.a2); float uscale = 1.0 / (u2 - u0); float target = float(n_out) * v_step; while (n_out == n || target < val_sum + params.val) { vec2 p1; if (n_out == n) { p1 = cubic.p3; } else { float u = (target - val_sum) / params.val; float a = mix(params.a0, params.a2, u); float au = approx_parabola_inv_integral(a); float t = (au - u0) * uscale; p1 = eval_quad(qp0, qp1, qp2, t); } // Output line segment xmin = min(p0.x, p1.x) - cubic.stroke.x; xmax = max(p0.x, p1.x) + cubic.stroke.x; ymin = min(p0.y, p1.y) - cubic.stroke.y; ymax = max(p0.y, p1.y) + cubic.stroke.y; float dx = p1.x - p0.x; float dy = p1.y - p0.y; // Set up for per-scanline coverage formula, below. float invslope = abs(dy) < 1e-9 ? 1e9 : dx / dy; c = (cubic.stroke.x + abs(invslope) * (0.5 * float(TILE_HEIGHT_PX) + cubic.stroke.y)) * SX; b = invslope; // Note: assumes square tiles, otherwise scale. a = (p0.x - (p0.y - 0.5 * float(TILE_HEIGHT_PX)) * b) * SX; int x0 = int(floor((xmin) * SX)); int x1 = int(ceil((xmax) * SX)); int y0 = int(floor((ymin) * SY)); int y1 = int(ceil((ymax) * SY)); x0 = clamp(x0, bbox.x, bbox.z); y0 = clamp(y0, bbox.y, bbox.w); x1 = clamp(x1, bbox.x, bbox.z); y1 = clamp(y1, bbox.y, bbox.w); float xc = a + b * float(y0); int stride = bbox.z - bbox.x; int base = (y0 - bbox.y) * stride - bbox.x; // TODO: can be tighter, use c to bound width uint n_tile_alloc = uint((x1 - x0) * (y1 - y0)); // Consider using subgroups to aggregate atomic add. uint tile_offset = atomicAdd(alloc, n_tile_alloc * TileSeg_size); TileSeg tile_seg; for (int y = y0; y < y1; y++) { float tile_y0 = float(y * TILE_HEIGHT_PX); if (tag == PathSeg_FillCubic && min(p0.y, p1.y) <= tile_y0) { int xray = max(int(ceil(xc - 0.5 * b)), bbox.x); if (xray < bbox.z) { int backdrop = p1.y < p0.y ? 1 : -1; TileRef tile_ref = Tile_index(path.tiles, uint(base + xray)); uint tile_el = tile_ref.offset >> 2; atomicAdd(tile[tile_el + 1], backdrop); } } int xx0 = clamp(int(floor(xc - c)), x0, x1); int xx1 = clamp(int(ceil(xc + c)), x0, x1); for (int x = xx0; x < xx1; x++) { float tile_x0 = float(x * TILE_WIDTH_PX); TileRef tile_ref = Tile_index(path.tiles, uint(base + x)); uint tile_el = tile_ref.offset >> 2; uint old = atomicExchange(tile[tile_el], tile_offset); tile_seg.start = p0; tile_seg.end = p1; float y_edge = 0.0; if (tag == PathSeg_FillCubic) { y_edge = mix(p0.y, p1.y, (tile_x0 - p0.x) / dx); if (min(p0.x, p1.x) < tile_x0 && y_edge >= tile_y0 && y_edge < tile_y0 + TILE_HEIGHT_PX) { if (p0.x > p1.x) { tile_seg.end = vec2(tile_x0, y_edge); } else { tile_seg.start = vec2(tile_x0, y_edge); } } else { y_edge = 1e9; } } tile_seg.y_edge = y_edge; tile_seg.next.offset = old; TileSeg_write(TileSegRef(tile_offset), tile_seg); tile_offset += TileSeg_size; } xc += b; base += stride; } n_out += 1; target += v_step; p0 = p1; } val_sum += params.val; qp0 = qp2; } break; } }