diff --git a/piet-gpu/shader/elements.comp b/piet-gpu/shader/elements.comp index 0855219..ad39af2 100644 --- a/piet-gpu/shader/elements.comp +++ b/piet-gpu/shader/elements.comp @@ -318,29 +318,34 @@ void main() { case Element_StrokeLine: LineSeg line = Element_StrokeLine_read(this_ref); PathStrokeLine path_line; - path_line.p0 = st.mat.xy * line.p0.x + st.mat.zw * line.p0.y + st.translate; - path_line.p1 = st.mat.xy * line.p1.x + st.mat.zw * line.p1.y + st.translate; - path_line.path_ix = st.path_count; + vec2 p0 = st.mat.xy * line.p0.x + st.mat.zw * line.p0.y + st.translate; + vec2 p1 = st.mat.xy * line.p1.x + st.mat.zw * line.p1.y + st.translate; + PathStrokeCubic path_cubic; + path_cubic.p0 = p0; + path_cubic.p1 = mix(p0, p1, 1.0 / 3.0); + path_cubic.p2 = mix(p1, p0, 1.0 / 3.0); + path_cubic.p3 = p1; + path_cubic.path_ix = st.path_count; if (tag == Element_StrokeLine) { - path_line.stroke = get_linewidth(st); + path_cubic.stroke = get_linewidth(st); } else { - path_line.stroke = vec2(0.0); + path_cubic.stroke = vec2(0.0); } // We do encoding a bit by hand to minimize divergence. Another approach // would be to have a fill/stroke bool. PathSegRef path_out_ref = PathSegRef((st.pathseg_count - 1) * PathSeg_size); - uint out_tag = tag == Element_FillLine ? PathSeg_FillLine : PathSeg_StrokeLine; + uint out_tag = tag == Element_FillLine ? PathSeg_FillCubic : PathSeg_StrokeCubic; pathseg[path_out_ref.offset >> 2] = out_tag; - PathStrokeLine_write(PathStrokeLineRef(path_out_ref.offset + 4), path_line); + PathStrokeCubic_write(PathStrokeCubicRef(path_out_ref.offset + 4), path_cubic); break; case Element_FillCubic: case Element_StrokeCubic: CubicSeg cubic = Element_StrokeCubic_read(this_ref); - PathStrokeCubic path_cubic; + path_cubic; path_cubic.p0 = st.mat.xy * cubic.p0.x + st.mat.zw * cubic.p0.y + st.translate; path_cubic.p1 = st.mat.xy * cubic.p1.x + st.mat.zw * cubic.p1.y + st.translate; - path_cubic.p1 = st.mat.xy * cubic.p2.x + st.mat.zw * cubic.p2.y + st.translate; - path_cubic.p1 = st.mat.xy * cubic.p3.x + st.mat.zw * cubic.p3.y + st.translate; + path_cubic.p2 = st.mat.xy * cubic.p2.x + st.mat.zw * cubic.p2.y + st.translate; + path_cubic.p3 = st.mat.xy * cubic.p3.x + st.mat.zw * cubic.p3.y + st.translate; path_cubic.path_ix = st.path_count; if (tag == Element_StrokeCubic) { path_cubic.stroke = get_linewidth(st); @@ -350,7 +355,7 @@ void main() { // We do encoding a bit by hand to minimize divergence. Another approach // would be to have a fill/stroke bool. path_out_ref = PathSegRef((st.pathseg_count - 1) * PathSeg_size); - out_tag = tag == Element_FillLine ? PathSeg_FillCubic : PathSeg_StrokeCubic; + out_tag = tag == Element_FillCubic ? PathSeg_FillCubic : PathSeg_StrokeCubic; pathseg[path_out_ref.offset >> 2] = out_tag; PathStrokeCubic_write(PathStrokeCubicRef(path_out_ref.offset + 4), path_cubic); break; diff --git a/piet-gpu/shader/elements.spv b/piet-gpu/shader/elements.spv index 55a43f2..e6bd773 100644 Binary files a/piet-gpu/shader/elements.spv and b/piet-gpu/shader/elements.spv differ diff --git a/piet-gpu/shader/path_coarse.comp b/piet-gpu/shader/path_coarse.comp index 693082e..1bbad42 100644 --- a/piet-gpu/shader/path_coarse.comp +++ b/piet-gpu/shader/path_coarse.comp @@ -33,6 +33,14 @@ layout(set = 0, binding = 2) buffer TileBuf { #define SX (1.0 / float(TILE_WIDTH_PX)) #define SY (1.0 / float(TILE_HEIGHT_PX)) +#define Q_ACCURACY 0.025 +#define MAX_HYPOT2 (432.0 * Q_ACCURACY * Q_ACCURACY) + +vec2 eval_cubic(vec2 p0, vec2 p1, vec2 p2, vec2 p3, float t) { + float mt = 1.0 - t; + return p0 * (mt * mt * mt) + (p1 * (mt * mt * 3.0) + (p2 * (mt * 3.0) + p3 * t) * t) * t; +} + void main() { uint element_ix = gl_GlobalInvocationID.x; PathSegRef ref = PathSegRef(element_ix * PathSeg_size); @@ -48,6 +56,7 @@ void main() { PathStrokeLine line; float dx; switch (tag) { + /* case PathSeg_FillLine: case PathSeg_StrokeLine: line = PathSeg_StrokeLine_read(ref); @@ -63,66 +72,101 @@ void main() { b = invslope; // Note: assumes square tiles, otherwise scale. a = (line.p0.x - (line.p0.y - 0.5 * float(TILE_HEIGHT_PX)) * b) * SX; break; - } - int x0 = int(floor((xmin) * SX)); - int x1 = int(ceil((xmax) * SX)); - int y0 = int(floor((ymin) * SY)); - int y1 = int(ceil((ymax) * SY)); - - uint path_ix = line.path_ix; - Path path = Path_read(PathRef(path_ix * Path_size)); - ivec4 bbox = ivec4(path.bbox); - x0 = clamp(x0, bbox.x, bbox.z); - y0 = clamp(y0, bbox.y, bbox.w); - x1 = clamp(x1, bbox.x, bbox.z); - y1 = clamp(y1, bbox.y, bbox.w); - float t = a + b * float(y0); - int stride = bbox.z - bbox.x; - int base = (y0 - bbox.y) * stride - bbox.x; - // TODO: can be tighter, use c to bound width - uint n_tile_alloc = uint((x1 - x0) * (y1 - y0)); - // Consider using subgroups to aggregate atomic add. - uint tile_offset = atomicAdd(alloc, n_tile_alloc * TileSeg_size); - TileSeg tile_seg; - for (int y = y0; y < y1; y++) { - float tile_y0 = float(y * TILE_HEIGHT_PX); - if (tag == PathSeg_FillLine && min(line.p0.y, line.p1.y) <= tile_y0) { - int xray = max(int(ceil(t - 0.5 * b)), bbox.x); - if (xray < bbox.z) { - int backdrop = line.p1.y < line.p0.y ? 1 : -1; - TileRef tile_ref = Tile_index(path.tiles, uint(base + xray)); - uint tile_el = tile_ref.offset >> 2; - atomicAdd(tile[tile_el + 1], backdrop); - } - } - int xx0 = clamp(int(floor(t - c)), x0, x1); - int xx1 = clamp(int(ceil(t + c)), x0, x1); - for (int x = xx0; x < xx1; x++) { - float tile_x0 = float(x * TILE_WIDTH_PX); - TileRef tile_ref = Tile_index(path.tiles, uint(base + x)); - uint tile_el = tile_ref.offset >> 2; - uint old = atomicExchange(tile[tile_el], tile_offset); - tile_seg.start = line.p0; - tile_seg.end = line.p1; - float y_edge = 0.0; - if (tag == PathSeg_FillLine) { - y_edge = mix(line.p0.y, line.p1.y, (tile_x0 - line.p0.x) / dx); - if (min(line.p0.x, line.p1.x) < tile_x0 && y_edge >= tile_y0 && y_edge < tile_y0 + TILE_HEIGHT_PX) { - if (line.p0.x > line.p1.x) { - tile_seg.end = vec2(tile_x0, y_edge); - } else { - tile_seg.start = vec2(tile_x0, y_edge); + */ + case PathSeg_FillCubic: + case PathSeg_StrokeCubic: + PathStrokeCubic cubic = PathSeg_StrokeCubic_read(ref); + vec2 err_v = 3.0 * (cubic.p2 - cubic.p1) + cubic.p0 - cubic.p3; + float err = err_v.x * err_v.x + err_v.y * err_v.y; + // The number of quadratics. + uint n = max(uint(ceil(pow(err * (1.0 / MAX_HYPOT2), 1.0 / 6.0))), 1); + vec2 p0 = cubic.p0; + float step = 1.0 / float(n); + uint path_ix = cubic.path_ix; + Path path = Path_read(PathRef(path_ix * Path_size)); + ivec4 bbox = ivec4(path.bbox); + for (int i = 0; i < n; i++) { + // TODO: probably need special logic to make sure it's manifold + float t = float(i + 1) * step; + vec2 p2 = eval_cubic(cubic.p0, cubic.p1, cubic.p2, cubic.p3, t); + /* + vec2 p1 = eval_cubic(cubic.p0, cubic.p1, cubic.p2, cubic.p3, t - 0.5 * step); + p1 = 2.0 * p1 - 0.5 * (p0 + p2); + */ + + xmin = min(p0.x, p2.x) - cubic.stroke.x; + xmax = max(p0.x, p2.x) + cubic.stroke.x; + ymin = min(p0.y, p2.y) - cubic.stroke.y; + ymax = max(p0.y, p2.y) + cubic.stroke.y; + float dx = p2.x - p0.x; + float dy = p2.y - p0.y; + // Set up for per-scanline coverage formula, below. + float invslope = abs(dy) < 1e-9 ? 1e9 : dx / dy; + c = (cubic.stroke.x + abs(invslope) * (0.5 * float(TILE_HEIGHT_PX) + cubic.stroke.y)) * SX; + b = invslope; // Note: assumes square tiles, otherwise scale. + a = (p0.x - (p0.y - 0.5 * float(TILE_HEIGHT_PX)) * b) * SX; + + int x0 = int(floor((xmin) * SX)); + int x1 = int(ceil((xmax) * SX)); + int y0 = int(floor((ymin) * SY)); + int y1 = int(ceil((ymax) * SY)); + + x0 = clamp(x0, bbox.x, bbox.z); + y0 = clamp(y0, bbox.y, bbox.w); + x1 = clamp(x1, bbox.x, bbox.z); + y1 = clamp(y1, bbox.y, bbox.w); + float xc = a + b * float(y0); + int stride = bbox.z - bbox.x; + int base = (y0 - bbox.y) * stride - bbox.x; + // TODO: can be tighter, use c to bound width + uint n_tile_alloc = uint((x1 - x0) * (y1 - y0)); + // Consider using subgroups to aggregate atomic add. + uint tile_offset = atomicAdd(alloc, n_tile_alloc * TileSeg_size); + TileSeg tile_seg; + for (int y = y0; y < y1; y++) { + float tile_y0 = float(y * TILE_HEIGHT_PX); + if (tag == PathSeg_FillCubic && min(p0.y, p2.y) <= tile_y0) { + int xray = max(int(ceil(xc - 0.5 * b)), bbox.x); + if (xray < bbox.z) { + int backdrop = p2.y < p0.y ? 1 : -1; + TileRef tile_ref = Tile_index(path.tiles, uint(base + xray)); + uint tile_el = tile_ref.offset >> 2; + atomicAdd(tile[tile_el + 1], backdrop); } - } else { - y_edge = 1e9; } + int xx0 = clamp(int(floor(xc - c)), x0, x1); + int xx1 = clamp(int(ceil(xc + c)), x0, x1); + for (int x = xx0; x < xx1; x++) { + float tile_x0 = float(x * TILE_WIDTH_PX); + TileRef tile_ref = Tile_index(path.tiles, uint(base + x)); + uint tile_el = tile_ref.offset >> 2; + uint old = atomicExchange(tile[tile_el], tile_offset); + tile_seg.start = p0; + tile_seg.end = p2; + float y_edge = 0.0; + if (tag == PathSeg_FillCubic) { + y_edge = mix(p0.y, p2.y, (tile_x0 - p0.x) / dx); + if (min(p0.x, p2.x) < tile_x0 && y_edge >= tile_y0 && y_edge < tile_y0 + TILE_HEIGHT_PX) { + if (p0.x > p2.x) { + tile_seg.end = vec2(tile_x0, y_edge); + } else { + tile_seg.start = vec2(tile_x0, y_edge); + } + } else { + y_edge = 1e9; + } + } + tile_seg.y_edge = y_edge; + tile_seg.next.offset = old; + TileSeg_write(TileSegRef(tile_offset), tile_seg); + tile_offset += TileSeg_size; + } + xc += b; + base += stride; } - tile_seg.y_edge = y_edge; - tile_seg.next.offset = old; - TileSeg_write(TileSegRef(tile_offset), tile_seg); - tile_offset += TileSeg_size; + + p0 = p2; } - t += b; - base += stride; + break; } } diff --git a/piet-gpu/shader/path_coarse.spv b/piet-gpu/shader/path_coarse.spv index 7098a63..0d10ea2 100644 Binary files a/piet-gpu/shader/path_coarse.spv and b/piet-gpu/shader/path_coarse.spv differ diff --git a/piet-gpu/src/render_ctx.rs b/piet-gpu/src/render_ctx.rs index 221b737..f914a2e 100644 --- a/piet-gpu/src/render_ctx.rs +++ b/piet-gpu/src/render_ctx.rs @@ -242,7 +242,7 @@ impl PietGpuRenderContext { } fn encode_path(&mut self, path: impl Iterator, is_fill: bool) { - let flatten = true; + let flatten = false; if flatten { let mut start_pt = None; let mut last_pt = None;