Make fills work

The backdrop propagation is slow but it does work.
2025-01-09 20:31:29 +11:00 · 2020-06-05 15:07:02 -07:00 · 2020-06-05 15:07:02 -07:00 · af0a1af8e1
parent f9f5961428
commit af0a1af8e1
16 changed files with 174 additions and 90 deletions
--- a/piet-gpu-types/src/ptcl.rs
+++ b/piet-gpu-types/src/ptcl.rs
@ -20,7 +20,8 @@ piet_gpu! {
            rgba_color: u32,
        }
        struct CmdFill {
-            seg_ref: Ref<SegChunk>,
+            // As above, really Ref<Tile>
            tile_ref: u32,
            backdrop: i32,
            rgba_color: u32,
        }
--- a/piet-gpu-types/src/tile.rs
+++ b/piet-gpu-types/src/tile.rs
@ -15,6 +15,7 @@ piet_gpu! {
        struct TileSeg {
            start: [f32; 2],
            end: [f32; 2],
            y_edge: f32,
            next: Ref<TileSeg>,
        }
    }
--- a/piet-gpu/bin/cli.rs
+++ b/piet-gpu/bin/cli.rs
@ -171,7 +171,7 @@ fn main() -> Result<(), Error> {
        let fence = device.create_fence(false)?;
        let mut cmd_buf = device.create_cmd_buf()?;
-        let query_pool = device.create_query_pool(7)?;
+        let query_pool = device.create_query_pool(8)?;
        let mut ctx = PietGpuRenderContext::new();
        if let Some(input) = matches.value_of("INPUT") {
@ -204,9 +204,10 @@ fn main() -> Result<(), Error> {
        println!("Element kernel time: {:.3}ms", ts[0] * 1e3);
        println!("Tile allocation kernel time: {:.3}ms", (ts[1] - ts[0]) * 1e3);
        println!("Coarse path kernel time: {:.3}ms", (ts[2] - ts[1]) * 1e3);
-        println!("Binning kernel time: {:.3}ms", (ts[3] - ts[2]) * 1e3);
+        println!("Backdrop kernel time: {:.3}ms", (ts[3] - ts[2]) * 1e3);
-        println!("Coarse raster kernel time: {:.3}ms", (ts[4] - ts[3]) * 1e3);
+        println!("Binning kernel time: {:.3}ms", (ts[4] - ts[3]) * 1e3);
-        println!("Render kernel time: {:.3}ms", (ts[5] - ts[4]) * 1e3);
+        println!("Coarse raster kernel time: {:.3}ms", (ts[5] - ts[4]) * 1e3);
        println!("Render kernel time: {:.3}ms", (ts[6] - ts[5]) * 1e3);
        /*
        let mut data: Vec<u32> = Default::default();
--- a/piet-gpu/shader/backdrop.comp
+++ b/piet-gpu/shader/backdrop.comp
@ -0,0 +1,56 @@
 // Propagation of tile backdrop for filling.
 #version 450
 #extension GL_GOOGLE_include_directive : enable
 #include "setup.h"
 #define BACKDROP_WG 256
 layout(local_size_x = BACKDROP_WG, local_size_y = 1) in;
 layout(set = 0, binding = 0) buffer AnnotatedBuf {
    uint[] annotated;
 };
 // This is really only used for n_elements; maybe we can handle that
 // a different way, but it's convenient to have the same signature as
 // tile allocation.
 layout(set = 0, binding = 1) buffer AllocBuf {
    uint n_elements;
    uint n_pathseg;
    uint alloc;
 };
 layout(set = 0, binding = 2) buffer TileBuf {
    uint[] tile;
 };
 #include "annotated.h"
 #include "tile.h"
 void main() {
    uint element_ix = gl_GlobalInvocationID.x;
    AnnotatedRef ref = AnnotatedRef(element_ix * Annotated_size);
    uint tag = Annotated_Nop;
    if (element_ix < n_elements) {
        tag = Annotated_tag(ref);
    }
    if (tag == Annotated_Fill) {
        PathRef path_ref = PathRef(element_ix * Path_size);
        Path path = Path_read(path_ref);
        uint width = path.bbox.z - path.bbox.x;
        uint height = path.bbox.w - path.bbox.y;
        // slightly handrolling the tile structure here...
        uint tile_el_ix = (path.tiles.offset >> 2) + 1;
        for (uint y = 0; y < height; y++) {
            uint sum = 0;
            for (uint x = 0; x < width; x++) {
                sum += tile[tile_el_ix];
                tile[tile_el_ix] = sum;
                tile_el_ix += 2;
            }
        }
    }
 }
--- a/piet-gpu/shader/backdrop.spv
+++ b/piet-gpu/shader/backdrop.spv
--- a/piet-gpu/shader/build.ninja
+++ b/piet-gpu/shader/build.ninja
@ -18,6 +18,8 @@ build tile_alloc.spv: glsl tile_alloc.comp | annotated.h tile.h setup.h
 build path_coarse.spv: glsl path_coarse.comp | annotated.h tile.h setup.h
 build backdrop.spv: glsl backdrop.comp | annotated.h tile.h setup.h
 build coarse.spv: glsl coarse.comp | annotated.h bins.h ptcl.h setup.h
 build kernel4.spv: glsl kernel4.comp | ptcl.h setup.h
--- a/piet-gpu/shader/coarse.comp
+++ b/piet-gpu/shader/coarse.comp
@ -226,7 +226,7 @@ void main() {
            uint x = sh_tile_x0[el_ix] + seq_ix % width;
            uint y = sh_tile_y0[el_ix] + seq_ix / width;
            Tile tile = Tile_read(TileRef(sh_tile_base[el_ix] + (sh_tile_stride[el_ix] * y + x) * Tile_size));
-            if (tile.tile.offset != 0) {
+            if (tile.tile.offset != 0 || tile.backdrop != 0) {
                uint el_slice = el_ix / 32;
                uint el_mask = 1 << (el_ix & 31);
                atomicOr(sh_bitmaps[el_slice][y * N_TILE_X + x], el_mask);
@ -357,52 +357,26 @@ void main() {
            tag = Annotated_tag(ref);
            switch (tag) {
            /*
            case Annotated_Fill:
                if (last_chunk_n > 0 || seg_count > 0) {
                    SegChunkRef chunk_ref = SegChunkRef(0);
                    if (seg_count > 0) {
                        chunk_ref = alloc_seg_chunk();
                        SegChunk chunk;
                        chunk.n = seg_count;
                        chunk.next = SegChunkRef(0);
                        uint seg_offset = seg_alloc + seg_start * Segment_size;
                        chunk.segs = SegmentRef(seg_offset);
                        SegChunk_write(chunk_ref, chunk);
                    }
                    if (last_chunk_n > 0) {
                        SegChunk chunk;
                        chunk.n = last_chunk_n;
                        chunk.next = chunk_ref;
                        chunk.segs = last_chunk_segs;
                        SegChunk_write(last_chunk_ref, chunk);
                    } else {
                        first_seg_chunk = chunk_ref;
                    }
                    AnnoFill fill = Annotated_Fill_read(ref);
                    CmdFill cmd_fill;
                    cmd_fill.seg_ref = first_seg_chunk;
                    cmd_fill.backdrop = backdrop;
                    cmd_fill.rgba_color = fill.rgba_color;
                    alloc_cmd(cmd_ref, cmd_limit);
                    Cmd_Fill_write(cmd_ref, cmd_fill);
                    cmd_ref.offset += Cmd_size;
                    last_chunk_n = 0;
                } else if (backdrop != 0) {
                    AnnoFill fill = Annotated_Fill_read(ref);
                    alloc_cmd(cmd_ref, cmd_limit);
                    Cmd_Solid_write(cmd_ref, CmdSolid(fill.rgba_color));
                    cmd_ref.offset += Cmd_size;
                }
                seg_start += seg_count;
                seg_count = 0;
                backdrop = 0;
                break;
            */
            case Annotated_Stroke:
                Tile tile = Tile_read(TileRef(sh_tile_base[element_ref_ix]
                    + (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
                AnnoFill fill = Annotated_Fill_read(ref);
                alloc_cmd(cmd_ref, cmd_limit);
                if (tile.tile.offset != 0) {
                    CmdFill cmd_fill;
                    cmd_fill.tile_ref = tile.tile.offset;
                    cmd_fill.backdrop = tile.backdrop;
                    cmd_fill.rgba_color = fill.rgba_color;
                    Cmd_Fill_write(cmd_ref, cmd_fill);
                } else {
                    AnnoFill fill = Annotated_Fill_read(ref);
                    Cmd_Solid_write(cmd_ref, CmdSolid(fill.rgba_color));
                }
                cmd_ref.offset += Cmd_size;
                break;
            case Annotated_Stroke:
                tile = Tile_read(TileRef(sh_tile_base[element_ref_ix]
                    + (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
                AnnoStroke stroke = Annotated_Stroke_read(ref);
                CmdStroke cmd_stroke;
                cmd_stroke.tile_ref = tile.tile.offset;
--- a/piet-gpu/shader/coarse.spv
+++ b/piet-gpu/shader/coarse.spv
--- a/piet-gpu/shader/kernel4.comp
+++ b/piet-gpu/shader/kernel4.comp
@ -80,46 +80,40 @@ void main() {
                rgb[k] = mix(rgb[k], fg_rgba.rgb, alpha * fg_rgba.a);
            }
            break;
        /*
        case Cmd_Fill:
            CmdFill fill = Cmd_Fill_read(cmd_ref);
            // Probably better to store as float, but conversion is no doubt cheap.
            float area[CHUNK];
            for (uint k = 0; k < CHUNK; k++) area[k] = float(fill.backdrop);
-            SegChunkRef fill_seg_chunk_ref = fill.seg_ref;
+            tile_seg_ref = TileSegRef(fill.tile_ref);
            do {
-                SegChunk seg_chunk = SegChunk_read(fill_seg_chunk_ref);
+                TileSeg seg = TileSeg_read(tile_seg_ref);
-                SegmentRef segs = seg_chunk.segs;
+                for (uint k = 0; k < CHUNK; k++) {
-                for (int i = 0; i < seg_chunk.n; i++) {
+                    vec2 my_xy = vec2(xy.x, xy.y + float(k * CHUNK_DY));
-                    Segment seg = Segment_read(Segment_index(segs, i));
+                    vec2 start = seg.start - my_xy;
-                    for (uint k = 0; k < CHUNK; k++) {
+                    vec2 end = seg.end - my_xy;
-                        vec2 my_xy = vec2(xy.x, xy.y + float(k * CHUNK_DY));
+                    vec2 window = clamp(vec2(start.y, end.y), 0.0, 1.0);
-                        vec2 start = seg.start - my_xy;
+                    if (window.x != window.y) {
-                        vec2 end = seg.end - my_xy;
+                        vec2 t = (window - start.y) / (end.y - start.y);
-                        vec2 window = clamp(vec2(start.y, end.y), 0.0, 1.0);
+                        vec2 xs = vec2(mix(start.x, end.x, t.x), mix(start.x, end.x, t.y));
-                        if (window.x != window.y) {
+                        float xmin = min(min(xs.x, xs.y), 1.0) - 1e-6;
-                            vec2 t = (window - start.y) / (end.y - start.y);
+                        float xmax = max(xs.x, xs.y);
-                            vec2 xs = vec2(mix(start.x, end.x, t.x), mix(start.x, end.x, t.y));
+                        float b = min(xmax, 1.0);
-                            float xmin = min(min(xs.x, xs.y), 1.0) - 1e-6;
+                        float c = max(b, 0.0);
-                            float xmax = max(xs.x, xs.y);
+                        float d = max(xmin, 0.0);
-                            float b = min(xmax, 1.0);
+                        float a = (b + 0.5 * (d * d - c * c) - xmin) / (xmax - xmin);
-                            float c = max(b, 0.0);
+                        area[k] += a * (window.x - window.y);
                            float d = max(xmin, 0.0);
                            float a = (b + 0.5 * (d * d - c * c) - xmin) / (xmax - xmin);
                            area[k] += a * (window.x - window.y);
                        }
                        area[k] += sign(end.x - start.x) * clamp(my_xy.y - seg.y_edge + 1.0, 0.0, 1.0);
                    }
                    area[k] += sign(end.x - start.x) * clamp(my_xy.y - seg.y_edge + 1.0, 0.0, 1.0);
                }
-                fill_seg_chunk_ref = seg_chunk.next;
+                tile_seg_ref = seg.next;
-            } while (fill_seg_chunk_ref.offset != 0);
+            } while (tile_seg_ref.offset != 0);
            fg_rgba = unpackUnorm4x8(fill.rgba_color).wzyx;
            for (uint k = 0; k < CHUNK; k++) {
                float alpha = min(abs(area[k]), 1.0);
                rgb[k] = mix(rgb[k], fg_rgba.rgb, alpha * fg_rgba.a);
            }
            break;
        */
        case Cmd_Solid:
            CmdSolid solid = Cmd_Solid_read(cmd_ref);
            fg_rgba = unpackUnorm4x8(solid.rgba_color).wzyx;
--- a/piet-gpu/shader/kernel4.spv
+++ b/piet-gpu/shader/kernel4.spv
--- a/piet-gpu/shader/path_coarse.comp
+++ b/piet-gpu/shader/path_coarse.comp
@ -36,9 +36,11 @@ layout(set = 0, binding = 2) buffer TileBuf {
 shared uint sh_tile_count[COARSE_WG];
 shared uint sh_width[COARSE_WG];
 shared uint sh_draw_width[COARSE_WG];
 shared uint sh_tag[COARSE_WG];
 shared vec2 sh_p0[COARSE_WG];
 shared vec2 sh_p1[COARSE_WG];
 shared int sh_x0[COARSE_WG];
 shared int sh_bbox_x1[COARSE_WG];
 shared int sh_y0[COARSE_WG];
 shared float sh_a[COARSE_WG];
 shared float sh_b[COARSE_WG];
@ -56,6 +58,7 @@ void main() {
    if (element_ix < n_pathseg) {
        tag = PathSeg_tag(ref);
    }
    sh_tag[th_ix] = tag;
    // Setup for coverage algorithm.
    float a, b, c;
    // Bounding box of element in pixel coordinates.
@ -96,6 +99,7 @@ void main() {
    x1 = clamp(x1, bbox.x, bbox.z);
    y1 = clamp(y1, bbox.y, bbox.w);
    sh_x0[th_ix] = x0;
    sh_bbox_x1[th_ix] = bbox.z;
    // TODO: can get rid of this (fold into base), with care (also need to update `a`)
    sh_y0[th_ix] = y0;
    int stride = bbox.z - bbox.x;
@ -138,7 +142,8 @@ void main() {
        int x1 = x0 + int(sh_width[el_ix]);
        int dx = int(seq_ix % draw_width);
        uint y = sh_y0[el_ix] + seq_ix / draw_width;
-        float t = sh_a[el_ix] + sh_b[el_ix] * float(y);
+        float b = sh_b[el_ix];
        float t = sh_a[el_ix] + b * float(y);
        float c = sh_c[el_ix];
        int xx0 = clamp(int(floor(t - c)), x0, x1);
        int xx1 = clamp(int(ceil(t + c)), x0, x1);
@ -148,8 +153,34 @@ void main() {
            uint tile_el = (sh_base[el_ix] + uint(y * sh_stride[el_ix] + x) * Tile_size) >> 2;
            uint old = atomicExchange(tile[tile_el], tile_offset);
            TileSeg tile_seg;
-            tile_seg.start = sh_p0[el_ix];
+            vec2 p0 = sh_p0[el_ix];
-            tile_seg.end = sh_p1[el_ix];
+            vec2 p1 = sh_p1[el_ix];
            float y_edge = 0.0;
            if (sh_tag[el_ix] == PathSeg_FillLine) {
                vec2 tile_xy = vec2(x * TILE_WIDTH_PX, y * TILE_HEIGHT_PX);
                if (dx == 0 && min(p0.y, p1.y) <= tile_xy.y) {
                    // TODO: need a little more work to make sure this triggers even
                    // when line is to the left of bbox.
                    int xray = max(int(ceil(t - 0.5 * b)), x0);
                    if (xray < sh_bbox_x1[el_ix]) {
                        int backdrop = p1.y < p0.y ? 1 : -1;
                        atomicAdd(tile[tile_el + 1 + 2 * (xray - x)], backdrop);
                    }
                }
                y_edge = mix(p0.y, p1.y, (tile_xy.x - p0.x) / (p1.x - p0.x));
                if (min(p0.x, p1.x) < tile_xy.x && y_edge >= tile_xy.y && y_edge < tile_xy.y + TILE_HEIGHT_PX) {
                    if (p0.x > p1.x) {
                        p1 = vec2(tile_xy.x, y_edge);
                    } else {
                        p0 = vec2(tile_xy.x, y_edge);
                    }
                } else {
                    y_edge = 1e9;
                }
            }
            tile_seg.start = p0;
            tile_seg.end = p1;
            tile_seg.y_edge = y_edge;
            tile_seg.next.offset = old;
            TileSeg_write(TileSegRef(tile_offset), tile_seg);
        }
--- a/piet-gpu/shader/path_coarse.spv
+++ b/piet-gpu/shader/path_coarse.spv
--- a/piet-gpu/shader/ptcl.h
+++ b/piet-gpu/shader/ptcl.h
@ -80,7 +80,7 @@ CmdStrokeRef CmdStroke_index(CmdStrokeRef ref, uint index) {
 }
 struct CmdFill {
-    SegChunkRef seg_ref;
+    uint tile_ref;
    int backdrop;
    uint rgba_color;
 };
@ -239,7 +239,7 @@ CmdFill CmdFill_read(CmdFillRef ref) {
    uint raw1 = ptcl[ix + 1];
    uint raw2 = ptcl[ix + 2];
    CmdFill s;
-    s.seg_ref = SegChunkRef(raw0);
+    s.tile_ref = raw0;
    s.backdrop = int(raw1);
    s.rgba_color = raw2;
    return s;
@ -247,7 +247,7 @@ CmdFill CmdFill_read(CmdFillRef ref) {
 void CmdFill_write(CmdFillRef ref, CmdFill s) {
    uint ix = ref.offset >> 2;
-    ptcl[ix + 0] = s.seg_ref.offset;
+    ptcl[ix + 0] = s.tile_ref;
    ptcl[ix + 1] = uint(s.backdrop);
    ptcl[ix + 2] = s.rgba_color;
 }
--- a/piet-gpu/shader/tile.h
+++ b/piet-gpu/shader/tile.h
@ -37,10 +37,11 @@ TileRef Tile_index(TileRef ref, uint index) {
 struct TileSeg {
    vec2 start;
    vec2 end;
    float y_edge;
    TileSegRef next;
 };
-#define TileSeg_size 20
+#define TileSeg_size 24
 TileSegRef TileSeg_index(TileSegRef ref, uint index) {
    return TileSegRef(ref.offset + index * TileSeg_size);
@ -87,10 +88,12 @@ TileSeg TileSeg_read(TileSegRef ref) {
    uint raw2 = tile[ix + 2];
    uint raw3 = tile[ix + 3];
    uint raw4 = tile[ix + 4];
    uint raw5 = tile[ix + 5];
    TileSeg s;
    s.start = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
    s.end = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
-    s.next = TileSegRef(raw4);
+    s.y_edge = uintBitsToFloat(raw4);
    s.next = TileSegRef(raw5);
    return s;
 }
@ -100,6 +103,7 @@ void TileSeg_write(TileSegRef ref, TileSeg s) {
    tile[ix + 1] = floatBitsToUint(s.start.y);
    tile[ix + 2] = floatBitsToUint(s.end.x);
    tile[ix + 3] = floatBitsToUint(s.end.y);
-    tile[ix + 4] = s.next.offset;
+    tile[ix + 4] = floatBitsToUint(s.y_edge);
    tile[ix + 5] = s.next.offset;
 }
--- a/piet-gpu/src/lib.rs
+++ b/piet-gpu/src/lib.rs
@ -57,8 +57,8 @@ pub fn render_scene(rc: &mut impl RenderContext) {
        let circle = Circle::new(center, radius);
        rc.fill(circle, &color);
    }
    /*
    let mut path = BezPath::new();
    /*
    path.move_to((100.0, 1150.0));
    path.line_to((200.0, 1200.0));
    path.line_to((150.0, 1250.0));
@ -143,6 +143,9 @@ pub struct Renderer<D: Device> {
    path_pipeline: D::Pipeline,
    path_ds: D::DescriptorSet,
    backdrop_pipeline: D::Pipeline,
    backdrop_ds: D::DescriptorSet,
    tile_alloc_buf_host: D::Buffer,
    tile_alloc_buf_dev: D::Buffer,
@ -224,6 +227,14 @@ impl<D: Device> Renderer<D> {
            &[],
        )?;
        let backdrop_alloc_code = include_bytes!("../shader/backdrop.spv");
        let backdrop_pipeline = device.create_simple_compute_pipeline(backdrop_alloc_code, 3, 0)?;
        let backdrop_ds = device.create_descriptor_set(
            &backdrop_pipeline,
            &[&anno_buf, &tile_alloc_buf_dev, &tile_buf],
            &[],
        )?;
        let bin_alloc_buf_host = device.create_buffer(12, host)?;
        let bin_alloc_buf_dev = device.create_buffer(12, dev)?;
@ -275,6 +286,8 @@ impl<D: Device> Renderer<D> {
            tile_ds,
            path_pipeline,
            path_ds,
            backdrop_pipeline,
            backdrop_ds,
            bin_pipeline,
            bin_ds,
            coarse_pipeline,
@ -333,6 +346,13 @@ impl<D: Device> Renderer<D> {
            (((self.n_pathseg + 31) / 32) as u32, 1, 1),
        );
        cmd_buf.write_timestamp(&query_pool, 3);
        cmd_buf.memory_barrier();
        cmd_buf.dispatch(
            &self.backdrop_pipeline,
            &self.backdrop_ds,
            (((self.n_paths + 255) / 256) as u32, 1, 1),
        );
        cmd_buf.write_timestamp(&query_pool, 4);
        // Note: this barrier is not needed as an actual dependency between
        // pipeline stages, but I am keeping it in so that timer queries are
        // easier to interpret.
@ -342,21 +362,21 @@ impl<D: Device> Renderer<D> {
            &self.bin_ds,
            (((self.n_paths + 255) / 256) as u32, 1, 1),
        );
-        cmd_buf.write_timestamp(&query_pool, 4);
+        cmd_buf.write_timestamp(&query_pool, 5);
        cmd_buf.memory_barrier();
        cmd_buf.dispatch(
            &self.coarse_pipeline,
            &self.coarse_ds,
            (WIDTH as u32 / 256, HEIGHT as u32 / 256, 1),
        );
-        cmd_buf.write_timestamp(&query_pool, 5);
+        cmd_buf.write_timestamp(&query_pool, 6);
        cmd_buf.memory_barrier();
        cmd_buf.dispatch(
            &self.k4_pipeline,
            &self.k4_ds,
            ((WIDTH / TILE_W) as u32, (HEIGHT / TILE_H) as u32, 1),
        );
-        cmd_buf.write_timestamp(&query_pool, 6);
+        cmd_buf.write_timestamp(&query_pool, 7);
        cmd_buf.memory_barrier();
        cmd_buf.image_barrier(&self.image_dev, ImageLayout::General, ImageLayout::BlitSrc);
    }
--- a/piet-gpu/src/pico_svg.rs
+++ b/piet-gpu/src/pico_svg.rs
@ -49,8 +49,8 @@ impl PicoSvg {
        for item in &self.items {
            match item {
                Item::Fill(fill_item) => {
-                    //rc.fill(&fill_item.path, &fill_item.color);
+                    rc.fill(&fill_item.path, &fill_item.color);
-                    rc.stroke(&fill_item.path, &fill_item.color, 1.0);
+                    //rc.stroke(&fill_item.path, &fill_item.color, 1.0);
                }
                Item::Stroke(stroke_item) => {
                    rc.stroke(&stroke_item.path, &stroke_item.color, stroke_item.width);