diff --git a/piet-gpu-types/src/ptcl.rs b/piet-gpu-types/src/ptcl.rs
index 98c4d44..96c0ecc 100644
--- a/piet-gpu-types/src/ptcl.rs
+++ b/piet-gpu-types/src/ptcl.rs
@@ -20,7 +20,8 @@ piet_gpu! {
             rgba_color: u32,
         }
         struct CmdFill {
-            seg_ref: Ref<SegChunk>,
+            // As above, really Ref<Tile>
+            tile_ref: u32,
             backdrop: i32,
             rgba_color: u32,
         }
diff --git a/piet-gpu-types/src/tile.rs b/piet-gpu-types/src/tile.rs
index 5a28037..18318e3 100644
--- a/piet-gpu-types/src/tile.rs
+++ b/piet-gpu-types/src/tile.rs
@@ -15,6 +15,7 @@ piet_gpu! {
         struct TileSeg {
             start: [f32; 2],
             end: [f32; 2],
+            y_edge: f32,
             next: Ref<TileSeg>,
         }
     }
diff --git a/piet-gpu/bin/cli.rs b/piet-gpu/bin/cli.rs
index 5b293d3..df2f894 100644
--- a/piet-gpu/bin/cli.rs
+++ b/piet-gpu/bin/cli.rs
@@ -171,7 +171,7 @@ fn main() -> Result<(), Error> {
 
         let fence = device.create_fence(false)?;
         let mut cmd_buf = device.create_cmd_buf()?;
-        let query_pool = device.create_query_pool(7)?;
+        let query_pool = device.create_query_pool(8)?;
 
         let mut ctx = PietGpuRenderContext::new();
         if let Some(input) = matches.value_of("INPUT") {
@@ -204,9 +204,10 @@ fn main() -> Result<(), Error> {
         println!("Element kernel time: {:.3}ms", ts[0] * 1e3);
         println!("Tile allocation kernel time: {:.3}ms", (ts[1] - ts[0]) * 1e3);
         println!("Coarse path kernel time: {:.3}ms", (ts[2] - ts[1]) * 1e3);
-        println!("Binning kernel time: {:.3}ms", (ts[3] - ts[2]) * 1e3);
-        println!("Coarse raster kernel time: {:.3}ms", (ts[4] - ts[3]) * 1e3);
-        println!("Render kernel time: {:.3}ms", (ts[5] - ts[4]) * 1e3);
+        println!("Backdrop kernel time: {:.3}ms", (ts[3] - ts[2]) * 1e3);
+        println!("Binning kernel time: {:.3}ms", (ts[4] - ts[3]) * 1e3);
+        println!("Coarse raster kernel time: {:.3}ms", (ts[5] - ts[4]) * 1e3);
+        println!("Render kernel time: {:.3}ms", (ts[6] - ts[5]) * 1e3);
 
         /*
         let mut data: Vec<u32> = Default::default();
diff --git a/piet-gpu/shader/backdrop.comp b/piet-gpu/shader/backdrop.comp
new file mode 100644
index 0000000..c0f58c4
--- /dev/null
+++ b/piet-gpu/shader/backdrop.comp
@@ -0,0 +1,56 @@
+// Propagation of tile backdrop for filling.
+
+#version 450
+#extension GL_GOOGLE_include_directive : enable
+
+#include "setup.h"
+
+#define BACKDROP_WG 256
+
+layout(local_size_x = BACKDROP_WG, local_size_y = 1) in;
+
+layout(set = 0, binding = 0) buffer AnnotatedBuf {
+    uint[] annotated;
+};
+
+// This is really only used for n_elements; maybe we can handle that
+// a different way, but it's convenient to have the same signature as
+// tile allocation.
+layout(set = 0, binding = 1) buffer AllocBuf {
+    uint n_elements;
+    uint n_pathseg;
+    uint alloc;
+};
+
+layout(set = 0, binding = 2) buffer TileBuf {
+    uint[] tile;
+};
+
+#include "annotated.h"
+#include "tile.h"
+
+void main() {
+    uint element_ix = gl_GlobalInvocationID.x;
+    AnnotatedRef ref = AnnotatedRef(element_ix * Annotated_size);
+
+    uint tag = Annotated_Nop;
+    if (element_ix < n_elements) {
+        tag = Annotated_tag(ref);
+    }
+    if (tag == Annotated_Fill) {
+        PathRef path_ref = PathRef(element_ix * Path_size);
+        Path path = Path_read(path_ref);
+        uint width = path.bbox.z - path.bbox.x;
+        uint height = path.bbox.w - path.bbox.y;
+        // slightly handrolling the tile structure here...
+        uint tile_el_ix = (path.tiles.offset >> 2) + 1;
+        for (uint y = 0; y < height; y++) {
+            uint sum = 0;
+            for (uint x = 0; x < width; x++) {
+                sum += tile[tile_el_ix];
+                tile[tile_el_ix] = sum;
+                tile_el_ix += 2;
+            }
+        }
+    }
+}
diff --git a/piet-gpu/shader/backdrop.spv b/piet-gpu/shader/backdrop.spv
new file mode 100644
index 0000000..0b4828e
Binary files /dev/null and b/piet-gpu/shader/backdrop.spv differ
diff --git a/piet-gpu/shader/build.ninja b/piet-gpu/shader/build.ninja
index 27fcfe2..4f6e07f 100644
--- a/piet-gpu/shader/build.ninja
+++ b/piet-gpu/shader/build.ninja
@@ -18,6 +18,8 @@ build tile_alloc.spv: glsl tile_alloc.comp | annotated.h tile.h setup.h
 
 build path_coarse.spv: glsl path_coarse.comp | annotated.h tile.h setup.h
 
+build backdrop.spv: glsl backdrop.comp | annotated.h tile.h setup.h
+
 build coarse.spv: glsl coarse.comp | annotated.h bins.h ptcl.h setup.h
 
 build kernel4.spv: glsl kernel4.comp | ptcl.h setup.h
diff --git a/piet-gpu/shader/coarse.comp b/piet-gpu/shader/coarse.comp
index e488fbf..eec0bfe 100644
--- a/piet-gpu/shader/coarse.comp
+++ b/piet-gpu/shader/coarse.comp
@@ -226,7 +226,7 @@ void main() {
             uint x = sh_tile_x0[el_ix] + seq_ix % width;
             uint y = sh_tile_y0[el_ix] + seq_ix / width;
             Tile tile = Tile_read(TileRef(sh_tile_base[el_ix] + (sh_tile_stride[el_ix] * y + x) * Tile_size));
-            if (tile.tile.offset != 0) {
+            if (tile.tile.offset != 0 || tile.backdrop != 0) {
                 uint el_slice = el_ix / 32;
                 uint el_mask = 1 << (el_ix & 31);
                 atomicOr(sh_bitmaps[el_slice][y * N_TILE_X + x], el_mask);
@@ -357,52 +357,26 @@ void main() {
             tag = Annotated_tag(ref);
 
             switch (tag) {
-            /*
             case Annotated_Fill:
-                if (last_chunk_n > 0 || seg_count > 0) {
-                    SegChunkRef chunk_ref = SegChunkRef(0);
-                    if (seg_count > 0) {
-                        chunk_ref = alloc_seg_chunk();
-                        SegChunk chunk;
-                        chunk.n = seg_count;
-                        chunk.next = SegChunkRef(0);
-                        uint seg_offset = seg_alloc + seg_start * Segment_size;
-                        chunk.segs = SegmentRef(seg_offset);
-                        SegChunk_write(chunk_ref, chunk);
-                    }
-                    if (last_chunk_n > 0) {
-                        SegChunk chunk;
-                        chunk.n = last_chunk_n;
-                        chunk.next = chunk_ref;
-                        chunk.segs = last_chunk_segs;
-                        SegChunk_write(last_chunk_ref, chunk);
-                    } else {
-                        first_seg_chunk = chunk_ref;
-                    }
-
-                    AnnoFill fill = Annotated_Fill_read(ref);
-                    CmdFill cmd_fill;
-                    cmd_fill.seg_ref = first_seg_chunk;
-                    cmd_fill.backdrop = backdrop;
-                    cmd_fill.rgba_color = fill.rgba_color;
-                    alloc_cmd(cmd_ref, cmd_limit);
-                    Cmd_Fill_write(cmd_ref, cmd_fill);
-                    cmd_ref.offset += Cmd_size;
-                    last_chunk_n = 0;
-                } else if (backdrop != 0) {
-                    AnnoFill fill = Annotated_Fill_read(ref);
-                    alloc_cmd(cmd_ref, cmd_limit);
-                    Cmd_Solid_write(cmd_ref, CmdSolid(fill.rgba_color));
-                    cmd_ref.offset += Cmd_size;
-                }
-                seg_start += seg_count;
-                seg_count = 0;
-                backdrop = 0;
-                break;
-            */
-            case Annotated_Stroke:
                 Tile tile = Tile_read(TileRef(sh_tile_base[element_ref_ix]
                     + (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
+                AnnoFill fill = Annotated_Fill_read(ref);
+                alloc_cmd(cmd_ref, cmd_limit);
+                if (tile.tile.offset != 0) {
+                    CmdFill cmd_fill;
+                    cmd_fill.tile_ref = tile.tile.offset;
+                    cmd_fill.backdrop = tile.backdrop;
+                    cmd_fill.rgba_color = fill.rgba_color;
+                    Cmd_Fill_write(cmd_ref, cmd_fill);
+                } else {
+                    AnnoFill fill = Annotated_Fill_read(ref);
+                    Cmd_Solid_write(cmd_ref, CmdSolid(fill.rgba_color));
+                }
+                cmd_ref.offset += Cmd_size;
+                break;
+            case Annotated_Stroke:
+                tile = Tile_read(TileRef(sh_tile_base[element_ref_ix]
+                    + (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
                 AnnoStroke stroke = Annotated_Stroke_read(ref);
                 CmdStroke cmd_stroke;
                 cmd_stroke.tile_ref = tile.tile.offset;
diff --git a/piet-gpu/shader/coarse.spv b/piet-gpu/shader/coarse.spv
index c5a304b..ad24e6b 100644
Binary files a/piet-gpu/shader/coarse.spv and b/piet-gpu/shader/coarse.spv differ
diff --git a/piet-gpu/shader/kernel4.comp b/piet-gpu/shader/kernel4.comp
index 0ecda68..e00320b 100644
--- a/piet-gpu/shader/kernel4.comp
+++ b/piet-gpu/shader/kernel4.comp
@@ -80,46 +80,40 @@ void main() {
                 rgb[k] = mix(rgb[k], fg_rgba.rgb, alpha * fg_rgba.a);
             }
             break;
-        /*
         case Cmd_Fill:
             CmdFill fill = Cmd_Fill_read(cmd_ref);
             // Probably better to store as float, but conversion is no doubt cheap.
             float area[CHUNK];
             for (uint k = 0; k < CHUNK; k++) area[k] = float(fill.backdrop);
-            SegChunkRef fill_seg_chunk_ref = fill.seg_ref;
+            tile_seg_ref = TileSegRef(fill.tile_ref);
             do {
-                SegChunk seg_chunk = SegChunk_read(fill_seg_chunk_ref);
-                SegmentRef segs = seg_chunk.segs;
-                for (int i = 0; i < seg_chunk.n; i++) {
-                    Segment seg = Segment_read(Segment_index(segs, i));
-                    for (uint k = 0; k < CHUNK; k++) {
-                        vec2 my_xy = vec2(xy.x, xy.y + float(k * CHUNK_DY));
-                        vec2 start = seg.start - my_xy;
-                        vec2 end = seg.end - my_xy;
-                        vec2 window = clamp(vec2(start.y, end.y), 0.0, 1.0);
-                        if (window.x != window.y) {
-                            vec2 t = (window - start.y) / (end.y - start.y);
-                            vec2 xs = vec2(mix(start.x, end.x, t.x), mix(start.x, end.x, t.y));
-                            float xmin = min(min(xs.x, xs.y), 1.0) - 1e-6;
-                            float xmax = max(xs.x, xs.y);
-                            float b = min(xmax, 1.0);
-                            float c = max(b, 0.0);
-                            float d = max(xmin, 0.0);
-                            float a = (b + 0.5 * (d * d - c * c) - xmin) / (xmax - xmin);
-                            area[k] += a * (window.x - window.y);
-                        }
-                        area[k] += sign(end.x - start.x) * clamp(my_xy.y - seg.y_edge + 1.0, 0.0, 1.0);
+                TileSeg seg = TileSeg_read(tile_seg_ref);
+                for (uint k = 0; k < CHUNK; k++) {
+                    vec2 my_xy = vec2(xy.x, xy.y + float(k * CHUNK_DY));
+                    vec2 start = seg.start - my_xy;
+                    vec2 end = seg.end - my_xy;
+                    vec2 window = clamp(vec2(start.y, end.y), 0.0, 1.0);
+                    if (window.x != window.y) {
+                        vec2 t = (window - start.y) / (end.y - start.y);
+                        vec2 xs = vec2(mix(start.x, end.x, t.x), mix(start.x, end.x, t.y));
+                        float xmin = min(min(xs.x, xs.y), 1.0) - 1e-6;
+                        float xmax = max(xs.x, xs.y);
+                        float b = min(xmax, 1.0);
+                        float c = max(b, 0.0);
+                        float d = max(xmin, 0.0);
+                        float a = (b + 0.5 * (d * d - c * c) - xmin) / (xmax - xmin);
+                        area[k] += a * (window.x - window.y);
                     }
+                    area[k] += sign(end.x - start.x) * clamp(my_xy.y - seg.y_edge + 1.0, 0.0, 1.0);
                 }
-                fill_seg_chunk_ref = seg_chunk.next;
-            } while (fill_seg_chunk_ref.offset != 0);
+                tile_seg_ref = seg.next;
+            } while (tile_seg_ref.offset != 0);
             fg_rgba = unpackUnorm4x8(fill.rgba_color).wzyx;
             for (uint k = 0; k < CHUNK; k++) {
                 float alpha = min(abs(area[k]), 1.0);
                 rgb[k] = mix(rgb[k], fg_rgba.rgb, alpha * fg_rgba.a);
             }
             break;
-        */
         case Cmd_Solid:
             CmdSolid solid = Cmd_Solid_read(cmd_ref);
             fg_rgba = unpackUnorm4x8(solid.rgba_color).wzyx;
diff --git a/piet-gpu/shader/kernel4.spv b/piet-gpu/shader/kernel4.spv
index cb27407..52ba572 100644
Binary files a/piet-gpu/shader/kernel4.spv and b/piet-gpu/shader/kernel4.spv differ
diff --git a/piet-gpu/shader/path_coarse.comp b/piet-gpu/shader/path_coarse.comp
index 9abcbf0..0b3bc8f 100644
--- a/piet-gpu/shader/path_coarse.comp
+++ b/piet-gpu/shader/path_coarse.comp
@@ -36,9 +36,11 @@ layout(set = 0, binding = 2) buffer TileBuf {
 shared uint sh_tile_count[COARSE_WG];
 shared uint sh_width[COARSE_WG];
 shared uint sh_draw_width[COARSE_WG];
+shared uint sh_tag[COARSE_WG];
 shared vec2 sh_p0[COARSE_WG];
 shared vec2 sh_p1[COARSE_WG];
 shared int sh_x0[COARSE_WG];
+shared int sh_bbox_x1[COARSE_WG];
 shared int sh_y0[COARSE_WG];
 shared float sh_a[COARSE_WG];
 shared float sh_b[COARSE_WG];
@@ -56,6 +58,7 @@ void main() {
     if (element_ix < n_pathseg) {
         tag = PathSeg_tag(ref);
     }
+    sh_tag[th_ix] = tag;
     // Setup for coverage algorithm.
     float a, b, c;
     // Bounding box of element in pixel coordinates.
@@ -96,6 +99,7 @@ void main() {
     x1 = clamp(x1, bbox.x, bbox.z);
     y1 = clamp(y1, bbox.y, bbox.w);
     sh_x0[th_ix] = x0;
+    sh_bbox_x1[th_ix] = bbox.z;
     // TODO: can get rid of this (fold into base), with care (also need to update `a`)
     sh_y0[th_ix] = y0;
     int stride = bbox.z - bbox.x;
@@ -138,7 +142,8 @@ void main() {
         int x1 = x0 + int(sh_width[el_ix]);
         int dx = int(seq_ix % draw_width);
         uint y = sh_y0[el_ix] + seq_ix / draw_width;
-        float t = sh_a[el_ix] + sh_b[el_ix] * float(y);
+        float b = sh_b[el_ix];
+        float t = sh_a[el_ix] + b * float(y);
         float c = sh_c[el_ix];
         int xx0 = clamp(int(floor(t - c)), x0, x1);
         int xx1 = clamp(int(ceil(t + c)), x0, x1);
@@ -148,8 +153,34 @@ void main() {
             uint tile_el = (sh_base[el_ix] + uint(y * sh_stride[el_ix] + x) * Tile_size) >> 2;
             uint old = atomicExchange(tile[tile_el], tile_offset);
             TileSeg tile_seg;
-            tile_seg.start = sh_p0[el_ix];
-            tile_seg.end = sh_p1[el_ix];
+            vec2 p0 = sh_p0[el_ix];
+            vec2 p1 = sh_p1[el_ix];
+            float y_edge = 0.0;
+            if (sh_tag[el_ix] == PathSeg_FillLine) {
+                vec2 tile_xy = vec2(x * TILE_WIDTH_PX, y * TILE_HEIGHT_PX);
+                if (dx == 0 && min(p0.y, p1.y) <= tile_xy.y) {
+                    // TODO: need a little more work to make sure this triggers even
+                    // when line is to the left of bbox.
+                    int xray = max(int(ceil(t - 0.5 * b)), x0);
+                    if (xray < sh_bbox_x1[el_ix]) {
+                        int backdrop = p1.y < p0.y ? 1 : -1;
+                        atomicAdd(tile[tile_el + 1 + 2 * (xray - x)], backdrop);
+                    }
+                }
+                y_edge = mix(p0.y, p1.y, (tile_xy.x - p0.x) / (p1.x - p0.x));
+                if (min(p0.x, p1.x) < tile_xy.x && y_edge >= tile_xy.y && y_edge < tile_xy.y + TILE_HEIGHT_PX) {
+                    if (p0.x > p1.x) {
+                        p1 = vec2(tile_xy.x, y_edge);
+                    } else {
+                        p0 = vec2(tile_xy.x, y_edge);
+                    }
+                } else {
+                    y_edge = 1e9;
+                }
+            }
+            tile_seg.start = p0;
+            tile_seg.end = p1;
+            tile_seg.y_edge = y_edge;
             tile_seg.next.offset = old;
             TileSeg_write(TileSegRef(tile_offset), tile_seg);
         }
diff --git a/piet-gpu/shader/path_coarse.spv b/piet-gpu/shader/path_coarse.spv
index 53cb759..8c4801b 100644
Binary files a/piet-gpu/shader/path_coarse.spv and b/piet-gpu/shader/path_coarse.spv differ
diff --git a/piet-gpu/shader/ptcl.h b/piet-gpu/shader/ptcl.h
index d337598..0c20a89 100644
--- a/piet-gpu/shader/ptcl.h
+++ b/piet-gpu/shader/ptcl.h
@@ -80,7 +80,7 @@ CmdStrokeRef CmdStroke_index(CmdStrokeRef ref, uint index) {
 }
 
 struct CmdFill {
-    SegChunkRef seg_ref;
+    uint tile_ref;
     int backdrop;
     uint rgba_color;
 };
@@ -239,7 +239,7 @@ CmdFill CmdFill_read(CmdFillRef ref) {
     uint raw1 = ptcl[ix + 1];
     uint raw2 = ptcl[ix + 2];
     CmdFill s;
-    s.seg_ref = SegChunkRef(raw0);
+    s.tile_ref = raw0;
     s.backdrop = int(raw1);
     s.rgba_color = raw2;
     return s;
@@ -247,7 +247,7 @@ CmdFill CmdFill_read(CmdFillRef ref) {
 
 void CmdFill_write(CmdFillRef ref, CmdFill s) {
     uint ix = ref.offset >> 2;
-    ptcl[ix + 0] = s.seg_ref.offset;
+    ptcl[ix + 0] = s.tile_ref;
     ptcl[ix + 1] = uint(s.backdrop);
     ptcl[ix + 2] = s.rgba_color;
 }
diff --git a/piet-gpu/shader/tile.h b/piet-gpu/shader/tile.h
index b4a8c9b..d7659ff 100644
--- a/piet-gpu/shader/tile.h
+++ b/piet-gpu/shader/tile.h
@@ -37,10 +37,11 @@ TileRef Tile_index(TileRef ref, uint index) {
 struct TileSeg {
     vec2 start;
     vec2 end;
+    float y_edge;
     TileSegRef next;
 };
 
-#define TileSeg_size 20
+#define TileSeg_size 24
 
 TileSegRef TileSeg_index(TileSegRef ref, uint index) {
     return TileSegRef(ref.offset + index * TileSeg_size);
@@ -87,10 +88,12 @@ TileSeg TileSeg_read(TileSegRef ref) {
     uint raw2 = tile[ix + 2];
     uint raw3 = tile[ix + 3];
     uint raw4 = tile[ix + 4];
+    uint raw5 = tile[ix + 5];
     TileSeg s;
     s.start = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
     s.end = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
-    s.next = TileSegRef(raw4);
+    s.y_edge = uintBitsToFloat(raw4);
+    s.next = TileSegRef(raw5);
     return s;
 }
 
@@ -100,6 +103,7 @@ void TileSeg_write(TileSegRef ref, TileSeg s) {
     tile[ix + 1] = floatBitsToUint(s.start.y);
     tile[ix + 2] = floatBitsToUint(s.end.x);
     tile[ix + 3] = floatBitsToUint(s.end.y);
-    tile[ix + 4] = s.next.offset;
+    tile[ix + 4] = floatBitsToUint(s.y_edge);
+    tile[ix + 5] = s.next.offset;
 }
 
diff --git a/piet-gpu/src/lib.rs b/piet-gpu/src/lib.rs
index 63d80fa..1e839d2 100644
--- a/piet-gpu/src/lib.rs
+++ b/piet-gpu/src/lib.rs
@@ -57,8 +57,8 @@ pub fn render_scene(rc: &mut impl RenderContext) {
         let circle = Circle::new(center, radius);
         rc.fill(circle, &color);
     }
-    /*
     let mut path = BezPath::new();
+    /*
     path.move_to((100.0, 1150.0));
     path.line_to((200.0, 1200.0));
     path.line_to((150.0, 1250.0));
@@ -143,6 +143,9 @@ pub struct Renderer<D: Device> {
     path_pipeline: D::Pipeline,
     path_ds: D::DescriptorSet,
 
+    backdrop_pipeline: D::Pipeline,
+    backdrop_ds: D::DescriptorSet,
+
     tile_alloc_buf_host: D::Buffer,
     tile_alloc_buf_dev: D::Buffer,
 
@@ -224,6 +227,14 @@ impl<D: Device> Renderer<D> {
             &[],
         )?;
 
+        let backdrop_alloc_code = include_bytes!("../shader/backdrop.spv");
+        let backdrop_pipeline = device.create_simple_compute_pipeline(backdrop_alloc_code, 3, 0)?;
+        let backdrop_ds = device.create_descriptor_set(
+            &backdrop_pipeline,
+            &[&anno_buf, &tile_alloc_buf_dev, &tile_buf],
+            &[],
+        )?;
+
         let bin_alloc_buf_host = device.create_buffer(12, host)?;
         let bin_alloc_buf_dev = device.create_buffer(12, dev)?;
 
@@ -275,6 +286,8 @@ impl<D: Device> Renderer<D> {
             tile_ds,
             path_pipeline,
             path_ds,
+            backdrop_pipeline,
+            backdrop_ds,
             bin_pipeline,
             bin_ds,
             coarse_pipeline,
@@ -333,6 +346,13 @@ impl<D: Device> Renderer<D> {
             (((self.n_pathseg + 31) / 32) as u32, 1, 1),
         );
         cmd_buf.write_timestamp(&query_pool, 3);
+        cmd_buf.memory_barrier();
+        cmd_buf.dispatch(
+            &self.backdrop_pipeline,
+            &self.backdrop_ds,
+            (((self.n_paths + 255) / 256) as u32, 1, 1),
+        );
+        cmd_buf.write_timestamp(&query_pool, 4);
         // Note: this barrier is not needed as an actual dependency between
         // pipeline stages, but I am keeping it in so that timer queries are
         // easier to interpret.
@@ -342,21 +362,21 @@ impl<D: Device> Renderer<D> {
             &self.bin_ds,
             (((self.n_paths + 255) / 256) as u32, 1, 1),
         );
-        cmd_buf.write_timestamp(&query_pool, 4);
+        cmd_buf.write_timestamp(&query_pool, 5);
         cmd_buf.memory_barrier();
         cmd_buf.dispatch(
             &self.coarse_pipeline,
             &self.coarse_ds,
             (WIDTH as u32 / 256, HEIGHT as u32 / 256, 1),
         );
-        cmd_buf.write_timestamp(&query_pool, 5);
+        cmd_buf.write_timestamp(&query_pool, 6);
         cmd_buf.memory_barrier();
         cmd_buf.dispatch(
             &self.k4_pipeline,
             &self.k4_ds,
             ((WIDTH / TILE_W) as u32, (HEIGHT / TILE_H) as u32, 1),
         );
-        cmd_buf.write_timestamp(&query_pool, 6);
+        cmd_buf.write_timestamp(&query_pool, 7);
         cmd_buf.memory_barrier();
         cmd_buf.image_barrier(&self.image_dev, ImageLayout::General, ImageLayout::BlitSrc);
     }
diff --git a/piet-gpu/src/pico_svg.rs b/piet-gpu/src/pico_svg.rs
index 0aac61a..140c42d 100644
--- a/piet-gpu/src/pico_svg.rs
+++ b/piet-gpu/src/pico_svg.rs
@@ -49,8 +49,8 @@ impl PicoSvg {
         for item in &self.items {
             match item {
                 Item::Fill(fill_item) => {
-                    //rc.fill(&fill_item.path, &fill_item.color);
-                    rc.stroke(&fill_item.path, &fill_item.color, 1.0);
+                    rc.fill(&fill_item.path, &fill_item.color);
+                    //rc.stroke(&fill_item.path, &fill_item.color, 1.0);
                 }
                 Item::Stroke(stroke_item) => {
                     rc.stroke(&stroke_item.path, &stroke_item.color, stroke_item.width);