diff --git a/piet-gpu-types/src/ptcl.rs b/piet-gpu-types/src/ptcl.rs
index f5e42af..b6df77d 100644
--- a/piet-gpu-types/src/ptcl.rs
+++ b/piet-gpu-types/src/ptcl.rs
@@ -4,8 +4,9 @@ piet_gpu! {
     #[gpu_write]
     mod ptcl {
         struct CmdCircle {
-            // In existing code, this is packed; we might need an annotation for this.
-            bbox: [u16; 4],
+            center: [f32; 2],
+            radius: f32,
+            rgba_color: u32,
         }
         struct CmdLine {
             start: [f32; 2],
diff --git a/piet-gpu/shader/build.ninja b/piet-gpu/shader/build.ninja
index 5befa7f..ada8694 100644
--- a/piet-gpu/shader/build.ninja
+++ b/piet-gpu/shader/build.ninja
@@ -10,3 +10,5 @@ rule glsl
 build image.spv: glsl image.comp | scene.h
 
 build kernel1.spv: glsl kernel1.comp | scene.h tilegroup.h
+
+build kernel3.spv: glsl kernel3.comp | scene.h tilegroup.h ptcl.h
diff --git a/piet-gpu/shader/image.comp b/piet-gpu/shader/image.comp
index 60739d5..6d84eb5 100644
--- a/piet-gpu/shader/image.comp
+++ b/piet-gpu/shader/image.comp
@@ -40,7 +40,7 @@ void main() {
         if (tag == PietItem_Circle) {
             PietCircle circle = PietItem_Circle_read(item_ref);
             float r = length(xy + vec2(0.5, 0.5) - circle.center.xy);
-            float alpha = clamp(circle.radius - r, 0.0, 1.0);
+            float alpha = clamp(0.5 + circle.radius - r, 0.0, 1.0);
             vec4 fg_rgba = unpackUnorm4x8(circle.rgba_color);
             // TODO: sRGB
             rgb = mix(rgb, fg_rgba.rgb, alpha * fg_rgba.a);
diff --git a/piet-gpu/shader/image.spv b/piet-gpu/shader/image.spv
index 527c9ae..097add1 100644
Binary files a/piet-gpu/shader/image.spv and b/piet-gpu/shader/image.spv differ
diff --git a/piet-gpu/shader/kernel1.comp b/piet-gpu/shader/kernel1.comp
index 436b8bd..3a4156c 100644
--- a/piet-gpu/shader/kernel1.comp
+++ b/piet-gpu/shader/kernel1.comp
@@ -1,3 +1,16 @@
+// This is "kernel 1" in a 4-kernel pipeline. It traverses the scene graph
+// and outputs "instances" (references to item + translation) for each item
+// that intersects the tilegroup.
+//
+// This implementation is simplistic and leaves a lot of performance on the
+// table. A fancier implementation would use threadgroup shared memory or
+// subgroups (or possibly both) to parallelize the reading of the input and
+// the computation of tilegroup intersection.
+//
+// In addition, there are some features currently missing. One is the use of
+// a bump allocator to extend the current fixed allocation. Another is support
+// for clipping.
+
 #version 450
 #extension GL_GOOGLE_include_directive : enable
 
@@ -18,10 +31,10 @@ layout(set = 0, binding = 1) buffer TilegroupBuf {
 // TODO: compute this
 #define WIDTH_IN_TILEGROUPS 4
 
-#define TILEGROUP_WIDTH 512
-#define TILEGROUP_HEIGHT 16
+#define TILEGROUP_WIDTH_PX 512
+#define TILEGROUP_HEIGHT_PX 16
 
-#define INITIAL_ALLOC 1024
+#define TILEGROUP_INITIAL_ALLOC 1024
 
 #define MAX_STACK 8
 
@@ -35,8 +48,8 @@ void main() {
     StackElement stack[MAX_STACK];
     uint stack_ix = 0;
     uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS + gl_GlobalInvocationID.x;
-    TileGroupRef tg_ref = TileGroupRef(tilegroup_ix * INITIAL_ALLOC);
-    vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILEGROUP_WIDTH, TILEGROUP_HEIGHT);
+    TileGroupRef tg_ref = TileGroupRef(tilegroup_ix * TILEGROUP_INITIAL_ALLOC);
+    vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILEGROUP_WIDTH_PX, TILEGROUP_HEIGHT_PX);
     PietItemRef root = PietItemRef(0);
     SimpleGroup group = PietItem_Group_read(root);
     StackElement tos = StackElement(root, 0, group.offset.xy);
@@ -45,8 +58,8 @@ void main() {
         if (tos.index < group.n_items) {
             Bbox bbox = Bbox_read(Bbox_index(group.bboxes, tos.index));
             vec4 bb = vec4(bbox.bbox) + tos.offset.xyxy;
-            bool hit = max(bb.x, xy0.x) < min(bb.z, xy0.x + float(TILEGROUP_WIDTH))
-                && max(bb.y, xy0.y) < min(bb.w, xy0.y + float(TILEGROUP_HEIGHT));
+            bool hit = max(bb.x, xy0.x) < min(bb.z, xy0.x + float(TILEGROUP_WIDTH_PX))
+                && max(bb.y, xy0.y) < min(bb.w, xy0.y + float(TILEGROUP_HEIGHT_PX));
             bool is_group = false;
             if (hit) {
                 PietItemRef item_ref = PietItem_index(group.items, tos.index);
diff --git a/piet-gpu/shader/kernel3.comp b/piet-gpu/shader/kernel3.comp
new file mode 100644
index 0000000..f9f9362
--- /dev/null
+++ b/piet-gpu/shader/kernel3.comp
@@ -0,0 +1,72 @@
+#version 450
+#extension GL_GOOGLE_include_directive : enable
+
+layout(local_size_x = 32, local_size_y = 1) in;
+
+layout(set = 0, binding = 0) readonly buffer SceneBuf {
+    uint[] scene;
+};
+
+// TODO: this should have a `readonly` qualifier, but then inclusion
+// of ptcl.h would fail because of the writers.
+layout(set = 0, binding = 1) buffer TilegroupBuf {
+    uint[] tilegroup;
+};
+
+layout(set = 0, binding = 2) buffer PtclBuf {
+    uint[] ptcl;
+};
+
+#include "scene.h"
+#include "tilegroup.h"
+#include "ptcl.h"
+
+// TODO: compute all these
+
+#define WIDTH_IN_TILEGROUPS 4
+#define WIDTH_IN_TILES 128
+#define TILEGROUP_WIDTH_TILES 32
+#define TILE_WIDTH_PX 16
+#define TILE_HEIGHT_PX 16
+
+// Must be the same as kernel1. Might be a good idea to move these particular
+// constants to their own .h file.
+#define TILEGROUP_INITIAL_ALLOC 1024
+
+#define PTCL_INITIAL_ALLOC 4096
+
+void main() {
+    uint tile_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILES + gl_GlobalInvocationID.x;
+    uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS
+        + (gl_GlobalInvocationID.x / TILEGROUP_WIDTH_TILES);
+    vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILE_WIDTH_PX, TILE_HEIGHT_PX);
+    TileGroupRef tg_ref = TileGroupRef(tilegroup_ix * TILEGROUP_INITIAL_ALLOC);
+    CmdRef cmd_ref = CmdRef(tile_ix * PTCL_INITIAL_ALLOC);
+
+    while (true) {
+        uint tg_tag = TileGroup_tag(tg_ref);
+        if (tg_tag == TileGroup_End) {
+            break;
+        }
+        // Assume tg_tag is `Instance`, though there will be more cases.
+        Instance ins = TileGroup_Instance_read(tg_ref);
+        PietItemRef item_ref = PietItemRef(ins.item_ref);
+        uint item_tag = PietItem_tag(item_ref);
+        switch (item_tag) {
+        case PietItem_Circle:
+            PietCircle circle = PietItem_Circle_read(item_ref);
+            vec2 center = ins.offset + circle.center.xy;
+            float r = circle.radius;
+            if (max(center.x - r, xy0.x) < min(center.x + r, xy0.x + float(TILE_WIDTH_PX))
+                && max(center.y - r, xy0.y) < min(center.y + r, xy0.y + float(TILE_HEIGHT_PX)))
+            {
+                CmdCircle cmd = CmdCircle(center, r, circle.rgba_color);
+                Cmd_Circle_write(cmd_ref, cmd);
+                cmd_ref.offset += Cmd_size;
+            }
+            break;
+        }
+        tg_ref.offset += TileGroup_size;
+    }
+    Cmd_End_write(cmd_ref);
+}
diff --git a/piet-gpu/shader/kernel3.spv b/piet-gpu/shader/kernel3.spv
new file mode 100644
index 0000000..23a7c3e
Binary files /dev/null and b/piet-gpu/shader/kernel3.spv differ
diff --git a/piet-gpu/shader/ptcl.h b/piet-gpu/shader/ptcl.h
new file mode 100644
index 0000000..583cc10
--- /dev/null
+++ b/piet-gpu/shader/ptcl.h
@@ -0,0 +1,323 @@
+// Code auto-generated by piet-gpu-derive
+
+struct CmdCircleRef {
+    uint offset;
+};
+
+struct CmdLineRef {
+    uint offset;
+};
+
+struct CmdStrokeRef {
+    uint offset;
+};
+
+struct CmdFillRef {
+    uint offset;
+};
+
+struct CmdFillEdgeRef {
+    uint offset;
+};
+
+struct CmdDrawFillRef {
+    uint offset;
+};
+
+struct CmdSolidRef {
+    uint offset;
+};
+
+struct CmdRef {
+    uint offset;
+};
+
+struct CmdCircle {
+    vec2 center;
+    float radius;
+    uint rgba_color;
+};
+
+#define CmdCircle_size 16
+
+CmdCircleRef CmdCircle_index(CmdCircleRef ref, uint index) {
+    return CmdCircleRef(ref.offset + index * CmdCircle_size);
+}
+
+struct CmdLine {
+    vec2 start;
+    vec2 end;
+};
+
+#define CmdLine_size 16
+
+CmdLineRef CmdLine_index(CmdLineRef ref, uint index) {
+    return CmdLineRef(ref.offset + index * CmdLine_size);
+}
+
+struct CmdStroke {
+    float halfWidth;
+    uint rgba_color;
+};
+
+#define CmdStroke_size 8
+
+CmdStrokeRef CmdStroke_index(CmdStrokeRef ref, uint index) {
+    return CmdStrokeRef(ref.offset + index * CmdStroke_size);
+}
+
+struct CmdFill {
+    vec2 start;
+    vec2 end;
+};
+
+#define CmdFill_size 16
+
+CmdFillRef CmdFill_index(CmdFillRef ref, uint index) {
+    return CmdFillRef(ref.offset + index * CmdFill_size);
+}
+
+struct CmdFillEdge {
+    int sign;
+    float y;
+};
+
+#define CmdFillEdge_size 8
+
+CmdFillEdgeRef CmdFillEdge_index(CmdFillEdgeRef ref, uint index) {
+    return CmdFillEdgeRef(ref.offset + index * CmdFillEdge_size);
+}
+
+struct CmdDrawFill {
+    int backdrop;
+    uint rgba_color;
+};
+
+#define CmdDrawFill_size 8
+
+CmdDrawFillRef CmdDrawFill_index(CmdDrawFillRef ref, uint index) {
+    return CmdDrawFillRef(ref.offset + index * CmdDrawFill_size);
+}
+
+struct CmdSolid {
+    uint rgba_color;
+};
+
+#define CmdSolid_size 4
+
+CmdSolidRef CmdSolid_index(CmdSolidRef ref, uint index) {
+    return CmdSolidRef(ref.offset + index * CmdSolid_size);
+}
+
+#define Cmd_End 0
+#define Cmd_Circle 1
+#define Cmd_Line 2
+#define Cmd_Fill 3
+#define Cmd_Stroke 4
+#define Cmd_FillEdge 5
+#define Cmd_DrawFill 6
+#define Cmd_Solid 7
+#define Cmd_Bail 8
+#define Cmd_size 20
+
+CmdRef Cmd_index(CmdRef ref, uint index) {
+    return CmdRef(ref.offset + index * Cmd_size);
+}
+
+CmdCircle CmdCircle_read(CmdCircleRef ref) {
+    uint ix = ref.offset >> 2;
+    uint raw0 = ptcl[ix + 0];
+    uint raw1 = ptcl[ix + 1];
+    uint raw2 = ptcl[ix + 2];
+    uint raw3 = ptcl[ix + 3];
+    CmdCircle s;
+    s.center = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
+    s.radius = uintBitsToFloat(raw2);
+    s.rgba_color = raw3;
+    return s;
+}
+
+void CmdCircle_write(CmdCircleRef ref, CmdCircle s) {
+    uint ix = ref.offset >> 2;
+    ptcl[ix + 0] = floatBitsToUint(s.center.x);
+    ptcl[ix + 1] = floatBitsToUint(s.center.y);
+    ptcl[ix + 2] = floatBitsToUint(s.radius);
+    ptcl[ix + 3] = s.rgba_color;
+}
+
+CmdLine CmdLine_read(CmdLineRef ref) {
+    uint ix = ref.offset >> 2;
+    uint raw0 = ptcl[ix + 0];
+    uint raw1 = ptcl[ix + 1];
+    uint raw2 = ptcl[ix + 2];
+    uint raw3 = ptcl[ix + 3];
+    CmdLine s;
+    s.start = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
+    s.end = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
+    return s;
+}
+
+void CmdLine_write(CmdLineRef ref, CmdLine s) {
+    uint ix = ref.offset >> 2;
+    ptcl[ix + 0] = floatBitsToUint(s.start.x);
+    ptcl[ix + 1] = floatBitsToUint(s.start.y);
+    ptcl[ix + 2] = floatBitsToUint(s.end.x);
+    ptcl[ix + 3] = floatBitsToUint(s.end.y);
+}
+
+CmdStroke CmdStroke_read(CmdStrokeRef ref) {
+    uint ix = ref.offset >> 2;
+    uint raw0 = ptcl[ix + 0];
+    uint raw1 = ptcl[ix + 1];
+    CmdStroke s;
+    s.halfWidth = uintBitsToFloat(raw0);
+    s.rgba_color = raw1;
+    return s;
+}
+
+void CmdStroke_write(CmdStrokeRef ref, CmdStroke s) {
+    uint ix = ref.offset >> 2;
+    ptcl[ix + 0] = floatBitsToUint(s.halfWidth);
+    ptcl[ix + 1] = s.rgba_color;
+}
+
+CmdFill CmdFill_read(CmdFillRef ref) {
+    uint ix = ref.offset >> 2;
+    uint raw0 = ptcl[ix + 0];
+    uint raw1 = ptcl[ix + 1];
+    uint raw2 = ptcl[ix + 2];
+    uint raw3 = ptcl[ix + 3];
+    CmdFill s;
+    s.start = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
+    s.end = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
+    return s;
+}
+
+void CmdFill_write(CmdFillRef ref, CmdFill s) {
+    uint ix = ref.offset >> 2;
+    ptcl[ix + 0] = floatBitsToUint(s.start.x);
+    ptcl[ix + 1] = floatBitsToUint(s.start.y);
+    ptcl[ix + 2] = floatBitsToUint(s.end.x);
+    ptcl[ix + 3] = floatBitsToUint(s.end.y);
+}
+
+CmdFillEdge CmdFillEdge_read(CmdFillEdgeRef ref) {
+    uint ix = ref.offset >> 2;
+    uint raw0 = ptcl[ix + 0];
+    uint raw1 = ptcl[ix + 1];
+    CmdFillEdge s;
+    s.sign = int(raw0);
+    s.y = uintBitsToFloat(raw1);
+    return s;
+}
+
+void CmdFillEdge_write(CmdFillEdgeRef ref, CmdFillEdge s) {
+    uint ix = ref.offset >> 2;
+    ptcl[ix + 0] = uint(s.sign);
+    ptcl[ix + 1] = floatBitsToUint(s.y);
+}
+
+CmdDrawFill CmdDrawFill_read(CmdDrawFillRef ref) {
+    uint ix = ref.offset >> 2;
+    uint raw0 = ptcl[ix + 0];
+    uint raw1 = ptcl[ix + 1];
+    CmdDrawFill s;
+    s.backdrop = int(raw0);
+    s.rgba_color = raw1;
+    return s;
+}
+
+void CmdDrawFill_write(CmdDrawFillRef ref, CmdDrawFill s) {
+    uint ix = ref.offset >> 2;
+    ptcl[ix + 0] = uint(s.backdrop);
+    ptcl[ix + 1] = s.rgba_color;
+}
+
+CmdSolid CmdSolid_read(CmdSolidRef ref) {
+    uint ix = ref.offset >> 2;
+    uint raw0 = ptcl[ix + 0];
+    CmdSolid s;
+    s.rgba_color = raw0;
+    return s;
+}
+
+void CmdSolid_write(CmdSolidRef ref, CmdSolid s) {
+    uint ix = ref.offset >> 2;
+    ptcl[ix + 0] = s.rgba_color;
+}
+
+uint Cmd_tag(CmdRef ref) {
+    return ptcl[ref.offset >> 2];
+}
+
+CmdCircle Cmd_Circle_read(CmdRef ref) {
+    return CmdCircle_read(CmdCircleRef(ref.offset + 4));
+}
+
+CmdLine Cmd_Line_read(CmdRef ref) {
+    return CmdLine_read(CmdLineRef(ref.offset + 4));
+}
+
+CmdFill Cmd_Fill_read(CmdRef ref) {
+    return CmdFill_read(CmdFillRef(ref.offset + 4));
+}
+
+CmdStroke Cmd_Stroke_read(CmdRef ref) {
+    return CmdStroke_read(CmdStrokeRef(ref.offset + 4));
+}
+
+CmdFillEdge Cmd_FillEdge_read(CmdRef ref) {
+    return CmdFillEdge_read(CmdFillEdgeRef(ref.offset + 4));
+}
+
+CmdDrawFill Cmd_DrawFill_read(CmdRef ref) {
+    return CmdDrawFill_read(CmdDrawFillRef(ref.offset + 4));
+}
+
+CmdSolid Cmd_Solid_read(CmdRef ref) {
+    return CmdSolid_read(CmdSolidRef(ref.offset + 4));
+}
+
+void Cmd_End_write(CmdRef ref) {
+    ptcl[ref.offset >> 2] = Cmd_End;
+}
+
+void Cmd_Circle_write(CmdRef ref, CmdCircle s) {
+    ptcl[ref.offset >> 2] = Cmd_Circle;
+    CmdCircle_write(CmdCircleRef(ref.offset + 4), s);
+}
+
+void Cmd_Line_write(CmdRef ref, CmdLine s) {
+    ptcl[ref.offset >> 2] = Cmd_Line;
+    CmdLine_write(CmdLineRef(ref.offset + 4), s);
+}
+
+void Cmd_Fill_write(CmdRef ref, CmdFill s) {
+    ptcl[ref.offset >> 2] = Cmd_Fill;
+    CmdFill_write(CmdFillRef(ref.offset + 4), s);
+}
+
+void Cmd_Stroke_write(CmdRef ref, CmdStroke s) {
+    ptcl[ref.offset >> 2] = Cmd_Stroke;
+    CmdStroke_write(CmdStrokeRef(ref.offset + 4), s);
+}
+
+void Cmd_FillEdge_write(CmdRef ref, CmdFillEdge s) {
+    ptcl[ref.offset >> 2] = Cmd_FillEdge;
+    CmdFillEdge_write(CmdFillEdgeRef(ref.offset + 4), s);
+}
+
+void Cmd_DrawFill_write(CmdRef ref, CmdDrawFill s) {
+    ptcl[ref.offset >> 2] = Cmd_DrawFill;
+    CmdDrawFill_write(CmdDrawFillRef(ref.offset + 4), s);
+}
+
+void Cmd_Solid_write(CmdRef ref, CmdSolid s) {
+    ptcl[ref.offset >> 2] = Cmd_Solid;
+    CmdSolid_write(CmdSolidRef(ref.offset + 4), s);
+}
+
+void Cmd_Bail_write(CmdRef ref) {
+    ptcl[ref.offset >> 2] = Cmd_Bail;
+}
+
diff --git a/piet-gpu/src/main.rs b/piet-gpu/src/main.rs
index 72f0d3c..56b73ca 100644
--- a/piet-gpu/src/main.rs
+++ b/piet-gpu/src/main.rs
@@ -73,6 +73,7 @@ fn dump_scene(buf: &[u8]) {
     }
 }
 
+#[allow(unused)]
 fn dump_k1_data(k1_buf: &[u32]) {
     for i in 0..k1_buf.len() {
         if k1_buf[i] != 0 {
@@ -96,7 +97,9 @@ fn main() {
             .create_buffer(std::mem::size_of_val(&scene[..]) as u64, dev)
             .unwrap();
         device.write_buffer(&scene_buf, &scene).unwrap();
+        // These should only be on the host if we're going to examine them from Rust.
         let tilegroup_buf = device.create_buffer(384 * 1024, host).unwrap();
+        let ptcl_buf = device.create_buffer(12 * 1024 * 4096, host).unwrap();
         let image_buf = device
             .create_buffer((WIDTH * HEIGHT * 4) as u64, host)
             .unwrap();
@@ -110,16 +113,23 @@ fn main() {
             .create_descriptor_set(&k1_pipeline, &[&scene_dev, &tilegroup_buf])
             .unwrap();
 
+        let k3_code = include_bytes!("../shader/kernel3.spv");
+        let k3_pipeline = device.create_simple_compute_pipeline(k3_code, 3).unwrap();
+        let k3_ds = device
+            .create_descriptor_set(&k3_pipeline, &[&scene_dev, &tilegroup_buf, &ptcl_buf])
+            .unwrap();
+
         let code = include_bytes!("../shader/image.spv");
         let pipeline = device.create_simple_compute_pipeline(code, 2).unwrap();
         let descriptor_set = device
             .create_descriptor_set(&pipeline, &[&scene_dev, &image_dev])
             .unwrap();
-        let query_pool = device.create_query_pool(3).unwrap();
+        let query_pool = device.create_query_pool(4).unwrap();
         let mut cmd_buf = device.create_cmd_buf().unwrap();
         cmd_buf.begin();
         cmd_buf.copy_buffer(&scene_buf, &scene_dev);
         cmd_buf.clear_buffer(&tilegroup_buf);
+        cmd_buf.clear_buffer(&ptcl_buf);
         cmd_buf.memory_barrier();
         cmd_buf.write_timestamp(&query_pool, 0);
         cmd_buf.dispatch(
@@ -129,22 +139,36 @@ fn main() {
         );
         cmd_buf.write_timestamp(&query_pool, 1);
         cmd_buf.memory_barrier();
+        cmd_buf.dispatch(
+            &k3_pipeline,
+            &k3_ds,
+            ((WIDTH / 512) as u32, (HEIGHT / 16) as u32, 1),
+        );
+        cmd_buf.write_timestamp(&query_pool, 2);
+        cmd_buf.memory_barrier();
         cmd_buf.dispatch(
             &pipeline,
             &descriptor_set,
             ((WIDTH / TILE_W) as u32, (HEIGHT / TILE_H) as u32, 1),
         );
-        cmd_buf.write_timestamp(&query_pool, 2);
+        cmd_buf.write_timestamp(&query_pool, 3);
         cmd_buf.memory_barrier();
         cmd_buf.copy_buffer(&image_dev, &image_buf);
         cmd_buf.finish();
         device.run_cmd_buf(&cmd_buf).unwrap();
         let timestamps = device.reap_query_pool(query_pool).unwrap();
         println!("Kernel 1 time: {:.3}ms", timestamps[0] * 1e3);
-        println!("Render time: {:.3}ms", (timestamps[1] - timestamps[0]) * 1e3);
+        println!(
+            "Kernel 3 time: {:.3}ms",
+            (timestamps[1] - timestamps[0]) * 1e3
+        );
+        println!(
+            "Render time: {:.3}ms",
+            (timestamps[2] - timestamps[1]) * 1e3
+        );
 
         let mut k1_data: Vec<u32> = Default::default();
-        device.read_buffer(&tilegroup_buf, &mut k1_data).unwrap();
+        device.read_buffer(&ptcl_buf, &mut k1_data).unwrap();
         dump_k1_data(&k1_data);
 
         let mut img_data: Vec<u8> = Default::default();