diff --git a/piet-gpu/shader/backdrop.comp b/piet-gpu/shader/backdrop.comp
index afe4d62..beba683 100644
--- a/piet-gpu/shader/backdrop.comp
+++ b/piet-gpu/shader/backdrop.comp
@@ -15,7 +15,7 @@
 
 #include "setup.h"
 
-#define LG_BACKDROP_WG 8
+#define LG_BACKDROP_WG (7 + LG_WG_FACTOR)
 #define BACKDROP_WG (1 << LG_BACKDROP_WG)
 
 layout(local_size_x = BACKDROP_WG, local_size_y = 1) in;
diff --git a/piet-gpu/shader/binning.comp b/piet-gpu/shader/binning.comp
index 3ed507b..046c4fb 100644
--- a/piet-gpu/shader/binning.comp
+++ b/piet-gpu/shader/binning.comp
@@ -17,8 +17,6 @@ layout(set = 0, binding = 0) buffer AnnotatedBuf {
 
 layout(set = 0, binding = 1) buffer AllocBuf {
     uint n_elements; // paths
-    // Will be incremented atomically to claim tiles
-    uint tile_ix;
     uint alloc;
 };
 
@@ -42,8 +40,6 @@ shared uint bitmaps[N_SLICE][N_TILE];
 shared uint count[N_SLICE][N_TILE];
 shared uint sh_chunk_start[N_TILE];
 
-shared float sh_right_edge[N_TILE];
-
 void main() {
     uint my_n_elements = n_elements;
     uint my_partition = gl_WorkGroupID.x;
diff --git a/piet-gpu/shader/binning.spv b/piet-gpu/shader/binning.spv
index 1b31cd1..98f1d4e 100644
Binary files a/piet-gpu/shader/binning.spv and b/piet-gpu/shader/binning.spv differ
diff --git a/piet-gpu/shader/coarse.comp b/piet-gpu/shader/coarse.comp
index 56b3c07..57cbc8b 100644
--- a/piet-gpu/shader/coarse.comp
+++ b/piet-gpu/shader/coarse.comp
@@ -41,7 +41,7 @@ layout(set = 0, binding = 4) buffer PtclBuf {
 #include "tile.h"
 #include "ptcl.h"
 
-#define LG_N_PART_READ 8
+#define LG_N_PART_READ (7 + LG_WG_FACTOR)
 #define N_PART_READ (1 << LG_N_PART_READ)
 
 shared uint sh_elements[N_TILE];
diff --git a/piet-gpu/shader/coarse.spv b/piet-gpu/shader/coarse.spv
index 692b6c1..757b8b2 100644
Binary files a/piet-gpu/shader/coarse.spv and b/piet-gpu/shader/coarse.spv differ
diff --git a/piet-gpu/shader/elements.spv b/piet-gpu/shader/elements.spv
index 5bd0650..54c5cc0 100644
Binary files a/piet-gpu/shader/elements.spv and b/piet-gpu/shader/elements.spv differ
diff --git a/piet-gpu/shader/kernel4.spv b/piet-gpu/shader/kernel4.spv
index 52ba572..6f50746 100644
Binary files a/piet-gpu/shader/kernel4.spv and b/piet-gpu/shader/kernel4.spv differ
diff --git a/piet-gpu/shader/path_coarse.comp b/piet-gpu/shader/path_coarse.comp
index b17f3e3..658af0e 100644
--- a/piet-gpu/shader/path_coarse.comp
+++ b/piet-gpu/shader/path_coarse.comp
@@ -108,27 +108,9 @@ void main() {
     PathStrokeLine line;
     float dx;
     switch (tag) {
-    /*
-    case PathSeg_FillLine:
-    case PathSeg_StrokeLine:
-        line = PathSeg_StrokeLine_read(ref);
-        xmin = min(line.p0.x, line.p1.x) - line.stroke.x;
-        xmax = max(line.p0.x, line.p1.x) + line.stroke.x;
-        ymin = min(line.p0.y, line.p1.y) - line.stroke.y;
-        ymax = max(line.p0.y, line.p1.y) + line.stroke.y;
-        dx = line.p1.x - line.p0.x;
-        float dy = line.p1.y - line.p0.y;
-        // Set up for per-scanline coverage formula, below.
-        float invslope = abs(dy) < 1e-9 ? 1e9 : dx / dy;
-        c = (line.stroke.x + abs(invslope) * (0.5 * float(TILE_HEIGHT_PX) + line.stroke.y)) * SX;
-        b = invslope; // Note: assumes square tiles, otherwise scale.
-        a = (line.p0.x - (line.p0.y - 0.5 * float(TILE_HEIGHT_PX)) * b) * SX;
-        break;
-    */
     case PathSeg_FillCubic:
     case PathSeg_StrokeCubic:
         PathStrokeCubic cubic = PathSeg_StrokeCubic_read(ref);
-        // Commented out code is for computing error bound on conversion to quadratics
         vec2 err_v = 3.0 * (cubic.p2 - cubic.p1) + cubic.p0 - cubic.p3;
         float err = err_v.x * err_v.x + err_v.y * err_v.y;
         // The number of quadratics.
diff --git a/piet-gpu/shader/setup.h b/piet-gpu/shader/setup.h
index 75bed8e..6bdde03 100644
--- a/piet-gpu/shader/setup.h
+++ b/piet-gpu/shader/setup.h
@@ -3,6 +3,11 @@
 // Much of this will be made dynamic in various ways, but for now it's easiest
 // to hardcode and keep all in one place.
 
+// A LG_WG_FACTOR of n scales workgroup sizes by 2^n. Use 0 for a
+// maximum workgroup size of 128, or 1 for a maximum size of 256.
+#define LG_WG_FACTOR 1
+#define WG_FACTOR (1<<LG_WG_FACTOR)
+
 // TODO: compute all these
 
 #define WIDTH_IN_TILES 128
@@ -10,21 +15,14 @@
 #define TILE_WIDTH_PX 16
 #define TILE_HEIGHT_PX 16
 
-// TODO: make the image size dynamic.
-#define IMAGE_WIDTH (WIDTH_IN_TILES*TILE_WIDTH_PX)
-#define IMAGE_HEIGHT (HEIGHT_IN_TILES*TILE_HEIGHT_PX)
-
 #define PTCL_INITIAL_ALLOC 1024
 
-// Stuff for new algorithm follows; some of the above should get
-// deleted.
-
 // These should probably be renamed and/or reworked. In the binning
 // kernel, they represent the number of bins. Also, the workgroup size
 // of that kernel is equal to the number of bins, but should probably
 // be more flexible (it's 512 in the K&L paper).
 #define N_TILE_X 16
-#define N_TILE_Y 16
+#define N_TILE_Y (8 * WG_FACTOR)
 #define N_TILE (N_TILE_X * N_TILE_Y)
-#define LG_N_TILE 8
+#define LG_N_TILE (7 + LG_WG_FACTOR)
 #define N_SLICE (N_TILE / 32)
diff --git a/piet-gpu/shader/tile_alloc.comp b/piet-gpu/shader/tile_alloc.comp
index ee5d1db..b0c99a5 100644
--- a/piet-gpu/shader/tile_alloc.comp
+++ b/piet-gpu/shader/tile_alloc.comp
@@ -5,7 +5,7 @@
 
 #include "setup.h"
 
-#define LG_TILE_ALLOC_WG 8
+#define LG_TILE_ALLOC_WG (7 + LG_WG_FACTOR)
 #define TILE_ALLOC_WG (1 << LG_TILE_ALLOC_WG)
 
 layout(local_size_x = TILE_ALLOC_WG, local_size_y = 1) in;
diff --git a/piet-gpu/shader/tile_alloc.spv b/piet-gpu/shader/tile_alloc.spv
index 81b3607..4eae6b8 100644
Binary files a/piet-gpu/shader/tile_alloc.spv and b/piet-gpu/shader/tile_alloc.spv differ
diff --git a/piet-gpu/src/lib.rs b/piet-gpu/src/lib.rs
index 324df71..6177b3e 100644
--- a/piet-gpu/src/lib.rs
+++ b/piet-gpu/src/lib.rs
@@ -227,14 +227,14 @@ impl<D: Device> Renderer<D> {
             &[],
         )?;
 
-        let bin_alloc_buf_host = device.create_buffer(12, host)?;
-        let bin_alloc_buf_dev = device.create_buffer(12, dev)?;
+        let bin_alloc_buf_host = device.create_buffer(8, host)?;
+        let bin_alloc_buf_dev = device.create_buffer(8, dev)?;
 
         // TODO: constants
         let bin_alloc_start = ((n_paths + 255) & !255) * 8;
         device.write_buffer(
             &bin_alloc_buf_host,
-            &[n_paths as u32, 0, bin_alloc_start as u32],
+            &[n_paths as u32, bin_alloc_start as u32],
         )?;
         let bin_code = include_bytes!("../shader/binning.spv");
         let bin_pipeline = device.create_simple_compute_pipeline(bin_code, 3, 0)?;