diff --git a/piet-gpu/shader/backdrop.comp b/piet-gpu/shader/backdrop.comp
index b640d14..12ae5b1 100644
--- a/piet-gpu/shader/backdrop.comp
+++ b/piet-gpu/shader/backdrop.comp
@@ -35,16 +35,13 @@ shared Alloc sh_row_alloc[BACKDROP_WG];
 shared uint sh_row_width[BACKDROP_WG];
 
 void main() {
-    if (mem_error != NO_ERROR) {
-        return;
-    }
-
     uint th_ix = gl_LocalInvocationID.x;
     uint element_ix = gl_GlobalInvocationID.x;
     AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);
 
     // Work assignment: 1 thread : 1 path element
     uint row_count = 0;
+    bool mem_ok = mem_error == NO_ERROR;
     if (element_ix < conf.n_elements) {
         AnnotatedTag tag = Annotated_tag(conf.anno_alloc, ref);
         switch (tag.tag) {
@@ -67,7 +64,7 @@ void main() {
                 // long as it doesn't cross the left edge.
                 row_count = 0;
             }
-            Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size);
+            Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, mem_ok);
             sh_row_alloc[th_ix] = path_alloc;
         }
     }
@@ -95,7 +92,7 @@ void main() {
             }
         }
         uint width = sh_row_width[el_ix];
-        if (width > 0) {
+        if (width > 0 && mem_ok) {
             // Process one row sequentially
             // Read backdrop value per tile and prefix sum it
             Alloc tiles_alloc = sh_row_alloc[el_ix];
diff --git a/piet-gpu/shader/backdrop.spv b/piet-gpu/shader/backdrop.spv
index 26aeb4c..4b5ffdd 100644
Binary files a/piet-gpu/shader/backdrop.spv and b/piet-gpu/shader/backdrop.spv differ
diff --git a/piet-gpu/shader/binning.comp b/piet-gpu/shader/binning.comp
index a43722d..b5e2dab 100644
--- a/piet-gpu/shader/binning.comp
+++ b/piet-gpu/shader/binning.comp
@@ -36,10 +36,6 @@ shared Alloc sh_chunk_alloc[N_TILE];
 shared bool sh_alloc_failed;
 
 void main() {
-    if (mem_error != NO_ERROR) {
-        return;
-    }
-
     uint my_n_elements = conf.n_elements;
     uint my_partition = gl_WorkGroupID.x;
 
@@ -105,7 +101,7 @@ void main() {
         count[i][gl_LocalInvocationID.x] = element_count;
     }
     // element_count is number of elements covering bin for this invocation.
-    Alloc chunk_alloc = new_alloc(0, 0);
+    Alloc chunk_alloc = new_alloc(0, 0, true);
     if (element_count != 0) {
         // TODO: aggregate atomic adds (subgroup is probably fastest)
         MallocResult chunk = malloc(element_count * BinInstance_size);
@@ -122,7 +118,7 @@ void main() {
     write_mem(conf.bin_alloc, out_ix + 1, chunk_alloc.offset);
 
     barrier();
-    if (sh_alloc_failed) {
+    if (sh_alloc_failed || mem_error != NO_ERROR) {
         return;
     }
 
diff --git a/piet-gpu/shader/binning.spv b/piet-gpu/shader/binning.spv
index 46f1e47..05abf0c 100644
Binary files a/piet-gpu/shader/binning.spv and b/piet-gpu/shader/binning.spv differ
diff --git a/piet-gpu/shader/coarse.comp b/piet-gpu/shader/coarse.comp
index 76d7fc6..731da97 100644
--- a/piet-gpu/shader/coarse.comp
+++ b/piet-gpu/shader/coarse.comp
@@ -56,7 +56,7 @@ void write_tile_alloc(uint el_ix, Alloc a) {
     sh_tile_alloc[el_ix] = a;
 }
 
-Alloc read_tile_alloc(uint el_ix) {
+Alloc read_tile_alloc(uint el_ix, bool mem_ok) {
     return sh_tile_alloc[el_ix];
 }
 #else
@@ -64,9 +64,9 @@ void write_tile_alloc(uint el_ix, Alloc a) {
     // No-op
 }
 
-Alloc read_tile_alloc(uint el_ix) {
+Alloc read_tile_alloc(uint el_ix, bool mem_ok) {
     // All memory.
-    return new_alloc(0, memory.length()*4);
+    return new_alloc(0, memory.length()*4, mem_ok);
 }
 #endif
 
@@ -109,10 +109,6 @@ void write_fill(Alloc alloc, inout CmdRef cmd_ref, uint flags, Tile tile, float
 }
 
 void main() {
-    if (mem_error != NO_ERROR) {
-        return;
-    }
-
     // Could use either linear or 2d layouts for both dispatch and
     // invocations within the workgroup. We'll use variables to abstract.
     uint width_in_bins = (conf.width_in_tiles + N_TILE_X - 1)/N_TILE_X;
@@ -158,6 +154,7 @@ void main() {
 
     uint num_begin_slots = 0;
     uint begin_slot = 0;
+    bool mem_ok = mem_error == NO_ERROR;
     while (true) {
         for (uint i = 0; i < N_SLICE; i++) {
             sh_bitmaps[i][th_ix] = 0;
@@ -172,7 +169,7 @@ void main() {
                     uint in_ix = (conf.bin_alloc.offset >> 2) + ((partition_ix + th_ix) * N_TILE + bin_ix) * 2;
                     count = read_mem(conf.bin_alloc, in_ix);
                     uint offset = read_mem(conf.bin_alloc, in_ix + 1);
-                    sh_part_elements[th_ix] = new_alloc(offset, count*BinInstance_size);
+                    sh_part_elements[th_ix] = new_alloc(offset, count*BinInstance_size, mem_ok);
                 }
                 // prefix sum of counts
                 for (uint i = 0; i < LG_N_PART_READ; i++) {
@@ -196,7 +193,7 @@ void main() {
             }
             // use binary search to find element to read
             uint ix = rd_ix + th_ix;
-            if (ix >= wr_ix && ix < ready_ix) {
+            if (ix >= wr_ix && ix < ready_ix && mem_ok) {
                 uint part_ix = 0;
                 for (uint i = 0; i < LG_N_PART_READ; i++) {
                     uint probe = part_ix + ((N_PART_READ / 2) >> i);
@@ -253,7 +250,7 @@ void main() {
             // base relative to bin
             uint base = path.tiles.offset - uint(dy * stride + dx) * Tile_size;
             sh_tile_base[th_ix] = base;
-            Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size);
+            Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, mem_ok);
             write_tile_alloc(th_ix, path_alloc);
             break;
         default:
@@ -288,11 +285,11 @@ void main() {
             uint width = sh_tile_width[el_ix];
             uint x = sh_tile_x0[el_ix] + seq_ix % width;
             uint y = sh_tile_y0[el_ix] + seq_ix / width;
-            bool include_tile;
+            bool include_tile = false;
             if (tag == Annotated_BeginClip || tag == Annotated_EndClip) {
                 include_tile = true;
-            } else {
-                Tile tile = Tile_read(read_tile_alloc(el_ix), TileRef(sh_tile_base[el_ix] + (sh_tile_stride[el_ix] * y + x) * Tile_size));
+            } else if (mem_ok) {
+                Tile tile = Tile_read(read_tile_alloc(el_ix, mem_ok), TileRef(sh_tile_base[el_ix] + (sh_tile_stride[el_ix] * y + x) * Tile_size));
                 // Include the path in the tile if
                 // - the tile contains at least a segment (tile offset non-zero)
                 // - the tile is completely covered (backdrop non-zero)
@@ -311,7 +308,7 @@ void main() {
         // through the non-segment elements.
         uint slice_ix = 0;
         uint bitmap = sh_bitmaps[0][th_ix];
-        while (true) {
+        while (mem_ok) {
             if (bitmap == 0) {
                 slice_ix++;
                 if (slice_ix == N_SLICE) {
@@ -337,7 +334,7 @@ void main() {
             if (clip_zero_depth == 0) {
                 switch (tag.tag) {
                 case Annotated_Color:
-                    Tile tile = Tile_read(read_tile_alloc(element_ref_ix), TileRef(sh_tile_base[element_ref_ix]
+                    Tile tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok), TileRef(sh_tile_base[element_ref_ix]
                         + (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
                     AnnoColor fill = Annotated_Color_read(conf.anno_alloc, ref);
                     if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
@@ -348,7 +345,7 @@ void main() {
                     cmd_ref.offset += 4 + CmdColor_size;
                     break;
                 case Annotated_Image:
-                    tile = Tile_read(read_tile_alloc(element_ref_ix), TileRef(sh_tile_base[element_ref_ix]
+                    tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok), TileRef(sh_tile_base[element_ref_ix]
                         + (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
                     AnnoImage fill_img = Annotated_Image_read(conf.anno_alloc, ref);
                     if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
@@ -359,7 +356,7 @@ void main() {
                     cmd_ref.offset += 4 + CmdImage_size;
                     break;
                 case Annotated_BeginClip:
-                    tile = Tile_read(read_tile_alloc(element_ref_ix), TileRef(sh_tile_base[element_ref_ix]
+                    tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok), TileRef(sh_tile_base[element_ref_ix]
                         + (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
                     if (tile.tile.offset == 0 && tile.backdrop == 0) {
                         clip_zero_depth = clip_depth + 1;
diff --git a/piet-gpu/shader/coarse.spv b/piet-gpu/shader/coarse.spv
index 7d0b629..5570675 100644
Binary files a/piet-gpu/shader/coarse.spv and b/piet-gpu/shader/coarse.spv differ
diff --git a/piet-gpu/shader/elements.comp b/piet-gpu/shader/elements.comp
index 5b5f54b..b3773c4 100644
--- a/piet-gpu/shader/elements.comp
+++ b/piet-gpu/shader/elements.comp
@@ -176,10 +176,6 @@ shared uint sh_part_ix;
 shared State sh_prefix;
 
 void main() {
-    if (mem_error != NO_ERROR) {
-        return;
-    }
-
     State th_state[N_ROWS];
     // Determine partition to process by atomic counter (described in Section
     // 4.4 of prefix sum paper).
diff --git a/piet-gpu/shader/elements.spv b/piet-gpu/shader/elements.spv
index 6a50c7e..8e51bdd 100644
Binary files a/piet-gpu/shader/elements.spv and b/piet-gpu/shader/elements.spv differ
diff --git a/piet-gpu/shader/kernel4.comp b/piet-gpu/shader/kernel4.comp
index 1d706a6..ea05228 100644
--- a/piet-gpu/shader/kernel4.comp
+++ b/piet-gpu/shader/kernel4.comp
@@ -85,10 +85,6 @@ vec4[CHUNK] fillImage(uvec2 xy, CmdImage cmd_img) {
 }
 
 void main() {
-    if (mem_error != NO_ERROR) {
-        return;
-    }
-
     uint tile_ix = gl_WorkGroupID.y * conf.width_in_tiles + gl_WorkGroupID.x;
     Alloc cmd_alloc = slice_mem(conf.ptcl_alloc, tile_ix * PTCL_INITIAL_ALLOC, PTCL_INITIAL_ALLOC);
     CmdRef cmd_ref = CmdRef(cmd_alloc.offset);
@@ -118,7 +114,8 @@ void main() {
 
     float area[CHUNK];
     uint clip_depth = 0;
-    while (true) {
+    bool mem_ok = mem_error == NO_ERROR;
+    while (mem_ok) {
         uint tag = Cmd_tag(cmd_alloc, cmd_ref).tag;
         if (tag == Cmd_End) {
             break;
@@ -131,7 +128,7 @@ void main() {
             for (uint k = 0; k < CHUNK; k++) df[k] = 1e9;
             TileSegRef tile_seg_ref = TileSegRef(stroke.tile_ref);
             do {
-                TileSeg seg = TileSeg_read(new_alloc(tile_seg_ref.offset, TileSeg_size), tile_seg_ref);
+                TileSeg seg = TileSeg_read(new_alloc(tile_seg_ref.offset, TileSeg_size, mem_ok), tile_seg_ref);
                 vec2 line_vec = seg.vector;
                 for (uint k = 0; k < CHUNK; k++) {
                     vec2 dpos = xy + vec2(0.5, 0.5) - seg.origin;
@@ -152,7 +149,7 @@ void main() {
             tile_seg_ref = TileSegRef(fill.tile_ref);
             // Calculate coverage based on backdrop + coverage of each line segment
             do {
-                TileSeg seg = TileSeg_read(new_alloc(tile_seg_ref.offset, TileSeg_size), tile_seg_ref);
+                TileSeg seg = TileSeg_read(new_alloc(tile_seg_ref.offset, TileSeg_size, mem_ok), tile_seg_ref);
                 for (uint k = 0; k < CHUNK; k++) {
                     vec2 my_xy = xy + vec2(chunk_offset(k));
                     vec2 start = seg.origin - my_xy;
diff --git a/piet-gpu/shader/kernel4.spv b/piet-gpu/shader/kernel4.spv
index bd14134..077640e 100644
Binary files a/piet-gpu/shader/kernel4.spv and b/piet-gpu/shader/kernel4.spv differ
diff --git a/piet-gpu/shader/kernel4_idx.spv b/piet-gpu/shader/kernel4_idx.spv
index 0443678..e2aa984 100644
Binary files a/piet-gpu/shader/kernel4_idx.spv and b/piet-gpu/shader/kernel4_idx.spv differ
diff --git a/piet-gpu/shader/mem.h b/piet-gpu/shader/mem.h
index 6e6e775..9e81f04 100644
--- a/piet-gpu/shader/mem.h
+++ b/piet-gpu/shader/mem.h
@@ -44,11 +44,15 @@ struct MallocResult {
 };
 
 // new_alloc synthesizes an Alloc from an offset and size.
-Alloc new_alloc(uint offset, uint size) {
+Alloc new_alloc(uint offset, uint size, bool mem_ok) {
     Alloc a;
     a.offset = offset;
 #ifdef MEM_DEBUG
-    a.size = size;
+    if (mem_ok) {
+        a.size = size;
+    } else {
+        a.size = 0;
+    }
 #endif
     return a;
 }
@@ -56,11 +60,10 @@ Alloc new_alloc(uint offset, uint size) {
 // malloc allocates size bytes of memory.
 MallocResult malloc(uint size) {
     MallocResult r;
-    r.failed = false;
     uint offset = atomicAdd(mem_offset, size);
-    r.alloc = new_alloc(offset, size);
-    if (offset + size > memory.length() * 4) {
-        r.failed = true;
+    r.failed = offset + size > memory.length() * 4;
+    r.alloc = new_alloc(offset, size, !r.failed);
+    if (r.failed) {
         atomicMax(mem_error, ERR_MALLOC_FAILED);
         return r;
     }
@@ -119,8 +122,10 @@ Alloc slice_mem(Alloc a, uint offset, uint size) {
         // but never written.
         return Alloc(0, 0);
     }
+    return Alloc(a.offset + offset, size);
+#else
+    return Alloc(a.offset + offset);
 #endif
-    return new_alloc(a.offset + offset, size);
 }
 
 // alloc_write writes alloc to memory at offset bytes.
diff --git a/piet-gpu/shader/path_coarse.comp b/piet-gpu/shader/path_coarse.comp
index 51264bf..147c8a5 100644
--- a/piet-gpu/shader/path_coarse.comp
+++ b/piet-gpu/shader/path_coarse.comp
@@ -87,10 +87,6 @@ SubdivResult estimate_subdiv(vec2 p0, vec2 p1, vec2 p2, float sqrt_tol) {
 }
 
 void main() {
-    if (mem_error != NO_ERROR) {
-        return;
-    }
-
     uint element_ix = gl_GlobalInvocationID.x;
     PathSegRef ref = PathSegRef(conf.pathseg_alloc.offset + element_ix * PathSeg_size);
 
@@ -98,6 +94,7 @@ void main() {
     if (element_ix < conf.n_pathseg) {
         tag = PathSeg_tag(conf.pathseg_alloc, ref);
     }
+    bool mem_ok = mem_error == NO_ERROR;
     switch (tag.tag) {
     case PathSeg_Cubic:
         PathCubic cubic = PathSeg_Cubic_read(conf.pathseg_alloc, ref);
@@ -135,7 +132,7 @@ void main() {
         bool is_stroke = fill_mode_from_flags(tag.flags) == MODE_STROKE;
         uint path_ix = cubic.path_ix;
         Path path = Path_read(conf.tile_alloc, PathRef(conf.tile_alloc.offset + path_ix * Path_size));
-        Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size);
+        Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, mem_ok);
         ivec4 bbox = ivec4(path.bbox);
         vec2 p0 = cubic.p0;
         qp0 = cubic.p0;
@@ -195,7 +192,7 @@ void main() {
                 uint n_tile_alloc = uint((x1 - x0) * (y1 - y0));
                 // Consider using subgroups to aggregate atomic add.
                 MallocResult tile_alloc = malloc(n_tile_alloc * TileSeg_size);
-                if (tile_alloc.failed) {
+                if (tile_alloc.failed || !mem_ok) {
                     return;
                 }
                 uint tile_offset = tile_alloc.alloc.offset;
diff --git a/piet-gpu/shader/path_coarse.spv b/piet-gpu/shader/path_coarse.spv
index b4cd985..08d61a4 100644
Binary files a/piet-gpu/shader/path_coarse.spv and b/piet-gpu/shader/path_coarse.spv differ
diff --git a/piet-gpu/shader/tile_alloc.comp b/piet-gpu/shader/tile_alloc.comp
index 3a6e4ee..973ec14 100644
--- a/piet-gpu/shader/tile_alloc.comp
+++ b/piet-gpu/shader/tile_alloc.comp
@@ -28,10 +28,6 @@ shared uint sh_tile_count[TILE_ALLOC_WG];
 shared MallocResult sh_tile_alloc;
 
 void main() {
-    if (mem_error != NO_ERROR) {
-        return;
-    }
-
     uint th_ix = gl_LocalInvocationID.x;
     uint element_ix = gl_GlobalInvocationID.x;
     PathRef path_ref = PathRef(conf.tile_alloc.offset + element_ix * Path_size);
@@ -86,7 +82,7 @@ void main() {
     }
     barrier();
     MallocResult alloc_start = sh_tile_alloc;
-    if (alloc_start.failed) {
+    if (alloc_start.failed || mem_error != NO_ERROR) {
         return;
     }
 
diff --git a/piet-gpu/shader/tile_alloc.spv b/piet-gpu/shader/tile_alloc.spv
index 6d375bd..4a39f96 100644
Binary files a/piet-gpu/shader/tile_alloc.spv and b/piet-gpu/shader/tile_alloc.spv differ