diff --git a/piet-gpu/shader/backdrop.comp b/piet-gpu/shader/backdrop.comp index b640d14..12ae5b1 100644 --- a/piet-gpu/shader/backdrop.comp +++ b/piet-gpu/shader/backdrop.comp @@ -35,16 +35,13 @@ shared Alloc sh_row_alloc[BACKDROP_WG]; shared uint sh_row_width[BACKDROP_WG]; void main() { - if (mem_error != NO_ERROR) { - return; - } - uint th_ix = gl_LocalInvocationID.x; uint element_ix = gl_GlobalInvocationID.x; AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size); // Work assignment: 1 thread : 1 path element uint row_count = 0; + bool mem_ok = mem_error == NO_ERROR; if (element_ix < conf.n_elements) { AnnotatedTag tag = Annotated_tag(conf.anno_alloc, ref); switch (tag.tag) { @@ -67,7 +64,7 @@ void main() { // long as it doesn't cross the left edge. row_count = 0; } - Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size); + Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, mem_ok); sh_row_alloc[th_ix] = path_alloc; } } @@ -95,7 +92,7 @@ void main() { } } uint width = sh_row_width[el_ix]; - if (width > 0) { + if (width > 0 && mem_ok) { // Process one row sequentially // Read backdrop value per tile and prefix sum it Alloc tiles_alloc = sh_row_alloc[el_ix]; diff --git a/piet-gpu/shader/backdrop.spv b/piet-gpu/shader/backdrop.spv index 26aeb4c..4b5ffdd 100644 Binary files a/piet-gpu/shader/backdrop.spv and b/piet-gpu/shader/backdrop.spv differ diff --git a/piet-gpu/shader/binning.comp b/piet-gpu/shader/binning.comp index a43722d..b5e2dab 100644 --- a/piet-gpu/shader/binning.comp +++ b/piet-gpu/shader/binning.comp @@ -36,10 +36,6 @@ shared Alloc sh_chunk_alloc[N_TILE]; shared bool sh_alloc_failed; void main() { - if (mem_error != NO_ERROR) { - return; - } - uint my_n_elements = conf.n_elements; uint my_partition = gl_WorkGroupID.x; @@ -105,7 +101,7 @@ void main() { count[i][gl_LocalInvocationID.x] = element_count; } // element_count is number of elements covering bin for this invocation. - Alloc chunk_alloc = new_alloc(0, 0); + Alloc chunk_alloc = new_alloc(0, 0, true); if (element_count != 0) { // TODO: aggregate atomic adds (subgroup is probably fastest) MallocResult chunk = malloc(element_count * BinInstance_size); @@ -122,7 +118,7 @@ void main() { write_mem(conf.bin_alloc, out_ix + 1, chunk_alloc.offset); barrier(); - if (sh_alloc_failed) { + if (sh_alloc_failed || mem_error != NO_ERROR) { return; } diff --git a/piet-gpu/shader/binning.spv b/piet-gpu/shader/binning.spv index 46f1e47..05abf0c 100644 Binary files a/piet-gpu/shader/binning.spv and b/piet-gpu/shader/binning.spv differ diff --git a/piet-gpu/shader/coarse.comp b/piet-gpu/shader/coarse.comp index 76d7fc6..731da97 100644 --- a/piet-gpu/shader/coarse.comp +++ b/piet-gpu/shader/coarse.comp @@ -56,7 +56,7 @@ void write_tile_alloc(uint el_ix, Alloc a) { sh_tile_alloc[el_ix] = a; } -Alloc read_tile_alloc(uint el_ix) { +Alloc read_tile_alloc(uint el_ix, bool mem_ok) { return sh_tile_alloc[el_ix]; } #else @@ -64,9 +64,9 @@ void write_tile_alloc(uint el_ix, Alloc a) { // No-op } -Alloc read_tile_alloc(uint el_ix) { +Alloc read_tile_alloc(uint el_ix, bool mem_ok) { // All memory. - return new_alloc(0, memory.length()*4); + return new_alloc(0, memory.length()*4, mem_ok); } #endif @@ -109,10 +109,6 @@ void write_fill(Alloc alloc, inout CmdRef cmd_ref, uint flags, Tile tile, float } void main() { - if (mem_error != NO_ERROR) { - return; - } - // Could use either linear or 2d layouts for both dispatch and // invocations within the workgroup. We'll use variables to abstract. uint width_in_bins = (conf.width_in_tiles + N_TILE_X - 1)/N_TILE_X; @@ -158,6 +154,7 @@ void main() { uint num_begin_slots = 0; uint begin_slot = 0; + bool mem_ok = mem_error == NO_ERROR; while (true) { for (uint i = 0; i < N_SLICE; i++) { sh_bitmaps[i][th_ix] = 0; @@ -172,7 +169,7 @@ void main() { uint in_ix = (conf.bin_alloc.offset >> 2) + ((partition_ix + th_ix) * N_TILE + bin_ix) * 2; count = read_mem(conf.bin_alloc, in_ix); uint offset = read_mem(conf.bin_alloc, in_ix + 1); - sh_part_elements[th_ix] = new_alloc(offset, count*BinInstance_size); + sh_part_elements[th_ix] = new_alloc(offset, count*BinInstance_size, mem_ok); } // prefix sum of counts for (uint i = 0; i < LG_N_PART_READ; i++) { @@ -196,7 +193,7 @@ void main() { } // use binary search to find element to read uint ix = rd_ix + th_ix; - if (ix >= wr_ix && ix < ready_ix) { + if (ix >= wr_ix && ix < ready_ix && mem_ok) { uint part_ix = 0; for (uint i = 0; i < LG_N_PART_READ; i++) { uint probe = part_ix + ((N_PART_READ / 2) >> i); @@ -253,7 +250,7 @@ void main() { // base relative to bin uint base = path.tiles.offset - uint(dy * stride + dx) * Tile_size; sh_tile_base[th_ix] = base; - Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size); + Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, mem_ok); write_tile_alloc(th_ix, path_alloc); break; default: @@ -288,11 +285,11 @@ void main() { uint width = sh_tile_width[el_ix]; uint x = sh_tile_x0[el_ix] + seq_ix % width; uint y = sh_tile_y0[el_ix] + seq_ix / width; - bool include_tile; + bool include_tile = false; if (tag == Annotated_BeginClip || tag == Annotated_EndClip) { include_tile = true; - } else { - Tile tile = Tile_read(read_tile_alloc(el_ix), TileRef(sh_tile_base[el_ix] + (sh_tile_stride[el_ix] * y + x) * Tile_size)); + } else if (mem_ok) { + Tile tile = Tile_read(read_tile_alloc(el_ix, mem_ok), TileRef(sh_tile_base[el_ix] + (sh_tile_stride[el_ix] * y + x) * Tile_size)); // Include the path in the tile if // - the tile contains at least a segment (tile offset non-zero) // - the tile is completely covered (backdrop non-zero) @@ -311,7 +308,7 @@ void main() { // through the non-segment elements. uint slice_ix = 0; uint bitmap = sh_bitmaps[0][th_ix]; - while (true) { + while (mem_ok) { if (bitmap == 0) { slice_ix++; if (slice_ix == N_SLICE) { @@ -337,7 +334,7 @@ void main() { if (clip_zero_depth == 0) { switch (tag.tag) { case Annotated_Color: - Tile tile = Tile_read(read_tile_alloc(element_ref_ix), TileRef(sh_tile_base[element_ref_ix] + Tile tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok), TileRef(sh_tile_base[element_ref_ix] + (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size)); AnnoColor fill = Annotated_Color_read(conf.anno_alloc, ref); if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) { @@ -348,7 +345,7 @@ void main() { cmd_ref.offset += 4 + CmdColor_size; break; case Annotated_Image: - tile = Tile_read(read_tile_alloc(element_ref_ix), TileRef(sh_tile_base[element_ref_ix] + tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok), TileRef(sh_tile_base[element_ref_ix] + (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size)); AnnoImage fill_img = Annotated_Image_read(conf.anno_alloc, ref); if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) { @@ -359,7 +356,7 @@ void main() { cmd_ref.offset += 4 + CmdImage_size; break; case Annotated_BeginClip: - tile = Tile_read(read_tile_alloc(element_ref_ix), TileRef(sh_tile_base[element_ref_ix] + tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok), TileRef(sh_tile_base[element_ref_ix] + (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size)); if (tile.tile.offset == 0 && tile.backdrop == 0) { clip_zero_depth = clip_depth + 1; diff --git a/piet-gpu/shader/coarse.spv b/piet-gpu/shader/coarse.spv index 7d0b629..5570675 100644 Binary files a/piet-gpu/shader/coarse.spv and b/piet-gpu/shader/coarse.spv differ diff --git a/piet-gpu/shader/elements.comp b/piet-gpu/shader/elements.comp index 56ad6e2..b3773c4 100644 --- a/piet-gpu/shader/elements.comp +++ b/piet-gpu/shader/elements.comp @@ -176,10 +176,6 @@ shared uint sh_part_ix; shared State sh_prefix; void main() { - if (mem_error != NO_ERROR) { - return; - } - State th_state[N_ROWS]; // Determine partition to process by atomic counter (described in Section // 4.4 of prefix sum paper). @@ -392,7 +388,6 @@ void main() { vec2 lw = get_linewidth(st); anno_begin_clip.linewidth = st.linewidth * sqrt(abs(st.mat.x * st.mat.w - st.mat.y * st.mat.z)); } else { - anno_begin_clip.bbox = begin_clip.bbox; anno_fill.linewidth = 0.0; } out_ref = AnnotatedRef(conf.anno_alloc.offset + (st.path_count - 1) * Annotated_size); diff --git a/piet-gpu/shader/elements.spv b/piet-gpu/shader/elements.spv index 6bd53b3..8e51bdd 100644 Binary files a/piet-gpu/shader/elements.spv and b/piet-gpu/shader/elements.spv differ diff --git a/piet-gpu/shader/kernel4.comp b/piet-gpu/shader/kernel4.comp index 1d706a6..c613b72 100644 --- a/piet-gpu/shader/kernel4.comp +++ b/piet-gpu/shader/kernel4.comp @@ -22,45 +22,45 @@ #define CHUNK_DY (TILE_HEIGHT_PX / CHUNK_Y) layout(local_size_x = CHUNK_DX, local_size_y = CHUNK_DY) in; -layout(set = 0, binding = 1) readonly buffer ConfigBuf { +layout(set = 0, binding = 1) restrict readonly buffer ConfigBuf { Config conf; }; -layout(rgba8, set = 0, binding = 2) uniform writeonly image2D image; +layout(rgba8, set = 0, binding = 2) uniform restrict writeonly image2D image; #ifdef ENABLE_IMAGE_INDICES -layout(rgba8, set = 0, binding = 3) uniform readonly image2D images[]; +layout(rgba8, set = 0, binding = 3) uniform restrict readonly image2D images[]; #else -layout(rgba8, set = 0, binding = 3) uniform readonly image2D images[1]; +layout(rgba8, set = 0, binding = 3) uniform restrict readonly image2D images[1]; #endif #include "ptcl.h" #include "tile.h" -vec3 tosRGB(vec3 rgb) { +mediump vec3 tosRGB(mediump vec3 rgb) { bvec3 cutoff = greaterThanEqual(rgb, vec3(0.0031308)); - vec3 below = vec3(12.92)*rgb; - vec3 above = vec3(1.055)*pow(rgb, vec3(0.41666)) - vec3(0.055); + mediump vec3 below = vec3(12.92)*rgb; + mediump vec3 above = vec3(1.055)*pow(rgb, vec3(0.41666)) - vec3(0.055); return mix(below, above, cutoff); } -vec3 fromsRGB(vec3 srgb) { +mediump vec3 fromsRGB(mediump vec3 srgb) { // Formula from EXT_sRGB. bvec3 cutoff = greaterThanEqual(srgb, vec3(0.04045)); - vec3 below = srgb/vec3(12.92); - vec3 above = pow((srgb + vec3(0.055))/vec3(1.055), vec3(2.4)); + mediump vec3 below = srgb/vec3(12.92); + mediump vec3 above = pow((srgb + vec3(0.055))/vec3(1.055), vec3(2.4)); return mix(below, above, cutoff); } // unpacksRGB unpacks a color in the sRGB color space to a vec4 in the linear color // space. -vec4 unpacksRGB(uint srgba) { - vec4 color = unpackUnorm4x8(srgba).wzyx; +mediump vec4 unpacksRGB(uint srgba) { + mediump vec4 color = unpackUnorm4x8(srgba).wzyx; return vec4(fromsRGB(color.rgb), color.a); } // packsRGB packs a color in the linear color space into its 8-bit sRGB equivalent. -uint packsRGB(vec4 rgba) { +uint packsRGB(mediump vec4 rgba) { rgba = vec4(tosRGB(rgba.rgb), rgba.a); return packUnorm4x8(rgba.wzyx); } @@ -69,14 +69,15 @@ uvec2 chunk_offset(uint i) { return uvec2(i % CHUNK_X * CHUNK_DX, i / CHUNK_X * CHUNK_DY); } -vec4[CHUNK] fillImage(uvec2 xy, CmdImage cmd_img) { - vec4 rgba[CHUNK]; +mediump vec4[CHUNK] fillImage(uvec2 xy, CmdImage cmd_img) { + mediump vec4 rgba[CHUNK]; for (uint i = 0; i < CHUNK; i++) { ivec2 uv = ivec2(xy + chunk_offset(i)) + cmd_img.offset; + mediump vec4 fg_rgba; #ifdef ENABLE_IMAGE_INDICES - vec4 fg_rgba = imageLoad(images[cmd_img.index], uv); + fg_rgba = imageLoad(images[cmd_img.index], uv); #else - vec4 fg_rgba = imageLoad(images[0], uv); + fg_rgba = imageLoad(images[0], uv); #endif fg_rgba.rgb = fromsRGB(fg_rgba.rgb); rgba[i] = fg_rgba; @@ -85,10 +86,6 @@ vec4[CHUNK] fillImage(uvec2 xy, CmdImage cmd_img) { } void main() { - if (mem_error != NO_ERROR) { - return; - } - uint tile_ix = gl_WorkGroupID.y * conf.width_in_tiles + gl_WorkGroupID.x; Alloc cmd_alloc = slice_mem(conf.ptcl_alloc, tile_ix * PTCL_INITIAL_ALLOC, PTCL_INITIAL_ALLOC); CmdRef cmd_ref = CmdRef(cmd_alloc.offset); @@ -99,7 +96,7 @@ void main() { uvec2 xy_uint = uvec2(gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_WorkGroupID.x, gl_LocalInvocationID.y + TILE_HEIGHT_PX * gl_WorkGroupID.y); vec2 xy = vec2(xy_uint); - vec4 rgba[CHUNK]; + mediump vec4 rgba[CHUNK]; for (uint i = 0; i < CHUNK; i++) { rgba[i] = vec4(0.0); // TODO: remove this debug image support when the actual image method is plumbed. @@ -116,9 +113,10 @@ void main() { #endif } - float area[CHUNK]; + mediump float area[CHUNK]; uint clip_depth = 0; - while (true) { + bool mem_ok = mem_error == NO_ERROR; + while (mem_ok) { uint tag = Cmd_tag(cmd_alloc, cmd_ref).tag; if (tag == Cmd_End) { break; @@ -127,11 +125,11 @@ void main() { case Cmd_Stroke: // Calculate distance field from all the line segments in this tile. CmdStroke stroke = Cmd_Stroke_read(cmd_alloc, cmd_ref); - float df[CHUNK]; + mediump float df[CHUNK]; for (uint k = 0; k < CHUNK; k++) df[k] = 1e9; TileSegRef tile_seg_ref = TileSegRef(stroke.tile_ref); do { - TileSeg seg = TileSeg_read(new_alloc(tile_seg_ref.offset, TileSeg_size), tile_seg_ref); + TileSeg seg = TileSeg_read(new_alloc(tile_seg_ref.offset, TileSeg_size, mem_ok), tile_seg_ref); vec2 line_vec = seg.vector; for (uint k = 0; k < CHUNK; k++) { vec2 dpos = xy + vec2(0.5, 0.5) - seg.origin; @@ -152,7 +150,7 @@ void main() { tile_seg_ref = TileSegRef(fill.tile_ref); // Calculate coverage based on backdrop + coverage of each line segment do { - TileSeg seg = TileSeg_read(new_alloc(tile_seg_ref.offset, TileSeg_size), tile_seg_ref); + TileSeg seg = TileSeg_read(new_alloc(tile_seg_ref.offset, TileSeg_size, mem_ok), tile_seg_ref); for (uint k = 0; k < CHUNK; k++) { vec2 my_xy = xy + vec2(chunk_offset(k)); vec2 start = seg.origin - my_xy; @@ -193,18 +191,18 @@ void main() { break; case Cmd_Color: CmdColor color = Cmd_Color_read(cmd_alloc, cmd_ref); - vec4 fg = unpacksRGB(color.rgba_color); + mediump vec4 fg = unpacksRGB(color.rgba_color); for (uint k = 0; k < CHUNK; k++) { - vec4 fg_k = fg * area[k]; + mediump vec4 fg_k = fg * area[k]; rgba[k] = rgba[k] * (1.0 - fg_k.a) + fg_k; } cmd_ref.offset += 4 + CmdColor_size; break; case Cmd_Image: CmdImage fill_img = Cmd_Image_read(cmd_alloc, cmd_ref); - vec4 img[CHUNK] = fillImage(xy_uint, fill_img); + mediump vec4 img[CHUNK] = fillImage(xy_uint, fill_img); for (uint k = 0; k < CHUNK; k++) { - vec4 fg_k = img[k] * area[k]; + mediump vec4 fg_k = img[k] * area[k]; rgba[k] = rgba[k] * (1.0 - fg_k.a) + fg_k; } cmd_ref.offset += 4 + CmdImage_size; @@ -215,7 +213,7 @@ void main() { for (uint k = 0; k < CHUNK; k++) { uvec2 offset = chunk_offset(k); uint srgb = packsRGB(vec4(rgba[k])); - float alpha = clamp(abs(area[k]), 0.0, 1.0); + mediump float alpha = clamp(abs(area[k]), 0.0, 1.0); write_mem(scratch_alloc, base_ix + 0 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX), srgb); write_mem(scratch_alloc, base_ix + 1 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX), floatBitsToUint(alpha)); rgba[k] = vec4(0.0); @@ -231,8 +229,8 @@ void main() { uvec2 offset = chunk_offset(k); uint srgb = read_mem(scratch_alloc, base_ix + 0 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX)); uint alpha = read_mem(scratch_alloc, base_ix + 1 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX)); - vec4 bg = unpacksRGB(srgb); - vec4 fg = rgba[k] * area[k] * uintBitsToFloat(alpha); + mediump vec4 bg = unpacksRGB(srgb); + mediump vec4 fg = rgba[k] * area[k] * uintBitsToFloat(alpha); rgba[k] = bg * (1.0 - fg.a) + fg; } cmd_ref.offset += 4; diff --git a/piet-gpu/shader/kernel4.spv b/piet-gpu/shader/kernel4.spv index bd14134..3e88494 100644 Binary files a/piet-gpu/shader/kernel4.spv and b/piet-gpu/shader/kernel4.spv differ diff --git a/piet-gpu/shader/kernel4_idx.spv b/piet-gpu/shader/kernel4_idx.spv index 0443678..17d2f47 100644 Binary files a/piet-gpu/shader/kernel4_idx.spv and b/piet-gpu/shader/kernel4_idx.spv differ diff --git a/piet-gpu/shader/mem.h b/piet-gpu/shader/mem.h index 6e6e775..9e81f04 100644 --- a/piet-gpu/shader/mem.h +++ b/piet-gpu/shader/mem.h @@ -44,11 +44,15 @@ struct MallocResult { }; // new_alloc synthesizes an Alloc from an offset and size. -Alloc new_alloc(uint offset, uint size) { +Alloc new_alloc(uint offset, uint size, bool mem_ok) { Alloc a; a.offset = offset; #ifdef MEM_DEBUG - a.size = size; + if (mem_ok) { + a.size = size; + } else { + a.size = 0; + } #endif return a; } @@ -56,11 +60,10 @@ Alloc new_alloc(uint offset, uint size) { // malloc allocates size bytes of memory. MallocResult malloc(uint size) { MallocResult r; - r.failed = false; uint offset = atomicAdd(mem_offset, size); - r.alloc = new_alloc(offset, size); - if (offset + size > memory.length() * 4) { - r.failed = true; + r.failed = offset + size > memory.length() * 4; + r.alloc = new_alloc(offset, size, !r.failed); + if (r.failed) { atomicMax(mem_error, ERR_MALLOC_FAILED); return r; } @@ -119,8 +122,10 @@ Alloc slice_mem(Alloc a, uint offset, uint size) { // but never written. return Alloc(0, 0); } + return Alloc(a.offset + offset, size); +#else + return Alloc(a.offset + offset); #endif - return new_alloc(a.offset + offset, size); } // alloc_write writes alloc to memory at offset bytes. diff --git a/piet-gpu/shader/path_coarse.comp b/piet-gpu/shader/path_coarse.comp index 51264bf..147c8a5 100644 --- a/piet-gpu/shader/path_coarse.comp +++ b/piet-gpu/shader/path_coarse.comp @@ -87,10 +87,6 @@ SubdivResult estimate_subdiv(vec2 p0, vec2 p1, vec2 p2, float sqrt_tol) { } void main() { - if (mem_error != NO_ERROR) { - return; - } - uint element_ix = gl_GlobalInvocationID.x; PathSegRef ref = PathSegRef(conf.pathseg_alloc.offset + element_ix * PathSeg_size); @@ -98,6 +94,7 @@ void main() { if (element_ix < conf.n_pathseg) { tag = PathSeg_tag(conf.pathseg_alloc, ref); } + bool mem_ok = mem_error == NO_ERROR; switch (tag.tag) { case PathSeg_Cubic: PathCubic cubic = PathSeg_Cubic_read(conf.pathseg_alloc, ref); @@ -135,7 +132,7 @@ void main() { bool is_stroke = fill_mode_from_flags(tag.flags) == MODE_STROKE; uint path_ix = cubic.path_ix; Path path = Path_read(conf.tile_alloc, PathRef(conf.tile_alloc.offset + path_ix * Path_size)); - Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size); + Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, mem_ok); ivec4 bbox = ivec4(path.bbox); vec2 p0 = cubic.p0; qp0 = cubic.p0; @@ -195,7 +192,7 @@ void main() { uint n_tile_alloc = uint((x1 - x0) * (y1 - y0)); // Consider using subgroups to aggregate atomic add. MallocResult tile_alloc = malloc(n_tile_alloc * TileSeg_size); - if (tile_alloc.failed) { + if (tile_alloc.failed || !mem_ok) { return; } uint tile_offset = tile_alloc.alloc.offset; diff --git a/piet-gpu/shader/path_coarse.spv b/piet-gpu/shader/path_coarse.spv index b4cd985..08d61a4 100644 Binary files a/piet-gpu/shader/path_coarse.spv and b/piet-gpu/shader/path_coarse.spv differ diff --git a/piet-gpu/shader/tile_alloc.comp b/piet-gpu/shader/tile_alloc.comp index 3a6e4ee..973ec14 100644 --- a/piet-gpu/shader/tile_alloc.comp +++ b/piet-gpu/shader/tile_alloc.comp @@ -28,10 +28,6 @@ shared uint sh_tile_count[TILE_ALLOC_WG]; shared MallocResult sh_tile_alloc; void main() { - if (mem_error != NO_ERROR) { - return; - } - uint th_ix = gl_LocalInvocationID.x; uint element_ix = gl_GlobalInvocationID.x; PathRef path_ref = PathRef(conf.tile_alloc.offset + element_ix * Path_size); @@ -86,7 +82,7 @@ void main() { } barrier(); MallocResult alloc_start = sh_tile_alloc; - if (alloc_start.failed) { + if (alloc_start.failed || mem_error != NO_ERROR) { return; } diff --git a/piet-gpu/shader/tile_alloc.spv b/piet-gpu/shader/tile_alloc.spv index 6d375bd..4a39f96 100644 Binary files a/piet-gpu/shader/tile_alloc.spv and b/piet-gpu/shader/tile_alloc.spv differ