mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 12:41:30 +11:00
avoid non-uniform barrier control flow when exhausting memory
The compute shaders have a check for the succesful completion of their preceding stage. However, consider a shader execution path like the following: void main() if (mem_error != NO_ERROR) { return; } ... malloc(...); ... barrier(); ... } and shader execution that fails to allocate memory, thereby setting mem_error to ERR_MALLOC_FAILED in malloc before reaching the barrier. If another shader execution then begins execution, its mem_eror check will make it return early and not reach the barrier. All GPU APIs require (dynamically) uniform control flow for barriers, and the above case may lead to GPU hangs in practice. Fix this issue by replacing the early exits with careful checks that don't interrupt barrier control flow. Unfortunately, it's harder to prove the soundness of the new checks, so this change also clears dynamic memory ranges in MEM_DEBUG mode when memory is exhausted. The result is that accessing memory after exhaustion triggers an error. Signed-off-by: Elias Naur <mail@eliasnaur.com>
This commit is contained in:
parent
3b4a72deb9
commit
d9d518b248
|
@ -35,16 +35,13 @@ shared Alloc sh_row_alloc[BACKDROP_WG];
|
||||||
shared uint sh_row_width[BACKDROP_WG];
|
shared uint sh_row_width[BACKDROP_WG];
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
if (mem_error != NO_ERROR) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint th_ix = gl_LocalInvocationID.x;
|
uint th_ix = gl_LocalInvocationID.x;
|
||||||
uint element_ix = gl_GlobalInvocationID.x;
|
uint element_ix = gl_GlobalInvocationID.x;
|
||||||
AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);
|
AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);
|
||||||
|
|
||||||
// Work assignment: 1 thread : 1 path element
|
// Work assignment: 1 thread : 1 path element
|
||||||
uint row_count = 0;
|
uint row_count = 0;
|
||||||
|
bool mem_ok = mem_error == NO_ERROR;
|
||||||
if (element_ix < conf.n_elements) {
|
if (element_ix < conf.n_elements) {
|
||||||
AnnotatedTag tag = Annotated_tag(conf.anno_alloc, ref);
|
AnnotatedTag tag = Annotated_tag(conf.anno_alloc, ref);
|
||||||
switch (tag.tag) {
|
switch (tag.tag) {
|
||||||
|
@ -67,7 +64,7 @@ void main() {
|
||||||
// long as it doesn't cross the left edge.
|
// long as it doesn't cross the left edge.
|
||||||
row_count = 0;
|
row_count = 0;
|
||||||
}
|
}
|
||||||
Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size);
|
Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, mem_ok);
|
||||||
sh_row_alloc[th_ix] = path_alloc;
|
sh_row_alloc[th_ix] = path_alloc;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -95,7 +92,7 @@ void main() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
uint width = sh_row_width[el_ix];
|
uint width = sh_row_width[el_ix];
|
||||||
if (width > 0) {
|
if (width > 0 && mem_ok) {
|
||||||
// Process one row sequentially
|
// Process one row sequentially
|
||||||
// Read backdrop value per tile and prefix sum it
|
// Read backdrop value per tile and prefix sum it
|
||||||
Alloc tiles_alloc = sh_row_alloc[el_ix];
|
Alloc tiles_alloc = sh_row_alloc[el_ix];
|
||||||
|
|
Binary file not shown.
|
@ -36,10 +36,6 @@ shared Alloc sh_chunk_alloc[N_TILE];
|
||||||
shared bool sh_alloc_failed;
|
shared bool sh_alloc_failed;
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
if (mem_error != NO_ERROR) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint my_n_elements = conf.n_elements;
|
uint my_n_elements = conf.n_elements;
|
||||||
uint my_partition = gl_WorkGroupID.x;
|
uint my_partition = gl_WorkGroupID.x;
|
||||||
|
|
||||||
|
@ -105,7 +101,7 @@ void main() {
|
||||||
count[i][gl_LocalInvocationID.x] = element_count;
|
count[i][gl_LocalInvocationID.x] = element_count;
|
||||||
}
|
}
|
||||||
// element_count is number of elements covering bin for this invocation.
|
// element_count is number of elements covering bin for this invocation.
|
||||||
Alloc chunk_alloc = new_alloc(0, 0);
|
Alloc chunk_alloc = new_alloc(0, 0, true);
|
||||||
if (element_count != 0) {
|
if (element_count != 0) {
|
||||||
// TODO: aggregate atomic adds (subgroup is probably fastest)
|
// TODO: aggregate atomic adds (subgroup is probably fastest)
|
||||||
MallocResult chunk = malloc(element_count * BinInstance_size);
|
MallocResult chunk = malloc(element_count * BinInstance_size);
|
||||||
|
@ -122,7 +118,7 @@ void main() {
|
||||||
write_mem(conf.bin_alloc, out_ix + 1, chunk_alloc.offset);
|
write_mem(conf.bin_alloc, out_ix + 1, chunk_alloc.offset);
|
||||||
|
|
||||||
barrier();
|
barrier();
|
||||||
if (sh_alloc_failed) {
|
if (sh_alloc_failed || mem_error != NO_ERROR) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Binary file not shown.
|
@ -56,7 +56,7 @@ void write_tile_alloc(uint el_ix, Alloc a) {
|
||||||
sh_tile_alloc[el_ix] = a;
|
sh_tile_alloc[el_ix] = a;
|
||||||
}
|
}
|
||||||
|
|
||||||
Alloc read_tile_alloc(uint el_ix) {
|
Alloc read_tile_alloc(uint el_ix, bool mem_ok) {
|
||||||
return sh_tile_alloc[el_ix];
|
return sh_tile_alloc[el_ix];
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
|
@ -64,9 +64,9 @@ void write_tile_alloc(uint el_ix, Alloc a) {
|
||||||
// No-op
|
// No-op
|
||||||
}
|
}
|
||||||
|
|
||||||
Alloc read_tile_alloc(uint el_ix) {
|
Alloc read_tile_alloc(uint el_ix, bool mem_ok) {
|
||||||
// All memory.
|
// All memory.
|
||||||
return new_alloc(0, memory.length()*4);
|
return new_alloc(0, memory.length()*4, mem_ok);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -109,10 +109,6 @@ void write_fill(Alloc alloc, inout CmdRef cmd_ref, uint flags, Tile tile, float
|
||||||
}
|
}
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
if (mem_error != NO_ERROR) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Could use either linear or 2d layouts for both dispatch and
|
// Could use either linear or 2d layouts for both dispatch and
|
||||||
// invocations within the workgroup. We'll use variables to abstract.
|
// invocations within the workgroup. We'll use variables to abstract.
|
||||||
uint width_in_bins = (conf.width_in_tiles + N_TILE_X - 1)/N_TILE_X;
|
uint width_in_bins = (conf.width_in_tiles + N_TILE_X - 1)/N_TILE_X;
|
||||||
|
@ -158,6 +154,7 @@ void main() {
|
||||||
|
|
||||||
uint num_begin_slots = 0;
|
uint num_begin_slots = 0;
|
||||||
uint begin_slot = 0;
|
uint begin_slot = 0;
|
||||||
|
bool mem_ok = mem_error == NO_ERROR;
|
||||||
while (true) {
|
while (true) {
|
||||||
for (uint i = 0; i < N_SLICE; i++) {
|
for (uint i = 0; i < N_SLICE; i++) {
|
||||||
sh_bitmaps[i][th_ix] = 0;
|
sh_bitmaps[i][th_ix] = 0;
|
||||||
|
@ -172,7 +169,7 @@ void main() {
|
||||||
uint in_ix = (conf.bin_alloc.offset >> 2) + ((partition_ix + th_ix) * N_TILE + bin_ix) * 2;
|
uint in_ix = (conf.bin_alloc.offset >> 2) + ((partition_ix + th_ix) * N_TILE + bin_ix) * 2;
|
||||||
count = read_mem(conf.bin_alloc, in_ix);
|
count = read_mem(conf.bin_alloc, in_ix);
|
||||||
uint offset = read_mem(conf.bin_alloc, in_ix + 1);
|
uint offset = read_mem(conf.bin_alloc, in_ix + 1);
|
||||||
sh_part_elements[th_ix] = new_alloc(offset, count*BinInstance_size);
|
sh_part_elements[th_ix] = new_alloc(offset, count*BinInstance_size, mem_ok);
|
||||||
}
|
}
|
||||||
// prefix sum of counts
|
// prefix sum of counts
|
||||||
for (uint i = 0; i < LG_N_PART_READ; i++) {
|
for (uint i = 0; i < LG_N_PART_READ; i++) {
|
||||||
|
@ -196,7 +193,7 @@ void main() {
|
||||||
}
|
}
|
||||||
// use binary search to find element to read
|
// use binary search to find element to read
|
||||||
uint ix = rd_ix + th_ix;
|
uint ix = rd_ix + th_ix;
|
||||||
if (ix >= wr_ix && ix < ready_ix) {
|
if (ix >= wr_ix && ix < ready_ix && mem_ok) {
|
||||||
uint part_ix = 0;
|
uint part_ix = 0;
|
||||||
for (uint i = 0; i < LG_N_PART_READ; i++) {
|
for (uint i = 0; i < LG_N_PART_READ; i++) {
|
||||||
uint probe = part_ix + ((N_PART_READ / 2) >> i);
|
uint probe = part_ix + ((N_PART_READ / 2) >> i);
|
||||||
|
@ -253,7 +250,7 @@ void main() {
|
||||||
// base relative to bin
|
// base relative to bin
|
||||||
uint base = path.tiles.offset - uint(dy * stride + dx) * Tile_size;
|
uint base = path.tiles.offset - uint(dy * stride + dx) * Tile_size;
|
||||||
sh_tile_base[th_ix] = base;
|
sh_tile_base[th_ix] = base;
|
||||||
Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size);
|
Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, mem_ok);
|
||||||
write_tile_alloc(th_ix, path_alloc);
|
write_tile_alloc(th_ix, path_alloc);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
@ -288,11 +285,11 @@ void main() {
|
||||||
uint width = sh_tile_width[el_ix];
|
uint width = sh_tile_width[el_ix];
|
||||||
uint x = sh_tile_x0[el_ix] + seq_ix % width;
|
uint x = sh_tile_x0[el_ix] + seq_ix % width;
|
||||||
uint y = sh_tile_y0[el_ix] + seq_ix / width;
|
uint y = sh_tile_y0[el_ix] + seq_ix / width;
|
||||||
bool include_tile;
|
bool include_tile = false;
|
||||||
if (tag == Annotated_BeginClip || tag == Annotated_EndClip) {
|
if (tag == Annotated_BeginClip || tag == Annotated_EndClip) {
|
||||||
include_tile = true;
|
include_tile = true;
|
||||||
} else {
|
} else if (mem_ok) {
|
||||||
Tile tile = Tile_read(read_tile_alloc(el_ix), TileRef(sh_tile_base[el_ix] + (sh_tile_stride[el_ix] * y + x) * Tile_size));
|
Tile tile = Tile_read(read_tile_alloc(el_ix, mem_ok), TileRef(sh_tile_base[el_ix] + (sh_tile_stride[el_ix] * y + x) * Tile_size));
|
||||||
// Include the path in the tile if
|
// Include the path in the tile if
|
||||||
// - the tile contains at least a segment (tile offset non-zero)
|
// - the tile contains at least a segment (tile offset non-zero)
|
||||||
// - the tile is completely covered (backdrop non-zero)
|
// - the tile is completely covered (backdrop non-zero)
|
||||||
|
@ -311,7 +308,7 @@ void main() {
|
||||||
// through the non-segment elements.
|
// through the non-segment elements.
|
||||||
uint slice_ix = 0;
|
uint slice_ix = 0;
|
||||||
uint bitmap = sh_bitmaps[0][th_ix];
|
uint bitmap = sh_bitmaps[0][th_ix];
|
||||||
while (true) {
|
while (mem_ok) {
|
||||||
if (bitmap == 0) {
|
if (bitmap == 0) {
|
||||||
slice_ix++;
|
slice_ix++;
|
||||||
if (slice_ix == N_SLICE) {
|
if (slice_ix == N_SLICE) {
|
||||||
|
@ -337,7 +334,7 @@ void main() {
|
||||||
if (clip_zero_depth == 0) {
|
if (clip_zero_depth == 0) {
|
||||||
switch (tag.tag) {
|
switch (tag.tag) {
|
||||||
case Annotated_Color:
|
case Annotated_Color:
|
||||||
Tile tile = Tile_read(read_tile_alloc(element_ref_ix), TileRef(sh_tile_base[element_ref_ix]
|
Tile tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok), TileRef(sh_tile_base[element_ref_ix]
|
||||||
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
|
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
|
||||||
AnnoColor fill = Annotated_Color_read(conf.anno_alloc, ref);
|
AnnoColor fill = Annotated_Color_read(conf.anno_alloc, ref);
|
||||||
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
|
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
|
||||||
|
@ -348,7 +345,7 @@ void main() {
|
||||||
cmd_ref.offset += 4 + CmdColor_size;
|
cmd_ref.offset += 4 + CmdColor_size;
|
||||||
break;
|
break;
|
||||||
case Annotated_Image:
|
case Annotated_Image:
|
||||||
tile = Tile_read(read_tile_alloc(element_ref_ix), TileRef(sh_tile_base[element_ref_ix]
|
tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok), TileRef(sh_tile_base[element_ref_ix]
|
||||||
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
|
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
|
||||||
AnnoImage fill_img = Annotated_Image_read(conf.anno_alloc, ref);
|
AnnoImage fill_img = Annotated_Image_read(conf.anno_alloc, ref);
|
||||||
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
|
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
|
||||||
|
@ -359,7 +356,7 @@ void main() {
|
||||||
cmd_ref.offset += 4 + CmdImage_size;
|
cmd_ref.offset += 4 + CmdImage_size;
|
||||||
break;
|
break;
|
||||||
case Annotated_BeginClip:
|
case Annotated_BeginClip:
|
||||||
tile = Tile_read(read_tile_alloc(element_ref_ix), TileRef(sh_tile_base[element_ref_ix]
|
tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok), TileRef(sh_tile_base[element_ref_ix]
|
||||||
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
|
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
|
||||||
if (tile.tile.offset == 0 && tile.backdrop == 0) {
|
if (tile.tile.offset == 0 && tile.backdrop == 0) {
|
||||||
clip_zero_depth = clip_depth + 1;
|
clip_zero_depth = clip_depth + 1;
|
||||||
|
|
Binary file not shown.
|
@ -176,10 +176,6 @@ shared uint sh_part_ix;
|
||||||
shared State sh_prefix;
|
shared State sh_prefix;
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
if (mem_error != NO_ERROR) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
State th_state[N_ROWS];
|
State th_state[N_ROWS];
|
||||||
// Determine partition to process by atomic counter (described in Section
|
// Determine partition to process by atomic counter (described in Section
|
||||||
// 4.4 of prefix sum paper).
|
// 4.4 of prefix sum paper).
|
||||||
|
|
Binary file not shown.
|
@ -85,10 +85,6 @@ vec4[CHUNK] fillImage(uvec2 xy, CmdImage cmd_img) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
if (mem_error != NO_ERROR) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint tile_ix = gl_WorkGroupID.y * conf.width_in_tiles + gl_WorkGroupID.x;
|
uint tile_ix = gl_WorkGroupID.y * conf.width_in_tiles + gl_WorkGroupID.x;
|
||||||
Alloc cmd_alloc = slice_mem(conf.ptcl_alloc, tile_ix * PTCL_INITIAL_ALLOC, PTCL_INITIAL_ALLOC);
|
Alloc cmd_alloc = slice_mem(conf.ptcl_alloc, tile_ix * PTCL_INITIAL_ALLOC, PTCL_INITIAL_ALLOC);
|
||||||
CmdRef cmd_ref = CmdRef(cmd_alloc.offset);
|
CmdRef cmd_ref = CmdRef(cmd_alloc.offset);
|
||||||
|
@ -118,7 +114,8 @@ void main() {
|
||||||
|
|
||||||
float area[CHUNK];
|
float area[CHUNK];
|
||||||
uint clip_depth = 0;
|
uint clip_depth = 0;
|
||||||
while (true) {
|
bool mem_ok = mem_error == NO_ERROR;
|
||||||
|
while (mem_ok) {
|
||||||
uint tag = Cmd_tag(cmd_alloc, cmd_ref).tag;
|
uint tag = Cmd_tag(cmd_alloc, cmd_ref).tag;
|
||||||
if (tag == Cmd_End) {
|
if (tag == Cmd_End) {
|
||||||
break;
|
break;
|
||||||
|
@ -131,7 +128,7 @@ void main() {
|
||||||
for (uint k = 0; k < CHUNK; k++) df[k] = 1e9;
|
for (uint k = 0; k < CHUNK; k++) df[k] = 1e9;
|
||||||
TileSegRef tile_seg_ref = TileSegRef(stroke.tile_ref);
|
TileSegRef tile_seg_ref = TileSegRef(stroke.tile_ref);
|
||||||
do {
|
do {
|
||||||
TileSeg seg = TileSeg_read(new_alloc(tile_seg_ref.offset, TileSeg_size), tile_seg_ref);
|
TileSeg seg = TileSeg_read(new_alloc(tile_seg_ref.offset, TileSeg_size, mem_ok), tile_seg_ref);
|
||||||
vec2 line_vec = seg.vector;
|
vec2 line_vec = seg.vector;
|
||||||
for (uint k = 0; k < CHUNK; k++) {
|
for (uint k = 0; k < CHUNK; k++) {
|
||||||
vec2 dpos = xy + vec2(0.5, 0.5) - seg.origin;
|
vec2 dpos = xy + vec2(0.5, 0.5) - seg.origin;
|
||||||
|
@ -152,7 +149,7 @@ void main() {
|
||||||
tile_seg_ref = TileSegRef(fill.tile_ref);
|
tile_seg_ref = TileSegRef(fill.tile_ref);
|
||||||
// Calculate coverage based on backdrop + coverage of each line segment
|
// Calculate coverage based on backdrop + coverage of each line segment
|
||||||
do {
|
do {
|
||||||
TileSeg seg = TileSeg_read(new_alloc(tile_seg_ref.offset, TileSeg_size), tile_seg_ref);
|
TileSeg seg = TileSeg_read(new_alloc(tile_seg_ref.offset, TileSeg_size, mem_ok), tile_seg_ref);
|
||||||
for (uint k = 0; k < CHUNK; k++) {
|
for (uint k = 0; k < CHUNK; k++) {
|
||||||
vec2 my_xy = xy + vec2(chunk_offset(k));
|
vec2 my_xy = xy + vec2(chunk_offset(k));
|
||||||
vec2 start = seg.origin - my_xy;
|
vec2 start = seg.origin - my_xy;
|
||||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -44,11 +44,15 @@ struct MallocResult {
|
||||||
};
|
};
|
||||||
|
|
||||||
// new_alloc synthesizes an Alloc from an offset and size.
|
// new_alloc synthesizes an Alloc from an offset and size.
|
||||||
Alloc new_alloc(uint offset, uint size) {
|
Alloc new_alloc(uint offset, uint size, bool mem_ok) {
|
||||||
Alloc a;
|
Alloc a;
|
||||||
a.offset = offset;
|
a.offset = offset;
|
||||||
#ifdef MEM_DEBUG
|
#ifdef MEM_DEBUG
|
||||||
a.size = size;
|
if (mem_ok) {
|
||||||
|
a.size = size;
|
||||||
|
} else {
|
||||||
|
a.size = 0;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
|
@ -56,11 +60,10 @@ Alloc new_alloc(uint offset, uint size) {
|
||||||
// malloc allocates size bytes of memory.
|
// malloc allocates size bytes of memory.
|
||||||
MallocResult malloc(uint size) {
|
MallocResult malloc(uint size) {
|
||||||
MallocResult r;
|
MallocResult r;
|
||||||
r.failed = false;
|
|
||||||
uint offset = atomicAdd(mem_offset, size);
|
uint offset = atomicAdd(mem_offset, size);
|
||||||
r.alloc = new_alloc(offset, size);
|
r.failed = offset + size > memory.length() * 4;
|
||||||
if (offset + size > memory.length() * 4) {
|
r.alloc = new_alloc(offset, size, !r.failed);
|
||||||
r.failed = true;
|
if (r.failed) {
|
||||||
atomicMax(mem_error, ERR_MALLOC_FAILED);
|
atomicMax(mem_error, ERR_MALLOC_FAILED);
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
@ -119,8 +122,10 @@ Alloc slice_mem(Alloc a, uint offset, uint size) {
|
||||||
// but never written.
|
// but never written.
|
||||||
return Alloc(0, 0);
|
return Alloc(0, 0);
|
||||||
}
|
}
|
||||||
|
return Alloc(a.offset + offset, size);
|
||||||
|
#else
|
||||||
|
return Alloc(a.offset + offset);
|
||||||
#endif
|
#endif
|
||||||
return new_alloc(a.offset + offset, size);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// alloc_write writes alloc to memory at offset bytes.
|
// alloc_write writes alloc to memory at offset bytes.
|
||||||
|
|
|
@ -87,10 +87,6 @@ SubdivResult estimate_subdiv(vec2 p0, vec2 p1, vec2 p2, float sqrt_tol) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
if (mem_error != NO_ERROR) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint element_ix = gl_GlobalInvocationID.x;
|
uint element_ix = gl_GlobalInvocationID.x;
|
||||||
PathSegRef ref = PathSegRef(conf.pathseg_alloc.offset + element_ix * PathSeg_size);
|
PathSegRef ref = PathSegRef(conf.pathseg_alloc.offset + element_ix * PathSeg_size);
|
||||||
|
|
||||||
|
@ -98,6 +94,7 @@ void main() {
|
||||||
if (element_ix < conf.n_pathseg) {
|
if (element_ix < conf.n_pathseg) {
|
||||||
tag = PathSeg_tag(conf.pathseg_alloc, ref);
|
tag = PathSeg_tag(conf.pathseg_alloc, ref);
|
||||||
}
|
}
|
||||||
|
bool mem_ok = mem_error == NO_ERROR;
|
||||||
switch (tag.tag) {
|
switch (tag.tag) {
|
||||||
case PathSeg_Cubic:
|
case PathSeg_Cubic:
|
||||||
PathCubic cubic = PathSeg_Cubic_read(conf.pathseg_alloc, ref);
|
PathCubic cubic = PathSeg_Cubic_read(conf.pathseg_alloc, ref);
|
||||||
|
@ -135,7 +132,7 @@ void main() {
|
||||||
bool is_stroke = fill_mode_from_flags(tag.flags) == MODE_STROKE;
|
bool is_stroke = fill_mode_from_flags(tag.flags) == MODE_STROKE;
|
||||||
uint path_ix = cubic.path_ix;
|
uint path_ix = cubic.path_ix;
|
||||||
Path path = Path_read(conf.tile_alloc, PathRef(conf.tile_alloc.offset + path_ix * Path_size));
|
Path path = Path_read(conf.tile_alloc, PathRef(conf.tile_alloc.offset + path_ix * Path_size));
|
||||||
Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size);
|
Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, mem_ok);
|
||||||
ivec4 bbox = ivec4(path.bbox);
|
ivec4 bbox = ivec4(path.bbox);
|
||||||
vec2 p0 = cubic.p0;
|
vec2 p0 = cubic.p0;
|
||||||
qp0 = cubic.p0;
|
qp0 = cubic.p0;
|
||||||
|
@ -195,7 +192,7 @@ void main() {
|
||||||
uint n_tile_alloc = uint((x1 - x0) * (y1 - y0));
|
uint n_tile_alloc = uint((x1 - x0) * (y1 - y0));
|
||||||
// Consider using subgroups to aggregate atomic add.
|
// Consider using subgroups to aggregate atomic add.
|
||||||
MallocResult tile_alloc = malloc(n_tile_alloc * TileSeg_size);
|
MallocResult tile_alloc = malloc(n_tile_alloc * TileSeg_size);
|
||||||
if (tile_alloc.failed) {
|
if (tile_alloc.failed || !mem_ok) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
uint tile_offset = tile_alloc.alloc.offset;
|
uint tile_offset = tile_alloc.alloc.offset;
|
||||||
|
|
Binary file not shown.
|
@ -28,10 +28,6 @@ shared uint sh_tile_count[TILE_ALLOC_WG];
|
||||||
shared MallocResult sh_tile_alloc;
|
shared MallocResult sh_tile_alloc;
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
if (mem_error != NO_ERROR) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint th_ix = gl_LocalInvocationID.x;
|
uint th_ix = gl_LocalInvocationID.x;
|
||||||
uint element_ix = gl_GlobalInvocationID.x;
|
uint element_ix = gl_GlobalInvocationID.x;
|
||||||
PathRef path_ref = PathRef(conf.tile_alloc.offset + element_ix * Path_size);
|
PathRef path_ref = PathRef(conf.tile_alloc.offset + element_ix * Path_size);
|
||||||
|
@ -86,7 +82,7 @@ void main() {
|
||||||
}
|
}
|
||||||
barrier();
|
barrier();
|
||||||
MallocResult alloc_start = sh_tile_alloc;
|
MallocResult alloc_start = sh_tile_alloc;
|
||||||
if (alloc_start.failed) {
|
if (alloc_start.failed || mem_error != NO_ERROR) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Binary file not shown.
Loading…
Reference in a new issue