mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-23 18:06:34 +11:00
Partially revert uniform load of bump.failed
Just load the atomic bump counter directly instead of piping it through a shared variable, when workgroupUniformLoad is not available. The value is in fact dynamically uniform, but that depends on the stage not setting its own failure flag, a fairly subtle invariant. I think there was a write-after-read hazard for the reuse of sh_part_count[0]. However, doing the experiment of just changing that doesn't fix the problem on mac. It's possible there's a shader compilation problem (possibly the same one as provoking the storageBarrier workaround in tile_alloc), or also possibly a logic error I'm not understanding. In any case, this change does appear to fix the hangs on mac. Fixes #267
This commit is contained in:
parent
6a184244e6
commit
27e6fdd267
2 changed files with 6 additions and 6 deletions
|
@ -148,15 +148,14 @@ fn main(
|
||||||
// Exit early if prior stages failed, as we can't run this stage.
|
// Exit early if prior stages failed, as we can't run this stage.
|
||||||
// We need to check only prior stages, as if this stage has failed in another workgroup,
|
// We need to check only prior stages, as if this stage has failed in another workgroup,
|
||||||
// we still want to know this workgroup's memory requirement.
|
// we still want to know this workgroup's memory requirement.
|
||||||
|
#ifdef have_uniform
|
||||||
if local_id.x == 0u {
|
if local_id.x == 0u {
|
||||||
// Reuse sh_part_count to hold failed flag, shmem is tight
|
// Reuse sh_part_count to hold failed flag, shmem is tight
|
||||||
sh_part_count[0] = atomicLoad(&bump.failed);
|
sh_part_count[0] = atomicLoad(&bump.failed);
|
||||||
}
|
}
|
||||||
#ifdef have_uniform
|
|
||||||
let failed = workgroupUniformLoad(&sh_part_count[0]);
|
let failed = workgroupUniformLoad(&sh_part_count[0]);
|
||||||
#else
|
#else
|
||||||
workgroupBarrier();
|
let failed = atomicLoad(&bump.failed);
|
||||||
let failed = sh_part_count[0];
|
|
||||||
#endif
|
#endif
|
||||||
if (failed & (STAGE_BINNING | STAGE_TILE_ALLOC | STAGE_PATH_COARSE)) != 0u {
|
if (failed & (STAGE_BINNING | STAGE_TILE_ALLOC | STAGE_PATH_COARSE)) != 0u {
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -29,7 +29,9 @@ let WG_SIZE = 256u;
|
||||||
|
|
||||||
var<workgroup> sh_tile_count: array<u32, WG_SIZE>;
|
var<workgroup> sh_tile_count: array<u32, WG_SIZE>;
|
||||||
var<workgroup> sh_tile_offset: u32;
|
var<workgroup> sh_tile_offset: u32;
|
||||||
|
#ifdef have_uniform
|
||||||
var<workgroup> sh_atomic_failed: u32;
|
var<workgroup> sh_atomic_failed: u32;
|
||||||
|
#endif
|
||||||
|
|
||||||
@compute @workgroup_size(256)
|
@compute @workgroup_size(256)
|
||||||
fn main(
|
fn main(
|
||||||
|
@ -39,14 +41,13 @@ fn main(
|
||||||
// Exit early if prior stages failed, as we can't run this stage.
|
// Exit early if prior stages failed, as we can't run this stage.
|
||||||
// We need to check only prior stages, as if this stage has failed in another workgroup,
|
// We need to check only prior stages, as if this stage has failed in another workgroup,
|
||||||
// we still want to know this workgroup's memory requirement.
|
// we still want to know this workgroup's memory requirement.
|
||||||
|
#ifdef have_uniform
|
||||||
if local_id.x == 0u {
|
if local_id.x == 0u {
|
||||||
sh_atomic_failed = atomicLoad(&bump.failed);
|
sh_atomic_failed = atomicLoad(&bump.failed);
|
||||||
}
|
}
|
||||||
#ifdef have_uniform
|
|
||||||
let failed = workgroupUniformLoad(&sh_atomic_failed);
|
let failed = workgroupUniformLoad(&sh_atomic_failed);
|
||||||
#else
|
#else
|
||||||
workgroupBarrier();
|
let failed = atomicLoad(&bump.failed);
|
||||||
let failed = sh_atomic_failed;
|
|
||||||
#endif
|
#endif
|
||||||
if (failed & STAGE_BINNING) != 0u {
|
if (failed & STAGE_BINNING) != 0u {
|
||||||
return;
|
return;
|
||||||
|
|
Loading…
Add table
Reference in a new issue