From 27e6fdd267577a3297a1653cffbf452a8e6ec32d Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Sun, 29 Jan 2023 08:51:51 -0800 Subject: [PATCH] Partially revert uniform load of bump.failed Just load the atomic bump counter directly instead of piping it through a shared variable, when workgroupUniformLoad is not available. The value is in fact dynamically uniform, but that depends on the stage not setting its own failure flag, a fairly subtle invariant. I think there was a write-after-read hazard for the reuse of sh_part_count[0]. However, doing the experiment of just changing that doesn't fix the problem on mac. It's possible there's a shader compilation problem (possibly the same one as provoking the storageBarrier workaround in tile_alloc), or also possibly a logic error I'm not understanding. In any case, this change does appear to fix the hangs on mac. Fixes #267 --- shader/coarse.wgsl | 5 ++--- shader/tile_alloc.wgsl | 7 ++++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/shader/coarse.wgsl b/shader/coarse.wgsl index df09de9..0963c29 100644 --- a/shader/coarse.wgsl +++ b/shader/coarse.wgsl @@ -148,15 +148,14 @@ fn main( // Exit early if prior stages failed, as we can't run this stage. // We need to check only prior stages, as if this stage has failed in another workgroup, // we still want to know this workgroup's memory requirement. +#ifdef have_uniform if local_id.x == 0u { // Reuse sh_part_count to hold failed flag, shmem is tight sh_part_count[0] = atomicLoad(&bump.failed); } -#ifdef have_uniform let failed = workgroupUniformLoad(&sh_part_count[0]); #else - workgroupBarrier(); - let failed = sh_part_count[0]; + let failed = atomicLoad(&bump.failed); #endif if (failed & (STAGE_BINNING | STAGE_TILE_ALLOC | STAGE_PATH_COARSE)) != 0u { return; diff --git a/shader/tile_alloc.wgsl b/shader/tile_alloc.wgsl index 8d39e7c..8ec9217 100644 --- a/shader/tile_alloc.wgsl +++ b/shader/tile_alloc.wgsl @@ -29,7 +29,9 @@ let WG_SIZE = 256u; var sh_tile_count: array; var sh_tile_offset: u32; +#ifdef have_uniform var sh_atomic_failed: u32; +#endif @compute @workgroup_size(256) fn main( @@ -39,14 +41,13 @@ fn main( // Exit early if prior stages failed, as we can't run this stage. // We need to check only prior stages, as if this stage has failed in another workgroup, // we still want to know this workgroup's memory requirement. +#ifdef have_uniform if local_id.x == 0u { sh_atomic_failed = atomicLoad(&bump.failed); } -#ifdef have_uniform let failed = workgroupUniformLoad(&sh_atomic_failed); #else - workgroupBarrier(); - let failed = sh_atomic_failed; + let failed = atomicLoad(&bump.failed); #endif if (failed & STAGE_BINNING) != 0u { return;