From 0c0c61dc82bf209282a77eabd27c42f3b4f988ec Mon Sep 17 00:00:00 2001 From: Chad Brokaw Date: Wed, 18 Jan 2023 21:36:32 -0500 Subject: [PATCH] Address review feedback * Add counts to offsets when comparing against buffer size limits * Remove multiplication by 4 in blend buffer allocation (we use units of u32) * Move buffer sizes from BumpAllocators to Config * Add comments about early exit --- shader/binning.wgsl | 2 +- shader/coarse.wgsl | 11 +++++++---- shader/path_coarse_full.wgsl | 5 ++++- shader/shared/bump.wgsl | 4 ---- shader/shared/config.wgsl | 6 ++++++ shader/tile_alloc.wgsl | 8 ++++++-- src/encoding/packed.rs | 8 ++++++++ src/render.rs | 29 +++++++++++------------------ 8 files changed, 43 insertions(+), 30 deletions(-) diff --git a/shader/binning.wgsl b/shader/binning.wgsl index d2e7e87..77ab462 100644 --- a/shader/binning.wgsl +++ b/shader/binning.wgsl @@ -128,7 +128,7 @@ fn main( } // element_count is the number of draw objects covering this thread's bin var chunk_offset = atomicAdd(&bump.binning, element_count); - if chunk_offset > bump.binning_size { + if chunk_offset + element_count > config.binning_size { chunk_offset = 0u; atomicOr(&bump.failed, STAGE_BINNING); } diff --git a/shader/coarse.wgsl b/shader/coarse.wgsl index ec47db0..cea3637 100644 --- a/shader/coarse.wgsl +++ b/shader/coarse.wgsl @@ -71,7 +71,7 @@ fn alloc_cmd(size: u32) { // by setting the initial value of the bump allocator. let ptcl_dyn_start = config.width_in_tiles * config.height_in_tiles * PTCL_INITIAL_ALLOC; var new_cmd = ptcl_dyn_start + atomicAdd(&bump.ptcl, PTCL_INCREMENT); - if new_cmd > bump.ptcl_size { + if new_cmd + PTCL_INCREMENT > config.ptcl_size { new_cmd = 0u; atomicOr(&bump.failed, STAGE_COARSE); } @@ -137,7 +137,7 @@ fn write_end_clip(end_clip: CmdEndClip) { ptcl[cmd_offset] = CMD_END_CLIP; ptcl[cmd_offset + 1u] = end_clip.blend; ptcl[cmd_offset + 2u] = bitcast(end_clip.alpha); - cmd_offset += 3u; + cmd_offset += 3u; } @compute @workgroup_size(256) @@ -145,9 +145,12 @@ fn main( @builtin(local_invocation_id) local_id: vec3, @builtin(workgroup_id) wg_id: vec3, ) { + // Exit early if prior stages failed, as we can't run this stage. + // We need to check only prior stages, as if this stage has failed in another workgroup, + // we still want to know this workgroup's memory requirement. if (atomicLoad(&bump.failed) & (STAGE_BINNING | STAGE_TILE_ALLOC | STAGE_PATH_COARSE)) != 0u { return; - } + } let width_in_bins = (config.width_in_tiles + N_TILE_X - 1u) / N_TILE_X; let bin_ix = width_in_bins * wg_id.y + wg_id.x; let n_partitions = (config.n_drawobj + N_TILE - 1u) / N_TILE; @@ -411,7 +414,7 @@ fn main( if bin_tile_x + tile_x < config.width_in_tiles && bin_tile_y + tile_y < config.height_in_tiles { ptcl[cmd_offset] = CMD_END; if max_blend_depth > BLEND_STACK_SPLIT { - let scratch_size = max_blend_depth * TILE_WIDTH * TILE_HEIGHT * 4u; + let scratch_size = max_blend_depth * TILE_WIDTH * TILE_HEIGHT; ptcl[blend_offset] = atomicAdd(&bump.blend, scratch_size); } } diff --git a/shader/path_coarse_full.wgsl b/shader/path_coarse_full.wgsl index c1847cf..ef1bed5 100644 --- a/shader/path_coarse_full.wgsl +++ b/shader/path_coarse_full.wgsl @@ -94,7 +94,7 @@ fn eval_cubic(p0: vec2, p1: vec2, p2: vec2, p3: vec2, t: f32 fn alloc_segment() -> u32 { var offset = atomicAdd(&bump.segments, 1u) + 1u; - if offset > bump.segments_size { + if offset + 1u > config.segments_size { offset = 0u; atomicOr(&bump.failed, STAGE_PATH_COARSE); } @@ -107,6 +107,9 @@ let MAX_QUADS = 16u; fn main( @builtin(global_invocation_id) global_id: vec3, ) { + // Exit early if prior stages failed, as we can't run this stage. + // We need to check only prior stages, as if this stage has failed in another workgroup, + // we still want to know this workgroup's memory requirement. if (atomicLoad(&bump.failed) & (STAGE_BINNING | STAGE_TILE_ALLOC)) != 0u { return; } diff --git a/shader/shared/bump.wgsl b/shader/shared/bump.wgsl index e3ec23c..4864f31 100644 --- a/shader/shared/bump.wgsl +++ b/shader/shared/bump.wgsl @@ -10,10 +10,6 @@ let STAGE_COARSE: u32 = 0x8u; struct BumpAllocators { // Bitmask of stages that have failed allocation. failed: atomic, - binning_size: u32, - ptcl_size: u32, - tiles_size: u32, - segments_size: u32, binning: atomic, ptcl: atomic, tile: atomic, diff --git a/shader/shared/config.wgsl b/shader/shared/config.wgsl index 1579341..f586f47 100644 --- a/shader/shared/config.wgsl +++ b/shader/shared/config.wgsl @@ -24,6 +24,12 @@ struct Config { transform_base: u32, linewidth_base: u32, + + // Sizes of bump allocated buffers (in element size units) + binning_size: u32, + tiles_size: u32, + segments_size: u32, + ptcl_size: u32, } // Geometry of tiles and bins diff --git a/shader/tile_alloc.wgsl b/shader/tile_alloc.wgsl index b57fb96..b28166e 100644 --- a/shader/tile_alloc.wgsl +++ b/shader/tile_alloc.wgsl @@ -35,6 +35,9 @@ fn main( @builtin(global_invocation_id) global_id: vec3, @builtin(local_invocation_id) local_id: vec3, ) { + // Exit early if prior stages failed, as we can't run this stage. + // We need to check only prior stages, as if this stage has failed in another workgroup, + // we still want to know this workgroup's memory requirement. if (atomicLoad(&bump.failed) & STAGE_BINNING) != 0u { return; } @@ -75,8 +78,9 @@ fn main( sh_tile_count[local_id.x] = total_tile_count; } if local_id.x == WG_SIZE - 1u { - var offset = atomicAdd(&bump.tile, sh_tile_count[WG_SIZE - 1u]); - if offset > bump.tiles_size { + let count = sh_tile_count[WG_SIZE - 1u]; + var offset = atomicAdd(&bump.tile, count); + if offset + count > config.tiles_size { offset = 0u; atomicOr(&bump.failed, STAGE_TILE_ALLOC); } diff --git a/src/encoding/packed.rs b/src/encoding/packed.rs index e96ec3a..55ffc17 100644 --- a/src/encoding/packed.rs +++ b/src/encoding/packed.rs @@ -62,6 +62,14 @@ pub struct Config { pub target_height: u32, /// Layout of packed scene data. pub layout: Layout, + /// Size of binning buffer allocation (in u32s). + pub binning_size: u32, + /// Size of tile buffer allocation (in Tiles). + pub tiles_size: u32, + /// Size of segment buffer allocation (in PathSegments). + pub segments_size: u32, + /// Size of per-tile command list buffer allocation (in u32s). + pub ptcl_size: u32, } /// Packed encoding of scene data. diff --git a/src/render.rs b/src/render.rs index 6947962..9136931 100644 --- a/src/render.rs +++ b/src/render.rs @@ -61,11 +61,6 @@ pub const fn next_multiple_of(val: u32, rhs: u32) -> u32 { #[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] struct BumpAllocators { failed: u32, - // Sizes of the provided buffers - binning_size: u32, - ptcl_size: u32, - tiles_size: u32, - segments_size: u32, // Final needed dynamic size of the buffers. If any of these are larger than the corresponding `_size` element // reallocation needs to occur binning: u32, @@ -195,33 +190,30 @@ pub fn render_encoding_full( let new_width = next_multiple_of(width, 16); let new_height = next_multiple_of(height, 16); + let info_size = packed.layout.bin_data_start; let config = crate::encoding::Config { width_in_tiles: new_width / 16, height_in_tiles: new_height / 16, target_width: width, target_height: height, + binning_size: ((1 << 20) / 4) - info_size, + tiles_size: (1 << 24) / TILE_SIZE as u32, + segments_size: (1 << 26) / SEGMENT_SIZE as u32, + ptcl_size: (1 << 25) / 4, layout: packed.layout, }; // println!("{:?}", config); let scene_buf = ResourceProxy::Buf(recording.upload("scene", packed.data)); let config_buf = ResourceProxy::Buf(recording.upload_uniform("config", bytemuck::bytes_of(&config))); - let info_size = config.layout.bin_data_start; - let bump = BumpAllocators { - binning_size: ((1 << 20) / 4) - info_size, - ptcl_size: (1 << 25) / 4, - tiles_size: (1 << 24) / TILE_SIZE as u32, - segments_size: (1 << 26) / SEGMENT_SIZE as u32, - ..Default::default() - }; let info_bin_data_buf = ResourceProxy::new_buf( - (info_size + bump.binning_size) as u64 * 4, + (info_size + config.binning_size) as u64 * 4, "info_bin_data_buf", ); - let tile_buf = ResourceProxy::new_buf(bump.tiles_size as u64 * TILE_SIZE, "tile_buf"); + let tile_buf = ResourceProxy::new_buf(config.tiles_size as u64 * TILE_SIZE, "tile_buf"); let segments_buf = - ResourceProxy::new_buf(bump.segments_size as u64 * SEGMENT_SIZE, "segments_buf"); - let ptcl_buf = ResourceProxy::new_buf(bump.ptcl_size as u64 * 4, "ptcl_buf"); + ResourceProxy::new_buf(config.segments_size as u64 * SEGMENT_SIZE, "segments_buf"); + let ptcl_buf = ResourceProxy::new_buf(config.ptcl_size as u64 * 4, "ptcl_buf"); let pathtag_wgs = pathtag_padded / (4 * shaders::PATHTAG_REDUCE_WG as usize); let pathtag_large = pathtag_wgs > shaders::PATHTAG_REDUCE_WG as usize; @@ -356,13 +348,14 @@ pub fn render_encoding_full( ); } let draw_bbox_buf = ResourceProxy::new_buf(n_paths as u64 * DRAW_BBOX_SIZE, "draw_bbox_buf"); - let bump_buf = recording.upload("bump_buf", bytemuck::bytes_of(&bump)); + let bump_buf = BufProxy::new(BUMP_SIZE, "bump_buf"); let width_in_bins = (config.width_in_tiles + 15) / 16; let height_in_bins = (config.height_in_tiles + 15) / 16; let bin_header_buf = ResourceProxy::new_buf( (256 * drawobj_wgs) as u64 * BIN_HEADER_SIZE, "bin_header_buf", ); + recording.clear_all(bump_buf); let bump_buf = ResourceProxy::Buf(bump_buf); recording.dispatch( shaders.binning,