From e8f8ebbd14a2e6c39d6d4ef8e6f7c12bd440d308 Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Tue, 29 Nov 2022 17:23:12 -0800 Subject: [PATCH] Squeeze pipeline to fit This commit reduces the workgroup shared memory of binning to fit in 16k (by packing two u16's in a u32), and moves the config binding to uniform, from readonly storage. Progress toward #202 --- piet-wgsl/shader/backdrop.wgsl | 2 +- piet-wgsl/shader/backdrop_dyn.wgsl | 2 +- piet-wgsl/shader/bbox_clear.wgsl | 2 +- piet-wgsl/shader/binning.wgsl | 20 +++++++--- piet-wgsl/shader/clip_leaf.wgsl | 2 +- piet-wgsl/shader/clip_reduce.wgsl | 2 +- piet-wgsl/shader/coarse.wgsl | 2 +- piet-wgsl/shader/draw_leaf.wgsl | 2 +- piet-wgsl/shader/draw_reduce.wgsl | 2 +- piet-wgsl/shader/fine.wgsl | 2 +- piet-wgsl/shader/path_coarse.wgsl | 2 +- piet-wgsl/shader/path_coarse_full.wgsl | 2 +- piet-wgsl/shader/pathseg.wgsl | 2 +- piet-wgsl/shader/pathtag_reduce.wgsl | 2 +- piet-wgsl/shader/pathtag_scan.wgsl | 2 +- piet-wgsl/shader/tile_alloc.wgsl | 2 +- piet-wgsl/src/engine.rs | 28 +++++++++++++ piet-wgsl/src/render.rs | 4 +- piet-wgsl/src/shaders.rs | 54 +++++++++----------------- 19 files changed, 78 insertions(+), 58 deletions(-) diff --git a/piet-wgsl/shader/backdrop.wgsl b/piet-wgsl/shader/backdrop.wgsl index 66bcdd0..0a5c599 100644 --- a/piet-wgsl/shader/backdrop.wgsl +++ b/piet-wgsl/shader/backdrop.wgsl @@ -9,7 +9,7 @@ struct Tile { #import config @group(0) @binding(0) -var config: Config; +var config: Config; @group(0) @binding(1) var tiles: array; diff --git a/piet-wgsl/shader/backdrop_dyn.wgsl b/piet-wgsl/shader/backdrop_dyn.wgsl index ef30b46..085f44c 100644 --- a/piet-wgsl/shader/backdrop_dyn.wgsl +++ b/piet-wgsl/shader/backdrop_dyn.wgsl @@ -6,7 +6,7 @@ #import tile @group(0) @binding(0) -var config: Config; +var config: Config; @group(0) @binding(1) var paths: array; diff --git a/piet-wgsl/shader/bbox_clear.wgsl b/piet-wgsl/shader/bbox_clear.wgsl index 8a4f6f4..fe8cceb 100644 --- a/piet-wgsl/shader/bbox_clear.wgsl +++ b/piet-wgsl/shader/bbox_clear.wgsl @@ -3,7 +3,7 @@ #import config @group(0) @binding(0) -var config: Config; +var config: Config; struct PathBbox { x0: i32, diff --git a/piet-wgsl/shader/binning.wgsl b/piet-wgsl/shader/binning.wgsl index f994795..69aa821 100644 --- a/piet-wgsl/shader/binning.wgsl +++ b/piet-wgsl/shader/binning.wgsl @@ -8,7 +8,7 @@ #import bump @group(0) @binding(0) -var config: Config; +var config: Config; @group(0) @binding(1) var draw_monoids: array; @@ -46,9 +46,11 @@ let SY = 0.00390625; let WG_SIZE = 256u; let N_SLICE = 8u; //let N_SLICE = WG_SIZE / 32u; +let N_SUBSLICE = 4u; var sh_bitmaps: array, N_TILE>, N_SLICE>; -var sh_count: array, N_SLICE>; +// store count values packed two u16's to a u32 +var sh_count: array, N_SUBSLICE>; var sh_chunk_offset: array; @compute @workgroup_size(256) @@ -115,9 +117,13 @@ fn main( workgroupBarrier(); // Allocate output segments var element_count = 0u; - for (var i = 0u; i < N_SLICE; i += 1u) { - element_count += countOneBits(atomicLoad(&sh_bitmaps[i][local_id.x])); - sh_count[i][local_id.x] = element_count; + for (var i = 0u; i < N_SUBSLICE; i += 1u) { + element_count += countOneBits(atomicLoad(&sh_bitmaps[i * 2u][local_id.x])); + let element_count_lo = element_count; + element_count += countOneBits(atomicLoad(&sh_bitmaps[i * 2u + 1u][local_id.x])); + let element_count_hi = element_count; + let element_count_packed = element_count_lo | (element_count_hi << 16u); + sh_count[i][local_id.x] = element_count_packed; } // element_count is the number of draw objects covering this thread's bin let chunk_offset = atomicAdd(&bump.binning, element_count); @@ -136,7 +142,9 @@ fn main( if (out_mask & my_mask) != 0u { var idx = countOneBits(out_mask & (my_mask - 1u)); if my_slice > 0u { - idx += sh_count[my_slice - 1u][bin_ix]; + let count_ix = my_slice - 1u; + let count_packed = sh_count[count_ix / 2u][bin_ix]; + idx += (count_packed >> (16u * (count_ix & 1u))) & 0xffffu; } let offset = sh_chunk_offset[bin_ix]; bin_data[offset + idx] = element_ix; diff --git a/piet-wgsl/shader/clip_leaf.wgsl b/piet-wgsl/shader/clip_leaf.wgsl index 601e3ac..f71ec9b 100644 --- a/piet-wgsl/shader/clip_leaf.wgsl +++ b/piet-wgsl/shader/clip_leaf.wgsl @@ -6,7 +6,7 @@ #import drawtag @group(0) @binding(0) -var config: Config; +var config: Config; @group(0) @binding(1) var clip_inp: array; diff --git a/piet-wgsl/shader/clip_reduce.wgsl b/piet-wgsl/shader/clip_reduce.wgsl index 2b5b60c..935aea3 100644 --- a/piet-wgsl/shader/clip_reduce.wgsl +++ b/piet-wgsl/shader/clip_reduce.wgsl @@ -5,7 +5,7 @@ #import clip @group(0) @binding(0) -var config: Config; +var config: Config; @group(0) @binding(1) var clip_inp: array; diff --git a/piet-wgsl/shader/coarse.wgsl b/piet-wgsl/shader/coarse.wgsl index 9a836d0..f139175 100644 --- a/piet-wgsl/shader/coarse.wgsl +++ b/piet-wgsl/shader/coarse.wgsl @@ -9,7 +9,7 @@ #import tile @group(0) @binding(0) -var config: Config; +var config: Config; @group(0) @binding(1) var scene: array; diff --git a/piet-wgsl/shader/draw_leaf.wgsl b/piet-wgsl/shader/draw_leaf.wgsl index b7d0f95..0c9a72d 100644 --- a/piet-wgsl/shader/draw_leaf.wgsl +++ b/piet-wgsl/shader/draw_leaf.wgsl @@ -8,7 +8,7 @@ #import bbox @group(0) @binding(0) -var config: Config; +var config: Config; @group(0) @binding(1) var scene: array; diff --git a/piet-wgsl/shader/draw_reduce.wgsl b/piet-wgsl/shader/draw_reduce.wgsl index 994fce3..af17d78 100644 --- a/piet-wgsl/shader/draw_reduce.wgsl +++ b/piet-wgsl/shader/draw_reduce.wgsl @@ -4,7 +4,7 @@ #import drawtag @group(0) @binding(0) -var config: Config; +var config: Config; @group(0) @binding(1) var scene: array; diff --git a/piet-wgsl/shader/fine.wgsl b/piet-wgsl/shader/fine.wgsl index 6c23035..7b298ca 100644 --- a/piet-wgsl/shader/fine.wgsl +++ b/piet-wgsl/shader/fine.wgsl @@ -13,7 +13,7 @@ struct Tile { #import config @group(0) @binding(0) -var config: Config; +var config: Config; @group(0) @binding(1) var tiles: array; diff --git a/piet-wgsl/shader/path_coarse.wgsl b/piet-wgsl/shader/path_coarse.wgsl index 5df6fbe..5f168a6 100644 --- a/piet-wgsl/shader/path_coarse.wgsl +++ b/piet-wgsl/shader/path_coarse.wgsl @@ -4,7 +4,7 @@ #import pathtag @group(0) @binding(0) -var config: Config; +var config: Config; @group(0) @binding(1) var scene: array; diff --git a/piet-wgsl/shader/path_coarse_full.wgsl b/piet-wgsl/shader/path_coarse_full.wgsl index e37038f..d6e5d91 100644 --- a/piet-wgsl/shader/path_coarse_full.wgsl +++ b/piet-wgsl/shader/path_coarse_full.wgsl @@ -10,7 +10,7 @@ #import bump @group(0) @binding(0) -var config: Config; +var config: Config; @group(0) @binding(1) var scene: array; diff --git a/piet-wgsl/shader/pathseg.wgsl b/piet-wgsl/shader/pathseg.wgsl index 79674d6..ec059ab 100644 --- a/piet-wgsl/shader/pathseg.wgsl +++ b/piet-wgsl/shader/pathseg.wgsl @@ -16,7 +16,7 @@ #import cubic @group(0) @binding(0) -var config: Config; +var config: Config; @group(0) @binding(1) var scene: array; diff --git a/piet-wgsl/shader/pathtag_reduce.wgsl b/piet-wgsl/shader/pathtag_reduce.wgsl index 58b2a50..3d00f4f 100644 --- a/piet-wgsl/shader/pathtag_reduce.wgsl +++ b/piet-wgsl/shader/pathtag_reduce.wgsl @@ -4,7 +4,7 @@ #import pathtag @group(0) @binding(0) -var config: Config; +var config: Config; @group(0) @binding(1) var scene: array; diff --git a/piet-wgsl/shader/pathtag_scan.wgsl b/piet-wgsl/shader/pathtag_scan.wgsl index 735402d..f8a8005 100644 --- a/piet-wgsl/shader/pathtag_scan.wgsl +++ b/piet-wgsl/shader/pathtag_scan.wgsl @@ -4,7 +4,7 @@ #import pathtag @group(0) @binding(0) -var config: Config; +var config: Config; @group(0) @binding(1) var scene: array; diff --git a/piet-wgsl/shader/tile_alloc.wgsl b/piet-wgsl/shader/tile_alloc.wgsl index a1aa246..b7c6fd9 100644 --- a/piet-wgsl/shader/tile_alloc.wgsl +++ b/piet-wgsl/shader/tile_alloc.wgsl @@ -8,7 +8,7 @@ #import tile @group(0) @binding(0) -var config: Config; +var config: Config; @group(0) @binding(1) var scene: array; diff --git a/piet-wgsl/src/engine.rs b/piet-wgsl/src/engine.rs index ac4a049..5c66303 100644 --- a/piet-wgsl/src/engine.rs +++ b/piet-wgsl/src/engine.rs @@ -86,6 +86,7 @@ pub enum ExternalResource<'a> { pub enum Command { Upload(BufProxy, Vec), + UploadUniform(BufProxy, Vec), UploadImage(ImageProxy, Vec), // Discussion question: third argument is vec of resources? // Maybe use tricks to make more ergonomic? @@ -107,6 +108,8 @@ pub enum BindType { Buffer, /// A storage buffer with read only access. BufReadOnly, + /// A small storage buffer to be used as uniforms. + Uniform, /// A storage image. Image(ImageFormat), /// A storage image with read only access. @@ -158,6 +161,16 @@ impl Engine { }, count: None, }, + BindType::Uniform => wgpu::BindGroupLayoutEntry { + binding: i as u32, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Uniform, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: None, + }, BindType::Image(format) | BindType::ImageRead(format) => { wgpu::BindGroupLayoutEntry { binding: i as u32, @@ -229,6 +242,14 @@ impl Engine { }); bind_map.insert_buf(buf_proxy.id, buf); } + Command::UploadUniform(buf_proxy, bytes) => { + let buf = device.create_buffer_init(&wgpu::util::BufferInitDescriptor { + label: None, + contents: &bytes, + usage: wgpu::BufferUsages::UNIFORM, + }); + bind_map.insert_buf(buf_proxy.id, buf); + } Command::UploadImage(image_proxy, bytes) => { let buf = device.create_buffer_init(&wgpu::util::BufferInitDescriptor { label: None, @@ -329,6 +350,13 @@ impl Recording { buf_proxy } + pub fn upload_uniform(&mut self, data: impl Into>) -> BufProxy { + let data = data.into(); + let buf_proxy = BufProxy::new(data.len() as u64); + self.push(Command::UploadUniform(buf_proxy, data)); + buf_proxy + } + pub fn upload_image( &mut self, width: u32, diff --git a/piet-wgsl/src/render.rs b/piet-wgsl/src/render.rs index 7ca9022..b7ef6aa 100644 --- a/piet-wgsl/src/render.rs +++ b/piet-wgsl/src/render.rs @@ -85,7 +85,7 @@ fn render(scene: &Scene, shaders: &Shaders) -> (Recording, BufProxy) { ..Default::default() }; let scene_buf = recording.upload(scene); - let config_buf = recording.upload(bytemuck::bytes_of(&config).to_owned()); + let config_buf = recording.upload_uniform(bytemuck::bytes_of(&config)); let reduced_buf = BufProxy::new(pathtag_wgs as u64 * TAG_MONOID_SIZE); // TODO: really only need pathtag_wgs - 1 @@ -231,7 +231,7 @@ pub fn render_full( }; // println!("{:?}", config); let scene_buf = ResourceProxy::Buf(recording.upload(scene)); - let config_buf = ResourceProxy::Buf(recording.upload(bytemuck::bytes_of(&config).to_owned())); + let config_buf = ResourceProxy::Buf(recording.upload_uniform(bytemuck::bytes_of(&config))); let pathtag_wgs = pathtag_padded / (4 * shaders::PATHTAG_REDUCE_WG as usize); let reduced_buf = ResourceProxy::new_buf(pathtag_wgs as u64 * TAG_MONOID_FULL_SIZE); diff --git a/piet-wgsl/src/shaders.rs b/piet-wgsl/src/shaders.rs index a8a4176..b893ab5 100644 --- a/piet-wgsl/src/shaders.rs +++ b/piet-wgsl/src/shaders.rs @@ -71,17 +71,13 @@ pub fn init_shaders(device: &Device, engine: &mut Engine) -> Result Result Result Result Result Result Result Result Result Result Result Result Result