diff --git a/piet-wgsl/shader/backdrop.wgsl b/piet-wgsl/shader/backdrop.wgsl index 66bcdd0..0a5c599 100644 --- a/piet-wgsl/shader/backdrop.wgsl +++ b/piet-wgsl/shader/backdrop.wgsl @@ -9,7 +9,7 @@ struct Tile { #import config @group(0) @binding(0) -var config: Config; +var config: Config; @group(0) @binding(1) var tiles: array; diff --git a/piet-wgsl/shader/backdrop_dyn.wgsl b/piet-wgsl/shader/backdrop_dyn.wgsl index ef30b46..085f44c 100644 --- a/piet-wgsl/shader/backdrop_dyn.wgsl +++ b/piet-wgsl/shader/backdrop_dyn.wgsl @@ -6,7 +6,7 @@ #import tile @group(0) @binding(0) -var config: Config; +var config: Config; @group(0) @binding(1) var paths: array; diff --git a/piet-wgsl/shader/bbox_clear.wgsl b/piet-wgsl/shader/bbox_clear.wgsl index 8a4f6f4..fe8cceb 100644 --- a/piet-wgsl/shader/bbox_clear.wgsl +++ b/piet-wgsl/shader/bbox_clear.wgsl @@ -3,7 +3,7 @@ #import config @group(0) @binding(0) -var config: Config; +var config: Config; struct PathBbox { x0: i32, diff --git a/piet-wgsl/shader/binning.wgsl b/piet-wgsl/shader/binning.wgsl index f994795..0aab723 100644 --- a/piet-wgsl/shader/binning.wgsl +++ b/piet-wgsl/shader/binning.wgsl @@ -8,7 +8,7 @@ #import bump @group(0) @binding(0) -var config: Config; +var config: Config; @group(0) @binding(1) var draw_monoids: array; @@ -46,9 +46,11 @@ let SY = 0.00390625; let WG_SIZE = 256u; let N_SLICE = 8u; //let N_SLICE = WG_SIZE / 32u; +let N_SUBSLICE = 4u; var sh_bitmaps: array, N_TILE>, N_SLICE>; -var sh_count: array, N_SLICE>; +// store count values packed two u16's to a u32 +var sh_count: array, N_SUBSLICE>; var sh_chunk_offset: array; @compute @workgroup_size(256) @@ -115,9 +117,13 @@ fn main( workgroupBarrier(); // Allocate output segments var element_count = 0u; - for (var i = 0u; i < N_SLICE; i += 1u) { - element_count += countOneBits(atomicLoad(&sh_bitmaps[i][local_id.x])); - sh_count[i][local_id.x] = element_count; + for (var i = 0u; i < N_SUBSLICE; i += 1u) { + element_count += countOneBits(atomicLoad(&sh_bitmaps[i * 2u][local_id.x])); + let element_count_lo = element_count; + element_count += countOneBits(atomicLoad(&sh_bitmaps[i * 2u + 1u][local_id.x])); + let element_count_hi = element_count; + let element_count_packed = element_count_lo | (element_count_hi << 16u); + sh_count[i][local_id.x] = element_count_packed; } // element_count is the number of draw objects covering this thread's bin let chunk_offset = atomicAdd(&bump.binning, element_count); @@ -136,9 +142,11 @@ fn main( if (out_mask & my_mask) != 0u { var idx = countOneBits(out_mask & (my_mask - 1u)); if my_slice > 0u { - idx += sh_count[my_slice - 1u][bin_ix]; + let count_ix = my_slice - 1u; + let count_packed = sh_count[count_ix / 2u][bin_ix]; + idx += (count_packed >> (16u * (count_ix & 1u))) & 0xffffu; } - let offset = sh_chunk_offset[bin_ix]; + let offset = config.bin_data_start + sh_chunk_offset[bin_ix]; bin_data[offset + idx] = element_ix; } x += 1; diff --git a/piet-wgsl/shader/clip_leaf.wgsl b/piet-wgsl/shader/clip_leaf.wgsl index 601e3ac..f71ec9b 100644 --- a/piet-wgsl/shader/clip_leaf.wgsl +++ b/piet-wgsl/shader/clip_leaf.wgsl @@ -6,7 +6,7 @@ #import drawtag @group(0) @binding(0) -var config: Config; +var config: Config; @group(0) @binding(1) var clip_inp: array; diff --git a/piet-wgsl/shader/clip_reduce.wgsl b/piet-wgsl/shader/clip_reduce.wgsl index 2b5b60c..935aea3 100644 --- a/piet-wgsl/shader/clip_reduce.wgsl +++ b/piet-wgsl/shader/clip_reduce.wgsl @@ -5,7 +5,7 @@ #import clip @group(0) @binding(0) -var config: Config; +var config: Config; @group(0) @binding(1) var clip_inp: array; diff --git a/piet-wgsl/shader/coarse.wgsl b/piet-wgsl/shader/coarse.wgsl index 9a836d0..f60753f 100644 --- a/piet-wgsl/shader/coarse.wgsl +++ b/piet-wgsl/shader/coarse.wgsl @@ -9,7 +9,7 @@ #import tile @group(0) @binding(0) -var config: Config; +var config: Config; @group(0) @binding(1) var scene: array; @@ -27,7 +27,7 @@ struct BinHeader { var bin_headers: array; @group(0) @binding(4) -var bin_data: array; +var info_bin_data: array; @group(0) @binding(5) var paths: array; @@ -36,12 +36,9 @@ var paths: array; var tiles: array; @group(0) @binding(7) -var info: array; - -@group(0) @binding(8) var bump: BumpAllocators; -@group(0) @binding(9) +@group(0) @binding(8) var ptcl: array; @@ -208,8 +205,8 @@ fn main( } } ix -= select(part_start_ix, sh_part_count[part_ix - 1u], part_ix > 0u); - let offset = sh_part_offsets[part_ix]; - sh_drawobj_ix[local_id.x] = bin_data[offset + ix]; + let offset = config.bin_data_start + sh_part_offsets[part_ix]; + sh_drawobj_ix[local_id.x] = info_bin_data[offset + ix]; } wr_ix = min(rd_ix + N_TILE, ready_ix); if wr_ix - rd_ix >= N_TILE || (wr_ix >= ready_ix && partition_ix >= n_partitions) { @@ -326,14 +323,14 @@ fn main( switch drawtag { // DRAWTAG_FILL_COLOR case 0x44u: { - let linewidth = bitcast(info[di]); + let linewidth = bitcast(info_bin_data[di]); write_path(tile, linewidth); let rgba_color = scene[dd]; write_color(CmdColor(rgba_color)); } // DRAWTAG_FILL_LIN_GRADIENT case 0x114u: { - let linewidth = bitcast(info[di]); + let linewidth = bitcast(info_bin_data[di]); write_path(tile, linewidth); let index = scene[dd]; let info_offset = di + 1u; @@ -341,7 +338,7 @@ fn main( } // DRAWTAG_FILL_RAD_GRADIENT case 0x2dcu: { - let linewidth = bitcast(info[di]); + let linewidth = bitcast(info_bin_data[di]); write_path(tile, linewidth); let index = scene[dd]; let info_offset = di + 1u; diff --git a/piet-wgsl/shader/draw_leaf.wgsl b/piet-wgsl/shader/draw_leaf.wgsl index b7d0f95..0c9a72d 100644 --- a/piet-wgsl/shader/draw_leaf.wgsl +++ b/piet-wgsl/shader/draw_leaf.wgsl @@ -8,7 +8,7 @@ #import bbox @group(0) @binding(0) -var config: Config; +var config: Config; @group(0) @binding(1) var scene: array; diff --git a/piet-wgsl/shader/draw_reduce.wgsl b/piet-wgsl/shader/draw_reduce.wgsl index 994fce3..af17d78 100644 --- a/piet-wgsl/shader/draw_reduce.wgsl +++ b/piet-wgsl/shader/draw_reduce.wgsl @@ -4,7 +4,7 @@ #import drawtag @group(0) @binding(0) -var config: Config; +var config: Config; @group(0) @binding(1) var scene: array; diff --git a/piet-wgsl/shader/fine.wgsl b/piet-wgsl/shader/fine.wgsl index 6c23035..7b298ca 100644 --- a/piet-wgsl/shader/fine.wgsl +++ b/piet-wgsl/shader/fine.wgsl @@ -13,7 +13,7 @@ struct Tile { #import config @group(0) @binding(0) -var config: Config; +var config: Config; @group(0) @binding(1) var tiles: array; diff --git a/piet-wgsl/shader/path_coarse.wgsl b/piet-wgsl/shader/path_coarse.wgsl index 5df6fbe..5f168a6 100644 --- a/piet-wgsl/shader/path_coarse.wgsl +++ b/piet-wgsl/shader/path_coarse.wgsl @@ -4,7 +4,7 @@ #import pathtag @group(0) @binding(0) -var config: Config; +var config: Config; @group(0) @binding(1) var scene: array; diff --git a/piet-wgsl/shader/path_coarse_full.wgsl b/piet-wgsl/shader/path_coarse_full.wgsl index e37038f..d6e5d91 100644 --- a/piet-wgsl/shader/path_coarse_full.wgsl +++ b/piet-wgsl/shader/path_coarse_full.wgsl @@ -10,7 +10,7 @@ #import bump @group(0) @binding(0) -var config: Config; +var config: Config; @group(0) @binding(1) var scene: array; diff --git a/piet-wgsl/shader/pathseg.wgsl b/piet-wgsl/shader/pathseg.wgsl index 79674d6..ec059ab 100644 --- a/piet-wgsl/shader/pathseg.wgsl +++ b/piet-wgsl/shader/pathseg.wgsl @@ -16,7 +16,7 @@ #import cubic @group(0) @binding(0) -var config: Config; +var config: Config; @group(0) @binding(1) var scene: array; diff --git a/piet-wgsl/shader/pathtag_reduce.wgsl b/piet-wgsl/shader/pathtag_reduce.wgsl index 58b2a50..3d00f4f 100644 --- a/piet-wgsl/shader/pathtag_reduce.wgsl +++ b/piet-wgsl/shader/pathtag_reduce.wgsl @@ -4,7 +4,7 @@ #import pathtag @group(0) @binding(0) -var config: Config; +var config: Config; @group(0) @binding(1) var scene: array; diff --git a/piet-wgsl/shader/pathtag_scan.wgsl b/piet-wgsl/shader/pathtag_scan.wgsl index 735402d..f8a8005 100644 --- a/piet-wgsl/shader/pathtag_scan.wgsl +++ b/piet-wgsl/shader/pathtag_scan.wgsl @@ -4,7 +4,7 @@ #import pathtag @group(0) @binding(0) -var config: Config; +var config: Config; @group(0) @binding(1) var scene: array; diff --git a/piet-wgsl/shader/shared/config.wgsl b/piet-wgsl/shader/shared/config.wgsl index 21d9b0e..54f94f6 100644 --- a/piet-wgsl/shader/shared/config.wgsl +++ b/piet-wgsl/shader/shared/config.wgsl @@ -11,6 +11,10 @@ struct Config { n_path: u32, n_clip: u32, + // To reduce the number of bindings, info and bin data are combined + // into one buffer. + bin_data_start: u32, + // offsets within scene buffer (in u32 units) // Note: this is a difference from piet-gpu, which is in bytes pathtag_base: u32, diff --git a/piet-wgsl/shader/tile_alloc.wgsl b/piet-wgsl/shader/tile_alloc.wgsl index a1aa246..b7c6fd9 100644 --- a/piet-wgsl/shader/tile_alloc.wgsl +++ b/piet-wgsl/shader/tile_alloc.wgsl @@ -8,7 +8,7 @@ #import tile @group(0) @binding(0) -var config: Config; +var config: Config; @group(0) @binding(1) var scene: array; diff --git a/piet-wgsl/src/engine.rs b/piet-wgsl/src/engine.rs index ac4a049..b9d16b1 100644 --- a/piet-wgsl/src/engine.rs +++ b/piet-wgsl/src/engine.rs @@ -86,6 +86,7 @@ pub enum ExternalResource<'a> { pub enum Command { Upload(BufProxy, Vec), + UploadUniform(BufProxy, Vec), UploadImage(ImageProxy, Vec), // Discussion question: third argument is vec of resources? // Maybe use tricks to make more ergonomic? @@ -107,6 +108,8 @@ pub enum BindType { Buffer, /// A storage buffer with read only access. BufReadOnly, + /// A small storage buffer to be used as uniforms. + Uniform, /// A storage image. Image(ImageFormat), /// A storage image with read only access. @@ -158,6 +161,16 @@ impl Engine { }, count: None, }, + BindType::Uniform => wgpu::BindGroupLayoutEntry { + binding: i as u32, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Uniform, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: None, + }, BindType::Image(format) | BindType::ImageRead(format) => { wgpu::BindGroupLayoutEntry { binding: i as u32, @@ -229,6 +242,14 @@ impl Engine { }); bind_map.insert_buf(buf_proxy.id, buf); } + Command::UploadUniform(buf_proxy, bytes) => { + let buf = device.create_buffer_init(&wgpu::util::BufferInitDescriptor { + label: None, + contents: &bytes, + usage: wgpu::BufferUsages::UNIFORM, + }); + bind_map.insert_buf(buf_proxy.id, buf); + } Command::UploadImage(image_proxy, bytes) => { let buf = device.create_buffer_init(&wgpu::util::BufferInitDescriptor { label: None, @@ -308,7 +329,7 @@ impl Engine { } Command::Clear(proxy, offset, size) => { let buffer = bind_map.get_or_create(*proxy, device)?; - encoder.clear_buffer(buffer, *offset, *size) + encoder.clear_buffer(buffer, *offset, *size); } } } @@ -329,6 +350,13 @@ impl Recording { buf_proxy } + pub fn upload_uniform(&mut self, data: impl Into>) -> BufProxy { + let data = data.into(); + let buf_proxy = BufProxy::new(data.len() as u64); + self.push(Command::UploadUniform(buf_proxy, data)); + buf_proxy + } + pub fn upload_image( &mut self, width: u32, diff --git a/piet-wgsl/src/render.rs b/piet-wgsl/src/render.rs index 7ca9022..54a0bb6 100644 --- a/piet-wgsl/src/render.rs +++ b/piet-wgsl/src/render.rs @@ -33,6 +33,7 @@ struct Config { n_drawobj: u32, n_path: u32, n_clip: u32, + bin_data_start: u32, pathtag_base: u32, pathdata_base: u32, drawtag_base: u32, @@ -85,7 +86,7 @@ fn render(scene: &Scene, shaders: &Shaders) -> (Recording, BufProxy) { ..Default::default() }; let scene_buf = recording.upload(scene); - let config_buf = recording.upload(bytemuck::bytes_of(&config).to_owned()); + let config_buf = recording.upload_uniform(bytemuck::bytes_of(&config)); let reduced_buf = BufProxy::new(pathtag_wgs as u64 * TAG_MONOID_SIZE); // TODO: really only need pathtag_wgs - 1 @@ -209,6 +210,7 @@ pub fn render_full( // TODO: calculate for real when we do rectangles let n_drawobj = n_path; let n_clip = data.n_clip; + let bin_data_start = n_drawobj * MAX_DRAWINFO_SIZE as u32; let new_width = next_multiple_of(width, 16); let new_height = next_multiple_of(height, 16); @@ -222,6 +224,7 @@ pub fn render_full( n_drawobj, n_path, n_clip, + bin_data_start, pathtag_base, pathdata_base, drawtag_base, @@ -231,7 +234,7 @@ pub fn render_full( }; // println!("{:?}", config); let scene_buf = ResourceProxy::Buf(recording.upload(scene)); - let config_buf = ResourceProxy::Buf(recording.upload(bytemuck::bytes_of(&config).to_owned())); + let config_buf = ResourceProxy::Buf(recording.upload_uniform(bytemuck::bytes_of(&config))); let pathtag_wgs = pathtag_padded / (4 * shaders::PATHTAG_REDUCE_WG as usize); let reduced_buf = ResourceProxy::new_buf(pathtag_wgs as u64 * TAG_MONOID_FULL_SIZE); @@ -278,7 +281,7 @@ pub fn render_full( [config_buf, scene_buf, draw_reduced_buf], ); let draw_monoid_buf = ResourceProxy::new_buf(n_drawobj as u64 * DRAWMONOID_SIZE); - let info_buf = ResourceProxy::new_buf(n_drawobj as u64 * MAX_DRAWINFO_SIZE); + let info_bin_data_buf = ResourceProxy::new_buf(1 << 20); let clip_inp_buf = ResourceProxy::new_buf(data.n_clip as u64 * CLIP_INP_SIZE); recording.dispatch( shaders.draw_leaf, @@ -289,7 +292,7 @@ pub fn render_full( draw_reduced_buf, path_bbox_buf, draw_monoid_buf, - info_buf, + info_bin_data_buf, clip_inp_buf, ], ); @@ -329,7 +332,6 @@ pub fn render_full( } let draw_bbox_buf = ResourceProxy::new_buf(n_path as u64 * DRAW_BBOX_SIZE); let bump_buf = BufProxy::new(BUMP_SIZE); - let bin_data_buf = ResourceProxy::new_buf(1 << 20); let width_in_bins = (config.width_in_tiles + 15) / 16; let height_in_bins = (config.height_in_tiles + 15) / 16; let n_bins = width_in_bins * height_in_bins; @@ -346,7 +348,7 @@ pub fn render_full( clip_bbox_buf, draw_bbox_buf, bump_buf, - bin_data_buf, + info_bin_data_buf, bin_header_buf, ], ); @@ -395,10 +397,9 @@ pub fn render_full( scene_buf, draw_monoid_buf, bin_header_buf, - bin_data_buf, + info_bin_data_buf, path_buf, tile_buf, - info_buf, bump_buf, ptcl_buf, ], @@ -414,7 +415,7 @@ pub fn render_full( ResourceProxy::Image(out_image), ptcl_buf, gradient_image, - info_buf, + info_bin_data_buf, ], ); (recording, ResourceProxy::Image(out_image)) diff --git a/piet-wgsl/src/shaders.rs b/piet-wgsl/src/shaders.rs index a8a4176..1df6b14 100644 --- a/piet-wgsl/src/shaders.rs +++ b/piet-wgsl/src/shaders.rs @@ -71,17 +71,13 @@ pub fn init_shaders(device: &Device, engine: &mut Engine) -> Result Result Result Result Result Result Result Result Result Result Result Result Result