Combine info and bin_data buffers

We need to reduce the number of buffer bindings to 8 so it can run on all WebGPU devices. The best candidate was to combine info and bin_data, which are written by two different stages (draw_leaf and binning, respectively), both read by coarse, and are unstructured, so the only real shader change needed is to add the offset for the binning data. I thought I was going to have to do a blit to make it fit, but fortunately that wasn't needed. Progress to #202
2025-01-09 20:31:29 +11:00 · 2022-11-29 17:35:19 -08:00 · 2022-11-29 17:35:19 -08:00 · 876a3ad581
parent e8f8ebbd14
commit 876a3ad581
7 changed files with 21 additions and 21 deletions
--- a/piet-wgsl/shader/binning.wgsl
+++ b/piet-wgsl/shader/binning.wgsl
@ -146,7 +146,7 @@ fn main(
                let count_packed = sh_count[count_ix / 2u][bin_ix];
                idx += (count_packed >> (16u * (count_ix & 1u))) & 0xffffu;
            }
-            let offset = sh_chunk_offset[bin_ix];
+            let offset = config.bin_data_start + sh_chunk_offset[bin_ix];
            bin_data[offset + idx] = element_ix;
        }
        x += 1;
--- a/piet-wgsl/shader/coarse.wgsl
+++ b/piet-wgsl/shader/coarse.wgsl
@ -27,7 +27,7 @@ struct BinHeader {
 var<storage> bin_headers: array<BinHeader>;

@group(0) @binding(4)
-var<storage> bin_data: array<u32>;
+var<storage> info_bin_data: array<u32>;

@group(0) @binding(5)
 var<storage> paths: array<Path>;
@ -36,12 +36,9 @@ var<storage> paths: array<Path>;
 var<storage> tiles: array<Tile>;

@group(0) @binding(7)
-var<storage> info: array<u32>;
-
-@group(0) @binding(8)
 var<storage, read_write> bump: BumpAllocators;

-@group(0) @binding(9)
+@group(0) @binding(8)
 var<storage, read_write> ptcl: array<u32>;


@ -208,8 +205,8 @@ fn main(
                    }
                }
                ix -= select(part_start_ix, sh_part_count[part_ix - 1u], part_ix > 0u);
-                let offset = sh_part_offsets[part_ix];
-                sh_drawobj_ix[local_id.x] = bin_data[offset + ix];
+                let offset = config.bin_data_start + sh_part_offsets[part_ix];
+                sh_drawobj_ix[local_id.x] = info_bin_data[offset + ix];
            }
            wr_ix = min(rd_ix + N_TILE, ready_ix);
            if wr_ix - rd_ix >= N_TILE || (wr_ix >= ready_ix && partition_ix >= n_partitions) {
@ -326,14 +323,14 @@ fn main(
                switch drawtag {
                    // DRAWTAG_FILL_COLOR
                    case 0x44u: {
-                        let linewidth = bitcast<f32>(info[di]);
+                        let linewidth = bitcast<f32>(info_bin_data[di]);
                        write_path(tile, linewidth);
                        let rgba_color = scene[dd];
                        write_color(CmdColor(rgba_color));
                    }
                    // DRAWTAG_FILL_LIN_GRADIENT
                    case 0x114u: {
-                        let linewidth = bitcast<f32>(info[di]);
+                        let linewidth = bitcast<f32>(info_bin_data[di]);
                        write_path(tile, linewidth);
                        let index = scene[dd];
                        let info_offset = di + 1u;
@ -341,7 +338,7 @@ fn main(
                    }
                    // DRAWTAG_FILL_RAD_GRADIENT
                    case 0x2dcu: {
-                        let linewidth = bitcast<f32>(info[di]);
+                        let linewidth = bitcast<f32>(info_bin_data[di]);
                        write_path(tile, linewidth);
                        let index = scene[dd];
                        let info_offset = di + 1u;
--- a/piet-wgsl/shader/shared/config.wgsl
+++ b/piet-wgsl/shader/shared/config.wgsl
@ -11,6 +11,10 @@ struct Config {
    n_path: u32,
    n_clip: u32,

+    // To reduce the number of bindings, info and bin data are combined
+    // into one buffer.
+    bin_data_start: u32,
+
    // offsets within scene buffer (in u32 units)
    // Note: this is a difference from piet-gpu, which is in bytes
    pathtag_base: u32,
--- a/piet-wgsl/src/engine.rs
+++ b/piet-wgsl/src/engine.rs
@ -329,7 +329,7 @@ impl Engine {
                }
                Command::Clear(proxy, offset, size) => {
                    let buffer = bind_map.get_or_create(*proxy, device)?;
-                    encoder.clear_buffer(buffer, *offset, *size)
+                    encoder.clear_buffer(buffer, *offset, *size);
                }
            }
        }
--- a/piet-wgsl/src/render.rs
+++ b/piet-wgsl/src/render.rs
@ -33,6 +33,7 @@ struct Config {
    n_drawobj: u32,
    n_path: u32,
    n_clip: u32,
+    bin_data_start: u32,
    pathtag_base: u32,
    pathdata_base: u32,
    drawtag_base: u32,
@ -209,6 +210,7 @@ pub fn render_full(
    // TODO: calculate for real when we do rectangles
    let n_drawobj = n_path;
    let n_clip = data.n_clip;
+    let bin_data_start = n_drawobj * MAX_DRAWINFO_SIZE as u32;

    let new_width = next_multiple_of(width, 16);
    let new_height = next_multiple_of(height, 16);
@ -222,6 +224,7 @@ pub fn render_full(
        n_drawobj,
        n_path,
        n_clip,
+        bin_data_start,
        pathtag_base,
        pathdata_base,
        drawtag_base,
@ -278,7 +281,7 @@ pub fn render_full(
        [config_buf, scene_buf, draw_reduced_buf],
    );
    let draw_monoid_buf = ResourceProxy::new_buf(n_drawobj as u64 * DRAWMONOID_SIZE);
-    let info_buf = ResourceProxy::new_buf(n_drawobj as u64 * MAX_DRAWINFO_SIZE);
+    let info_bin_data_buf = ResourceProxy::new_buf(1 << 20);
    let clip_inp_buf = ResourceProxy::new_buf(data.n_clip as u64 * CLIP_INP_SIZE);
    recording.dispatch(
        shaders.draw_leaf,
@ -289,7 +292,7 @@ pub fn render_full(
            draw_reduced_buf,
            path_bbox_buf,
            draw_monoid_buf,
-            info_buf,
+            info_bin_data_buf,
            clip_inp_buf,
        ],
    );
@ -329,7 +332,6 @@ pub fn render_full(
    }
    let draw_bbox_buf = ResourceProxy::new_buf(n_path as u64 * DRAW_BBOX_SIZE);
    let bump_buf = BufProxy::new(BUMP_SIZE);
-    let bin_data_buf = ResourceProxy::new_buf(1 << 20);
    let width_in_bins = (config.width_in_tiles + 15) / 16;
    let height_in_bins = (config.height_in_tiles + 15) / 16;
    let n_bins = width_in_bins * height_in_bins;
@ -346,7 +348,7 @@ pub fn render_full(
            clip_bbox_buf,
            draw_bbox_buf,
            bump_buf,
-            bin_data_buf,
+            info_bin_data_buf,
            bin_header_buf,
        ],
    );
@ -395,10 +397,9 @@ pub fn render_full(
            scene_buf,
            draw_monoid_buf,
            bin_header_buf,
-            bin_data_buf,
+            info_bin_data_buf,
            path_buf,
            tile_buf,
-            info_buf,
            bump_buf,
            ptcl_buf,
        ],
@ -414,7 +415,7 @@ pub fn render_full(
            ResourceProxy::Image(out_image),
            ptcl_buf,
            gradient_image,
-            info_buf,
+            info_bin_data_buf,
        ],
    );
    (recording, ResourceProxy::Image(out_image))
--- a/piet-wgsl/src/shaders.rs
+++ b/piet-wgsl/src/shaders.rs
@ -259,7 +259,6 @@ pub fn full_shaders(device: &Device, engine: &mut Engine) -> Result<FullShaders,
            BindType::BufReadOnly,
            BindType::BufReadOnly,
            BindType::BufReadOnly,
-            BindType::BufReadOnly,
            BindType::Buffer,
            BindType::Buffer,
        ],
--- a/piet-wgsl/src/util.rs
+++ b/piet-wgsl/src/util.rs
@ -34,7 +34,6 @@ impl RenderContext {
        let adapter = instance.request_adapter(&Default::default()).await.unwrap();
        let features = adapter.features();
        let mut limits = Limits::default();
-        limits.max_storage_buffers_per_shader_stage = 16;
        let (device, queue) = adapter
            .request_device(
                &wgpu::DeviceDescriptor {