Combine info and bin_data buffers

We need to reduce the number of buffer bindings to 8 so it can run on
all WebGPU devices. The best candidate was to combine info and bin_data,
which are written by two different stages (draw_leaf and binning,
respectively), both read by coarse, and are unstructured, so the only
real shader change needed is to add the offset for the binning data.

I thought I was going to have to do a blit to make it fit, but
fortunately that wasn't needed.

Progress to #202
This commit is contained in:
Raph Levien 2022-11-29 17:35:19 -08:00
parent e8f8ebbd14
commit 876a3ad581
7 changed files with 21 additions and 21 deletions

View file

@ -146,7 +146,7 @@ fn main(
let count_packed = sh_count[count_ix / 2u][bin_ix];
idx += (count_packed >> (16u * (count_ix & 1u))) & 0xffffu;
}
let offset = sh_chunk_offset[bin_ix];
let offset = config.bin_data_start + sh_chunk_offset[bin_ix];
bin_data[offset + idx] = element_ix;
}
x += 1;

View file

@ -27,7 +27,7 @@ struct BinHeader {
var<storage> bin_headers: array<BinHeader>;
@group(0) @binding(4)
var<storage> bin_data: array<u32>;
var<storage> info_bin_data: array<u32>;
@group(0) @binding(5)
var<storage> paths: array<Path>;
@ -36,12 +36,9 @@ var<storage> paths: array<Path>;
var<storage> tiles: array<Tile>;
@group(0) @binding(7)
var<storage> info: array<u32>;
@group(0) @binding(8)
var<storage, read_write> bump: BumpAllocators;
@group(0) @binding(9)
@group(0) @binding(8)
var<storage, read_write> ptcl: array<u32>;
@ -208,8 +205,8 @@ fn main(
}
}
ix -= select(part_start_ix, sh_part_count[part_ix - 1u], part_ix > 0u);
let offset = sh_part_offsets[part_ix];
sh_drawobj_ix[local_id.x] = bin_data[offset + ix];
let offset = config.bin_data_start + sh_part_offsets[part_ix];
sh_drawobj_ix[local_id.x] = info_bin_data[offset + ix];
}
wr_ix = min(rd_ix + N_TILE, ready_ix);
if wr_ix - rd_ix >= N_TILE || (wr_ix >= ready_ix && partition_ix >= n_partitions) {
@ -326,14 +323,14 @@ fn main(
switch drawtag {
// DRAWTAG_FILL_COLOR
case 0x44u: {
let linewidth = bitcast<f32>(info[di]);
let linewidth = bitcast<f32>(info_bin_data[di]);
write_path(tile, linewidth);
let rgba_color = scene[dd];
write_color(CmdColor(rgba_color));
}
// DRAWTAG_FILL_LIN_GRADIENT
case 0x114u: {
let linewidth = bitcast<f32>(info[di]);
let linewidth = bitcast<f32>(info_bin_data[di]);
write_path(tile, linewidth);
let index = scene[dd];
let info_offset = di + 1u;
@ -341,7 +338,7 @@ fn main(
}
// DRAWTAG_FILL_RAD_GRADIENT
case 0x2dcu: {
let linewidth = bitcast<f32>(info[di]);
let linewidth = bitcast<f32>(info_bin_data[di]);
write_path(tile, linewidth);
let index = scene[dd];
let info_offset = di + 1u;

View file

@ -11,6 +11,10 @@ struct Config {
n_path: u32,
n_clip: u32,
// To reduce the number of bindings, info and bin data are combined
// into one buffer.
bin_data_start: u32,
// offsets within scene buffer (in u32 units)
// Note: this is a difference from piet-gpu, which is in bytes
pathtag_base: u32,

View file

@ -329,7 +329,7 @@ impl Engine {
}
Command::Clear(proxy, offset, size) => {
let buffer = bind_map.get_or_create(*proxy, device)?;
encoder.clear_buffer(buffer, *offset, *size)
encoder.clear_buffer(buffer, *offset, *size);
}
}
}

View file

@ -33,6 +33,7 @@ struct Config {
n_drawobj: u32,
n_path: u32,
n_clip: u32,
bin_data_start: u32,
pathtag_base: u32,
pathdata_base: u32,
drawtag_base: u32,
@ -209,6 +210,7 @@ pub fn render_full(
// TODO: calculate for real when we do rectangles
let n_drawobj = n_path;
let n_clip = data.n_clip;
let bin_data_start = n_drawobj * MAX_DRAWINFO_SIZE as u32;
let new_width = next_multiple_of(width, 16);
let new_height = next_multiple_of(height, 16);
@ -222,6 +224,7 @@ pub fn render_full(
n_drawobj,
n_path,
n_clip,
bin_data_start,
pathtag_base,
pathdata_base,
drawtag_base,
@ -278,7 +281,7 @@ pub fn render_full(
[config_buf, scene_buf, draw_reduced_buf],
);
let draw_monoid_buf = ResourceProxy::new_buf(n_drawobj as u64 * DRAWMONOID_SIZE);
let info_buf = ResourceProxy::new_buf(n_drawobj as u64 * MAX_DRAWINFO_SIZE);
let info_bin_data_buf = ResourceProxy::new_buf(1 << 20);
let clip_inp_buf = ResourceProxy::new_buf(data.n_clip as u64 * CLIP_INP_SIZE);
recording.dispatch(
shaders.draw_leaf,
@ -289,7 +292,7 @@ pub fn render_full(
draw_reduced_buf,
path_bbox_buf,
draw_monoid_buf,
info_buf,
info_bin_data_buf,
clip_inp_buf,
],
);
@ -329,7 +332,6 @@ pub fn render_full(
}
let draw_bbox_buf = ResourceProxy::new_buf(n_path as u64 * DRAW_BBOX_SIZE);
let bump_buf = BufProxy::new(BUMP_SIZE);
let bin_data_buf = ResourceProxy::new_buf(1 << 20);
let width_in_bins = (config.width_in_tiles + 15) / 16;
let height_in_bins = (config.height_in_tiles + 15) / 16;
let n_bins = width_in_bins * height_in_bins;
@ -346,7 +348,7 @@ pub fn render_full(
clip_bbox_buf,
draw_bbox_buf,
bump_buf,
bin_data_buf,
info_bin_data_buf,
bin_header_buf,
],
);
@ -395,10 +397,9 @@ pub fn render_full(
scene_buf,
draw_monoid_buf,
bin_header_buf,
bin_data_buf,
info_bin_data_buf,
path_buf,
tile_buf,
info_buf,
bump_buf,
ptcl_buf,
],
@ -414,7 +415,7 @@ pub fn render_full(
ResourceProxy::Image(out_image),
ptcl_buf,
gradient_image,
info_buf,
info_bin_data_buf,
],
);
(recording, ResourceProxy::Image(out_image))

View file

@ -259,7 +259,6 @@ pub fn full_shaders(device: &Device, engine: &mut Engine) -> Result<FullShaders,
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::Buffer,
BindType::Buffer,
],

View file

@ -34,7 +34,6 @@ impl RenderContext {
let adapter = instance.request_adapter(&Default::default()).await.unwrap();
let features = adapter.features();
let mut limits = Limits::default();
limits.max_storage_buffers_per_shader_stage = 16;
let (device, queue) = adapter
.request_device(
&wgpu::DeviceDescriptor {