Combine info and bin_data buffers

We need to reduce the number of buffer bindings to 8 so it can run on
all WebGPU devices. The best candidate was to combine info and bin_data,
which are written by two different stages (draw_leaf and binning,
respectively), both read by coarse, and are unstructured, so the only
real shader change needed is to add the offset for the binning data.

I thought I was going to have to do a blit to make it fit, but
fortunately that wasn't needed.

Progress to #202
This commit is contained in:
Raph Levien 2022-11-29 17:35:19 -08:00
parent e8f8ebbd14
commit 876a3ad581
7 changed files with 21 additions and 21 deletions

View file

@ -146,7 +146,7 @@ fn main(
let count_packed = sh_count[count_ix / 2u][bin_ix]; let count_packed = sh_count[count_ix / 2u][bin_ix];
idx += (count_packed >> (16u * (count_ix & 1u))) & 0xffffu; idx += (count_packed >> (16u * (count_ix & 1u))) & 0xffffu;
} }
let offset = sh_chunk_offset[bin_ix]; let offset = config.bin_data_start + sh_chunk_offset[bin_ix];
bin_data[offset + idx] = element_ix; bin_data[offset + idx] = element_ix;
} }
x += 1; x += 1;

View file

@ -27,7 +27,7 @@ struct BinHeader {
var<storage> bin_headers: array<BinHeader>; var<storage> bin_headers: array<BinHeader>;
@group(0) @binding(4) @group(0) @binding(4)
var<storage> bin_data: array<u32>; var<storage> info_bin_data: array<u32>;
@group(0) @binding(5) @group(0) @binding(5)
var<storage> paths: array<Path>; var<storage> paths: array<Path>;
@ -36,12 +36,9 @@ var<storage> paths: array<Path>;
var<storage> tiles: array<Tile>; var<storage> tiles: array<Tile>;
@group(0) @binding(7) @group(0) @binding(7)
var<storage> info: array<u32>;
@group(0) @binding(8)
var<storage, read_write> bump: BumpAllocators; var<storage, read_write> bump: BumpAllocators;
@group(0) @binding(9) @group(0) @binding(8)
var<storage, read_write> ptcl: array<u32>; var<storage, read_write> ptcl: array<u32>;
@ -208,8 +205,8 @@ fn main(
} }
} }
ix -= select(part_start_ix, sh_part_count[part_ix - 1u], part_ix > 0u); ix -= select(part_start_ix, sh_part_count[part_ix - 1u], part_ix > 0u);
let offset = sh_part_offsets[part_ix]; let offset = config.bin_data_start + sh_part_offsets[part_ix];
sh_drawobj_ix[local_id.x] = bin_data[offset + ix]; sh_drawobj_ix[local_id.x] = info_bin_data[offset + ix];
} }
wr_ix = min(rd_ix + N_TILE, ready_ix); wr_ix = min(rd_ix + N_TILE, ready_ix);
if wr_ix - rd_ix >= N_TILE || (wr_ix >= ready_ix && partition_ix >= n_partitions) { if wr_ix - rd_ix >= N_TILE || (wr_ix >= ready_ix && partition_ix >= n_partitions) {
@ -326,14 +323,14 @@ fn main(
switch drawtag { switch drawtag {
// DRAWTAG_FILL_COLOR // DRAWTAG_FILL_COLOR
case 0x44u: { case 0x44u: {
let linewidth = bitcast<f32>(info[di]); let linewidth = bitcast<f32>(info_bin_data[di]);
write_path(tile, linewidth); write_path(tile, linewidth);
let rgba_color = scene[dd]; let rgba_color = scene[dd];
write_color(CmdColor(rgba_color)); write_color(CmdColor(rgba_color));
} }
// DRAWTAG_FILL_LIN_GRADIENT // DRAWTAG_FILL_LIN_GRADIENT
case 0x114u: { case 0x114u: {
let linewidth = bitcast<f32>(info[di]); let linewidth = bitcast<f32>(info_bin_data[di]);
write_path(tile, linewidth); write_path(tile, linewidth);
let index = scene[dd]; let index = scene[dd];
let info_offset = di + 1u; let info_offset = di + 1u;
@ -341,7 +338,7 @@ fn main(
} }
// DRAWTAG_FILL_RAD_GRADIENT // DRAWTAG_FILL_RAD_GRADIENT
case 0x2dcu: { case 0x2dcu: {
let linewidth = bitcast<f32>(info[di]); let linewidth = bitcast<f32>(info_bin_data[di]);
write_path(tile, linewidth); write_path(tile, linewidth);
let index = scene[dd]; let index = scene[dd];
let info_offset = di + 1u; let info_offset = di + 1u;

View file

@ -11,6 +11,10 @@ struct Config {
n_path: u32, n_path: u32,
n_clip: u32, n_clip: u32,
// To reduce the number of bindings, info and bin data are combined
// into one buffer.
bin_data_start: u32,
// offsets within scene buffer (in u32 units) // offsets within scene buffer (in u32 units)
// Note: this is a difference from piet-gpu, which is in bytes // Note: this is a difference from piet-gpu, which is in bytes
pathtag_base: u32, pathtag_base: u32,

View file

@ -329,7 +329,7 @@ impl Engine {
} }
Command::Clear(proxy, offset, size) => { Command::Clear(proxy, offset, size) => {
let buffer = bind_map.get_or_create(*proxy, device)?; let buffer = bind_map.get_or_create(*proxy, device)?;
encoder.clear_buffer(buffer, *offset, *size) encoder.clear_buffer(buffer, *offset, *size);
} }
} }
} }

View file

@ -33,6 +33,7 @@ struct Config {
n_drawobj: u32, n_drawobj: u32,
n_path: u32, n_path: u32,
n_clip: u32, n_clip: u32,
bin_data_start: u32,
pathtag_base: u32, pathtag_base: u32,
pathdata_base: u32, pathdata_base: u32,
drawtag_base: u32, drawtag_base: u32,
@ -209,6 +210,7 @@ pub fn render_full(
// TODO: calculate for real when we do rectangles // TODO: calculate for real when we do rectangles
let n_drawobj = n_path; let n_drawobj = n_path;
let n_clip = data.n_clip; let n_clip = data.n_clip;
let bin_data_start = n_drawobj * MAX_DRAWINFO_SIZE as u32;
let new_width = next_multiple_of(width, 16); let new_width = next_multiple_of(width, 16);
let new_height = next_multiple_of(height, 16); let new_height = next_multiple_of(height, 16);
@ -222,6 +224,7 @@ pub fn render_full(
n_drawobj, n_drawobj,
n_path, n_path,
n_clip, n_clip,
bin_data_start,
pathtag_base, pathtag_base,
pathdata_base, pathdata_base,
drawtag_base, drawtag_base,
@ -278,7 +281,7 @@ pub fn render_full(
[config_buf, scene_buf, draw_reduced_buf], [config_buf, scene_buf, draw_reduced_buf],
); );
let draw_monoid_buf = ResourceProxy::new_buf(n_drawobj as u64 * DRAWMONOID_SIZE); let draw_monoid_buf = ResourceProxy::new_buf(n_drawobj as u64 * DRAWMONOID_SIZE);
let info_buf = ResourceProxy::new_buf(n_drawobj as u64 * MAX_DRAWINFO_SIZE); let info_bin_data_buf = ResourceProxy::new_buf(1 << 20);
let clip_inp_buf = ResourceProxy::new_buf(data.n_clip as u64 * CLIP_INP_SIZE); let clip_inp_buf = ResourceProxy::new_buf(data.n_clip as u64 * CLIP_INP_SIZE);
recording.dispatch( recording.dispatch(
shaders.draw_leaf, shaders.draw_leaf,
@ -289,7 +292,7 @@ pub fn render_full(
draw_reduced_buf, draw_reduced_buf,
path_bbox_buf, path_bbox_buf,
draw_monoid_buf, draw_monoid_buf,
info_buf, info_bin_data_buf,
clip_inp_buf, clip_inp_buf,
], ],
); );
@ -329,7 +332,6 @@ pub fn render_full(
} }
let draw_bbox_buf = ResourceProxy::new_buf(n_path as u64 * DRAW_BBOX_SIZE); let draw_bbox_buf = ResourceProxy::new_buf(n_path as u64 * DRAW_BBOX_SIZE);
let bump_buf = BufProxy::new(BUMP_SIZE); let bump_buf = BufProxy::new(BUMP_SIZE);
let bin_data_buf = ResourceProxy::new_buf(1 << 20);
let width_in_bins = (config.width_in_tiles + 15) / 16; let width_in_bins = (config.width_in_tiles + 15) / 16;
let height_in_bins = (config.height_in_tiles + 15) / 16; let height_in_bins = (config.height_in_tiles + 15) / 16;
let n_bins = width_in_bins * height_in_bins; let n_bins = width_in_bins * height_in_bins;
@ -346,7 +348,7 @@ pub fn render_full(
clip_bbox_buf, clip_bbox_buf,
draw_bbox_buf, draw_bbox_buf,
bump_buf, bump_buf,
bin_data_buf, info_bin_data_buf,
bin_header_buf, bin_header_buf,
], ],
); );
@ -395,10 +397,9 @@ pub fn render_full(
scene_buf, scene_buf,
draw_monoid_buf, draw_monoid_buf,
bin_header_buf, bin_header_buf,
bin_data_buf, info_bin_data_buf,
path_buf, path_buf,
tile_buf, tile_buf,
info_buf,
bump_buf, bump_buf,
ptcl_buf, ptcl_buf,
], ],
@ -414,7 +415,7 @@ pub fn render_full(
ResourceProxy::Image(out_image), ResourceProxy::Image(out_image),
ptcl_buf, ptcl_buf,
gradient_image, gradient_image,
info_buf, info_bin_data_buf,
], ],
); );
(recording, ResourceProxy::Image(out_image)) (recording, ResourceProxy::Image(out_image))

View file

@ -259,7 +259,6 @@ pub fn full_shaders(device: &Device, engine: &mut Engine) -> Result<FullShaders,
BindType::BufReadOnly, BindType::BufReadOnly,
BindType::BufReadOnly, BindType::BufReadOnly,
BindType::BufReadOnly, BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::Buffer, BindType::Buffer,
BindType::Buffer, BindType::Buffer,
], ],

View file

@ -34,7 +34,6 @@ impl RenderContext {
let adapter = instance.request_adapter(&Default::default()).await.unwrap(); let adapter = instance.request_adapter(&Default::default()).await.unwrap();
let features = adapter.features(); let features = adapter.features();
let mut limits = Limits::default(); let mut limits = Limits::default();
limits.max_storage_buffers_per_shader_stage = 16;
let (device, queue) = adapter let (device, queue) = adapter
.request_device( .request_device(
&wgpu::DeviceDescriptor { &wgpu::DeviceDescriptor {