mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-09 20:31:29 +11:00
Combine info and bin_data buffers
We need to reduce the number of buffer bindings to 8 so it can run on all WebGPU devices. The best candidate was to combine info and bin_data, which are written by two different stages (draw_leaf and binning, respectively), both read by coarse, and are unstructured, so the only real shader change needed is to add the offset for the binning data. I thought I was going to have to do a blit to make it fit, but fortunately that wasn't needed. Progress to #202
This commit is contained in:
parent
e8f8ebbd14
commit
876a3ad581
|
@ -146,7 +146,7 @@ fn main(
|
|||
let count_packed = sh_count[count_ix / 2u][bin_ix];
|
||||
idx += (count_packed >> (16u * (count_ix & 1u))) & 0xffffu;
|
||||
}
|
||||
let offset = sh_chunk_offset[bin_ix];
|
||||
let offset = config.bin_data_start + sh_chunk_offset[bin_ix];
|
||||
bin_data[offset + idx] = element_ix;
|
||||
}
|
||||
x += 1;
|
||||
|
|
|
@ -27,7 +27,7 @@ struct BinHeader {
|
|||
var<storage> bin_headers: array<BinHeader>;
|
||||
|
||||
@group(0) @binding(4)
|
||||
var<storage> bin_data: array<u32>;
|
||||
var<storage> info_bin_data: array<u32>;
|
||||
|
||||
@group(0) @binding(5)
|
||||
var<storage> paths: array<Path>;
|
||||
|
@ -36,12 +36,9 @@ var<storage> paths: array<Path>;
|
|||
var<storage> tiles: array<Tile>;
|
||||
|
||||
@group(0) @binding(7)
|
||||
var<storage> info: array<u32>;
|
||||
|
||||
@group(0) @binding(8)
|
||||
var<storage, read_write> bump: BumpAllocators;
|
||||
|
||||
@group(0) @binding(9)
|
||||
@group(0) @binding(8)
|
||||
var<storage, read_write> ptcl: array<u32>;
|
||||
|
||||
|
||||
|
@ -208,8 +205,8 @@ fn main(
|
|||
}
|
||||
}
|
||||
ix -= select(part_start_ix, sh_part_count[part_ix - 1u], part_ix > 0u);
|
||||
let offset = sh_part_offsets[part_ix];
|
||||
sh_drawobj_ix[local_id.x] = bin_data[offset + ix];
|
||||
let offset = config.bin_data_start + sh_part_offsets[part_ix];
|
||||
sh_drawobj_ix[local_id.x] = info_bin_data[offset + ix];
|
||||
}
|
||||
wr_ix = min(rd_ix + N_TILE, ready_ix);
|
||||
if wr_ix - rd_ix >= N_TILE || (wr_ix >= ready_ix && partition_ix >= n_partitions) {
|
||||
|
@ -326,14 +323,14 @@ fn main(
|
|||
switch drawtag {
|
||||
// DRAWTAG_FILL_COLOR
|
||||
case 0x44u: {
|
||||
let linewidth = bitcast<f32>(info[di]);
|
||||
let linewidth = bitcast<f32>(info_bin_data[di]);
|
||||
write_path(tile, linewidth);
|
||||
let rgba_color = scene[dd];
|
||||
write_color(CmdColor(rgba_color));
|
||||
}
|
||||
// DRAWTAG_FILL_LIN_GRADIENT
|
||||
case 0x114u: {
|
||||
let linewidth = bitcast<f32>(info[di]);
|
||||
let linewidth = bitcast<f32>(info_bin_data[di]);
|
||||
write_path(tile, linewidth);
|
||||
let index = scene[dd];
|
||||
let info_offset = di + 1u;
|
||||
|
@ -341,7 +338,7 @@ fn main(
|
|||
}
|
||||
// DRAWTAG_FILL_RAD_GRADIENT
|
||||
case 0x2dcu: {
|
||||
let linewidth = bitcast<f32>(info[di]);
|
||||
let linewidth = bitcast<f32>(info_bin_data[di]);
|
||||
write_path(tile, linewidth);
|
||||
let index = scene[dd];
|
||||
let info_offset = di + 1u;
|
||||
|
|
|
@ -11,6 +11,10 @@ struct Config {
|
|||
n_path: u32,
|
||||
n_clip: u32,
|
||||
|
||||
// To reduce the number of bindings, info and bin data are combined
|
||||
// into one buffer.
|
||||
bin_data_start: u32,
|
||||
|
||||
// offsets within scene buffer (in u32 units)
|
||||
// Note: this is a difference from piet-gpu, which is in bytes
|
||||
pathtag_base: u32,
|
||||
|
|
|
@ -329,7 +329,7 @@ impl Engine {
|
|||
}
|
||||
Command::Clear(proxy, offset, size) => {
|
||||
let buffer = bind_map.get_or_create(*proxy, device)?;
|
||||
encoder.clear_buffer(buffer, *offset, *size)
|
||||
encoder.clear_buffer(buffer, *offset, *size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,6 +33,7 @@ struct Config {
|
|||
n_drawobj: u32,
|
||||
n_path: u32,
|
||||
n_clip: u32,
|
||||
bin_data_start: u32,
|
||||
pathtag_base: u32,
|
||||
pathdata_base: u32,
|
||||
drawtag_base: u32,
|
||||
|
@ -209,6 +210,7 @@ pub fn render_full(
|
|||
// TODO: calculate for real when we do rectangles
|
||||
let n_drawobj = n_path;
|
||||
let n_clip = data.n_clip;
|
||||
let bin_data_start = n_drawobj * MAX_DRAWINFO_SIZE as u32;
|
||||
|
||||
let new_width = next_multiple_of(width, 16);
|
||||
let new_height = next_multiple_of(height, 16);
|
||||
|
@ -222,6 +224,7 @@ pub fn render_full(
|
|||
n_drawobj,
|
||||
n_path,
|
||||
n_clip,
|
||||
bin_data_start,
|
||||
pathtag_base,
|
||||
pathdata_base,
|
||||
drawtag_base,
|
||||
|
@ -278,7 +281,7 @@ pub fn render_full(
|
|||
[config_buf, scene_buf, draw_reduced_buf],
|
||||
);
|
||||
let draw_monoid_buf = ResourceProxy::new_buf(n_drawobj as u64 * DRAWMONOID_SIZE);
|
||||
let info_buf = ResourceProxy::new_buf(n_drawobj as u64 * MAX_DRAWINFO_SIZE);
|
||||
let info_bin_data_buf = ResourceProxy::new_buf(1 << 20);
|
||||
let clip_inp_buf = ResourceProxy::new_buf(data.n_clip as u64 * CLIP_INP_SIZE);
|
||||
recording.dispatch(
|
||||
shaders.draw_leaf,
|
||||
|
@ -289,7 +292,7 @@ pub fn render_full(
|
|||
draw_reduced_buf,
|
||||
path_bbox_buf,
|
||||
draw_monoid_buf,
|
||||
info_buf,
|
||||
info_bin_data_buf,
|
||||
clip_inp_buf,
|
||||
],
|
||||
);
|
||||
|
@ -329,7 +332,6 @@ pub fn render_full(
|
|||
}
|
||||
let draw_bbox_buf = ResourceProxy::new_buf(n_path as u64 * DRAW_BBOX_SIZE);
|
||||
let bump_buf = BufProxy::new(BUMP_SIZE);
|
||||
let bin_data_buf = ResourceProxy::new_buf(1 << 20);
|
||||
let width_in_bins = (config.width_in_tiles + 15) / 16;
|
||||
let height_in_bins = (config.height_in_tiles + 15) / 16;
|
||||
let n_bins = width_in_bins * height_in_bins;
|
||||
|
@ -346,7 +348,7 @@ pub fn render_full(
|
|||
clip_bbox_buf,
|
||||
draw_bbox_buf,
|
||||
bump_buf,
|
||||
bin_data_buf,
|
||||
info_bin_data_buf,
|
||||
bin_header_buf,
|
||||
],
|
||||
);
|
||||
|
@ -395,10 +397,9 @@ pub fn render_full(
|
|||
scene_buf,
|
||||
draw_monoid_buf,
|
||||
bin_header_buf,
|
||||
bin_data_buf,
|
||||
info_bin_data_buf,
|
||||
path_buf,
|
||||
tile_buf,
|
||||
info_buf,
|
||||
bump_buf,
|
||||
ptcl_buf,
|
||||
],
|
||||
|
@ -414,7 +415,7 @@ pub fn render_full(
|
|||
ResourceProxy::Image(out_image),
|
||||
ptcl_buf,
|
||||
gradient_image,
|
||||
info_buf,
|
||||
info_bin_data_buf,
|
||||
],
|
||||
);
|
||||
(recording, ResourceProxy::Image(out_image))
|
||||
|
|
|
@ -259,7 +259,6 @@ pub fn full_shaders(device: &Device, engine: &mut Engine) -> Result<FullShaders,
|
|||
BindType::BufReadOnly,
|
||||
BindType::BufReadOnly,
|
||||
BindType::BufReadOnly,
|
||||
BindType::BufReadOnly,
|
||||
BindType::Buffer,
|
||||
BindType::Buffer,
|
||||
],
|
||||
|
|
|
@ -34,7 +34,6 @@ impl RenderContext {
|
|||
let adapter = instance.request_adapter(&Default::default()).await.unwrap();
|
||||
let features = adapter.features();
|
||||
let mut limits = Limits::default();
|
||||
limits.max_storage_buffers_per_shader_stage = 16;
|
||||
let (device, queue) = adapter
|
||||
.request_device(
|
||||
&wgpu::DeviceDescriptor {
|
||||
|
|
Loading…
Reference in a new issue