mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-09 20:31:29 +11:00
Address review feedback
* Add counts to offsets when comparing against buffer size limits * Remove multiplication by 4 in blend buffer allocation (we use units of u32) * Move buffer sizes from BumpAllocators to Config * Add comments about early exit
This commit is contained in:
parent
db7d93b85c
commit
0c0c61dc82
|
@ -128,7 +128,7 @@ fn main(
|
||||||
}
|
}
|
||||||
// element_count is the number of draw objects covering this thread's bin
|
// element_count is the number of draw objects covering this thread's bin
|
||||||
var chunk_offset = atomicAdd(&bump.binning, element_count);
|
var chunk_offset = atomicAdd(&bump.binning, element_count);
|
||||||
if chunk_offset > bump.binning_size {
|
if chunk_offset + element_count > config.binning_size {
|
||||||
chunk_offset = 0u;
|
chunk_offset = 0u;
|
||||||
atomicOr(&bump.failed, STAGE_BINNING);
|
atomicOr(&bump.failed, STAGE_BINNING);
|
||||||
}
|
}
|
||||||
|
|
|
@ -71,7 +71,7 @@ fn alloc_cmd(size: u32) {
|
||||||
// by setting the initial value of the bump allocator.
|
// by setting the initial value of the bump allocator.
|
||||||
let ptcl_dyn_start = config.width_in_tiles * config.height_in_tiles * PTCL_INITIAL_ALLOC;
|
let ptcl_dyn_start = config.width_in_tiles * config.height_in_tiles * PTCL_INITIAL_ALLOC;
|
||||||
var new_cmd = ptcl_dyn_start + atomicAdd(&bump.ptcl, PTCL_INCREMENT);
|
var new_cmd = ptcl_dyn_start + atomicAdd(&bump.ptcl, PTCL_INCREMENT);
|
||||||
if new_cmd > bump.ptcl_size {
|
if new_cmd + PTCL_INCREMENT > config.ptcl_size {
|
||||||
new_cmd = 0u;
|
new_cmd = 0u;
|
||||||
atomicOr(&bump.failed, STAGE_COARSE);
|
atomicOr(&bump.failed, STAGE_COARSE);
|
||||||
}
|
}
|
||||||
|
@ -145,6 +145,9 @@ fn main(
|
||||||
@builtin(local_invocation_id) local_id: vec3<u32>,
|
@builtin(local_invocation_id) local_id: vec3<u32>,
|
||||||
@builtin(workgroup_id) wg_id: vec3<u32>,
|
@builtin(workgroup_id) wg_id: vec3<u32>,
|
||||||
) {
|
) {
|
||||||
|
// Exit early if prior stages failed, as we can't run this stage.
|
||||||
|
// We need to check only prior stages, as if this stage has failed in another workgroup,
|
||||||
|
// we still want to know this workgroup's memory requirement.
|
||||||
if (atomicLoad(&bump.failed) & (STAGE_BINNING | STAGE_TILE_ALLOC | STAGE_PATH_COARSE)) != 0u {
|
if (atomicLoad(&bump.failed) & (STAGE_BINNING | STAGE_TILE_ALLOC | STAGE_PATH_COARSE)) != 0u {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -411,7 +414,7 @@ fn main(
|
||||||
if bin_tile_x + tile_x < config.width_in_tiles && bin_tile_y + tile_y < config.height_in_tiles {
|
if bin_tile_x + tile_x < config.width_in_tiles && bin_tile_y + tile_y < config.height_in_tiles {
|
||||||
ptcl[cmd_offset] = CMD_END;
|
ptcl[cmd_offset] = CMD_END;
|
||||||
if max_blend_depth > BLEND_STACK_SPLIT {
|
if max_blend_depth > BLEND_STACK_SPLIT {
|
||||||
let scratch_size = max_blend_depth * TILE_WIDTH * TILE_HEIGHT * 4u;
|
let scratch_size = max_blend_depth * TILE_WIDTH * TILE_HEIGHT;
|
||||||
ptcl[blend_offset] = atomicAdd(&bump.blend, scratch_size);
|
ptcl[blend_offset] = atomicAdd(&bump.blend, scratch_size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -94,7 +94,7 @@ fn eval_cubic(p0: vec2<f32>, p1: vec2<f32>, p2: vec2<f32>, p3: vec2<f32>, t: f32
|
||||||
|
|
||||||
fn alloc_segment() -> u32 {
|
fn alloc_segment() -> u32 {
|
||||||
var offset = atomicAdd(&bump.segments, 1u) + 1u;
|
var offset = atomicAdd(&bump.segments, 1u) + 1u;
|
||||||
if offset > bump.segments_size {
|
if offset + 1u > config.segments_size {
|
||||||
offset = 0u;
|
offset = 0u;
|
||||||
atomicOr(&bump.failed, STAGE_PATH_COARSE);
|
atomicOr(&bump.failed, STAGE_PATH_COARSE);
|
||||||
}
|
}
|
||||||
|
@ -107,6 +107,9 @@ let MAX_QUADS = 16u;
|
||||||
fn main(
|
fn main(
|
||||||
@builtin(global_invocation_id) global_id: vec3<u32>,
|
@builtin(global_invocation_id) global_id: vec3<u32>,
|
||||||
) {
|
) {
|
||||||
|
// Exit early if prior stages failed, as we can't run this stage.
|
||||||
|
// We need to check only prior stages, as if this stage has failed in another workgroup,
|
||||||
|
// we still want to know this workgroup's memory requirement.
|
||||||
if (atomicLoad(&bump.failed) & (STAGE_BINNING | STAGE_TILE_ALLOC)) != 0u {
|
if (atomicLoad(&bump.failed) & (STAGE_BINNING | STAGE_TILE_ALLOC)) != 0u {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
|
@ -10,10 +10,6 @@ let STAGE_COARSE: u32 = 0x8u;
|
||||||
struct BumpAllocators {
|
struct BumpAllocators {
|
||||||
// Bitmask of stages that have failed allocation.
|
// Bitmask of stages that have failed allocation.
|
||||||
failed: atomic<u32>,
|
failed: atomic<u32>,
|
||||||
binning_size: u32,
|
|
||||||
ptcl_size: u32,
|
|
||||||
tiles_size: u32,
|
|
||||||
segments_size: u32,
|
|
||||||
binning: atomic<u32>,
|
binning: atomic<u32>,
|
||||||
ptcl: atomic<u32>,
|
ptcl: atomic<u32>,
|
||||||
tile: atomic<u32>,
|
tile: atomic<u32>,
|
||||||
|
|
|
@ -24,6 +24,12 @@ struct Config {
|
||||||
|
|
||||||
transform_base: u32,
|
transform_base: u32,
|
||||||
linewidth_base: u32,
|
linewidth_base: u32,
|
||||||
|
|
||||||
|
// Sizes of bump allocated buffers (in element size units)
|
||||||
|
binning_size: u32,
|
||||||
|
tiles_size: u32,
|
||||||
|
segments_size: u32,
|
||||||
|
ptcl_size: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Geometry of tiles and bins
|
// Geometry of tiles and bins
|
||||||
|
|
|
@ -35,6 +35,9 @@ fn main(
|
||||||
@builtin(global_invocation_id) global_id: vec3<u32>,
|
@builtin(global_invocation_id) global_id: vec3<u32>,
|
||||||
@builtin(local_invocation_id) local_id: vec3<u32>,
|
@builtin(local_invocation_id) local_id: vec3<u32>,
|
||||||
) {
|
) {
|
||||||
|
// Exit early if prior stages failed, as we can't run this stage.
|
||||||
|
// We need to check only prior stages, as if this stage has failed in another workgroup,
|
||||||
|
// we still want to know this workgroup's memory requirement.
|
||||||
if (atomicLoad(&bump.failed) & STAGE_BINNING) != 0u {
|
if (atomicLoad(&bump.failed) & STAGE_BINNING) != 0u {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -75,8 +78,9 @@ fn main(
|
||||||
sh_tile_count[local_id.x] = total_tile_count;
|
sh_tile_count[local_id.x] = total_tile_count;
|
||||||
}
|
}
|
||||||
if local_id.x == WG_SIZE - 1u {
|
if local_id.x == WG_SIZE - 1u {
|
||||||
var offset = atomicAdd(&bump.tile, sh_tile_count[WG_SIZE - 1u]);
|
let count = sh_tile_count[WG_SIZE - 1u];
|
||||||
if offset > bump.tiles_size {
|
var offset = atomicAdd(&bump.tile, count);
|
||||||
|
if offset + count > config.tiles_size {
|
||||||
offset = 0u;
|
offset = 0u;
|
||||||
atomicOr(&bump.failed, STAGE_TILE_ALLOC);
|
atomicOr(&bump.failed, STAGE_TILE_ALLOC);
|
||||||
}
|
}
|
||||||
|
|
|
@ -62,6 +62,14 @@ pub struct Config {
|
||||||
pub target_height: u32,
|
pub target_height: u32,
|
||||||
/// Layout of packed scene data.
|
/// Layout of packed scene data.
|
||||||
pub layout: Layout,
|
pub layout: Layout,
|
||||||
|
/// Size of binning buffer allocation (in u32s).
|
||||||
|
pub binning_size: u32,
|
||||||
|
/// Size of tile buffer allocation (in Tiles).
|
||||||
|
pub tiles_size: u32,
|
||||||
|
/// Size of segment buffer allocation (in PathSegments).
|
||||||
|
pub segments_size: u32,
|
||||||
|
/// Size of per-tile command list buffer allocation (in u32s).
|
||||||
|
pub ptcl_size: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Packed encoding of scene data.
|
/// Packed encoding of scene data.
|
||||||
|
|
|
@ -61,11 +61,6 @@ pub const fn next_multiple_of(val: u32, rhs: u32) -> u32 {
|
||||||
#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)]
|
#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)]
|
||||||
struct BumpAllocators {
|
struct BumpAllocators {
|
||||||
failed: u32,
|
failed: u32,
|
||||||
// Sizes of the provided buffers
|
|
||||||
binning_size: u32,
|
|
||||||
ptcl_size: u32,
|
|
||||||
tiles_size: u32,
|
|
||||||
segments_size: u32,
|
|
||||||
// Final needed dynamic size of the buffers. If any of these are larger than the corresponding `_size` element
|
// Final needed dynamic size of the buffers. If any of these are larger than the corresponding `_size` element
|
||||||
// reallocation needs to occur
|
// reallocation needs to occur
|
||||||
binning: u32,
|
binning: u32,
|
||||||
|
@ -195,33 +190,30 @@ pub fn render_encoding_full(
|
||||||
let new_width = next_multiple_of(width, 16);
|
let new_width = next_multiple_of(width, 16);
|
||||||
let new_height = next_multiple_of(height, 16);
|
let new_height = next_multiple_of(height, 16);
|
||||||
|
|
||||||
|
let info_size = packed.layout.bin_data_start;
|
||||||
let config = crate::encoding::Config {
|
let config = crate::encoding::Config {
|
||||||
width_in_tiles: new_width / 16,
|
width_in_tiles: new_width / 16,
|
||||||
height_in_tiles: new_height / 16,
|
height_in_tiles: new_height / 16,
|
||||||
target_width: width,
|
target_width: width,
|
||||||
target_height: height,
|
target_height: height,
|
||||||
|
binning_size: ((1 << 20) / 4) - info_size,
|
||||||
|
tiles_size: (1 << 24) / TILE_SIZE as u32,
|
||||||
|
segments_size: (1 << 26) / SEGMENT_SIZE as u32,
|
||||||
|
ptcl_size: (1 << 25) / 4,
|
||||||
layout: packed.layout,
|
layout: packed.layout,
|
||||||
};
|
};
|
||||||
// println!("{:?}", config);
|
// println!("{:?}", config);
|
||||||
let scene_buf = ResourceProxy::Buf(recording.upload("scene", packed.data));
|
let scene_buf = ResourceProxy::Buf(recording.upload("scene", packed.data));
|
||||||
let config_buf =
|
let config_buf =
|
||||||
ResourceProxy::Buf(recording.upload_uniform("config", bytemuck::bytes_of(&config)));
|
ResourceProxy::Buf(recording.upload_uniform("config", bytemuck::bytes_of(&config)));
|
||||||
let info_size = config.layout.bin_data_start;
|
|
||||||
let bump = BumpAllocators {
|
|
||||||
binning_size: ((1 << 20) / 4) - info_size,
|
|
||||||
ptcl_size: (1 << 25) / 4,
|
|
||||||
tiles_size: (1 << 24) / TILE_SIZE as u32,
|
|
||||||
segments_size: (1 << 26) / SEGMENT_SIZE as u32,
|
|
||||||
..Default::default()
|
|
||||||
};
|
|
||||||
let info_bin_data_buf = ResourceProxy::new_buf(
|
let info_bin_data_buf = ResourceProxy::new_buf(
|
||||||
(info_size + bump.binning_size) as u64 * 4,
|
(info_size + config.binning_size) as u64 * 4,
|
||||||
"info_bin_data_buf",
|
"info_bin_data_buf",
|
||||||
);
|
);
|
||||||
let tile_buf = ResourceProxy::new_buf(bump.tiles_size as u64 * TILE_SIZE, "tile_buf");
|
let tile_buf = ResourceProxy::new_buf(config.tiles_size as u64 * TILE_SIZE, "tile_buf");
|
||||||
let segments_buf =
|
let segments_buf =
|
||||||
ResourceProxy::new_buf(bump.segments_size as u64 * SEGMENT_SIZE, "segments_buf");
|
ResourceProxy::new_buf(config.segments_size as u64 * SEGMENT_SIZE, "segments_buf");
|
||||||
let ptcl_buf = ResourceProxy::new_buf(bump.ptcl_size as u64 * 4, "ptcl_buf");
|
let ptcl_buf = ResourceProxy::new_buf(config.ptcl_size as u64 * 4, "ptcl_buf");
|
||||||
|
|
||||||
let pathtag_wgs = pathtag_padded / (4 * shaders::PATHTAG_REDUCE_WG as usize);
|
let pathtag_wgs = pathtag_padded / (4 * shaders::PATHTAG_REDUCE_WG as usize);
|
||||||
let pathtag_large = pathtag_wgs > shaders::PATHTAG_REDUCE_WG as usize;
|
let pathtag_large = pathtag_wgs > shaders::PATHTAG_REDUCE_WG as usize;
|
||||||
|
@ -356,13 +348,14 @@ pub fn render_encoding_full(
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
let draw_bbox_buf = ResourceProxy::new_buf(n_paths as u64 * DRAW_BBOX_SIZE, "draw_bbox_buf");
|
let draw_bbox_buf = ResourceProxy::new_buf(n_paths as u64 * DRAW_BBOX_SIZE, "draw_bbox_buf");
|
||||||
let bump_buf = recording.upload("bump_buf", bytemuck::bytes_of(&bump));
|
let bump_buf = BufProxy::new(BUMP_SIZE, "bump_buf");
|
||||||
let width_in_bins = (config.width_in_tiles + 15) / 16;
|
let width_in_bins = (config.width_in_tiles + 15) / 16;
|
||||||
let height_in_bins = (config.height_in_tiles + 15) / 16;
|
let height_in_bins = (config.height_in_tiles + 15) / 16;
|
||||||
let bin_header_buf = ResourceProxy::new_buf(
|
let bin_header_buf = ResourceProxy::new_buf(
|
||||||
(256 * drawobj_wgs) as u64 * BIN_HEADER_SIZE,
|
(256 * drawobj_wgs) as u64 * BIN_HEADER_SIZE,
|
||||||
"bin_header_buf",
|
"bin_header_buf",
|
||||||
);
|
);
|
||||||
|
recording.clear_all(bump_buf);
|
||||||
let bump_buf = ResourceProxy::Buf(bump_buf);
|
let bump_buf = ResourceProxy::Buf(bump_buf);
|
||||||
recording.dispatch(
|
recording.dispatch(
|
||||||
shaders.binning,
|
shaders.binning,
|
||||||
|
|
Loading…
Reference in a new issue