[vello_encoding] Use the vello_encoding crate in the vello renderer

Removed the workgroup count and buffer size calculations from
src/render.rs. This code now uses the types returned by the
vello_encoding crate for this purpose.
This commit is contained in:
Arman Uguray 2023-04-13 20:33:05 -07:00
parent 899ecaa430
commit ad82519444

View file

@ -5,24 +5,12 @@ use crate::{
shaders::{self, FullShaders},
RenderParams, Scene,
};
use {
bytemuck::{Pod, Zeroable},
vello_encoding::{Encoding, GpuConfig, Layout},
};
use vello_encoding::{Encoding, WorkgroupSize};
/// State for a render in progress.
pub struct Render {
/// Size of binning and info combined buffer in u32 units
binning_info_size: u32,
/// Size of tiles buf in tiles
tiles_size: u32,
/// Size of segments buf in segments
segments_size: u32,
/// Size of per-tile command list in u32 units
ptcl_size: u32,
width_in_tiles: u32,
height_in_tiles: u32,
fine: Option<FineResources>,
fine_wg_count: Option<WorkgroupSize>,
fine_resources: Option<FineResources>,
}
/// Resources produced by pipeline, needed for fine rasterization.
@ -39,47 +27,6 @@ struct FineResources {
out_image: ImageProxy,
}
const TAG_MONOID_SIZE: u64 = 12;
const TAG_MONOID_FULL_SIZE: u64 = 20;
const PATH_BBOX_SIZE: u64 = 24;
const CUBIC_SIZE: u64 = 48;
const DRAWMONOID_SIZE: u64 = 16;
const CLIP_BIC_SIZE: u64 = 8;
const CLIP_EL_SIZE: u64 = 32;
const CLIP_INP_SIZE: u64 = 8;
const CLIP_BBOX_SIZE: u64 = 16;
const PATH_SIZE: u64 = 32;
const DRAW_BBOX_SIZE: u64 = 16;
const BUMP_SIZE: u64 = std::mem::size_of::<BumpAllocators>() as u64;
const BIN_HEADER_SIZE: u64 = 8;
const TILE_SIZE: u64 = 8;
const SEGMENT_SIZE: u64 = 24;
fn size_to_words(byte_size: usize) -> u32 {
(byte_size / std::mem::size_of::<u32>()) as u32
}
pub const fn next_multiple_of(val: u32, rhs: u32) -> u32 {
match val % rhs {
0 => val,
r => val + (rhs - r),
}
}
// This must be kept in sync with the struct in shader/shared/bump.wgsl
#[repr(C)]
#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)]
struct BumpAllocators {
failed: u32,
// Final needed dynamic size of the buffers. If any of these are larger than the corresponding `_size` element
// reallocation needs to occur
binning: u32,
ptcl: u32,
tile: u32,
segments: u32,
blend: u32,
}
pub fn render_full(
scene: &Scene,
shaders: &FullShaders,
@ -104,21 +51,11 @@ pub fn render_encoding_full(
(recording, out_image.into())
}
pub fn align_up(len: usize, alignment: u32) -> usize {
len + (len.wrapping_neg() & (alignment as usize - 1))
}
impl Render {
pub fn new() -> Self {
// These sizes are adequate for paris-30k but should probably be dialed down.
Render {
binning_info_size: (1 << 20) / 4,
tiles_size: (1 << 24) / TILE_SIZE as u32,
segments_size: (1 << 26) / SEGMENT_SIZE as u32,
ptcl_size: (1 << 25) / 4 as u32,
width_in_tiles: 0,
height_in_tiles: 0,
fine: None,
fine_wg_count: None,
fine_resources: None,
}
}
@ -133,7 +70,8 @@ impl Render {
params: &RenderParams,
robust: bool,
) -> Recording {
use vello_encoding::Resolver;
use vello_encoding::{RenderConfig, Resolver};
let mut recording = Recording::default();
let mut resolver = Resolver::new();
let mut packed = vec![];
@ -155,29 +93,6 @@ impl Render {
} else {
ImageProxy::new(images.width, images.height, ImageFormat::Rgba8)
};
// TODO: calculate for real when we do rectangles
let n_pathtag = layout.path_tags(&packed).len();
let pathtag_padded = align_up(n_pathtag, 4 * shaders::PATHTAG_REDUCE_WG);
let n_paths = layout.n_paths;
let n_drawobj = layout.n_paths;
let n_clip = layout.n_clips;
let new_width = next_multiple_of(params.width, 16);
let new_height = next_multiple_of(params.height, 16);
let info_size = layout.bin_data_start;
let config = GpuConfig {
width_in_tiles: new_width / 16,
height_in_tiles: new_height / 16,
target_width: params.width,
target_height: params.height,
base_color: params.base_color.to_premul_u32(),
binning_size: self.binning_info_size - info_size,
tiles_size: self.tiles_size,
segments_size: self.segments_size,
ptcl_size: self.ptcl_size,
layout: layout,
};
for image in images.images {
recording.write_image(
image_atlas,
@ -188,52 +103,54 @@ impl Render {
image.0.data.data(),
);
}
// println!("{:?}", config);
let cpu_config =
RenderConfig::new(&layout, params.width, params.height, &params.base_color);
let buffer_sizes = &cpu_config.buffer_sizes;
let wg_counts = &cpu_config.workgroup_counts;
let scene_buf = ResourceProxy::Buf(recording.upload("scene", packed));
let config_buf =
ResourceProxy::Buf(recording.upload_uniform("config", bytemuck::bytes_of(&config)));
let config_buf = ResourceProxy::Buf(
recording.upload_uniform("config", bytemuck::bytes_of(&cpu_config.gpu)),
);
let info_bin_data_buf = ResourceProxy::new_buf(
(info_size + config.binning_size) as u64 * 4,
buffer_sizes.bin_data.size_in_bytes() as u64,
"info_bin_data_buf",
);
let tile_buf = ResourceProxy::new_buf(config.tiles_size as u64 * TILE_SIZE, "tile_buf");
let tile_buf =
ResourceProxy::new_buf(buffer_sizes.tiles.size_in_bytes().into(), "tile_buf");
let segments_buf =
ResourceProxy::new_buf(config.segments_size as u64 * SEGMENT_SIZE, "segments_buf");
let ptcl_buf = ResourceProxy::new_buf(config.ptcl_size as u64 * 4, "ptcl_buf");
let pathtag_wgs = pathtag_padded / (4 * shaders::PATHTAG_REDUCE_WG as usize);
let pathtag_large = pathtag_wgs > shaders::PATHTAG_REDUCE_WG as usize;
let reduced_size = if pathtag_large {
align_up(pathtag_wgs, shaders::PATHTAG_REDUCE_WG)
} else {
pathtag_wgs
};
let reduced_buf =
ResourceProxy::new_buf(reduced_size as u64 * TAG_MONOID_FULL_SIZE, "reduced_buf");
ResourceProxy::new_buf(buffer_sizes.segments.size_in_bytes().into(), "segments_buf");
let ptcl_buf = ResourceProxy::new_buf(buffer_sizes.ptcl.size_in_bytes().into(), "ptcl_buf");
let reduced_buf = ResourceProxy::new_buf(
buffer_sizes.path_reduced.size_in_bytes().into(),
"reduced_buf",
);
// TODO: really only need pathtag_wgs - 1
recording.dispatch(
shaders.pathtag_reduce,
(pathtag_wgs as u32, 1, 1),
wg_counts.path_reduce,
[config_buf, scene_buf, reduced_buf],
);
let mut pathtag_parent = reduced_buf;
let mut large_pathtag_bufs = None;
if pathtag_large {
let reduced2_size = shaders::PATHTAG_REDUCE_WG as usize;
let reduced2_buf =
ResourceProxy::new_buf(reduced2_size as u64 * TAG_MONOID_FULL_SIZE, "reduced2_buf");
if wg_counts.use_large_path_scan {
let reduced2_buf = ResourceProxy::new_buf(
buffer_sizes.path_reduced2.size_in_bytes().into(),
"reduced2_buf",
);
recording.dispatch(
shaders.pathtag_reduce2,
(reduced2_size as u32, 1, 1),
wg_counts.path_reduce2,
[reduced_buf, reduced2_buf],
);
let reduced_scan_buf = ResourceProxy::new_buf(
pathtag_wgs as u64 * TAG_MONOID_FULL_SIZE,
buffer_sizes.path_reduced_scan.size_in_bytes().into(),
"reduced_scan_buf",
);
recording.dispatch(
shaders.pathtag_scan1,
(reduced_size as u32 / shaders::PATHTAG_REDUCE_WG, 1, 1),
wg_counts.path_scan1,
[reduced_buf, reduced2_buf, reduced_scan_buf],
);
pathtag_parent = reduced_scan_buf;
@ -241,17 +158,17 @@ impl Render {
}
let tagmonoid_buf = ResourceProxy::new_buf(
pathtag_wgs as u64 * shaders::PATHTAG_REDUCE_WG as u64 * TAG_MONOID_FULL_SIZE,
buffer_sizes.path_monoids.size_in_bytes().into(),
"tagmonoid_buf",
);
let pathtag_scan = if pathtag_large {
let pathtag_scan = if wg_counts.use_large_path_scan {
shaders.pathtag_scan_large
} else {
shaders.pathtag_scan
};
recording.dispatch(
pathtag_scan,
(pathtag_wgs as u32, 1, 1),
wg_counts.path_scan,
[config_buf, scene_buf, pathtag_parent, tagmonoid_buf],
);
recording.free_resource(reduced_buf);
@ -259,20 +176,20 @@ impl Render {
recording.free_resource(reduced2);
recording.free_resource(reduced_scan);
}
let drawobj_wgs = (n_drawobj + shaders::PATH_BBOX_WG - 1) / shaders::PATH_BBOX_WG;
let path_bbox_buf =
ResourceProxy::new_buf(n_paths as u64 * PATH_BBOX_SIZE, "path_bbox_buf");
let path_bbox_buf = ResourceProxy::new_buf(
buffer_sizes.path_bboxes.size_in_bytes().into(),
"path_bbox_buf",
);
recording.dispatch(
shaders.bbox_clear,
(drawobj_wgs, 1, 1),
wg_counts.bbox_clear,
[config_buf, path_bbox_buf],
);
let cubic_buf = ResourceProxy::new_buf(n_pathtag as u64 * CUBIC_SIZE, "cubic_buf");
let path_coarse_wgs =
(n_pathtag as u32 + shaders::PATH_COARSE_WG - 1) / shaders::PATH_COARSE_WG;
let cubic_buf =
ResourceProxy::new_buf(buffer_sizes.cubics.size_in_bytes().into(), "cubic_buf");
recording.dispatch(
shaders.pathseg,
(path_coarse_wgs, 1, 1),
wg_counts.path_seg,
[
config_buf,
scene_buf,
@ -281,19 +198,26 @@ impl Render {
cubic_buf,
],
);
let draw_reduced_buf =
ResourceProxy::new_buf(drawobj_wgs as u64 * DRAWMONOID_SIZE, "draw_reduced_buf");
let draw_reduced_buf = ResourceProxy::new_buf(
buffer_sizes.draw_reduced.size_in_bytes().into(),
"draw_reduced_buf",
);
recording.dispatch(
shaders.draw_reduce,
(drawobj_wgs, 1, 1),
wg_counts.draw_reduce,
[config_buf, scene_buf, draw_reduced_buf],
);
let draw_monoid_buf =
ResourceProxy::new_buf(n_drawobj as u64 * DRAWMONOID_SIZE, "draw_monoid_buf");
let clip_inp_buf = ResourceProxy::new_buf(n_clip as u64 * CLIP_INP_SIZE, "clip_inp_buf");
let draw_monoid_buf = ResourceProxy::new_buf(
buffer_sizes.draw_monoids.size_in_bytes().into(),
"draw_monoid_buf",
);
let clip_inp_buf = ResourceProxy::new_buf(
buffer_sizes.clip_inps.size_in_bytes().into(),
"clip_inp_buf",
);
recording.dispatch(
shaders.draw_leaf,
(drawobj_wgs, 1, 1),
wg_counts.draw_leaf,
[
config_buf,
scene_buf,
@ -305,16 +229,16 @@ impl Render {
],
);
recording.free_resource(draw_reduced_buf);
let clip_el_buf = ResourceProxy::new_buf(n_clip as u64 * CLIP_EL_SIZE, "clip_el_buf");
let clip_el_buf =
ResourceProxy::new_buf(buffer_sizes.clip_els.size_in_bytes().into(), "clip_el_buf");
let clip_bic_buf = ResourceProxy::new_buf(
(n_clip / shaders::CLIP_REDUCE_WG) as u64 * CLIP_BIC_SIZE,
buffer_sizes.clip_bics.size_in_bytes().into(),
"clip_bic_buf",
);
let clip_wg_reduce = n_clip.saturating_sub(1) / shaders::CLIP_REDUCE_WG;
if clip_wg_reduce > 0 {
if wg_counts.clip_reduce.0 > 0 {
recording.dispatch(
shaders.clip_reduce,
(clip_wg_reduce, 1, 1),
wg_counts.clip_reduce,
[
config_buf,
clip_inp_buf,
@ -324,12 +248,14 @@ impl Render {
],
);
}
let clip_wg = (n_clip + shaders::CLIP_REDUCE_WG - 1) / shaders::CLIP_REDUCE_WG;
let clip_bbox_buf = ResourceProxy::new_buf(n_clip as u64 * CLIP_BBOX_SIZE, "clip_bbox_buf");
if clip_wg > 0 {
let clip_bbox_buf = ResourceProxy::new_buf(
buffer_sizes.clip_bboxes.size_in_bytes().into(),
"clip_bbox_buf",
);
if wg_counts.clip_leaf.0 > 0 {
recording.dispatch(
shaders.clip_leaf,
(clip_wg, 1, 1),
wg_counts.clip_leaf,
[
config_buf,
clip_inp_buf,
@ -344,20 +270,20 @@ impl Render {
recording.free_resource(clip_inp_buf);
recording.free_resource(clip_bic_buf);
recording.free_resource(clip_el_buf);
let draw_bbox_buf =
ResourceProxy::new_buf(n_paths as u64 * DRAW_BBOX_SIZE, "draw_bbox_buf");
let bump_buf = BufProxy::new(BUMP_SIZE, "bump_buf");
let width_in_bins = (config.width_in_tiles + 15) / 16;
let height_in_bins = (config.height_in_tiles + 15) / 16;
let draw_bbox_buf = ResourceProxy::new_buf(
buffer_sizes.draw_bboxes.size_in_bytes().into(),
"draw_bbox_buf",
);
let bump_buf = BufProxy::new(buffer_sizes.bump_alloc.size_in_bytes().into(), "bump_buf");
let bin_header_buf = ResourceProxy::new_buf(
(256 * drawobj_wgs) as u64 * BIN_HEADER_SIZE,
buffer_sizes.bin_headers.size_in_bytes().into(),
"bin_header_buf",
);
recording.clear_all(bump_buf);
let bump_buf = ResourceProxy::Buf(bump_buf);
recording.dispatch(
shaders.binning,
(drawobj_wgs, 1, 1),
wg_counts.binning,
[
config_buf,
draw_monoid_buf,
@ -374,12 +300,11 @@ impl Render {
recording.free_resource(clip_bbox_buf);
// Note: this only needs to be rounded up because of the workaround to store the tile_offset
// in storage rather than workgroup memory.
let n_path_aligned = align_up(n_paths as usize, 256);
let path_buf = ResourceProxy::new_buf(n_path_aligned as u64 * PATH_SIZE, "path_buf");
let path_wgs = (n_paths + shaders::PATH_BBOX_WG - 1) / shaders::PATH_BBOX_WG;
let path_buf =
ResourceProxy::new_buf(buffer_sizes.paths.size_in_bytes().into(), "path_buf");
recording.dispatch(
shaders.tile_alloc,
(path_wgs, 1, 1),
wg_counts.tile_alloc,
[
config_buf,
scene_buf,
@ -392,7 +317,7 @@ impl Render {
recording.free_resource(draw_bbox_buf);
recording.dispatch(
shaders.path_coarse,
(path_coarse_wgs, 1, 1),
wg_counts.path_coarse,
[
config_buf,
scene_buf,
@ -408,12 +333,12 @@ impl Render {
recording.free_resource(cubic_buf);
recording.dispatch(
shaders.backdrop,
(path_wgs, 1, 1),
wg_counts.backdrop,
[config_buf, path_buf, tile_buf],
);
recording.dispatch(
shaders.coarse,
(width_in_bins, height_in_bins, 1),
wg_counts.coarse,
[
config_buf,
scene_buf,
@ -431,9 +356,8 @@ impl Render {
recording.free_resource(bin_header_buf);
recording.free_resource(path_buf);
let out_image = ImageProxy::new(params.width, params.height, ImageFormat::Rgba8);
self.width_in_tiles = config.width_in_tiles;
self.height_in_tiles = config.height_in_tiles;
self.fine = Some(FineResources {
self.fine_wg_count = Some(wg_counts.fine);
self.fine_resources = Some(FineResources {
config_buf,
bump_buf,
tile_buf,
@ -453,10 +377,11 @@ impl Render {
/// Run fine rasterization assuming the coarse phase succeeded.
pub fn record_fine(&mut self, shaders: &FullShaders, recording: &mut Recording) {
let fine = self.fine.take().unwrap();
let fine_wg_count = self.fine_wg_count.take().unwrap();
let fine = self.fine_resources.take().unwrap();
recording.dispatch(
shaders.fine,
(self.width_in_tiles, self.height_in_tiles, 1),
fine_wg_count,
[
fine.config_buf,
fine.tile_buf,
@ -482,10 +407,16 @@ impl Render {
/// This is going away, as the caller will add the output image to the bind
/// map.
pub fn out_image(&self) -> ImageProxy {
self.fine.as_ref().unwrap().out_image
self.fine_resources.as_ref().unwrap().out_image
}
pub fn bump_buf(&self) -> BufProxy {
*self.fine.as_ref().unwrap().bump_buf.as_buf().unwrap()
*self
.fine_resources
.as_ref()
.unwrap()
.bump_buf
.as_buf()
.unwrap()
}
}