[vello_encoding] Use the vello_encoding crate in the vello renderer

Removed the workgroup count and buffer size calculations from
src/render.rs. This code now uses the types returned by the
vello_encoding crate for this purpose.
This commit is contained in:
Arman Uguray 2023-04-13 20:33:05 -07:00
parent 899ecaa430
commit ad82519444

View file

@ -5,24 +5,12 @@ use crate::{
shaders::{self, FullShaders}, shaders::{self, FullShaders},
RenderParams, Scene, RenderParams, Scene,
}; };
use { use vello_encoding::{Encoding, WorkgroupSize};
bytemuck::{Pod, Zeroable},
vello_encoding::{Encoding, GpuConfig, Layout},
};
/// State for a render in progress. /// State for a render in progress.
pub struct Render { pub struct Render {
/// Size of binning and info combined buffer in u32 units fine_wg_count: Option<WorkgroupSize>,
binning_info_size: u32, fine_resources: Option<FineResources>,
/// Size of tiles buf in tiles
tiles_size: u32,
/// Size of segments buf in segments
segments_size: u32,
/// Size of per-tile command list in u32 units
ptcl_size: u32,
width_in_tiles: u32,
height_in_tiles: u32,
fine: Option<FineResources>,
} }
/// Resources produced by pipeline, needed for fine rasterization. /// Resources produced by pipeline, needed for fine rasterization.
@ -39,47 +27,6 @@ struct FineResources {
out_image: ImageProxy, out_image: ImageProxy,
} }
const TAG_MONOID_SIZE: u64 = 12;
const TAG_MONOID_FULL_SIZE: u64 = 20;
const PATH_BBOX_SIZE: u64 = 24;
const CUBIC_SIZE: u64 = 48;
const DRAWMONOID_SIZE: u64 = 16;
const CLIP_BIC_SIZE: u64 = 8;
const CLIP_EL_SIZE: u64 = 32;
const CLIP_INP_SIZE: u64 = 8;
const CLIP_BBOX_SIZE: u64 = 16;
const PATH_SIZE: u64 = 32;
const DRAW_BBOX_SIZE: u64 = 16;
const BUMP_SIZE: u64 = std::mem::size_of::<BumpAllocators>() as u64;
const BIN_HEADER_SIZE: u64 = 8;
const TILE_SIZE: u64 = 8;
const SEGMENT_SIZE: u64 = 24;
fn size_to_words(byte_size: usize) -> u32 {
(byte_size / std::mem::size_of::<u32>()) as u32
}
pub const fn next_multiple_of(val: u32, rhs: u32) -> u32 {
match val % rhs {
0 => val,
r => val + (rhs - r),
}
}
// This must be kept in sync with the struct in shader/shared/bump.wgsl
#[repr(C)]
#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)]
struct BumpAllocators {
failed: u32,
// Final needed dynamic size of the buffers. If any of these are larger than the corresponding `_size` element
// reallocation needs to occur
binning: u32,
ptcl: u32,
tile: u32,
segments: u32,
blend: u32,
}
pub fn render_full( pub fn render_full(
scene: &Scene, scene: &Scene,
shaders: &FullShaders, shaders: &FullShaders,
@ -104,21 +51,11 @@ pub fn render_encoding_full(
(recording, out_image.into()) (recording, out_image.into())
} }
pub fn align_up(len: usize, alignment: u32) -> usize {
len + (len.wrapping_neg() & (alignment as usize - 1))
}
impl Render { impl Render {
pub fn new() -> Self { pub fn new() -> Self {
// These sizes are adequate for paris-30k but should probably be dialed down.
Render { Render {
binning_info_size: (1 << 20) / 4, fine_wg_count: None,
tiles_size: (1 << 24) / TILE_SIZE as u32, fine_resources: None,
segments_size: (1 << 26) / SEGMENT_SIZE as u32,
ptcl_size: (1 << 25) / 4 as u32,
width_in_tiles: 0,
height_in_tiles: 0,
fine: None,
} }
} }
@ -133,7 +70,8 @@ impl Render {
params: &RenderParams, params: &RenderParams,
robust: bool, robust: bool,
) -> Recording { ) -> Recording {
use vello_encoding::Resolver; use vello_encoding::{RenderConfig, Resolver};
let mut recording = Recording::default(); let mut recording = Recording::default();
let mut resolver = Resolver::new(); let mut resolver = Resolver::new();
let mut packed = vec![]; let mut packed = vec![];
@ -155,29 +93,6 @@ impl Render {
} else { } else {
ImageProxy::new(images.width, images.height, ImageFormat::Rgba8) ImageProxy::new(images.width, images.height, ImageFormat::Rgba8)
}; };
// TODO: calculate for real when we do rectangles
let n_pathtag = layout.path_tags(&packed).len();
let pathtag_padded = align_up(n_pathtag, 4 * shaders::PATHTAG_REDUCE_WG);
let n_paths = layout.n_paths;
let n_drawobj = layout.n_paths;
let n_clip = layout.n_clips;
let new_width = next_multiple_of(params.width, 16);
let new_height = next_multiple_of(params.height, 16);
let info_size = layout.bin_data_start;
let config = GpuConfig {
width_in_tiles: new_width / 16,
height_in_tiles: new_height / 16,
target_width: params.width,
target_height: params.height,
base_color: params.base_color.to_premul_u32(),
binning_size: self.binning_info_size - info_size,
tiles_size: self.tiles_size,
segments_size: self.segments_size,
ptcl_size: self.ptcl_size,
layout: layout,
};
for image in images.images { for image in images.images {
recording.write_image( recording.write_image(
image_atlas, image_atlas,
@ -188,52 +103,54 @@ impl Render {
image.0.data.data(), image.0.data.data(),
); );
} }
// println!("{:?}", config);
let cpu_config =
RenderConfig::new(&layout, params.width, params.height, &params.base_color);
let buffer_sizes = &cpu_config.buffer_sizes;
let wg_counts = &cpu_config.workgroup_counts;
let scene_buf = ResourceProxy::Buf(recording.upload("scene", packed)); let scene_buf = ResourceProxy::Buf(recording.upload("scene", packed));
let config_buf = let config_buf = ResourceProxy::Buf(
ResourceProxy::Buf(recording.upload_uniform("config", bytemuck::bytes_of(&config))); recording.upload_uniform("config", bytemuck::bytes_of(&cpu_config.gpu)),
);
let info_bin_data_buf = ResourceProxy::new_buf( let info_bin_data_buf = ResourceProxy::new_buf(
(info_size + config.binning_size) as u64 * 4, buffer_sizes.bin_data.size_in_bytes() as u64,
"info_bin_data_buf", "info_bin_data_buf",
); );
let tile_buf = ResourceProxy::new_buf(config.tiles_size as u64 * TILE_SIZE, "tile_buf"); let tile_buf =
ResourceProxy::new_buf(buffer_sizes.tiles.size_in_bytes().into(), "tile_buf");
let segments_buf = let segments_buf =
ResourceProxy::new_buf(config.segments_size as u64 * SEGMENT_SIZE, "segments_buf"); ResourceProxy::new_buf(buffer_sizes.segments.size_in_bytes().into(), "segments_buf");
let ptcl_buf = ResourceProxy::new_buf(config.ptcl_size as u64 * 4, "ptcl_buf"); let ptcl_buf = ResourceProxy::new_buf(buffer_sizes.ptcl.size_in_bytes().into(), "ptcl_buf");
let reduced_buf = ResourceProxy::new_buf(
let pathtag_wgs = pathtag_padded / (4 * shaders::PATHTAG_REDUCE_WG as usize); buffer_sizes.path_reduced.size_in_bytes().into(),
let pathtag_large = pathtag_wgs > shaders::PATHTAG_REDUCE_WG as usize; "reduced_buf",
let reduced_size = if pathtag_large { );
align_up(pathtag_wgs, shaders::PATHTAG_REDUCE_WG)
} else {
pathtag_wgs
};
let reduced_buf =
ResourceProxy::new_buf(reduced_size as u64 * TAG_MONOID_FULL_SIZE, "reduced_buf");
// TODO: really only need pathtag_wgs - 1 // TODO: really only need pathtag_wgs - 1
recording.dispatch( recording.dispatch(
shaders.pathtag_reduce, shaders.pathtag_reduce,
(pathtag_wgs as u32, 1, 1), wg_counts.path_reduce,
[config_buf, scene_buf, reduced_buf], [config_buf, scene_buf, reduced_buf],
); );
let mut pathtag_parent = reduced_buf; let mut pathtag_parent = reduced_buf;
let mut large_pathtag_bufs = None; let mut large_pathtag_bufs = None;
if pathtag_large { if wg_counts.use_large_path_scan {
let reduced2_size = shaders::PATHTAG_REDUCE_WG as usize; let reduced2_buf = ResourceProxy::new_buf(
let reduced2_buf = buffer_sizes.path_reduced2.size_in_bytes().into(),
ResourceProxy::new_buf(reduced2_size as u64 * TAG_MONOID_FULL_SIZE, "reduced2_buf"); "reduced2_buf",
);
recording.dispatch( recording.dispatch(
shaders.pathtag_reduce2, shaders.pathtag_reduce2,
(reduced2_size as u32, 1, 1), wg_counts.path_reduce2,
[reduced_buf, reduced2_buf], [reduced_buf, reduced2_buf],
); );
let reduced_scan_buf = ResourceProxy::new_buf( let reduced_scan_buf = ResourceProxy::new_buf(
pathtag_wgs as u64 * TAG_MONOID_FULL_SIZE, buffer_sizes.path_reduced_scan.size_in_bytes().into(),
"reduced_scan_buf", "reduced_scan_buf",
); );
recording.dispatch( recording.dispatch(
shaders.pathtag_scan1, shaders.pathtag_scan1,
(reduced_size as u32 / shaders::PATHTAG_REDUCE_WG, 1, 1), wg_counts.path_scan1,
[reduced_buf, reduced2_buf, reduced_scan_buf], [reduced_buf, reduced2_buf, reduced_scan_buf],
); );
pathtag_parent = reduced_scan_buf; pathtag_parent = reduced_scan_buf;
@ -241,17 +158,17 @@ impl Render {
} }
let tagmonoid_buf = ResourceProxy::new_buf( let tagmonoid_buf = ResourceProxy::new_buf(
pathtag_wgs as u64 * shaders::PATHTAG_REDUCE_WG as u64 * TAG_MONOID_FULL_SIZE, buffer_sizes.path_monoids.size_in_bytes().into(),
"tagmonoid_buf", "tagmonoid_buf",
); );
let pathtag_scan = if pathtag_large { let pathtag_scan = if wg_counts.use_large_path_scan {
shaders.pathtag_scan_large shaders.pathtag_scan_large
} else { } else {
shaders.pathtag_scan shaders.pathtag_scan
}; };
recording.dispatch( recording.dispatch(
pathtag_scan, pathtag_scan,
(pathtag_wgs as u32, 1, 1), wg_counts.path_scan,
[config_buf, scene_buf, pathtag_parent, tagmonoid_buf], [config_buf, scene_buf, pathtag_parent, tagmonoid_buf],
); );
recording.free_resource(reduced_buf); recording.free_resource(reduced_buf);
@ -259,20 +176,20 @@ impl Render {
recording.free_resource(reduced2); recording.free_resource(reduced2);
recording.free_resource(reduced_scan); recording.free_resource(reduced_scan);
} }
let drawobj_wgs = (n_drawobj + shaders::PATH_BBOX_WG - 1) / shaders::PATH_BBOX_WG; let path_bbox_buf = ResourceProxy::new_buf(
let path_bbox_buf = buffer_sizes.path_bboxes.size_in_bytes().into(),
ResourceProxy::new_buf(n_paths as u64 * PATH_BBOX_SIZE, "path_bbox_buf"); "path_bbox_buf",
);
recording.dispatch( recording.dispatch(
shaders.bbox_clear, shaders.bbox_clear,
(drawobj_wgs, 1, 1), wg_counts.bbox_clear,
[config_buf, path_bbox_buf], [config_buf, path_bbox_buf],
); );
let cubic_buf = ResourceProxy::new_buf(n_pathtag as u64 * CUBIC_SIZE, "cubic_buf"); let cubic_buf =
let path_coarse_wgs = ResourceProxy::new_buf(buffer_sizes.cubics.size_in_bytes().into(), "cubic_buf");
(n_pathtag as u32 + shaders::PATH_COARSE_WG - 1) / shaders::PATH_COARSE_WG;
recording.dispatch( recording.dispatch(
shaders.pathseg, shaders.pathseg,
(path_coarse_wgs, 1, 1), wg_counts.path_seg,
[ [
config_buf, config_buf,
scene_buf, scene_buf,
@ -281,19 +198,26 @@ impl Render {
cubic_buf, cubic_buf,
], ],
); );
let draw_reduced_buf = let draw_reduced_buf = ResourceProxy::new_buf(
ResourceProxy::new_buf(drawobj_wgs as u64 * DRAWMONOID_SIZE, "draw_reduced_buf"); buffer_sizes.draw_reduced.size_in_bytes().into(),
"draw_reduced_buf",
);
recording.dispatch( recording.dispatch(
shaders.draw_reduce, shaders.draw_reduce,
(drawobj_wgs, 1, 1), wg_counts.draw_reduce,
[config_buf, scene_buf, draw_reduced_buf], [config_buf, scene_buf, draw_reduced_buf],
); );
let draw_monoid_buf = let draw_monoid_buf = ResourceProxy::new_buf(
ResourceProxy::new_buf(n_drawobj as u64 * DRAWMONOID_SIZE, "draw_monoid_buf"); buffer_sizes.draw_monoids.size_in_bytes().into(),
let clip_inp_buf = ResourceProxy::new_buf(n_clip as u64 * CLIP_INP_SIZE, "clip_inp_buf"); "draw_monoid_buf",
);
let clip_inp_buf = ResourceProxy::new_buf(
buffer_sizes.clip_inps.size_in_bytes().into(),
"clip_inp_buf",
);
recording.dispatch( recording.dispatch(
shaders.draw_leaf, shaders.draw_leaf,
(drawobj_wgs, 1, 1), wg_counts.draw_leaf,
[ [
config_buf, config_buf,
scene_buf, scene_buf,
@ -305,16 +229,16 @@ impl Render {
], ],
); );
recording.free_resource(draw_reduced_buf); recording.free_resource(draw_reduced_buf);
let clip_el_buf = ResourceProxy::new_buf(n_clip as u64 * CLIP_EL_SIZE, "clip_el_buf"); let clip_el_buf =
ResourceProxy::new_buf(buffer_sizes.clip_els.size_in_bytes().into(), "clip_el_buf");
let clip_bic_buf = ResourceProxy::new_buf( let clip_bic_buf = ResourceProxy::new_buf(
(n_clip / shaders::CLIP_REDUCE_WG) as u64 * CLIP_BIC_SIZE, buffer_sizes.clip_bics.size_in_bytes().into(),
"clip_bic_buf", "clip_bic_buf",
); );
let clip_wg_reduce = n_clip.saturating_sub(1) / shaders::CLIP_REDUCE_WG; if wg_counts.clip_reduce.0 > 0 {
if clip_wg_reduce > 0 {
recording.dispatch( recording.dispatch(
shaders.clip_reduce, shaders.clip_reduce,
(clip_wg_reduce, 1, 1), wg_counts.clip_reduce,
[ [
config_buf, config_buf,
clip_inp_buf, clip_inp_buf,
@ -324,12 +248,14 @@ impl Render {
], ],
); );
} }
let clip_wg = (n_clip + shaders::CLIP_REDUCE_WG - 1) / shaders::CLIP_REDUCE_WG; let clip_bbox_buf = ResourceProxy::new_buf(
let clip_bbox_buf = ResourceProxy::new_buf(n_clip as u64 * CLIP_BBOX_SIZE, "clip_bbox_buf"); buffer_sizes.clip_bboxes.size_in_bytes().into(),
if clip_wg > 0 { "clip_bbox_buf",
);
if wg_counts.clip_leaf.0 > 0 {
recording.dispatch( recording.dispatch(
shaders.clip_leaf, shaders.clip_leaf,
(clip_wg, 1, 1), wg_counts.clip_leaf,
[ [
config_buf, config_buf,
clip_inp_buf, clip_inp_buf,
@ -344,20 +270,20 @@ impl Render {
recording.free_resource(clip_inp_buf); recording.free_resource(clip_inp_buf);
recording.free_resource(clip_bic_buf); recording.free_resource(clip_bic_buf);
recording.free_resource(clip_el_buf); recording.free_resource(clip_el_buf);
let draw_bbox_buf = let draw_bbox_buf = ResourceProxy::new_buf(
ResourceProxy::new_buf(n_paths as u64 * DRAW_BBOX_SIZE, "draw_bbox_buf"); buffer_sizes.draw_bboxes.size_in_bytes().into(),
let bump_buf = BufProxy::new(BUMP_SIZE, "bump_buf"); "draw_bbox_buf",
let width_in_bins = (config.width_in_tiles + 15) / 16; );
let height_in_bins = (config.height_in_tiles + 15) / 16; let bump_buf = BufProxy::new(buffer_sizes.bump_alloc.size_in_bytes().into(), "bump_buf");
let bin_header_buf = ResourceProxy::new_buf( let bin_header_buf = ResourceProxy::new_buf(
(256 * drawobj_wgs) as u64 * BIN_HEADER_SIZE, buffer_sizes.bin_headers.size_in_bytes().into(),
"bin_header_buf", "bin_header_buf",
); );
recording.clear_all(bump_buf); recording.clear_all(bump_buf);
let bump_buf = ResourceProxy::Buf(bump_buf); let bump_buf = ResourceProxy::Buf(bump_buf);
recording.dispatch( recording.dispatch(
shaders.binning, shaders.binning,
(drawobj_wgs, 1, 1), wg_counts.binning,
[ [
config_buf, config_buf,
draw_monoid_buf, draw_monoid_buf,
@ -374,12 +300,11 @@ impl Render {
recording.free_resource(clip_bbox_buf); recording.free_resource(clip_bbox_buf);
// Note: this only needs to be rounded up because of the workaround to store the tile_offset // Note: this only needs to be rounded up because of the workaround to store the tile_offset
// in storage rather than workgroup memory. // in storage rather than workgroup memory.
let n_path_aligned = align_up(n_paths as usize, 256); let path_buf =
let path_buf = ResourceProxy::new_buf(n_path_aligned as u64 * PATH_SIZE, "path_buf"); ResourceProxy::new_buf(buffer_sizes.paths.size_in_bytes().into(), "path_buf");
let path_wgs = (n_paths + shaders::PATH_BBOX_WG - 1) / shaders::PATH_BBOX_WG;
recording.dispatch( recording.dispatch(
shaders.tile_alloc, shaders.tile_alloc,
(path_wgs, 1, 1), wg_counts.tile_alloc,
[ [
config_buf, config_buf,
scene_buf, scene_buf,
@ -392,7 +317,7 @@ impl Render {
recording.free_resource(draw_bbox_buf); recording.free_resource(draw_bbox_buf);
recording.dispatch( recording.dispatch(
shaders.path_coarse, shaders.path_coarse,
(path_coarse_wgs, 1, 1), wg_counts.path_coarse,
[ [
config_buf, config_buf,
scene_buf, scene_buf,
@ -408,12 +333,12 @@ impl Render {
recording.free_resource(cubic_buf); recording.free_resource(cubic_buf);
recording.dispatch( recording.dispatch(
shaders.backdrop, shaders.backdrop,
(path_wgs, 1, 1), wg_counts.backdrop,
[config_buf, path_buf, tile_buf], [config_buf, path_buf, tile_buf],
); );
recording.dispatch( recording.dispatch(
shaders.coarse, shaders.coarse,
(width_in_bins, height_in_bins, 1), wg_counts.coarse,
[ [
config_buf, config_buf,
scene_buf, scene_buf,
@ -431,9 +356,8 @@ impl Render {
recording.free_resource(bin_header_buf); recording.free_resource(bin_header_buf);
recording.free_resource(path_buf); recording.free_resource(path_buf);
let out_image = ImageProxy::new(params.width, params.height, ImageFormat::Rgba8); let out_image = ImageProxy::new(params.width, params.height, ImageFormat::Rgba8);
self.width_in_tiles = config.width_in_tiles; self.fine_wg_count = Some(wg_counts.fine);
self.height_in_tiles = config.height_in_tiles; self.fine_resources = Some(FineResources {
self.fine = Some(FineResources {
config_buf, config_buf,
bump_buf, bump_buf,
tile_buf, tile_buf,
@ -453,10 +377,11 @@ impl Render {
/// Run fine rasterization assuming the coarse phase succeeded. /// Run fine rasterization assuming the coarse phase succeeded.
pub fn record_fine(&mut self, shaders: &FullShaders, recording: &mut Recording) { pub fn record_fine(&mut self, shaders: &FullShaders, recording: &mut Recording) {
let fine = self.fine.take().unwrap(); let fine_wg_count = self.fine_wg_count.take().unwrap();
let fine = self.fine_resources.take().unwrap();
recording.dispatch( recording.dispatch(
shaders.fine, shaders.fine,
(self.width_in_tiles, self.height_in_tiles, 1), fine_wg_count,
[ [
fine.config_buf, fine.config_buf,
fine.tile_buf, fine.tile_buf,
@ -482,10 +407,16 @@ impl Render {
/// This is going away, as the caller will add the output image to the bind /// This is going away, as the caller will add the output image to the bind
/// map. /// map.
pub fn out_image(&self) -> ImageProxy { pub fn out_image(&self) -> ImageProxy {
self.fine.as_ref().unwrap().out_image self.fine_resources.as_ref().unwrap().out_image
} }
pub fn bump_buf(&self) -> BufProxy { pub fn bump_buf(&self) -> BufProxy {
*self.fine.as_ref().unwrap().bump_buf.as_buf().unwrap() *self
.fine_resources
.as_ref()
.unwrap()
.bump_buf
.as_buf()
.unwrap()
} }
} }