diff --git a/src/render.rs b/src/render.rs index 3cc1658..f7d80e0 100644 --- a/src/render.rs +++ b/src/render.rs @@ -5,24 +5,12 @@ use crate::{ shaders::{self, FullShaders}, RenderParams, Scene, }; -use { - bytemuck::{Pod, Zeroable}, - vello_encoding::{Encoding, GpuConfig, Layout}, -}; +use vello_encoding::{Encoding, WorkgroupSize}; /// State for a render in progress. pub struct Render { - /// Size of binning and info combined buffer in u32 units - binning_info_size: u32, - /// Size of tiles buf in tiles - tiles_size: u32, - /// Size of segments buf in segments - segments_size: u32, - /// Size of per-tile command list in u32 units - ptcl_size: u32, - width_in_tiles: u32, - height_in_tiles: u32, - fine: Option, + fine_wg_count: Option, + fine_resources: Option, } /// Resources produced by pipeline, needed for fine rasterization. @@ -39,47 +27,6 @@ struct FineResources { out_image: ImageProxy, } -const TAG_MONOID_SIZE: u64 = 12; -const TAG_MONOID_FULL_SIZE: u64 = 20; -const PATH_BBOX_SIZE: u64 = 24; -const CUBIC_SIZE: u64 = 48; -const DRAWMONOID_SIZE: u64 = 16; -const CLIP_BIC_SIZE: u64 = 8; -const CLIP_EL_SIZE: u64 = 32; -const CLIP_INP_SIZE: u64 = 8; -const CLIP_BBOX_SIZE: u64 = 16; -const PATH_SIZE: u64 = 32; -const DRAW_BBOX_SIZE: u64 = 16; -const BUMP_SIZE: u64 = std::mem::size_of::() as u64; -const BIN_HEADER_SIZE: u64 = 8; -const TILE_SIZE: u64 = 8; -const SEGMENT_SIZE: u64 = 24; - -fn size_to_words(byte_size: usize) -> u32 { - (byte_size / std::mem::size_of::()) as u32 -} - -pub const fn next_multiple_of(val: u32, rhs: u32) -> u32 { - match val % rhs { - 0 => val, - r => val + (rhs - r), - } -} - -// This must be kept in sync with the struct in shader/shared/bump.wgsl -#[repr(C)] -#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] -struct BumpAllocators { - failed: u32, - // Final needed dynamic size of the buffers. If any of these are larger than the corresponding `_size` element - // reallocation needs to occur - binning: u32, - ptcl: u32, - tile: u32, - segments: u32, - blend: u32, -} - pub fn render_full( scene: &Scene, shaders: &FullShaders, @@ -104,21 +51,11 @@ pub fn render_encoding_full( (recording, out_image.into()) } -pub fn align_up(len: usize, alignment: u32) -> usize { - len + (len.wrapping_neg() & (alignment as usize - 1)) -} - impl Render { pub fn new() -> Self { - // These sizes are adequate for paris-30k but should probably be dialed down. Render { - binning_info_size: (1 << 20) / 4, - tiles_size: (1 << 24) / TILE_SIZE as u32, - segments_size: (1 << 26) / SEGMENT_SIZE as u32, - ptcl_size: (1 << 25) / 4 as u32, - width_in_tiles: 0, - height_in_tiles: 0, - fine: None, + fine_wg_count: None, + fine_resources: None, } } @@ -133,7 +70,8 @@ impl Render { params: &RenderParams, robust: bool, ) -> Recording { - use vello_encoding::Resolver; + use vello_encoding::{RenderConfig, Resolver}; + let mut recording = Recording::default(); let mut resolver = Resolver::new(); let mut packed = vec![]; @@ -155,29 +93,6 @@ impl Render { } else { ImageProxy::new(images.width, images.height, ImageFormat::Rgba8) }; - // TODO: calculate for real when we do rectangles - let n_pathtag = layout.path_tags(&packed).len(); - let pathtag_padded = align_up(n_pathtag, 4 * shaders::PATHTAG_REDUCE_WG); - let n_paths = layout.n_paths; - let n_drawobj = layout.n_paths; - let n_clip = layout.n_clips; - - let new_width = next_multiple_of(params.width, 16); - let new_height = next_multiple_of(params.height, 16); - - let info_size = layout.bin_data_start; - let config = GpuConfig { - width_in_tiles: new_width / 16, - height_in_tiles: new_height / 16, - target_width: params.width, - target_height: params.height, - base_color: params.base_color.to_premul_u32(), - binning_size: self.binning_info_size - info_size, - tiles_size: self.tiles_size, - segments_size: self.segments_size, - ptcl_size: self.ptcl_size, - layout: layout, - }; for image in images.images { recording.write_image( image_atlas, @@ -188,52 +103,54 @@ impl Render { image.0.data.data(), ); } - // println!("{:?}", config); + + let cpu_config = + RenderConfig::new(&layout, params.width, params.height, ¶ms.base_color); + let buffer_sizes = &cpu_config.buffer_sizes; + let wg_counts = &cpu_config.workgroup_counts; + let scene_buf = ResourceProxy::Buf(recording.upload("scene", packed)); - let config_buf = - ResourceProxy::Buf(recording.upload_uniform("config", bytemuck::bytes_of(&config))); + let config_buf = ResourceProxy::Buf( + recording.upload_uniform("config", bytemuck::bytes_of(&cpu_config.gpu)), + ); let info_bin_data_buf = ResourceProxy::new_buf( - (info_size + config.binning_size) as u64 * 4, + buffer_sizes.bin_data.size_in_bytes() as u64, "info_bin_data_buf", ); - let tile_buf = ResourceProxy::new_buf(config.tiles_size as u64 * TILE_SIZE, "tile_buf"); + let tile_buf = + ResourceProxy::new_buf(buffer_sizes.tiles.size_in_bytes().into(), "tile_buf"); let segments_buf = - ResourceProxy::new_buf(config.segments_size as u64 * SEGMENT_SIZE, "segments_buf"); - let ptcl_buf = ResourceProxy::new_buf(config.ptcl_size as u64 * 4, "ptcl_buf"); - - let pathtag_wgs = pathtag_padded / (4 * shaders::PATHTAG_REDUCE_WG as usize); - let pathtag_large = pathtag_wgs > shaders::PATHTAG_REDUCE_WG as usize; - let reduced_size = if pathtag_large { - align_up(pathtag_wgs, shaders::PATHTAG_REDUCE_WG) - } else { - pathtag_wgs - }; - let reduced_buf = - ResourceProxy::new_buf(reduced_size as u64 * TAG_MONOID_FULL_SIZE, "reduced_buf"); + ResourceProxy::new_buf(buffer_sizes.segments.size_in_bytes().into(), "segments_buf"); + let ptcl_buf = ResourceProxy::new_buf(buffer_sizes.ptcl.size_in_bytes().into(), "ptcl_buf"); + let reduced_buf = ResourceProxy::new_buf( + buffer_sizes.path_reduced.size_in_bytes().into(), + "reduced_buf", + ); // TODO: really only need pathtag_wgs - 1 recording.dispatch( shaders.pathtag_reduce, - (pathtag_wgs as u32, 1, 1), + wg_counts.path_reduce, [config_buf, scene_buf, reduced_buf], ); let mut pathtag_parent = reduced_buf; let mut large_pathtag_bufs = None; - if pathtag_large { - let reduced2_size = shaders::PATHTAG_REDUCE_WG as usize; - let reduced2_buf = - ResourceProxy::new_buf(reduced2_size as u64 * TAG_MONOID_FULL_SIZE, "reduced2_buf"); + if wg_counts.use_large_path_scan { + let reduced2_buf = ResourceProxy::new_buf( + buffer_sizes.path_reduced2.size_in_bytes().into(), + "reduced2_buf", + ); recording.dispatch( shaders.pathtag_reduce2, - (reduced2_size as u32, 1, 1), + wg_counts.path_reduce2, [reduced_buf, reduced2_buf], ); let reduced_scan_buf = ResourceProxy::new_buf( - pathtag_wgs as u64 * TAG_MONOID_FULL_SIZE, + buffer_sizes.path_reduced_scan.size_in_bytes().into(), "reduced_scan_buf", ); recording.dispatch( shaders.pathtag_scan1, - (reduced_size as u32 / shaders::PATHTAG_REDUCE_WG, 1, 1), + wg_counts.path_scan1, [reduced_buf, reduced2_buf, reduced_scan_buf], ); pathtag_parent = reduced_scan_buf; @@ -241,17 +158,17 @@ impl Render { } let tagmonoid_buf = ResourceProxy::new_buf( - pathtag_wgs as u64 * shaders::PATHTAG_REDUCE_WG as u64 * TAG_MONOID_FULL_SIZE, + buffer_sizes.path_monoids.size_in_bytes().into(), "tagmonoid_buf", ); - let pathtag_scan = if pathtag_large { + let pathtag_scan = if wg_counts.use_large_path_scan { shaders.pathtag_scan_large } else { shaders.pathtag_scan }; recording.dispatch( pathtag_scan, - (pathtag_wgs as u32, 1, 1), + wg_counts.path_scan, [config_buf, scene_buf, pathtag_parent, tagmonoid_buf], ); recording.free_resource(reduced_buf); @@ -259,20 +176,20 @@ impl Render { recording.free_resource(reduced2); recording.free_resource(reduced_scan); } - let drawobj_wgs = (n_drawobj + shaders::PATH_BBOX_WG - 1) / shaders::PATH_BBOX_WG; - let path_bbox_buf = - ResourceProxy::new_buf(n_paths as u64 * PATH_BBOX_SIZE, "path_bbox_buf"); + let path_bbox_buf = ResourceProxy::new_buf( + buffer_sizes.path_bboxes.size_in_bytes().into(), + "path_bbox_buf", + ); recording.dispatch( shaders.bbox_clear, - (drawobj_wgs, 1, 1), + wg_counts.bbox_clear, [config_buf, path_bbox_buf], ); - let cubic_buf = ResourceProxy::new_buf(n_pathtag as u64 * CUBIC_SIZE, "cubic_buf"); - let path_coarse_wgs = - (n_pathtag as u32 + shaders::PATH_COARSE_WG - 1) / shaders::PATH_COARSE_WG; + let cubic_buf = + ResourceProxy::new_buf(buffer_sizes.cubics.size_in_bytes().into(), "cubic_buf"); recording.dispatch( shaders.pathseg, - (path_coarse_wgs, 1, 1), + wg_counts.path_seg, [ config_buf, scene_buf, @@ -281,19 +198,26 @@ impl Render { cubic_buf, ], ); - let draw_reduced_buf = - ResourceProxy::new_buf(drawobj_wgs as u64 * DRAWMONOID_SIZE, "draw_reduced_buf"); + let draw_reduced_buf = ResourceProxy::new_buf( + buffer_sizes.draw_reduced.size_in_bytes().into(), + "draw_reduced_buf", + ); recording.dispatch( shaders.draw_reduce, - (drawobj_wgs, 1, 1), + wg_counts.draw_reduce, [config_buf, scene_buf, draw_reduced_buf], ); - let draw_monoid_buf = - ResourceProxy::new_buf(n_drawobj as u64 * DRAWMONOID_SIZE, "draw_monoid_buf"); - let clip_inp_buf = ResourceProxy::new_buf(n_clip as u64 * CLIP_INP_SIZE, "clip_inp_buf"); + let draw_monoid_buf = ResourceProxy::new_buf( + buffer_sizes.draw_monoids.size_in_bytes().into(), + "draw_monoid_buf", + ); + let clip_inp_buf = ResourceProxy::new_buf( + buffer_sizes.clip_inps.size_in_bytes().into(), + "clip_inp_buf", + ); recording.dispatch( shaders.draw_leaf, - (drawobj_wgs, 1, 1), + wg_counts.draw_leaf, [ config_buf, scene_buf, @@ -305,16 +229,16 @@ impl Render { ], ); recording.free_resource(draw_reduced_buf); - let clip_el_buf = ResourceProxy::new_buf(n_clip as u64 * CLIP_EL_SIZE, "clip_el_buf"); + let clip_el_buf = + ResourceProxy::new_buf(buffer_sizes.clip_els.size_in_bytes().into(), "clip_el_buf"); let clip_bic_buf = ResourceProxy::new_buf( - (n_clip / shaders::CLIP_REDUCE_WG) as u64 * CLIP_BIC_SIZE, + buffer_sizes.clip_bics.size_in_bytes().into(), "clip_bic_buf", ); - let clip_wg_reduce = n_clip.saturating_sub(1) / shaders::CLIP_REDUCE_WG; - if clip_wg_reduce > 0 { + if wg_counts.clip_reduce.0 > 0 { recording.dispatch( shaders.clip_reduce, - (clip_wg_reduce, 1, 1), + wg_counts.clip_reduce, [ config_buf, clip_inp_buf, @@ -324,12 +248,14 @@ impl Render { ], ); } - let clip_wg = (n_clip + shaders::CLIP_REDUCE_WG - 1) / shaders::CLIP_REDUCE_WG; - let clip_bbox_buf = ResourceProxy::new_buf(n_clip as u64 * CLIP_BBOX_SIZE, "clip_bbox_buf"); - if clip_wg > 0 { + let clip_bbox_buf = ResourceProxy::new_buf( + buffer_sizes.clip_bboxes.size_in_bytes().into(), + "clip_bbox_buf", + ); + if wg_counts.clip_leaf.0 > 0 { recording.dispatch( shaders.clip_leaf, - (clip_wg, 1, 1), + wg_counts.clip_leaf, [ config_buf, clip_inp_buf, @@ -344,20 +270,20 @@ impl Render { recording.free_resource(clip_inp_buf); recording.free_resource(clip_bic_buf); recording.free_resource(clip_el_buf); - let draw_bbox_buf = - ResourceProxy::new_buf(n_paths as u64 * DRAW_BBOX_SIZE, "draw_bbox_buf"); - let bump_buf = BufProxy::new(BUMP_SIZE, "bump_buf"); - let width_in_bins = (config.width_in_tiles + 15) / 16; - let height_in_bins = (config.height_in_tiles + 15) / 16; + let draw_bbox_buf = ResourceProxy::new_buf( + buffer_sizes.draw_bboxes.size_in_bytes().into(), + "draw_bbox_buf", + ); + let bump_buf = BufProxy::new(buffer_sizes.bump_alloc.size_in_bytes().into(), "bump_buf"); let bin_header_buf = ResourceProxy::new_buf( - (256 * drawobj_wgs) as u64 * BIN_HEADER_SIZE, + buffer_sizes.bin_headers.size_in_bytes().into(), "bin_header_buf", ); recording.clear_all(bump_buf); let bump_buf = ResourceProxy::Buf(bump_buf); recording.dispatch( shaders.binning, - (drawobj_wgs, 1, 1), + wg_counts.binning, [ config_buf, draw_monoid_buf, @@ -374,12 +300,11 @@ impl Render { recording.free_resource(clip_bbox_buf); // Note: this only needs to be rounded up because of the workaround to store the tile_offset // in storage rather than workgroup memory. - let n_path_aligned = align_up(n_paths as usize, 256); - let path_buf = ResourceProxy::new_buf(n_path_aligned as u64 * PATH_SIZE, "path_buf"); - let path_wgs = (n_paths + shaders::PATH_BBOX_WG - 1) / shaders::PATH_BBOX_WG; + let path_buf = + ResourceProxy::new_buf(buffer_sizes.paths.size_in_bytes().into(), "path_buf"); recording.dispatch( shaders.tile_alloc, - (path_wgs, 1, 1), + wg_counts.tile_alloc, [ config_buf, scene_buf, @@ -392,7 +317,7 @@ impl Render { recording.free_resource(draw_bbox_buf); recording.dispatch( shaders.path_coarse, - (path_coarse_wgs, 1, 1), + wg_counts.path_coarse, [ config_buf, scene_buf, @@ -408,12 +333,12 @@ impl Render { recording.free_resource(cubic_buf); recording.dispatch( shaders.backdrop, - (path_wgs, 1, 1), + wg_counts.backdrop, [config_buf, path_buf, tile_buf], ); recording.dispatch( shaders.coarse, - (width_in_bins, height_in_bins, 1), + wg_counts.coarse, [ config_buf, scene_buf, @@ -431,9 +356,8 @@ impl Render { recording.free_resource(bin_header_buf); recording.free_resource(path_buf); let out_image = ImageProxy::new(params.width, params.height, ImageFormat::Rgba8); - self.width_in_tiles = config.width_in_tiles; - self.height_in_tiles = config.height_in_tiles; - self.fine = Some(FineResources { + self.fine_wg_count = Some(wg_counts.fine); + self.fine_resources = Some(FineResources { config_buf, bump_buf, tile_buf, @@ -453,10 +377,11 @@ impl Render { /// Run fine rasterization assuming the coarse phase succeeded. pub fn record_fine(&mut self, shaders: &FullShaders, recording: &mut Recording) { - let fine = self.fine.take().unwrap(); + let fine_wg_count = self.fine_wg_count.take().unwrap(); + let fine = self.fine_resources.take().unwrap(); recording.dispatch( shaders.fine, - (self.width_in_tiles, self.height_in_tiles, 1), + fine_wg_count, [ fine.config_buf, fine.tile_buf, @@ -482,10 +407,16 @@ impl Render { /// This is going away, as the caller will add the output image to the bind /// map. pub fn out_image(&self) -> ImageProxy { - self.fine.as_ref().unwrap().out_image + self.fine_resources.as_ref().unwrap().out_image } pub fn bump_buf(&self) -> BufProxy { - *self.fine.as_ref().unwrap().bump_buf.as_buf().unwrap() + *self + .fine_resources + .as_ref() + .unwrap() + .bump_buf + .as_buf() + .unwrap() } }