[vello_encoding] Minor clean ups and correctness fixes

- Use the buffer sizes in src/render.rs as the current 128K is not
  sufficient for even the test scenes
- Add BumpAllocators type and bump buffer size
- Support the `base_color` render option
- Use immutable type construction where possible
- Fix the path tag stream length calculation to use the offsets stored
  in Layout. This both matches the current behavior in src/render.rs and
  makes it so that CpuConfig's construction no longer needs the Encoding
  type as an input
- Renamed CpuConfig & GpuConfig types to 'RenderConfig' and
  'ConfigUniform'
This commit is contained in:
Arman Uguray 2023-04-13 20:24:20 -07:00
parent 6d2b98cade
commit 0256d8a92f
3 changed files with 85 additions and 62 deletions

View file

@ -5,11 +5,19 @@ use super::{
use bytemuck::{Pod, Zeroable}; use bytemuck::{Pod, Zeroable};
use std::mem; use std::mem;
const TILE_WIDTH: u32 = 16;
const TILE_HEIGHT: u32 = 16;
const PATH_REDUCE_WG: u32 = 256;
const PATH_BBOX_WG: u32 = 256;
const PATH_COARSE_WG: u32 = 256;
const CLIP_REDUCE_WG: u32 = 256;
/// Counters for tracking dynamic allocation on the GPU. /// Counters for tracking dynamic allocation on the GPU.
/// ///
/// This must be kept in sync with the struct in shader/shared/bump.wgsl /// This must be kept in sync with the struct in shader/shared/bump.wgsl
#[repr(C)]
#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] #[derive(Clone, Copy, Debug, Default, Zeroable, Pod)]
#[repr(C)]
pub struct BumpAllocators { pub struct BumpAllocators {
pub failed: u32, pub failed: u32,
// Final needed dynamic size of the buffers. If any of these are larger // Final needed dynamic size of the buffers. If any of these are larger
@ -21,13 +29,13 @@ pub struct BumpAllocators {
pub blend: u32, pub blend: u32,
} }
/// GPU side configuration. /// Uniform render configuration data used by all GPU stages.
/// ///
/// This data structure must be kept in sync with the definition in /// This data structure must be kept in sync with the definition in
/// shaders/shared/config.wgsl. /// shaders/shared/config.wgsl.
#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] #[derive(Clone, Copy, Debug, Default, Zeroable, Pod)]
#[repr(C)] #[repr(C)]
pub struct GpuConfig { pub struct ConfigUniform {
/// Width of the scene in tiles. /// Width of the scene in tiles.
pub width_in_tiles: u32, pub width_in_tiles: u32,
/// Height of the scene in tiles. /// Height of the scene in tiles.
@ -52,57 +60,50 @@ pub struct GpuConfig {
/// CPU side setup and configuration. /// CPU side setup and configuration.
#[derive(Default)] #[derive(Default)]
pub struct CpuConfig { pub struct RenderConfig {
/// GPU side configuration. /// GPU side configuration.
pub gpu: GpuConfig, pub gpu: ConfigUniform,
/// Workgroup sizes for all compute pipelines. /// Workgroup counts for all compute pipelines.
pub workgroup_sizes: WorkgroupSizes, pub workgroup_counts: WorkgroupCounts,
/// Sizes of all buffer resources. /// Sizes of all buffer resources.
pub buffer_sizes: BufferSizes, pub buffer_sizes: BufferSizes,
} }
impl CpuConfig { impl RenderConfig {
pub fn new(encoding: &Encoding, layout: &Layout, width: u32, height: u32) -> Self { pub fn new(layout: &Layout, width: u32, height: u32, base_color: &peniko::Color) -> Self {
let new_width = next_multiple_of(width, 16); let new_width = next_multiple_of(width, TILE_WIDTH);
let new_height = next_multiple_of(height, 16); let new_height = next_multiple_of(height, TILE_HEIGHT);
let mut config = GpuConfig { let width_in_tiles = new_width / TILE_WIDTH;
width_in_tiles: new_width / 16, let height_in_tiles = new_height / TILE_HEIGHT;
height_in_tiles: new_height / 16, let n_path_tags = layout.path_tags_size();
let workgroup_counts =
WorkgroupCounts::new(&layout, width_in_tiles, height_in_tiles, n_path_tags);
let buffer_sizes = BufferSizes::new(&layout, &workgroup_counts, n_path_tags);
Self {
gpu: ConfigUniform {
width_in_tiles,
height_in_tiles,
target_width: width, target_width: width,
target_height: height, target_height: height,
base_color: 0, base_color: base_color.to_premul_u32(),
binning_size: 0, binning_size: buffer_sizes.bin_data.len() - layout.bin_data_start,
tiles_size: 0, tiles_size: buffer_sizes.tiles.len(),
segments_size: 0, segments_size: buffer_sizes.segments.len(),
ptcl_size: 0, ptcl_size: buffer_sizes.ptcl.len(),
layout: *layout, layout: *layout,
}; },
let n_path_tags = encoding.path_tags.len() as u32; workgroup_counts,
let workgroup_sizes = WorkgroupSizes::new(&config, n_path_tags);
let buffer_sizes = BufferSizes::new(&config, &workgroup_sizes, n_path_tags);
config.binning_size = buffer_sizes.bin_data.len();
config.tiles_size = buffer_sizes.tiles.len();
config.segments_size = buffer_sizes.tiles.len();
config.ptcl_size = buffer_sizes.tiles.len();
Self {
gpu: config,
workgroup_sizes,
buffer_sizes, buffer_sizes,
} }
} }
} }
const PATH_REDUCE_WG: u32 = 256;
const PATH_BBOX_WG: u32 = 256;
const PATH_COARSE_WG: u32 = 256;
const CLIP_REDUCE_WG: u32 = 256;
/// Type alias for a workgroup size. /// Type alias for a workgroup size.
pub type WorkgroupSize = (u32, u32, u32); pub type WorkgroupSize = (u32, u32, u32);
/// Computed sizes for all dispatches. /// Computed sizes for all dispatches.
#[derive(Copy, Clone, Debug, Default)] #[derive(Copy, Clone, Debug, Default)]
pub struct WorkgroupSizes { pub struct WorkgroupCounts {
pub use_large_path_scan: bool, pub use_large_path_scan: bool,
pub path_reduce: WorkgroupSize, pub path_reduce: WorkgroupSize,
pub path_reduce2: WorkgroupSize, pub path_reduce2: WorkgroupSize,
@ -122,15 +123,20 @@ pub struct WorkgroupSizes {
pub fine: WorkgroupSize, pub fine: WorkgroupSize,
} }
impl WorkgroupSizes { impl WorkgroupCounts {
pub fn new(config: &GpuConfig, n_path_tags: u32) -> Self { pub fn new(
let n_paths = config.layout.n_paths; layout: &Layout,
let n_draw_objects = config.layout.n_draw_objects; width_in_tiles: u32,
let n_clips = config.layout.n_clips; height_in_tiles: u32,
n_path_tags: u32,
) -> Self {
let n_paths = layout.n_paths;
let n_draw_objects = layout.n_draw_objects;
let n_clips = layout.n_clips;
let path_tag_padded = align_up(n_path_tags, 4 * PATH_REDUCE_WG); let path_tag_padded = align_up(n_path_tags, 4 * PATH_REDUCE_WG);
let path_tag_wgs = path_tag_padded / (4 * PATH_REDUCE_WG); let path_tag_wgs = path_tag_padded / (4 * PATH_REDUCE_WG);
let use_large_path_scan = path_tag_wgs > PATH_REDUCE_WG; let use_large_path_scan = path_tag_wgs > PATH_REDUCE_WG;
let path_reduce_wgs = if use_large_path_scan { let reduced_size = if use_large_path_scan {
align_up(path_tag_wgs, PATH_REDUCE_WG) align_up(path_tag_wgs, PATH_REDUCE_WG)
} else { } else {
path_tag_wgs path_tag_wgs
@ -140,13 +146,13 @@ impl WorkgroupSizes {
let clip_reduce_wgs = n_clips.saturating_sub(1) / CLIP_REDUCE_WG; let clip_reduce_wgs = n_clips.saturating_sub(1) / CLIP_REDUCE_WG;
let clip_wgs = (n_clips + CLIP_REDUCE_WG - 1) / CLIP_REDUCE_WG; let clip_wgs = (n_clips + CLIP_REDUCE_WG - 1) / CLIP_REDUCE_WG;
let path_wgs = (n_paths + PATH_BBOX_WG - 1) / PATH_BBOX_WG; let path_wgs = (n_paths + PATH_BBOX_WG - 1) / PATH_BBOX_WG;
let width_in_bins = (config.width_in_tiles + 15) / 16; let width_in_bins = (width_in_tiles + 15) / 16;
let height_in_bins = (config.height_in_tiles + 15) / 16; let height_in_bins = (height_in_tiles + 15) / 16;
Self { Self {
use_large_path_scan, use_large_path_scan,
path_reduce: (path_reduce_wgs, 1, 1), path_reduce: (path_tag_wgs, 1, 1),
path_reduce2: (PATH_REDUCE_WG, 1, 1), path_reduce2: (PATH_REDUCE_WG, 1, 1),
path_scan1: (path_reduce_wgs / PATH_REDUCE_WG, 1, 1), path_scan1: (reduced_size / PATH_REDUCE_WG, 1, 1),
path_scan: (path_tag_wgs, 1, 1), path_scan: (path_tag_wgs, 1, 1),
bbox_clear: (draw_object_wgs, 1, 1), bbox_clear: (draw_object_wgs, 1, 1),
path_seg: (path_coarse_wgs, 1, 1), path_seg: (path_coarse_wgs, 1, 1),
@ -159,7 +165,7 @@ impl WorkgroupSizes {
path_coarse: (path_coarse_wgs, 1, 1), path_coarse: (path_coarse_wgs, 1, 1),
backdrop: (path_wgs, 1, 1), backdrop: (path_wgs, 1, 1),
coarse: (width_in_bins, height_in_bins, 1), coarse: (width_in_bins, height_in_bins, 1),
fine: (config.width_in_tiles, config.height_in_tiles, 1), fine: (width_in_tiles, height_in_tiles, 1),
} }
} }
} }
@ -231,6 +237,7 @@ pub struct BufferSizes {
pub clip_bics: BufferSize<ClipBic>, pub clip_bics: BufferSize<ClipBic>,
pub clip_bboxes: BufferSize<ClipBbox>, pub clip_bboxes: BufferSize<ClipBbox>,
pub draw_bboxes: BufferSize<DrawBbox>, pub draw_bboxes: BufferSize<DrawBbox>,
pub bump_alloc: BufferSize<BumpAllocators>,
pub bin_headers: BufferSize<BinHeader>, pub bin_headers: BufferSize<BinHeader>,
pub paths: BufferSize<Path>, pub paths: BufferSize<Path>,
// Bump allocated buffers // Bump allocated buffers
@ -241,12 +248,17 @@ pub struct BufferSizes {
} }
impl BufferSizes { impl BufferSizes {
pub fn new(config: &GpuConfig, workgroups: &WorkgroupSizes, n_path_tags: u32) -> Self { pub fn new(layout: &Layout, workgroups: &WorkgroupCounts, n_path_tags: u32) -> Self {
let n_paths = config.layout.n_paths; let n_paths = layout.n_paths;
let n_draw_objects = config.layout.n_draw_objects; let n_draw_objects = layout.n_draw_objects;
let n_clips = config.layout.n_clips; let n_clips = layout.n_clips;
let path_tag_wgs = workgroups.path_reduce.0; let path_tag_wgs = workgroups.path_reduce.0;
let path_reduced = BufferSize::new(path_tag_wgs); let reduced_size = if workgroups.use_large_path_scan {
align_up(path_tag_wgs, PATH_REDUCE_WG)
} else {
path_tag_wgs
};
let path_reduced = BufferSize::new(reduced_size);
let path_reduced2 = BufferSize::new(PATH_REDUCE_WG); let path_reduced2 = BufferSize::new(PATH_REDUCE_WG);
let path_reduced_scan = BufferSize::new(path_tag_wgs); let path_reduced_scan = BufferSize::new(path_tag_wgs);
let path_monoids = BufferSize::new(path_tag_wgs * PATH_REDUCE_WG); let path_monoids = BufferSize::new(path_tag_wgs * PATH_REDUCE_WG);
@ -255,21 +267,24 @@ impl BufferSizes {
let draw_object_wgs = workgroups.draw_reduce.0; let draw_object_wgs = workgroups.draw_reduce.0;
let draw_reduced = BufferSize::new(draw_object_wgs); let draw_reduced = BufferSize::new(draw_object_wgs);
let draw_monoids = BufferSize::new(n_draw_objects); let draw_monoids = BufferSize::new(n_draw_objects);
let info = BufferSize::new(config.layout.bin_data_start); let info = BufferSize::new(layout.bin_data_start);
let clip_inps = BufferSize::new(n_clips); let clip_inps = BufferSize::new(n_clips);
let clip_els = BufferSize::new(n_clips); let clip_els = BufferSize::new(n_clips);
let clip_bics = BufferSize::new(n_clips / CLIP_REDUCE_WG); let clip_bics = BufferSize::new(n_clips / CLIP_REDUCE_WG);
let clip_bboxes = BufferSize::new(n_clips); let clip_bboxes = BufferSize::new(n_clips);
let draw_bboxes = BufferSize::new(n_paths); let draw_bboxes = BufferSize::new(n_paths);
let bump_alloc = BufferSize::new(1);
let bin_headers = BufferSize::new(draw_object_wgs * 256); let bin_headers = BufferSize::new(draw_object_wgs * 256);
let n_paths_aligned = align_up(n_paths, 256); let n_paths_aligned = align_up(n_paths, 256);
let paths = BufferSize::new(n_paths_aligned); let paths = BufferSize::new(n_paths_aligned);
// TODO: better heuristics. Just use 128k for now
let initial_bump_size = 128 * 1024; // The following buffer sizes have been hand picked to accommodate the vello test scenes as
let bin_data = BufferSize::from_size_in_bytes(initial_bump_size); // well as paris-30k. These should instead get derived from the scene layout using
let tiles = BufferSize::from_size_in_bytes(initial_bump_size); // reasonable heuristics.
let segments = BufferSize::from_size_in_bytes(initial_bump_size); let bin_data = BufferSize::from_size_in_bytes(1 << 20);
let ptcl = BufferSize::from_size_in_bytes(initial_bump_size); let tiles = BufferSize::from_size_in_bytes(1 << 24);
let segments = BufferSize::from_size_in_bytes(1 << 26);
let ptcl = BufferSize::from_size_in_bytes(1 << 25);
Self { Self {
path_reduced, path_reduced,
path_reduced2, path_reduced2,
@ -285,6 +300,7 @@ impl BufferSizes {
clip_bics, clip_bics,
clip_bboxes, clip_bboxes,
draw_bboxes, draw_bboxes,
bump_alloc,
bin_headers, bin_headers,
paths, paths,
bin_data, bin_data,

View file

@ -33,7 +33,8 @@ mod resolve;
pub use binning::BinHeader; pub use binning::BinHeader;
pub use clip::{Clip, ClipBbox, ClipBic, ClipElement}; pub use clip::{Clip, ClipBbox, ClipBic, ClipElement};
pub use config::{ pub use config::{
BufferSize, BufferSizes, BumpAllocators, CpuConfig, GpuConfig, WorkgroupSize, WorkgroupSizes, BufferSize, BufferSizes, BumpAllocators, ConfigUniform, RenderConfig, WorkgroupCounts,
WorkgroupSize,
}; };
pub use draw::{ pub use draw::{
DrawBbox, DrawBeginClip, DrawColor, DrawImage, DrawLinearGradient, DrawMonoid, DrawBbox, DrawBeginClip, DrawColor, DrawImage, DrawLinearGradient, DrawMonoid,

View file

@ -65,6 +65,12 @@ impl Layout {
bytemuck::cast_slice(&data[start..end]) bytemuck::cast_slice(&data[start..end])
} }
pub fn path_tags_size(&self) -> u32 {
let start = self.path_tag_base * 4;
let end = self.path_data_base * 4;
end - start
}
/// Returns the path tag stream in chunks of 4. /// Returns the path tag stream in chunks of 4.
pub fn path_tags_chunked<'a>(&self, data: &'a [u8]) -> &'a [u32] { pub fn path_tags_chunked<'a>(&self, data: &'a [u8]) -> &'a [u32] {
let start = self.path_tag_base as usize * 4; let start = self.path_tag_base as usize * 4;