mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-08 20:01:30 +11:00
[vello_encoding] Minor clean ups and correctness fixes
- Use the buffer sizes in src/render.rs as the current 128K is not sufficient for even the test scenes - Add BumpAllocators type and bump buffer size - Support the `base_color` render option - Use immutable type construction where possible - Fix the path tag stream length calculation to use the offsets stored in Layout. This both matches the current behavior in src/render.rs and makes it so that CpuConfig's construction no longer needs the Encoding type as an input - Renamed CpuConfig & GpuConfig types to 'RenderConfig' and 'ConfigUniform'
This commit is contained in:
parent
6d2b98cade
commit
0256d8a92f
|
@ -5,11 +5,19 @@ use super::{
|
|||
use bytemuck::{Pod, Zeroable};
|
||||
use std::mem;
|
||||
|
||||
const TILE_WIDTH: u32 = 16;
|
||||
const TILE_HEIGHT: u32 = 16;
|
||||
|
||||
const PATH_REDUCE_WG: u32 = 256;
|
||||
const PATH_BBOX_WG: u32 = 256;
|
||||
const PATH_COARSE_WG: u32 = 256;
|
||||
const CLIP_REDUCE_WG: u32 = 256;
|
||||
|
||||
/// Counters for tracking dynamic allocation on the GPU.
|
||||
///
|
||||
/// This must be kept in sync with the struct in shader/shared/bump.wgsl
|
||||
#[repr(C)]
|
||||
#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)]
|
||||
#[repr(C)]
|
||||
pub struct BumpAllocators {
|
||||
pub failed: u32,
|
||||
// Final needed dynamic size of the buffers. If any of these are larger
|
||||
|
@ -21,13 +29,13 @@ pub struct BumpAllocators {
|
|||
pub blend: u32,
|
||||
}
|
||||
|
||||
/// GPU side configuration.
|
||||
/// Uniform render configuration data used by all GPU stages.
|
||||
///
|
||||
/// This data structure must be kept in sync with the definition in
|
||||
/// shaders/shared/config.wgsl.
|
||||
#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)]
|
||||
#[repr(C)]
|
||||
pub struct GpuConfig {
|
||||
pub struct ConfigUniform {
|
||||
/// Width of the scene in tiles.
|
||||
pub width_in_tiles: u32,
|
||||
/// Height of the scene in tiles.
|
||||
|
@ -52,57 +60,50 @@ pub struct GpuConfig {
|
|||
|
||||
/// CPU side setup and configuration.
|
||||
#[derive(Default)]
|
||||
pub struct CpuConfig {
|
||||
pub struct RenderConfig {
|
||||
/// GPU side configuration.
|
||||
pub gpu: GpuConfig,
|
||||
/// Workgroup sizes for all compute pipelines.
|
||||
pub workgroup_sizes: WorkgroupSizes,
|
||||
pub gpu: ConfigUniform,
|
||||
/// Workgroup counts for all compute pipelines.
|
||||
pub workgroup_counts: WorkgroupCounts,
|
||||
/// Sizes of all buffer resources.
|
||||
pub buffer_sizes: BufferSizes,
|
||||
}
|
||||
|
||||
impl CpuConfig {
|
||||
pub fn new(encoding: &Encoding, layout: &Layout, width: u32, height: u32) -> Self {
|
||||
let new_width = next_multiple_of(width, 16);
|
||||
let new_height = next_multiple_of(height, 16);
|
||||
let mut config = GpuConfig {
|
||||
width_in_tiles: new_width / 16,
|
||||
height_in_tiles: new_height / 16,
|
||||
target_width: width,
|
||||
target_height: height,
|
||||
base_color: 0,
|
||||
binning_size: 0,
|
||||
tiles_size: 0,
|
||||
segments_size: 0,
|
||||
ptcl_size: 0,
|
||||
layout: *layout,
|
||||
};
|
||||
let n_path_tags = encoding.path_tags.len() as u32;
|
||||
let workgroup_sizes = WorkgroupSizes::new(&config, n_path_tags);
|
||||
let buffer_sizes = BufferSizes::new(&config, &workgroup_sizes, n_path_tags);
|
||||
config.binning_size = buffer_sizes.bin_data.len();
|
||||
config.tiles_size = buffer_sizes.tiles.len();
|
||||
config.segments_size = buffer_sizes.tiles.len();
|
||||
config.ptcl_size = buffer_sizes.tiles.len();
|
||||
impl RenderConfig {
|
||||
pub fn new(layout: &Layout, width: u32, height: u32, base_color: &peniko::Color) -> Self {
|
||||
let new_width = next_multiple_of(width, TILE_WIDTH);
|
||||
let new_height = next_multiple_of(height, TILE_HEIGHT);
|
||||
let width_in_tiles = new_width / TILE_WIDTH;
|
||||
let height_in_tiles = new_height / TILE_HEIGHT;
|
||||
let n_path_tags = layout.path_tags_size();
|
||||
let workgroup_counts =
|
||||
WorkgroupCounts::new(&layout, width_in_tiles, height_in_tiles, n_path_tags);
|
||||
let buffer_sizes = BufferSizes::new(&layout, &workgroup_counts, n_path_tags);
|
||||
Self {
|
||||
gpu: config,
|
||||
workgroup_sizes,
|
||||
gpu: ConfigUniform {
|
||||
width_in_tiles,
|
||||
height_in_tiles,
|
||||
target_width: width,
|
||||
target_height: height,
|
||||
base_color: base_color.to_premul_u32(),
|
||||
binning_size: buffer_sizes.bin_data.len() - layout.bin_data_start,
|
||||
tiles_size: buffer_sizes.tiles.len(),
|
||||
segments_size: buffer_sizes.segments.len(),
|
||||
ptcl_size: buffer_sizes.ptcl.len(),
|
||||
layout: *layout,
|
||||
},
|
||||
workgroup_counts,
|
||||
buffer_sizes,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const PATH_REDUCE_WG: u32 = 256;
|
||||
const PATH_BBOX_WG: u32 = 256;
|
||||
const PATH_COARSE_WG: u32 = 256;
|
||||
const CLIP_REDUCE_WG: u32 = 256;
|
||||
|
||||
/// Type alias for a workgroup size.
|
||||
pub type WorkgroupSize = (u32, u32, u32);
|
||||
|
||||
/// Computed sizes for all dispatches.
|
||||
#[derive(Copy, Clone, Debug, Default)]
|
||||
pub struct WorkgroupSizes {
|
||||
pub struct WorkgroupCounts {
|
||||
pub use_large_path_scan: bool,
|
||||
pub path_reduce: WorkgroupSize,
|
||||
pub path_reduce2: WorkgroupSize,
|
||||
|
@ -122,15 +123,20 @@ pub struct WorkgroupSizes {
|
|||
pub fine: WorkgroupSize,
|
||||
}
|
||||
|
||||
impl WorkgroupSizes {
|
||||
pub fn new(config: &GpuConfig, n_path_tags: u32) -> Self {
|
||||
let n_paths = config.layout.n_paths;
|
||||
let n_draw_objects = config.layout.n_draw_objects;
|
||||
let n_clips = config.layout.n_clips;
|
||||
impl WorkgroupCounts {
|
||||
pub fn new(
|
||||
layout: &Layout,
|
||||
width_in_tiles: u32,
|
||||
height_in_tiles: u32,
|
||||
n_path_tags: u32,
|
||||
) -> Self {
|
||||
let n_paths = layout.n_paths;
|
||||
let n_draw_objects = layout.n_draw_objects;
|
||||
let n_clips = layout.n_clips;
|
||||
let path_tag_padded = align_up(n_path_tags, 4 * PATH_REDUCE_WG);
|
||||
let path_tag_wgs = path_tag_padded / (4 * PATH_REDUCE_WG);
|
||||
let use_large_path_scan = path_tag_wgs > PATH_REDUCE_WG;
|
||||
let path_reduce_wgs = if use_large_path_scan {
|
||||
let reduced_size = if use_large_path_scan {
|
||||
align_up(path_tag_wgs, PATH_REDUCE_WG)
|
||||
} else {
|
||||
path_tag_wgs
|
||||
|
@ -140,13 +146,13 @@ impl WorkgroupSizes {
|
|||
let clip_reduce_wgs = n_clips.saturating_sub(1) / CLIP_REDUCE_WG;
|
||||
let clip_wgs = (n_clips + CLIP_REDUCE_WG - 1) / CLIP_REDUCE_WG;
|
||||
let path_wgs = (n_paths + PATH_BBOX_WG - 1) / PATH_BBOX_WG;
|
||||
let width_in_bins = (config.width_in_tiles + 15) / 16;
|
||||
let height_in_bins = (config.height_in_tiles + 15) / 16;
|
||||
let width_in_bins = (width_in_tiles + 15) / 16;
|
||||
let height_in_bins = (height_in_tiles + 15) / 16;
|
||||
Self {
|
||||
use_large_path_scan,
|
||||
path_reduce: (path_reduce_wgs, 1, 1),
|
||||
path_reduce: (path_tag_wgs, 1, 1),
|
||||
path_reduce2: (PATH_REDUCE_WG, 1, 1),
|
||||
path_scan1: (path_reduce_wgs / PATH_REDUCE_WG, 1, 1),
|
||||
path_scan1: (reduced_size / PATH_REDUCE_WG, 1, 1),
|
||||
path_scan: (path_tag_wgs, 1, 1),
|
||||
bbox_clear: (draw_object_wgs, 1, 1),
|
||||
path_seg: (path_coarse_wgs, 1, 1),
|
||||
|
@ -159,7 +165,7 @@ impl WorkgroupSizes {
|
|||
path_coarse: (path_coarse_wgs, 1, 1),
|
||||
backdrop: (path_wgs, 1, 1),
|
||||
coarse: (width_in_bins, height_in_bins, 1),
|
||||
fine: (config.width_in_tiles, config.height_in_tiles, 1),
|
||||
fine: (width_in_tiles, height_in_tiles, 1),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -231,6 +237,7 @@ pub struct BufferSizes {
|
|||
pub clip_bics: BufferSize<ClipBic>,
|
||||
pub clip_bboxes: BufferSize<ClipBbox>,
|
||||
pub draw_bboxes: BufferSize<DrawBbox>,
|
||||
pub bump_alloc: BufferSize<BumpAllocators>,
|
||||
pub bin_headers: BufferSize<BinHeader>,
|
||||
pub paths: BufferSize<Path>,
|
||||
// Bump allocated buffers
|
||||
|
@ -241,12 +248,17 @@ pub struct BufferSizes {
|
|||
}
|
||||
|
||||
impl BufferSizes {
|
||||
pub fn new(config: &GpuConfig, workgroups: &WorkgroupSizes, n_path_tags: u32) -> Self {
|
||||
let n_paths = config.layout.n_paths;
|
||||
let n_draw_objects = config.layout.n_draw_objects;
|
||||
let n_clips = config.layout.n_clips;
|
||||
pub fn new(layout: &Layout, workgroups: &WorkgroupCounts, n_path_tags: u32) -> Self {
|
||||
let n_paths = layout.n_paths;
|
||||
let n_draw_objects = layout.n_draw_objects;
|
||||
let n_clips = layout.n_clips;
|
||||
let path_tag_wgs = workgroups.path_reduce.0;
|
||||
let path_reduced = BufferSize::new(path_tag_wgs);
|
||||
let reduced_size = if workgroups.use_large_path_scan {
|
||||
align_up(path_tag_wgs, PATH_REDUCE_WG)
|
||||
} else {
|
||||
path_tag_wgs
|
||||
};
|
||||
let path_reduced = BufferSize::new(reduced_size);
|
||||
let path_reduced2 = BufferSize::new(PATH_REDUCE_WG);
|
||||
let path_reduced_scan = BufferSize::new(path_tag_wgs);
|
||||
let path_monoids = BufferSize::new(path_tag_wgs * PATH_REDUCE_WG);
|
||||
|
@ -255,21 +267,24 @@ impl BufferSizes {
|
|||
let draw_object_wgs = workgroups.draw_reduce.0;
|
||||
let draw_reduced = BufferSize::new(draw_object_wgs);
|
||||
let draw_monoids = BufferSize::new(n_draw_objects);
|
||||
let info = BufferSize::new(config.layout.bin_data_start);
|
||||
let info = BufferSize::new(layout.bin_data_start);
|
||||
let clip_inps = BufferSize::new(n_clips);
|
||||
let clip_els = BufferSize::new(n_clips);
|
||||
let clip_bics = BufferSize::new(n_clips / CLIP_REDUCE_WG);
|
||||
let clip_bboxes = BufferSize::new(n_clips);
|
||||
let draw_bboxes = BufferSize::new(n_paths);
|
||||
let bump_alloc = BufferSize::new(1);
|
||||
let bin_headers = BufferSize::new(draw_object_wgs * 256);
|
||||
let n_paths_aligned = align_up(n_paths, 256);
|
||||
let paths = BufferSize::new(n_paths_aligned);
|
||||
// TODO: better heuristics. Just use 128k for now
|
||||
let initial_bump_size = 128 * 1024;
|
||||
let bin_data = BufferSize::from_size_in_bytes(initial_bump_size);
|
||||
let tiles = BufferSize::from_size_in_bytes(initial_bump_size);
|
||||
let segments = BufferSize::from_size_in_bytes(initial_bump_size);
|
||||
let ptcl = BufferSize::from_size_in_bytes(initial_bump_size);
|
||||
|
||||
// The following buffer sizes have been hand picked to accommodate the vello test scenes as
|
||||
// well as paris-30k. These should instead get derived from the scene layout using
|
||||
// reasonable heuristics.
|
||||
let bin_data = BufferSize::from_size_in_bytes(1 << 20);
|
||||
let tiles = BufferSize::from_size_in_bytes(1 << 24);
|
||||
let segments = BufferSize::from_size_in_bytes(1 << 26);
|
||||
let ptcl = BufferSize::from_size_in_bytes(1 << 25);
|
||||
Self {
|
||||
path_reduced,
|
||||
path_reduced2,
|
||||
|
@ -285,6 +300,7 @@ impl BufferSizes {
|
|||
clip_bics,
|
||||
clip_bboxes,
|
||||
draw_bboxes,
|
||||
bump_alloc,
|
||||
bin_headers,
|
||||
paths,
|
||||
bin_data,
|
||||
|
|
|
@ -33,7 +33,8 @@ mod resolve;
|
|||
pub use binning::BinHeader;
|
||||
pub use clip::{Clip, ClipBbox, ClipBic, ClipElement};
|
||||
pub use config::{
|
||||
BufferSize, BufferSizes, BumpAllocators, CpuConfig, GpuConfig, WorkgroupSize, WorkgroupSizes,
|
||||
BufferSize, BufferSizes, BumpAllocators, ConfigUniform, RenderConfig, WorkgroupCounts,
|
||||
WorkgroupSize,
|
||||
};
|
||||
pub use draw::{
|
||||
DrawBbox, DrawBeginClip, DrawColor, DrawImage, DrawLinearGradient, DrawMonoid,
|
||||
|
|
|
@ -65,6 +65,12 @@ impl Layout {
|
|||
bytemuck::cast_slice(&data[start..end])
|
||||
}
|
||||
|
||||
pub fn path_tags_size(&self) -> u32 {
|
||||
let start = self.path_tag_base * 4;
|
||||
let end = self.path_data_base * 4;
|
||||
end - start
|
||||
}
|
||||
|
||||
/// Returns the path tag stream in chunks of 4.
|
||||
pub fn path_tags_chunked<'a>(&self, data: &'a [u8]) -> &'a [u32] {
|
||||
let start = self.path_tag_base as usize * 4;
|
||||
|
|
Loading…
Reference in a new issue