capture computation of workgroup and buffer sizes

This commit is contained in:
Chad Brokaw 2023-01-26 12:13:30 -05:00 committed by Arman Uguray
parent db2fefdc8f
commit 9f27fae64e
10 changed files with 422 additions and 44 deletions

View file

@ -0,0 +1,12 @@
// Copyright 2022 Google LLC
// SPDX-License-Identifier: Apache-2.0 OR MIT
use bytemuck::{Pod, Zeroable};
/// Binning header.
#[derive(Copy, Clone, Pod, Zeroable, Debug, Default)]
#[repr(C)]
pub struct BinHeader {
pub element_count: u32,
pub chunk_offset: u32,
}

View file

@ -0,0 +1,42 @@
// Copyright 2022 Google LLC
// SPDX-License-Identifier: Apache-2.0 OR MIT
use bytemuck::{Pod, Zeroable};
/// Clip stack element.
#[derive(Copy, Clone, Pod, Zeroable, Debug, Default)]
#[repr(C)]
pub struct ClipBic {
pub a: u32,
pub b: u32,
}
/// Clip element.
#[derive(Copy, Clone, Pod, Zeroable, Debug, Default)]
#[repr(C)]
pub struct ClipElement {
pub parent_ix: u32,
pub bbox: [f32; 4],
}
/// Clip resolution.
///
/// This is an intermediate element used to match clips to associated paths
/// and is also used to connect begin and end clip pairs.
#[derive(Copy, Clone, Pod, Zeroable, Debug, Default)]
#[repr(C)]
pub struct Clip {
// Index of the draw object.
pub ix: u32,
/// This is a packed encoding of an enum with the sign bit as the tag. If positive,
/// this entry is a BeginClip and contains the associated path index. If negative,
/// it is an EndClip and contains the bitwise-not of the EndClip draw object index.
pub path_ix: i32,
}
/// Clip bounding box.
#[derive(Copy, Clone, Pod, Zeroable, Debug, Default)]
#[repr(C)]
pub struct ClipBbox {
pub bbox: [f32; 4],
}

View file

@ -0,0 +1,307 @@
use super::{
BinHeader, Clip, ClipBbox, ClipBic, ClipElement, Cubic, DrawBbox, DrawMonoid, Encoding, Layout,
Path, PathBbox, PathMonoid, PathSegment, Tile,
};
use bytemuck::{Pod, Zeroable};
use std::mem;
/// Counters for tracking dynamic allocation on the GPU.
///
/// This must be kept in sync with the struct in shader/shared/bump.wgsl
#[repr(C)]
#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)]
pub struct BumpAllocators {
pub failed: u32,
// Final needed dynamic size of the buffers. If any of these are larger
// than the corresponding `_size` element reallocation needs to occur.
pub binning: u32,
pub ptcl: u32,
pub tile: u32,
pub segments: u32,
pub blend: u32,
}
/// GPU side configuration.
///
/// This data structure must be kept in sync with the definition in
/// shaders/shared/config.wgsl.
#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)]
#[repr(C)]
pub struct GpuConfig {
/// Width of the scene in tiles.
pub width_in_tiles: u32,
/// Height of the scene in tiles.
pub height_in_tiles: u32,
/// Width of the target in pixels.
pub target_width: u32,
/// Height of the target in pixels.
pub target_height: u32,
/// The base background color applied to the target before any blends.
pub base_color: u32,
/// Layout of packed scene data.
pub layout: Layout,
/// Size of binning buffer allocation (in u32s).
pub binning_size: u32,
/// Size of tile buffer allocation (in Tiles).
pub tiles_size: u32,
/// Size of segment buffer allocation (in PathSegments).
pub segments_size: u32,
/// Size of per-tile command list buffer allocation (in u32s).
pub ptcl_size: u32,
}
/// CPU side setup and configuration.
#[derive(Default)]
pub struct CpuConfig {
/// GPU side configuration.
pub gpu: GpuConfig,
/// Workgroup sizes for all compute pipelines.
pub workgroup_sizes: WorkgroupSizes,
/// Sizes of all buffer resources.
pub buffer_sizes: BufferSizes,
}
impl CpuConfig {
pub fn new(encoding: &Encoding, layout: &Layout, width: u32, height: u32) -> Self {
let new_width = next_multiple_of(width, 16);
let new_height = next_multiple_of(height, 16);
let mut config = GpuConfig {
width_in_tiles: new_width / 16,
height_in_tiles: new_height / 16,
target_width: width,
target_height: height,
base_color: 0,
binning_size: 0,
tiles_size: 0,
segments_size: 0,
ptcl_size: 0,
layout: *layout,
};
let n_path_tags = encoding.path_tags.len() as u32;
let workgroup_sizes = WorkgroupSizes::new(&config, n_path_tags);
let buffer_sizes = BufferSizes::new(&config, &workgroup_sizes, n_path_tags);
config.binning_size = buffer_sizes.bin_data.len();
config.tiles_size = buffer_sizes.tiles.len();
config.segments_size = buffer_sizes.tiles.len();
config.ptcl_size = buffer_sizes.tiles.len();
Self {
gpu: config,
workgroup_sizes,
buffer_sizes,
}
}
}
const PATH_REDUCE_WG: u32 = 256;
const PATH_BBOX_WG: u32 = 256;
const PATH_COARSE_WG: u32 = 256;
const CLIP_REDUCE_WG: u32 = 256;
/// Type alias for a workgroup size.
pub type WorkgroupSize = (u32, u32, u32);
/// Computed sizes for all dispatches.
#[derive(Copy, Clone, Debug, Default)]
pub struct WorkgroupSizes {
pub use_large_path_scan: bool,
pub path_reduce: WorkgroupSize,
pub path_reduce2: WorkgroupSize,
pub path_scan1: WorkgroupSize,
pub path_scan: WorkgroupSize,
pub bbox_clear: WorkgroupSize,
pub path_seg: WorkgroupSize,
pub draw_reduce: WorkgroupSize,
pub draw_leaf: WorkgroupSize,
pub clip_reduce: WorkgroupSize,
pub clip_leaf: WorkgroupSize,
pub binning: WorkgroupSize,
pub tile_alloc: WorkgroupSize,
pub path_coarse: WorkgroupSize,
pub backdrop: WorkgroupSize,
pub coarse: WorkgroupSize,
pub fine: WorkgroupSize,
}
impl WorkgroupSizes {
pub fn new(config: &GpuConfig, n_path_tags: u32) -> Self {
let n_paths = config.layout.n_paths;
let n_draw_objects = config.layout.n_draw_objects;
let n_clips = config.layout.n_clips;
let path_tag_padded = align_up(n_path_tags, 4 * PATH_REDUCE_WG);
let path_tag_wgs = path_tag_padded / (4 * PATH_REDUCE_WG);
let use_large_path_scan = path_tag_wgs > PATH_REDUCE_WG;
let path_reduce_wgs = if use_large_path_scan {
align_up(path_tag_wgs, PATH_REDUCE_WG)
} else {
path_tag_wgs
};
let draw_object_wgs = (n_draw_objects + PATH_BBOX_WG - 1) / PATH_BBOX_WG;
let path_coarse_wgs = (n_path_tags + PATH_COARSE_WG - 1) / PATH_COARSE_WG;
let clip_reduce_wgs = n_clips.saturating_sub(1) / CLIP_REDUCE_WG;
let clip_wgs = (n_clips + CLIP_REDUCE_WG - 1) / CLIP_REDUCE_WG;
let path_wgs = (n_paths + PATH_BBOX_WG - 1) / PATH_BBOX_WG;
let width_in_bins = (config.width_in_tiles + 15) / 16;
let height_in_bins = (config.height_in_tiles + 15) / 16;
Self {
use_large_path_scan,
path_reduce: (path_reduce_wgs, 1, 1),
path_reduce2: (PATH_REDUCE_WG, 1, 1),
path_scan1: (path_reduce_wgs / PATH_REDUCE_WG, 1, 1),
path_scan: (path_tag_wgs, 1, 1),
bbox_clear: (draw_object_wgs, 1, 1),
path_seg: (path_coarse_wgs, 1, 1),
draw_reduce: (draw_object_wgs, 1, 1),
draw_leaf: (draw_object_wgs, 1, 1),
clip_reduce: (clip_reduce_wgs, 1, 1),
clip_leaf: (clip_wgs, 1, 1),
binning: (draw_object_wgs, 1, 1),
tile_alloc: (path_wgs, 1, 1),
path_coarse: (path_coarse_wgs, 1, 1),
backdrop: (path_wgs, 1, 1),
coarse: (width_in_bins, height_in_bins, 1),
fine: (config.width_in_tiles, config.height_in_tiles, 1),
}
}
}
/// Typed buffer size primitive.
#[derive(Copy, Clone, Eq, Ord, Default, Debug)]
pub struct BufferSize<T: Sized> {
len: u32,
_phantom: std::marker::PhantomData<T>,
}
impl<T: Sized> BufferSize<T> {
/// Creates a new buffer size from number of elements.
pub const fn new(len: u32) -> Self {
Self {
len,
_phantom: std::marker::PhantomData,
}
}
/// Creates a new buffer size from size in bytes.
pub const fn from_size_in_bytes(size: u32) -> Self {
Self::new(size / mem::size_of::<T>() as u32)
}
/// Returns the number of elements.
pub const fn len(self) -> u32 {
self.len
}
/// Returns the size in bytes.
pub const fn size_in_bytes(self) -> u32 {
mem::size_of::<T>() as u32 * self.len
}
/// Returns the size in bytes aligned up to the given value.
pub const fn aligned_in_bytes(self, alignment: u32) -> u32 {
align_up(self.size_in_bytes(), alignment)
}
}
impl<T: Sized> PartialEq for BufferSize<T> {
fn eq(&self, other: &Self) -> bool {
self.len == other.len
}
}
impl<T: Sized> PartialOrd for BufferSize<T> {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
self.len.partial_cmp(&other.len)
}
}
/// Computed sizes for all buffers.
#[derive(Copy, Clone, Debug, Default)]
pub struct BufferSizes {
// Known size buffers
pub path_reduced: BufferSize<PathMonoid>,
pub path_reduced2: BufferSize<PathMonoid>,
pub path_reduced_scan: BufferSize<PathMonoid>,
pub path_monoids: BufferSize<PathMonoid>,
pub path_bboxes: BufferSize<PathBbox>,
pub cubics: BufferSize<Cubic>,
pub draw_reduced: BufferSize<DrawMonoid>,
pub draw_monoids: BufferSize<DrawMonoid>,
pub info: BufferSize<u32>,
pub clip_inps: BufferSize<Clip>,
pub clip_els: BufferSize<ClipElement>,
pub clip_bics: BufferSize<ClipBic>,
pub clip_bboxes: BufferSize<ClipBbox>,
pub draw_bboxes: BufferSize<DrawBbox>,
pub bin_headers: BufferSize<BinHeader>,
pub paths: BufferSize<Path>,
// Bump allocated buffers
pub bin_data: BufferSize<u32>,
pub tiles: BufferSize<Tile>,
pub segments: BufferSize<PathSegment>,
pub ptcl: BufferSize<u32>,
}
impl BufferSizes {
pub fn new(config: &GpuConfig, workgroups: &WorkgroupSizes, n_path_tags: u32) -> Self {
let n_paths = config.layout.n_paths;
let n_draw_objects = config.layout.n_draw_objects;
let n_clips = config.layout.n_clips;
let path_tag_wgs = workgroups.path_reduce.0;
let path_reduced = BufferSize::new(path_tag_wgs);
let path_reduced2 = BufferSize::new(PATH_REDUCE_WG);
let path_reduced_scan = BufferSize::new(path_tag_wgs);
let path_monoids = BufferSize::new(path_tag_wgs * PATH_REDUCE_WG);
let path_bboxes = BufferSize::new(n_paths);
let cubics = BufferSize::new(n_path_tags);
let draw_object_wgs = workgroups.draw_reduce.0;
let draw_reduced = BufferSize::new(draw_object_wgs);
let draw_monoids = BufferSize::new(n_draw_objects);
let info = BufferSize::new(config.layout.bin_data_start);
let clip_inps = BufferSize::new(n_clips);
let clip_els = BufferSize::new(n_clips);
let clip_bics = BufferSize::new(n_clips / CLIP_REDUCE_WG);
let clip_bboxes = BufferSize::new(n_clips);
let draw_bboxes = BufferSize::new(n_paths);
let bin_headers = BufferSize::new(draw_object_wgs * 256);
let n_paths_aligned = align_up(n_paths, 256);
let paths = BufferSize::new(n_paths_aligned);
// TODO: better heuristics. Just use 128k for now
let initial_bump_size = 128 * 1024;
let bin_data = BufferSize::from_size_in_bytes(initial_bump_size);
let tiles = BufferSize::from_size_in_bytes(initial_bump_size);
let segments = BufferSize::from_size_in_bytes(initial_bump_size);
let ptcl = BufferSize::from_size_in_bytes(initial_bump_size);
Self {
path_reduced,
path_reduced2,
path_reduced_scan,
path_monoids,
path_bboxes,
cubics,
draw_reduced,
draw_monoids,
info,
clip_inps,
clip_els,
clip_bics,
clip_bboxes,
draw_bboxes,
bin_headers,
paths,
bin_data,
tiles,
segments,
ptcl,
}
}
}
const fn align_up(len: u32, alignment: u32) -> u32 {
len + (len.wrapping_neg() & (alignment - 1))
}
const fn next_multiple_of(val: u32, rhs: u32) -> u32 {
match val % rhs {
0 => val,
r => val + (rhs - r),
}
}

View file

@ -54,6 +54,13 @@ impl DrawTag {
}
}
/// Draw object bounding box.
#[derive(Copy, Clone, Pod, Zeroable, Debug, Default)]
#[repr(C)]
pub struct DrawBbox {
pub bbox: [f32; 4],
}
/// Draw data for a solid color.
#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)]
#[repr(C)]
@ -131,7 +138,7 @@ impl DrawBeginClip {
}
/// Monoid for the draw tag stream.
#[derive(Copy, Clone, PartialEq, Eq, Pod, Zeroable, Default)]
#[derive(Copy, Clone, PartialEq, Eq, Pod, Zeroable, Default, Debug)]
#[repr(C)]
pub struct DrawMonoid {
// The number of paths preceding this draw object.

View file

@ -168,9 +168,7 @@ impl Encoding {
linewidths: self.linewidths.len(),
}
}
}
impl Encoding {
/// Encodes a linewidth.
pub fn encode_linewidth(&mut self, linewidth: f32) {
if self.linewidths.last() != Some(&linewidth) {

View file

@ -16,6 +16,9 @@
//! Raw scene encoding.
mod binning;
mod clip;
mod config;
mod draw;
mod encoding;
mod glyph;
@ -27,14 +30,21 @@ mod path;
mod ramp_cache;
mod resolve;
pub use binning::BinHeader;
pub use clip::{Clip, ClipBbox, ClipBic, ClipElement};
pub use config::{
BufferSize, BufferSizes, BumpAllocators, CpuConfig, GpuConfig, WorkgroupSize, WorkgroupSizes,
};
pub use draw::{
DrawBeginClip, DrawColor, DrawImage, DrawLinearGradient, DrawMonoid, DrawRadialGradient,
DrawTag,
DrawBbox, DrawBeginClip, DrawColor, DrawImage, DrawLinearGradient, DrawMonoid,
DrawRadialGradient, DrawTag,
};
pub use encoding::{Encoding, StreamOffsets};
pub use glyph::{Glyph, GlyphRun};
pub use math::Transform;
pub use monoid::Monoid;
pub use path::{PathBbox, PathEncoder, PathMonoid, PathSegment, PathSegmentType, PathTag};
pub use path::{
Cubic, Path, PathBbox, PathEncoder, PathMonoid, PathSegment, PathSegmentType, PathTag, Tile,
};
pub use ramp_cache::Ramps;
pub use resolve::{Config, Layout, Patch, Resolver};
pub use resolve::{Layout, Patch, Resolver};

View file

@ -20,7 +20,7 @@ use peniko::kurbo::Shape;
use super::Monoid;
/// Path segment.
#[derive(Clone, Copy, Debug, Zeroable, Pod)]
#[derive(Clone, Copy, Debug, Zeroable, Pod, Default)]
#[repr(C)]
pub struct PathSegment {
pub origin: [f32; 2],
@ -170,6 +170,19 @@ impl Monoid for PathMonoid {
}
}
/// Cubic path segment.
#[derive(Copy, Clone, Pod, Zeroable, Debug, Default)]
#[repr(C)]
pub struct Cubic {
pub p0: [f32; 2],
pub p1: [f32; 2],
pub p2: [f32; 2],
pub p3: [f32; 2],
pub stroke: [f32; 2],
pub path_ix: u32,
pub flags: u32,
}
/// Path bounding box.
#[derive(Copy, Clone, Pod, Zeroable, Default, Debug)]
#[repr(C)]
@ -188,6 +201,26 @@ pub struct PathBbox {
pub trans_ix: u32,
}
/// Tiled path object.
#[derive(Copy, Clone, Pod, Zeroable, Debug, Default)]
#[repr(C)]
pub struct Path {
/// Bounding box in tiles.
pub bbox: [f32; 4],
/// Offset (in u32s) to tile rectangle.
pub tiles: u32,
}
/// Tile object.
#[derive(Copy, Clone, Pod, Zeroable, Debug, Default)]
#[repr(C)]
pub struct Tile {
/// Accumulated backdrop at the left edge of the tile.
pub backdrop: i32,
/// Index of first path segment.
pub segments: u32,
}
/// Encoder for path segments.
pub struct PathEncoder<'a> {
tags: &'a mut Vec<PathTag>,

View file

@ -107,35 +107,6 @@ impl Layout {
}
}
/// Scene configuration.
///
/// This data structure must be kept in sync with the definition in
/// shaders/shared/config.wgsl.
#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)]
#[repr(C)]
pub struct Config {
/// Width of the scene in tiles.
pub width_in_tiles: u32,
/// Height of the scene in tiles.
pub height_in_tiles: u32,
/// Width of the target in pixels.
pub target_width: u32,
/// Height of the target in pixels.
pub target_height: u32,
/// The base background color applied to the target before any blends.
pub base_color: u32,
/// Layout of packed scene data.
pub layout: Layout,
/// Size of binning buffer allocation (in u32s).
pub binning_size: u32,
/// Size of tile buffer allocation (in Tiles).
pub tiles_size: u32,
/// Size of segment buffer allocation (in PathSegments).
pub segments_size: u32,
/// Size of per-tile command list buffer allocation (in u32s).
pub ptcl_size: u32,
}
/// Resolver for late bound resources.
#[derive(Default)]
pub struct Resolver {

View file

@ -27,8 +27,6 @@ pub use peniko::kurbo;
#[doc(hidden)]
pub use fello;
pub mod encoding;
pub mod glyph;
pub mod util;

View file

@ -7,10 +7,9 @@ use crate::{
};
use {
bytemuck::{Pod, Zeroable},
vello_encoding::{Config, Encoding, Layout},
vello_encoding::{Encoding, GpuConfig, Layout},
};
/// State for a render in progress.
pub struct Render {
/// Size of binning and info combined buffer in u32 units
@ -95,7 +94,7 @@ fn render(scene: &Scene, shaders: &Shaders) -> (Recording, BufProxy) {
let pathdata_base = size_to_words(scene.len());
scene.extend(&data.path_data);
let config = Config {
let config = GpuConfig {
width_in_tiles: 64,
height_in_tiles: 64,
target_width: 64 * 16,
@ -219,7 +218,8 @@ impl Render {
let mut recording = Recording::default();
let mut resolver = Resolver::new();
let mut packed = vec![];
let (layout, ramps, images) = resolver.resolve(encoding, &mut packed, shaders::PATHTAG_REDUCE_WG);
let (layout, ramps, images) =
resolver.resolve(encoding, &mut packed, shaders::PATHTAG_REDUCE_WG);
let gradient_image = if ramps.height == 0 {
ResourceProxy::new_image(1, 1, ImageFormat::Rgba8)
} else {
@ -247,7 +247,7 @@ impl Render {
let new_height = next_multiple_of(params.height, 16);
let info_size = layout.bin_data_start;
let config = vello_encoding::Config {
let config = GpuConfig {
width_in_tiles: new_width / 16,
height_in_tiles: new_height / 16,
target_width: params.width,