diff --git a/crates/encoding/src/binning.rs b/crates/encoding/src/binning.rs new file mode 100644 index 0000000..f9ec7d0 --- /dev/null +++ b/crates/encoding/src/binning.rs @@ -0,0 +1,12 @@ +// Copyright 2022 Google LLC +// SPDX-License-Identifier: Apache-2.0 OR MIT + +use bytemuck::{Pod, Zeroable}; + +/// Binning header. +#[derive(Copy, Clone, Pod, Zeroable, Debug, Default)] +#[repr(C)] +pub struct BinHeader { + pub element_count: u32, + pub chunk_offset: u32, +} diff --git a/crates/encoding/src/clip.rs b/crates/encoding/src/clip.rs new file mode 100644 index 0000000..1ecbadb --- /dev/null +++ b/crates/encoding/src/clip.rs @@ -0,0 +1,42 @@ +// Copyright 2022 Google LLC +// SPDX-License-Identifier: Apache-2.0 OR MIT + +use bytemuck::{Pod, Zeroable}; + +/// Clip stack element. +#[derive(Copy, Clone, Pod, Zeroable, Debug, Default)] +#[repr(C)] +pub struct ClipBic { + pub a: u32, + pub b: u32, +} + +/// Clip element. +#[derive(Copy, Clone, Pod, Zeroable, Debug, Default)] +#[repr(C)] +pub struct ClipElement { + pub parent_ix: u32, + pub bbox: [f32; 4], +} + +/// Clip resolution. +/// +/// This is an intermediate element used to match clips to associated paths +/// and is also used to connect begin and end clip pairs. +#[derive(Copy, Clone, Pod, Zeroable, Debug, Default)] +#[repr(C)] +pub struct Clip { + // Index of the draw object. + pub ix: u32, + /// This is a packed encoding of an enum with the sign bit as the tag. If positive, + /// this entry is a BeginClip and contains the associated path index. If negative, + /// it is an EndClip and contains the bitwise-not of the EndClip draw object index. + pub path_ix: i32, +} + +/// Clip bounding box. +#[derive(Copy, Clone, Pod, Zeroable, Debug, Default)] +#[repr(C)] +pub struct ClipBbox { + pub bbox: [f32; 4], +} diff --git a/crates/encoding/src/config.rs b/crates/encoding/src/config.rs new file mode 100644 index 0000000..47d718a --- /dev/null +++ b/crates/encoding/src/config.rs @@ -0,0 +1,307 @@ +use super::{ + BinHeader, Clip, ClipBbox, ClipBic, ClipElement, Cubic, DrawBbox, DrawMonoid, Encoding, Layout, + Path, PathBbox, PathMonoid, PathSegment, Tile, +}; +use bytemuck::{Pod, Zeroable}; +use std::mem; + +/// Counters for tracking dynamic allocation on the GPU. +/// +/// This must be kept in sync with the struct in shader/shared/bump.wgsl +#[repr(C)] +#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] +pub struct BumpAllocators { + pub failed: u32, + // Final needed dynamic size of the buffers. If any of these are larger + // than the corresponding `_size` element reallocation needs to occur. + pub binning: u32, + pub ptcl: u32, + pub tile: u32, + pub segments: u32, + pub blend: u32, +} + +/// GPU side configuration. +/// +/// This data structure must be kept in sync with the definition in +/// shaders/shared/config.wgsl. +#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] +#[repr(C)] +pub struct GpuConfig { + /// Width of the scene in tiles. + pub width_in_tiles: u32, + /// Height of the scene in tiles. + pub height_in_tiles: u32, + /// Width of the target in pixels. + pub target_width: u32, + /// Height of the target in pixels. + pub target_height: u32, + /// The base background color applied to the target before any blends. + pub base_color: u32, + /// Layout of packed scene data. + pub layout: Layout, + /// Size of binning buffer allocation (in u32s). + pub binning_size: u32, + /// Size of tile buffer allocation (in Tiles). + pub tiles_size: u32, + /// Size of segment buffer allocation (in PathSegments). + pub segments_size: u32, + /// Size of per-tile command list buffer allocation (in u32s). + pub ptcl_size: u32, +} + +/// CPU side setup and configuration. +#[derive(Default)] +pub struct CpuConfig { + /// GPU side configuration. + pub gpu: GpuConfig, + /// Workgroup sizes for all compute pipelines. + pub workgroup_sizes: WorkgroupSizes, + /// Sizes of all buffer resources. + pub buffer_sizes: BufferSizes, +} + +impl CpuConfig { + pub fn new(encoding: &Encoding, layout: &Layout, width: u32, height: u32) -> Self { + let new_width = next_multiple_of(width, 16); + let new_height = next_multiple_of(height, 16); + let mut config = GpuConfig { + width_in_tiles: new_width / 16, + height_in_tiles: new_height / 16, + target_width: width, + target_height: height, + base_color: 0, + binning_size: 0, + tiles_size: 0, + segments_size: 0, + ptcl_size: 0, + layout: *layout, + }; + let n_path_tags = encoding.path_tags.len() as u32; + let workgroup_sizes = WorkgroupSizes::new(&config, n_path_tags); + let buffer_sizes = BufferSizes::new(&config, &workgroup_sizes, n_path_tags); + config.binning_size = buffer_sizes.bin_data.len(); + config.tiles_size = buffer_sizes.tiles.len(); + config.segments_size = buffer_sizes.tiles.len(); + config.ptcl_size = buffer_sizes.tiles.len(); + Self { + gpu: config, + workgroup_sizes, + buffer_sizes, + } + } +} + +const PATH_REDUCE_WG: u32 = 256; +const PATH_BBOX_WG: u32 = 256; +const PATH_COARSE_WG: u32 = 256; +const CLIP_REDUCE_WG: u32 = 256; + +/// Type alias for a workgroup size. +pub type WorkgroupSize = (u32, u32, u32); + +/// Computed sizes for all dispatches. +#[derive(Copy, Clone, Debug, Default)] +pub struct WorkgroupSizes { + pub use_large_path_scan: bool, + pub path_reduce: WorkgroupSize, + pub path_reduce2: WorkgroupSize, + pub path_scan1: WorkgroupSize, + pub path_scan: WorkgroupSize, + pub bbox_clear: WorkgroupSize, + pub path_seg: WorkgroupSize, + pub draw_reduce: WorkgroupSize, + pub draw_leaf: WorkgroupSize, + pub clip_reduce: WorkgroupSize, + pub clip_leaf: WorkgroupSize, + pub binning: WorkgroupSize, + pub tile_alloc: WorkgroupSize, + pub path_coarse: WorkgroupSize, + pub backdrop: WorkgroupSize, + pub coarse: WorkgroupSize, + pub fine: WorkgroupSize, +} + +impl WorkgroupSizes { + pub fn new(config: &GpuConfig, n_path_tags: u32) -> Self { + let n_paths = config.layout.n_paths; + let n_draw_objects = config.layout.n_draw_objects; + let n_clips = config.layout.n_clips; + let path_tag_padded = align_up(n_path_tags, 4 * PATH_REDUCE_WG); + let path_tag_wgs = path_tag_padded / (4 * PATH_REDUCE_WG); + let use_large_path_scan = path_tag_wgs > PATH_REDUCE_WG; + let path_reduce_wgs = if use_large_path_scan { + align_up(path_tag_wgs, PATH_REDUCE_WG) + } else { + path_tag_wgs + }; + let draw_object_wgs = (n_draw_objects + PATH_BBOX_WG - 1) / PATH_BBOX_WG; + let path_coarse_wgs = (n_path_tags + PATH_COARSE_WG - 1) / PATH_COARSE_WG; + let clip_reduce_wgs = n_clips.saturating_sub(1) / CLIP_REDUCE_WG; + let clip_wgs = (n_clips + CLIP_REDUCE_WG - 1) / CLIP_REDUCE_WG; + let path_wgs = (n_paths + PATH_BBOX_WG - 1) / PATH_BBOX_WG; + let width_in_bins = (config.width_in_tiles + 15) / 16; + let height_in_bins = (config.height_in_tiles + 15) / 16; + Self { + use_large_path_scan, + path_reduce: (path_reduce_wgs, 1, 1), + path_reduce2: (PATH_REDUCE_WG, 1, 1), + path_scan1: (path_reduce_wgs / PATH_REDUCE_WG, 1, 1), + path_scan: (path_tag_wgs, 1, 1), + bbox_clear: (draw_object_wgs, 1, 1), + path_seg: (path_coarse_wgs, 1, 1), + draw_reduce: (draw_object_wgs, 1, 1), + draw_leaf: (draw_object_wgs, 1, 1), + clip_reduce: (clip_reduce_wgs, 1, 1), + clip_leaf: (clip_wgs, 1, 1), + binning: (draw_object_wgs, 1, 1), + tile_alloc: (path_wgs, 1, 1), + path_coarse: (path_coarse_wgs, 1, 1), + backdrop: (path_wgs, 1, 1), + coarse: (width_in_bins, height_in_bins, 1), + fine: (config.width_in_tiles, config.height_in_tiles, 1), + } + } +} + +/// Typed buffer size primitive. +#[derive(Copy, Clone, Eq, Ord, Default, Debug)] +pub struct BufferSize { + len: u32, + _phantom: std::marker::PhantomData, +} + +impl BufferSize { + /// Creates a new buffer size from number of elements. + pub const fn new(len: u32) -> Self { + Self { + len, + _phantom: std::marker::PhantomData, + } + } + + /// Creates a new buffer size from size in bytes. + pub const fn from_size_in_bytes(size: u32) -> Self { + Self::new(size / mem::size_of::() as u32) + } + + /// Returns the number of elements. + pub const fn len(self) -> u32 { + self.len + } + + /// Returns the size in bytes. + pub const fn size_in_bytes(self) -> u32 { + mem::size_of::() as u32 * self.len + } + + /// Returns the size in bytes aligned up to the given value. + pub const fn aligned_in_bytes(self, alignment: u32) -> u32 { + align_up(self.size_in_bytes(), alignment) + } +} + +impl PartialEq for BufferSize { + fn eq(&self, other: &Self) -> bool { + self.len == other.len + } +} + +impl PartialOrd for BufferSize { + fn partial_cmp(&self, other: &Self) -> Option { + self.len.partial_cmp(&other.len) + } +} + +/// Computed sizes for all buffers. +#[derive(Copy, Clone, Debug, Default)] +pub struct BufferSizes { + // Known size buffers + pub path_reduced: BufferSize, + pub path_reduced2: BufferSize, + pub path_reduced_scan: BufferSize, + pub path_monoids: BufferSize, + pub path_bboxes: BufferSize, + pub cubics: BufferSize, + pub draw_reduced: BufferSize, + pub draw_monoids: BufferSize, + pub info: BufferSize, + pub clip_inps: BufferSize, + pub clip_els: BufferSize, + pub clip_bics: BufferSize, + pub clip_bboxes: BufferSize, + pub draw_bboxes: BufferSize, + pub bin_headers: BufferSize, + pub paths: BufferSize, + // Bump allocated buffers + pub bin_data: BufferSize, + pub tiles: BufferSize, + pub segments: BufferSize, + pub ptcl: BufferSize, +} + +impl BufferSizes { + pub fn new(config: &GpuConfig, workgroups: &WorkgroupSizes, n_path_tags: u32) -> Self { + let n_paths = config.layout.n_paths; + let n_draw_objects = config.layout.n_draw_objects; + let n_clips = config.layout.n_clips; + let path_tag_wgs = workgroups.path_reduce.0; + let path_reduced = BufferSize::new(path_tag_wgs); + let path_reduced2 = BufferSize::new(PATH_REDUCE_WG); + let path_reduced_scan = BufferSize::new(path_tag_wgs); + let path_monoids = BufferSize::new(path_tag_wgs * PATH_REDUCE_WG); + let path_bboxes = BufferSize::new(n_paths); + let cubics = BufferSize::new(n_path_tags); + let draw_object_wgs = workgroups.draw_reduce.0; + let draw_reduced = BufferSize::new(draw_object_wgs); + let draw_monoids = BufferSize::new(n_draw_objects); + let info = BufferSize::new(config.layout.bin_data_start); + let clip_inps = BufferSize::new(n_clips); + let clip_els = BufferSize::new(n_clips); + let clip_bics = BufferSize::new(n_clips / CLIP_REDUCE_WG); + let clip_bboxes = BufferSize::new(n_clips); + let draw_bboxes = BufferSize::new(n_paths); + let bin_headers = BufferSize::new(draw_object_wgs * 256); + let n_paths_aligned = align_up(n_paths, 256); + let paths = BufferSize::new(n_paths_aligned); + // TODO: better heuristics. Just use 128k for now + let initial_bump_size = 128 * 1024; + let bin_data = BufferSize::from_size_in_bytes(initial_bump_size); + let tiles = BufferSize::from_size_in_bytes(initial_bump_size); + let segments = BufferSize::from_size_in_bytes(initial_bump_size); + let ptcl = BufferSize::from_size_in_bytes(initial_bump_size); + Self { + path_reduced, + path_reduced2, + path_reduced_scan, + path_monoids, + path_bboxes, + cubics, + draw_reduced, + draw_monoids, + info, + clip_inps, + clip_els, + clip_bics, + clip_bboxes, + draw_bboxes, + bin_headers, + paths, + bin_data, + tiles, + segments, + ptcl, + } + } +} + +const fn align_up(len: u32, alignment: u32) -> u32 { + len + (len.wrapping_neg() & (alignment - 1)) +} + +const fn next_multiple_of(val: u32, rhs: u32) -> u32 { + match val % rhs { + 0 => val, + r => val + (rhs - r), + } +} diff --git a/crates/encoding/src/draw.rs b/crates/encoding/src/draw.rs index 64fdd16..40022c1 100644 --- a/crates/encoding/src/draw.rs +++ b/crates/encoding/src/draw.rs @@ -54,6 +54,13 @@ impl DrawTag { } } +/// Draw object bounding box. +#[derive(Copy, Clone, Pod, Zeroable, Debug, Default)] +#[repr(C)] +pub struct DrawBbox { + pub bbox: [f32; 4], +} + /// Draw data for a solid color. #[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] #[repr(C)] @@ -131,7 +138,7 @@ impl DrawBeginClip { } /// Monoid for the draw tag stream. -#[derive(Copy, Clone, PartialEq, Eq, Pod, Zeroable, Default)] +#[derive(Copy, Clone, PartialEq, Eq, Pod, Zeroable, Default, Debug)] #[repr(C)] pub struct DrawMonoid { // The number of paths preceding this draw object. diff --git a/crates/encoding/src/encoding.rs b/crates/encoding/src/encoding.rs index 3c140dc..ca72df3 100644 --- a/crates/encoding/src/encoding.rs +++ b/crates/encoding/src/encoding.rs @@ -168,9 +168,7 @@ impl Encoding { linewidths: self.linewidths.len(), } } -} -impl Encoding { /// Encodes a linewidth. pub fn encode_linewidth(&mut self, linewidth: f32) { if self.linewidths.last() != Some(&linewidth) { diff --git a/crates/encoding/src/lib.rs b/crates/encoding/src/lib.rs index 2925256..68a4fac 100644 --- a/crates/encoding/src/lib.rs +++ b/crates/encoding/src/lib.rs @@ -16,6 +16,9 @@ //! Raw scene encoding. +mod binning; +mod clip; +mod config; mod draw; mod encoding; mod glyph; @@ -27,14 +30,21 @@ mod path; mod ramp_cache; mod resolve; +pub use binning::BinHeader; +pub use clip::{Clip, ClipBbox, ClipBic, ClipElement}; +pub use config::{ + BufferSize, BufferSizes, BumpAllocators, CpuConfig, GpuConfig, WorkgroupSize, WorkgroupSizes, +}; pub use draw::{ - DrawBeginClip, DrawColor, DrawImage, DrawLinearGradient, DrawMonoid, DrawRadialGradient, - DrawTag, + DrawBbox, DrawBeginClip, DrawColor, DrawImage, DrawLinearGradient, DrawMonoid, + DrawRadialGradient, DrawTag, }; pub use encoding::{Encoding, StreamOffsets}; pub use glyph::{Glyph, GlyphRun}; pub use math::Transform; pub use monoid::Monoid; -pub use path::{PathBbox, PathEncoder, PathMonoid, PathSegment, PathSegmentType, PathTag}; +pub use path::{ + Cubic, Path, PathBbox, PathEncoder, PathMonoid, PathSegment, PathSegmentType, PathTag, Tile, +}; pub use ramp_cache::Ramps; -pub use resolve::{Config, Layout, Patch, Resolver}; +pub use resolve::{Layout, Patch, Resolver}; diff --git a/crates/encoding/src/path.rs b/crates/encoding/src/path.rs index 5b01118..45f679f 100644 --- a/crates/encoding/src/path.rs +++ b/crates/encoding/src/path.rs @@ -20,7 +20,7 @@ use peniko::kurbo::Shape; use super::Monoid; /// Path segment. -#[derive(Clone, Copy, Debug, Zeroable, Pod)] +#[derive(Clone, Copy, Debug, Zeroable, Pod, Default)] #[repr(C)] pub struct PathSegment { pub origin: [f32; 2], @@ -170,6 +170,19 @@ impl Monoid for PathMonoid { } } +/// Cubic path segment. +#[derive(Copy, Clone, Pod, Zeroable, Debug, Default)] +#[repr(C)] +pub struct Cubic { + pub p0: [f32; 2], + pub p1: [f32; 2], + pub p2: [f32; 2], + pub p3: [f32; 2], + pub stroke: [f32; 2], + pub path_ix: u32, + pub flags: u32, +} + /// Path bounding box. #[derive(Copy, Clone, Pod, Zeroable, Default, Debug)] #[repr(C)] @@ -188,6 +201,26 @@ pub struct PathBbox { pub trans_ix: u32, } +/// Tiled path object. +#[derive(Copy, Clone, Pod, Zeroable, Debug, Default)] +#[repr(C)] +pub struct Path { + /// Bounding box in tiles. + pub bbox: [f32; 4], + /// Offset (in u32s) to tile rectangle. + pub tiles: u32, +} + +/// Tile object. +#[derive(Copy, Clone, Pod, Zeroable, Debug, Default)] +#[repr(C)] +pub struct Tile { + /// Accumulated backdrop at the left edge of the tile. + pub backdrop: i32, + /// Index of first path segment. + pub segments: u32, +} + /// Encoder for path segments. pub struct PathEncoder<'a> { tags: &'a mut Vec, diff --git a/crates/encoding/src/resolve.rs b/crates/encoding/src/resolve.rs index 051f3ac..1e0ed88 100644 --- a/crates/encoding/src/resolve.rs +++ b/crates/encoding/src/resolve.rs @@ -107,35 +107,6 @@ impl Layout { } } -/// Scene configuration. -/// -/// This data structure must be kept in sync with the definition in -/// shaders/shared/config.wgsl. -#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] -#[repr(C)] -pub struct Config { - /// Width of the scene in tiles. - pub width_in_tiles: u32, - /// Height of the scene in tiles. - pub height_in_tiles: u32, - /// Width of the target in pixels. - pub target_width: u32, - /// Height of the target in pixels. - pub target_height: u32, - /// The base background color applied to the target before any blends. - pub base_color: u32, - /// Layout of packed scene data. - pub layout: Layout, - /// Size of binning buffer allocation (in u32s). - pub binning_size: u32, - /// Size of tile buffer allocation (in Tiles). - pub tiles_size: u32, - /// Size of segment buffer allocation (in PathSegments). - pub segments_size: u32, - /// Size of per-tile command list buffer allocation (in u32s). - pub ptcl_size: u32, -} - /// Resolver for late bound resources. #[derive(Default)] pub struct Resolver { diff --git a/src/lib.rs b/src/lib.rs index d61e176..047d3b4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -27,8 +27,6 @@ pub use peniko::kurbo; #[doc(hidden)] pub use fello; -pub mod encoding; - pub mod glyph; pub mod util; diff --git a/src/render.rs b/src/render.rs index 893f82f..ee3e47a 100644 --- a/src/render.rs +++ b/src/render.rs @@ -7,10 +7,9 @@ use crate::{ }; use { bytemuck::{Pod, Zeroable}, - vello_encoding::{Config, Encoding, Layout}, + vello_encoding::{Encoding, GpuConfig, Layout}, }; - /// State for a render in progress. pub struct Render { /// Size of binning and info combined buffer in u32 units @@ -95,7 +94,7 @@ fn render(scene: &Scene, shaders: &Shaders) -> (Recording, BufProxy) { let pathdata_base = size_to_words(scene.len()); scene.extend(&data.path_data); - let config = Config { + let config = GpuConfig { width_in_tiles: 64, height_in_tiles: 64, target_width: 64 * 16, @@ -219,7 +218,8 @@ impl Render { let mut recording = Recording::default(); let mut resolver = Resolver::new(); let mut packed = vec![]; - let (layout, ramps, images) = resolver.resolve(encoding, &mut packed, shaders::PATHTAG_REDUCE_WG); + let (layout, ramps, images) = + resolver.resolve(encoding, &mut packed, shaders::PATHTAG_REDUCE_WG); let gradient_image = if ramps.height == 0 { ResourceProxy::new_image(1, 1, ImageFormat::Rgba8) } else { @@ -247,7 +247,7 @@ impl Render { let new_height = next_multiple_of(params.height, 16); let info_size = layout.bin_data_start; - let config = vello_encoding::Config { + let config = GpuConfig { width_in_tiles: new_width / 16, height_in_tiles: new_height / 16, target_width: params.width,