diff --git a/pgpu-render/src/render.rs b/pgpu-render/src/render.rs index d3ae07b..1227f19 100644 --- a/pgpu-render/src/render.rs +++ b/pgpu-render/src/render.rs @@ -147,9 +147,7 @@ pub struct PgpuSceneBuilder<'a>(piet_scene::scene::Builder<'a>); impl<'a> PgpuSceneBuilder<'a> { pub fn add_glyph(&mut self, glyph: &PgpuGlyph, transform: &piet_scene::geometry::Affine) { - self.0.push_transform(*transform); - self.0.append(&glyph.fragment); - self.0.pop_transform(); + self.0.append(&glyph.fragment, Some(*transform)); } pub fn finish(self) { diff --git a/piet-gpu/shader/build.ninja b/piet-gpu/shader/build.ninja index 09b0683..079c0e7 100644 --- a/piet-gpu/shader/build.ninja +++ b/piet-gpu/shader/build.ninja @@ -66,22 +66,6 @@ build gen/kernel4_gray.msl: msl gen/kernel4_gray.spv # New element pipeline follows -build gen/transform_reduce.spv: glsl transform_reduce.comp | scene.h setup.h mem.h -build gen/transform_reduce.hlsl: hlsl gen/transform_reduce.spv -build gen/transform_reduce.dxil: dxil gen/transform_reduce.hlsl -build gen/transform_reduce.msl: msl gen/transform_reduce.spv - -build gen/transform_root.spv: glsl transform_scan.comp | setup.h - flags = -DROOT -build gen/transform_root.hlsl: hlsl gen/transform_root.spv -build gen/transform_root.dxil: dxil gen/transform_root.hlsl -build gen/transform_root.msl: msl gen/transform_root.spv - -build gen/transform_leaf.spv: glsl transform_leaf.comp | scene.h tile.h setup.h mem.h -build gen/transform_leaf.hlsl: hlsl gen/transform_leaf.spv -build gen/transform_leaf.dxil: dxil gen/transform_leaf.hlsl -build gen/transform_leaf.msl: msl gen/transform_leaf.spv - build gen/pathtag_reduce.spv: glsl pathtag_reduce.comp | pathtag.h setup.h mem.h build gen/pathtag_reduce.hlsl: hlsl gen/pathtag_reduce.spv build gen/pathtag_reduce.dxil: dxil gen/pathtag_reduce.hlsl @@ -98,7 +82,7 @@ build gen/bbox_clear.hlsl: hlsl gen/bbox_clear.spv build gen/bbox_clear.dxil: dxil gen/bbox_clear.hlsl build gen/bbox_clear.msl: msl gen/bbox_clear.spv -build gen/pathseg.spv: glsl pathseg.comp | tile.h pathseg.h pathtag.h setup.h mem.h +build gen/pathseg.spv: glsl pathseg.comp | scene.h tile.h pathseg.h pathtag.h setup.h mem.h build gen/pathseg.hlsl: hlsl gen/pathseg.spv build gen/pathseg.dxil: dxil gen/pathseg.hlsl build gen/pathseg.msl: msl gen/pathseg.spv @@ -129,6 +113,6 @@ build gen/clip_leaf.hlsl: hlsl gen/clip_leaf.spv build gen/clip_leaf.dxil: dxil gen/clip_leaf.hlsl build gen/clip_leaf.msl: msl gen/clip_leaf.spv -build spv: phony gen/backdrop_lg.spv gen/backdrop.spv gen/bbox_clear.spv gen/binning.spv gen/clip_leaf.spv gen/clip_reduce.spv gen/coarse.spv gen/draw_leaf.spv gen/draw_reduce.spv gen/draw_root.spv gen/kernel4.spv gen/kernel4_gray.spv gen/path_coarse.spv gen/pathseg.spv gen/pathtag_reduce.spv gen/pathtag_root.spv gen/tile_alloc.spv gen/transform_leaf.spv gen/transform_reduce.spv gen/transform_root.spv -build dxil: phony gen/backdrop.hlsl gen/backdrop_lg.hlsl gen/bbox_clear.hlsl gen/binning.hlsl gen/clip_leaf.hlsl gen/clip_reduce.hlsl gen/coarse.hlsl gen/draw_leaf.hlsl gen/draw_reduce.hlsl gen/draw_root.hlsl gen/kernel4.hlsl gen/kernel4_gray.hlsl gen/path_coarse.hlsl gen/pathseg.hlsl gen/pathtag_reduce.hlsl gen/pathtag_root.hlsl gen/tile_alloc.hlsl gen/transform_leaf.hlsl gen/transform_reduce.hlsl gen/transform_root.hlsl -build msl: phony gen/backdrop_lg.msl gen/backdrop.msl gen/bbox_clear.msl gen/binning.msl gen/clip_leaf.msl gen/clip_reduce.msl gen/coarse.msl gen/draw_leaf.msl gen/draw_reduce.msl gen/draw_root.msl gen/kernel4.msl gen/kernel4_gray.msl gen/path_coarse.msl gen/pathseg.msl gen/pathtag_reduce.msl gen/pathtag_root.msl gen/tile_alloc.msl gen/transform_leaf.msl gen/transform_reduce.msl gen/transform_root.msl +build spv: phony gen/backdrop_lg.spv gen/backdrop.spv gen/bbox_clear.spv gen/binning.spv gen/clip_leaf.spv gen/clip_reduce.spv gen/coarse.spv gen/draw_leaf.spv gen/draw_reduce.spv gen/draw_root.spv gen/kernel4.spv gen/kernel4_gray.spv gen/path_coarse.spv gen/pathseg.spv gen/pathtag_reduce.spv gen/pathtag_root.spv gen/tile_alloc.spv +build dxil: phony gen/backdrop.hlsl gen/backdrop_lg.hlsl gen/bbox_clear.hlsl gen/binning.hlsl gen/clip_leaf.hlsl gen/clip_reduce.hlsl gen/coarse.hlsl gen/draw_leaf.hlsl gen/draw_reduce.hlsl gen/draw_root.hlsl gen/kernel4.hlsl gen/kernel4_gray.hlsl gen/path_coarse.hlsl gen/pathseg.hlsl gen/pathtag_reduce.hlsl gen/pathtag_root.hlsl gen/tile_alloc.hlsl +build msl: phony gen/backdrop_lg.msl gen/backdrop.msl gen/bbox_clear.msl gen/binning.msl gen/clip_leaf.msl gen/clip_reduce.msl gen/coarse.msl gen/draw_leaf.msl gen/draw_reduce.msl gen/draw_root.msl gen/kernel4.msl gen/kernel4_gray.msl gen/path_coarse.msl gen/pathseg.msl gen/pathtag_reduce.msl gen/pathtag_root.msl gen/tile_alloc.msl diff --git a/piet-gpu/shader/draw_leaf.comp b/piet-gpu/shader/draw_leaf.comp index ef369c9..434c7ea 100644 --- a/piet-gpu/shader/draw_leaf.comp +++ b/piet-gpu/shader/draw_leaf.comp @@ -108,10 +108,10 @@ void main() { vec2 translate; if (linewidth >= 0.0 || tag_word == Drawtag_FillLinGradient || tag_word == Drawtag_FillRadGradient) { uint trans_ix = memory[bbox_offset + 5]; - uint t = (conf.trans_alloc.offset >> 2) + 6 * trans_ix; - mat = uintBitsToFloat(uvec4(memory[t], memory[t + 1], memory[t + 2], memory[t + 3])); + uint t = (conf.trans_offset >> 2) + trans_ix * 6; + mat = uintBitsToFloat(uvec4(scene[t], scene[t + 1], scene[t + 2], scene[t + 3])); if (tag_word == Drawtag_FillLinGradient || tag_word == Drawtag_FillRadGradient) { - translate = uintBitsToFloat(uvec2(memory[t + 4], memory[t + 5])); + translate = uintBitsToFloat(uvec2(scene[t + 4], scene[t + 5])); } } if (linewidth >= 0.0) { diff --git a/piet-gpu/shader/pathseg.comp b/piet-gpu/shader/pathseg.comp index ce4ab84..0efa66f 100644 --- a/piet-gpu/shader/pathseg.comp +++ b/piet-gpu/shader/pathseg.comp @@ -26,6 +26,7 @@ layout(binding = 2) readonly buffer SceneBuf { #include "tile.h" #include "pathseg.h" +#include "scene.h" layout(binding = 3) readonly buffer ParentBuf { TagMonoid[] parent; @@ -126,7 +127,7 @@ void main() { uint lw_ix = (conf.linewidth_offset >> 2) + tm.linewidth_ix; uint save_path_ix = tm.path_ix; uint trans_ix = tm.trans_ix; - TransformSegRef trans_ref = TransformSegRef(conf.trans_alloc.offset + trans_ix * TransformSeg_size); + TransformRef trans_ref = TransformRef(conf.trans_offset + trans_ix * Transform_size); PathSegRef ps_ref = PathSegRef(conf.pathseg_alloc.offset + tm.pathseg_ix * PathSeg_size); for (uint i = 0; i < N_SEQ; i++) { linewidth[i] = uintBitsToFloat(scene[lw_ix]); @@ -162,7 +163,7 @@ void main() { } } } - TransformSeg transform = TransformSeg_read(conf.trans_alloc, trans_ref); + Transform transform = Transform_read(trans_ref); p0 = transform.mat.xy * p0.x + transform.mat.zw * p0.y + transform.translate; p1 = transform.mat.xy * p1.x + transform.mat.zw * p1.y + transform.translate; vec4 bbox = vec4(min(p0, p1), max(p0, p1)); @@ -219,7 +220,7 @@ void main() { local[i].flags = is_path; tm.path_ix += is_path; trans_ix += (tag_byte >> 5) & 1; - trans_ref.offset += ((tag_byte >> 5) & 1) * TransformSeg_size; + trans_ref.offset += ((tag_byte >> 5) & 1) * Transform_size; lw_ix += (tag_byte >> 6) & 1; } } diff --git a/piet-gpu/shader/setup.h b/piet-gpu/shader/setup.h index eb9f9ea..e6b6e3f 100644 --- a/piet-gpu/shader/setup.h +++ b/piet-gpu/shader/setup.h @@ -43,7 +43,6 @@ struct Config { Alloc ptcl_alloc; Alloc pathseg_alloc; Alloc anno_alloc; - Alloc trans_alloc; // new element pipeline stuff follows // Bounding boxes of paths, stored as int (so atomics work) diff --git a/piet-gpu/shader/transform_leaf.comp b/piet-gpu/shader/transform_leaf.comp deleted file mode 100644 index a5e4003..0000000 --- a/piet-gpu/shader/transform_leaf.comp +++ /dev/null @@ -1,86 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense - -// A scan for a tree reduction prefix scan that outputs the final result. -// Output is written into memory at trans_alloc. - -#version 450 -#extension GL_GOOGLE_include_directive : enable - -#include "mem.h" -#include "setup.h" - -#define N_ROWS 8 -#define LG_WG_SIZE (7 + LG_WG_FACTOR) -#define WG_SIZE (1 << LG_WG_SIZE) -#define PARTITION_SIZE (WG_SIZE * N_ROWS) - -layout(local_size_x = WG_SIZE, local_size_y = 1) in; - -layout(binding = 1) readonly buffer ConfigBuf { - Config conf; -}; - -layout(binding = 2) readonly buffer SceneBuf { - uint[] scene; -}; - -#include "scene.h" -#include "tile.h" - -#define Monoid Transform - -layout(set = 0, binding = 3) readonly buffer ParentBuf { - Monoid[] parent; -}; - -Monoid monoid_identity() { - return Monoid(vec4(1.0, 0.0, 0.0, 1.0), vec2(0.0, 0.0)); -} - -Monoid combine_monoid(Monoid a, Monoid b) { - Monoid c; - c.mat = a.mat.xyxy * b.mat.xxzz + a.mat.zwzw * b.mat.yyww; - c.translate = a.mat.xy * b.translate.x + a.mat.zw * b.translate.y + a.translate; - return c; -} - -shared Monoid sh_scratch[WG_SIZE]; - -void main() { - Monoid local[N_ROWS]; - - uint ix = gl_GlobalInvocationID.x * N_ROWS; - TransformRef ref = TransformRef(conf.trans_offset + ix * Transform_size); - - Monoid agg = Transform_read(ref); - local[0] = agg; - for (uint i = 1; i < N_ROWS; i++) { - agg = combine_monoid(agg, Transform_read(Transform_index(ref, i))); - local[i] = agg; - } - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i = 0; i < LG_WG_SIZE; i++) { - barrier(); - if (gl_LocalInvocationID.x >= (1u << i)) { - Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i)]; - agg = combine_monoid(other, agg); - } - barrier(); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - - barrier(); - Monoid row = monoid_identity(); - if (gl_WorkGroupID.x > 0) { - row = parent[gl_WorkGroupID.x - 1]; - } - if (gl_LocalInvocationID.x > 0) { - row = combine_monoid(row, sh_scratch[gl_LocalInvocationID.x - 1]); - } - for (uint i = 0; i < N_ROWS; i++) { - Monoid m = combine_monoid(row, local[i]); - TransformSeg transform = TransformSeg(m.mat, m.translate); - TransformSegRef trans_ref = TransformSegRef(conf.trans_alloc.offset + (ix + i) * TransformSeg_size); - TransformSeg_write(conf.trans_alloc, trans_ref, transform); - } -} diff --git a/piet-gpu/shader/transform_reduce.comp b/piet-gpu/shader/transform_reduce.comp deleted file mode 100644 index e59d559..0000000 --- a/piet-gpu/shader/transform_reduce.comp +++ /dev/null @@ -1,69 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense - -// The reduction phase for transform scan implemented as a tree reduction. - -#version 450 -#extension GL_GOOGLE_include_directive : enable - -#include "mem.h" -#include "setup.h" - -#define N_ROWS 8 -#define LG_WG_SIZE (7 + LG_WG_FACTOR) -#define WG_SIZE (1 << LG_WG_SIZE) -#define PARTITION_SIZE (WG_SIZE * N_ROWS) - -layout(local_size_x = WG_SIZE, local_size_y = 1) in; - -layout(binding = 1) readonly buffer ConfigBuf { - Config conf; -}; - -layout(binding = 2) readonly buffer SceneBuf { - uint[] scene; -}; - -#include "scene.h" - -#define Monoid Transform - -layout(set = 0, binding = 3) buffer OutBuf { - Monoid[] outbuf; -}; - -Monoid monoid_identity() { - return Monoid(vec4(1.0, 0.0, 0.0, 1.0), vec2(0.0, 0.0)); -} - -Monoid combine_monoid(Monoid a, Monoid b) { - Monoid c; - c.mat = a.mat.xyxy * b.mat.xxzz + a.mat.zwzw * b.mat.yyww; - c.translate = a.mat.xy * b.translate.x + a.mat.zw * b.translate.y + a.translate; - return c; -} - -shared Monoid sh_scratch[WG_SIZE]; - -void main() { - uint ix = gl_GlobalInvocationID.x * N_ROWS; - TransformRef ref = TransformRef(conf.trans_offset + ix * Transform_size); - - Monoid agg = Transform_read(ref); - for (uint i = 1; i < N_ROWS; i++) { - agg = combine_monoid(agg, Transform_read(Transform_index(ref, i))); - } - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i = 0; i < LG_WG_SIZE; i++) { - barrier(); - // We could make this predicate tighter, but would it help? - if (gl_LocalInvocationID.x + (1u << i) < WG_SIZE) { - Monoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i)]; - agg = combine_monoid(agg, other); - } - barrier(); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - if (gl_LocalInvocationID.x == 0) { - outbuf[gl_WorkGroupID.x] = agg; - } -} diff --git a/piet-gpu/shader/transform_scan.comp b/piet-gpu/shader/transform_scan.comp deleted file mode 100644 index 20b2a8a..0000000 --- a/piet-gpu/shader/transform_scan.comp +++ /dev/null @@ -1,91 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense - -// A scan for a tree reduction prefix scan (either root or not, by ifdef). - -#version 450 -#extension GL_GOOGLE_include_directive : enable - -#include "setup.h" - -#define N_ROWS 8 -#define LG_WG_SIZE (7 + LG_WG_FACTOR) -#define WG_SIZE (1 << LG_WG_SIZE) -#define PARTITION_SIZE (WG_SIZE * N_ROWS) - -layout(local_size_x = WG_SIZE, local_size_y = 1) in; - -// This is copy-pasted from scene.h. It might be better for DRY -// to include it, but that pulls in more stuff we don't need. -struct Transform { - vec4 mat; - vec2 translate; -}; - -#define Monoid Transform - -layout(binding = 0) buffer DataBuf { - Monoid[] data; -}; - -#ifndef ROOT -layout(binding = 1) readonly buffer ParentBuf { - Monoid[] parent; -}; -#endif - -Monoid monoid_identity() { - return Monoid(vec4(1.0, 0.0, 0.0, 1.0), vec2(0.0, 0.0)); -} - -Monoid combine_monoid(Monoid a, Monoid b) { - Monoid c; - c.mat = a.mat.xyxy * b.mat.xxzz + a.mat.zwzw * b.mat.yyww; - c.translate = a.mat.xy * b.translate.x + a.mat.zw * b.translate.y + a.translate; - return c; -} - -shared Monoid sh_scratch[WG_SIZE]; - -void main() { - Monoid local[N_ROWS]; - - uint ix = gl_GlobalInvocationID.x * N_ROWS; - - local[0] = data[ix]; - for (uint i = 1; i < N_ROWS; i++) { - local[i] = combine_monoid(local[i - 1], data[ix + i]); - } - Monoid agg = local[N_ROWS - 1]; - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i = 0; i < LG_WG_SIZE; i++) { - barrier(); - if (gl_LocalInvocationID.x >= (1u << i)) { - Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i)]; - agg = combine_monoid(other, agg); - } - barrier(); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - - barrier(); - // This could be a semigroup instead of a monoid if we reworked the - // conditional logic, but that might impact performance. - Monoid row = monoid_identity(); -#ifdef ROOT - if (gl_LocalInvocationID.x > 0) { - row = sh_scratch[gl_LocalInvocationID.x - 1]; - } -#else - if (gl_WorkGroupID.x > 0) { - row = parent[gl_WorkGroupID.x - 1]; - } - if (gl_LocalInvocationID.x > 0) { - row = combine_monoid(row, sh_scratch[gl_LocalInvocationID.x - 1]); - } -#endif - for (uint i = 0; i < N_ROWS; i++) { - Monoid m = combine_monoid(row, local[i]); - // TODO: gate buffer write - data[ix + i] = m; - } -} diff --git a/piet-gpu/src/encoder.rs b/piet-gpu/src/encoder.rs index bddb6f4..2d7c23a 100644 --- a/piet-gpu/src/encoder.rs +++ b/piet-gpu/src/encoder.rs @@ -20,9 +20,7 @@ use crate::{Blend, SceneStats, DRAWTAG_SIZE, TRANSFORM_SIZE}; use bytemuck::{Pod, Zeroable}; use piet_gpu_hal::BufWrite; -use crate::stages::{ - self, PathEncoder, Transform, DRAW_PART_SIZE, PATHSEG_PART_SIZE, TRANSFORM_PART_SIZE, -}; +use crate::stages::{self, PathEncoder, Transform, DRAW_PART_SIZE, PATHSEG_PART_SIZE}; pub struct Encoder { transform_stream: Vec, @@ -72,8 +70,6 @@ impl<'a, T: Copy + Pod> EncodedSceneRef<'a, T> { buf.fill_zero(padding(n_drawobj, DRAW_PART_SIZE as usize) * DRAWTAG_SIZE); buf.extend_slice(&self.drawdata_stream); buf.extend_slice(&self.transform_stream); - let n_trans = self.transform_stream.len(); - buf.fill_zero(padding(n_trans, TRANSFORM_PART_SIZE as usize) * TRANSFORM_SIZE); buf.extend_slice(&self.linewidth_stream); buf.extend_slice(&self.tag_stream); let n_pathtag = self.tag_stream.len(); @@ -244,8 +240,6 @@ impl Encoder { buf.fill_zero(padding(n_drawobj, DRAW_PART_SIZE as usize) * DRAWTAG_SIZE); buf.extend_slice(&self.drawdata_stream); buf.extend_slice(&self.transform_stream); - let n_trans = self.transform_stream.len(); - buf.fill_zero(padding(n_trans, TRANSFORM_PART_SIZE as usize) * TRANSFORM_SIZE); buf.extend_slice(&self.linewidth_stream); buf.extend_slice(&self.tag_stream); let n_pathtag = self.tag_stream.len(); diff --git a/piet-gpu/src/lib.rs b/piet-gpu/src/lib.rs index bfb5f19..e0415d4 100644 --- a/piet-gpu/src/lib.rs +++ b/piet-gpu/src/lib.rs @@ -27,10 +27,7 @@ use piet_gpu_hal::{ }; pub use pico_svg::PicoSvg; -use stages::{ - ClipBinding, ElementBinding, ElementCode, DRAW_PART_SIZE, PATHSEG_PART_SIZE, - TRANSFORM_PART_SIZE, -}; +use stages::{ClipBinding, ElementBinding, ElementCode, DRAW_PART_SIZE, PATHSEG_PART_SIZE}; use crate::stages::{ClipCode, Config, ElementStage, CLIP_PART_SIZE}; @@ -525,7 +522,6 @@ impl Renderer { &mut pass, &self.element_code, &self.element_bindings[buf_ix], - self.n_transform as u64, self.n_paths as u32, self.n_pathtag as u32, self.n_drawobj as u64, @@ -796,7 +792,7 @@ impl SceneStats { pub(crate) fn scene_size(&self) -> usize { align_up(self.n_drawobj, DRAW_PART_SIZE as usize) * DRAWTAG_SIZE + self.drawdata_len - + align_up(self.n_transform, TRANSFORM_PART_SIZE as usize) * TRANSFORM_SIZE + + self.n_transform * TRANSFORM_SIZE + self.linewidth_len + align_up(self.n_pathtag, PATHSEG_PART_SIZE as usize) + self.pathseg_len @@ -813,8 +809,7 @@ impl SceneStats { let drawdata_offset = drawtag_offset + n_drawobj_padded * DRAWTAG_SIZE; let trans_offset = drawdata_offset + self.drawdata_len; let n_trans = self.n_transform; - let n_trans_padded = align_up(n_trans, TRANSFORM_PART_SIZE as usize); - let linewidth_offset = trans_offset + n_trans_padded * TRANSFORM_SIZE; + let linewidth_offset = trans_offset + n_trans * TRANSFORM_SIZE; let pathtag_offset = linewidth_offset + self.linewidth_len; let n_pathtag = self.n_pathtag; let n_pathtag_padded = align_up(n_pathtag, PATHSEG_PART_SIZE as usize); @@ -822,8 +817,6 @@ impl SceneStats { // Layout of memory let mut alloc = 0; - let trans_alloc = alloc; - alloc += trans_alloc + n_trans_padded * TRANSFORM_SIZE; let pathseg_alloc = alloc; alloc += pathseg_alloc + self.n_pathseg as usize * PATHSEG_SIZE; let path_bbox_alloc = alloc; @@ -872,7 +865,6 @@ impl SceneStats { n_pathseg: self.n_pathseg, pathseg_alloc: pathseg_alloc as u32, anno_alloc: anno_alloc as u32, - trans_alloc: trans_alloc as u32, path_bbox_alloc: path_bbox_alloc as u32, drawmonoid_alloc: drawmonoid_alloc as u32, clip_alloc: clip_alloc as u32, diff --git a/piet-gpu/src/render_ctx.rs b/piet-gpu/src/render_ctx.rs index caef303..a283507 100644 --- a/piet-gpu/src/render_ctx.rs +++ b/piet-gpu/src/render_ctx.rs @@ -57,11 +57,7 @@ pub enum PietGpuBrush { #[derive(Default)] struct State { - /// The transform relative to the parent state. - rel_transform: Affine, /// The transform at the parent state. - /// - /// This invariant should hold: transform * rel_transform = cur_transform transform: Affine, n_clip: usize, } @@ -219,7 +215,6 @@ impl RenderContext for PietGpuRenderContext { fn save(&mut self) -> Result<(), Error> { self.state_stack.push(State { - rel_transform: Affine::default(), transform: self.cur_transform, n_clip: 0, }); @@ -228,10 +223,7 @@ impl RenderContext for PietGpuRenderContext { fn restore(&mut self) -> Result<(), Error> { if let Some(state) = self.state_stack.pop() { - if state.rel_transform != Affine::default() { - let a_inv = state.rel_transform.inverse(); - self.encode_transform(Transform::from_kurbo(a_inv)); - } + self.encode_transform(Transform::from_kurbo(state.transform)); self.cur_transform = state.transform; for _ in 0..state.n_clip { self.pop_clip(); @@ -250,11 +242,8 @@ impl RenderContext for PietGpuRenderContext { } fn transform(&mut self, transform: Affine) { - self.encode_transform(Transform::from_kurbo(transform)); - if let Some(tos) = self.state_stack.last_mut() { - tos.rel_transform *= transform; - } self.cur_transform *= transform; + self.encode_transform(Transform::from_kurbo(self.cur_transform)); } fn make_image( diff --git a/piet-gpu/src/stages.rs b/piet-gpu/src/stages.rs index fd85776..e786ef5 100644 --- a/piet-gpu/src/stages.rs +++ b/piet-gpu/src/stages.rs @@ -27,9 +27,7 @@ pub use clip::{ClipBinding, ClipCode, CLIP_PART_SIZE}; pub use draw::{DrawBinding, DrawCode, DrawMonoid, DrawStage, DRAW_PART_SIZE}; pub use path::{PathBinding, PathCode, PathEncoder, PathStage, PATHSEG_PART_SIZE}; use piet_gpu_hal::{Buffer, ComputePass, Session}; -pub use transform::{ - Transform, TransformBinding, TransformCode, TransformStage, TRANSFORM_PART_SIZE, -}; +pub use transform::Transform; /// The configuration block passed to piet-gpu shaders. /// @@ -47,7 +45,6 @@ pub struct Config { pub ptcl_alloc: u32, pub pathseg_alloc: u32, pub anno_alloc: u32, - pub trans_alloc: u32, pub path_bbox_alloc: u32, pub drawmonoid_alloc: u32, pub clip_alloc: u32, @@ -70,19 +67,16 @@ pub struct Config { // The "element" stage combines a number of stages for parts of the pipeline. pub struct ElementCode { - transform_code: TransformCode, path_code: PathCode, draw_code: DrawCode, } pub struct ElementStage { - transform_stage: TransformStage, path_stage: PathStage, draw_stage: DrawStage, } pub struct ElementBinding { - transform_binding: TransformBinding, path_binding: PathBinding, draw_binding: DrawBinding, } @@ -90,7 +84,6 @@ pub struct ElementBinding { impl ElementCode { pub unsafe fn new(session: &Session) -> ElementCode { ElementCode { - transform_code: TransformCode::new(session), path_code: PathCode::new(session), draw_code: DrawCode::new(session), } @@ -100,7 +93,6 @@ impl ElementCode { impl ElementStage { pub unsafe fn new(session: &Session, code: &ElementCode) -> ElementStage { ElementStage { - transform_stage: TransformStage::new(session, &code.transform_code), path_stage: PathStage::new(session, &code.path_code), draw_stage: DrawStage::new(session, &code.draw_code), } @@ -115,13 +107,6 @@ impl ElementStage { memory_buf: &Buffer, ) -> ElementBinding { ElementBinding { - transform_binding: self.transform_stage.bind( - session, - &code.transform_code, - config_buf, - scene_buf, - memory_buf, - ), path_binding: self.path_stage.bind( session, &code.path_code, @@ -144,17 +129,10 @@ impl ElementStage { pass: &mut ComputePass, code: &ElementCode, binding: &ElementBinding, - n_transform: u64, n_paths: u32, n_tags: u32, n_drawobj: u64, ) { - self.transform_stage.record( - pass, - &code.transform_code, - &binding.transform_binding, - n_transform, - ); // No memory barrier needed here; path has at least one before pathseg self.path_stage.record( pass, @@ -171,13 +149,11 @@ impl ElementStage { impl ElementBinding { pub unsafe fn rebind_memory(&mut self, session: &Session, memory: &Buffer) { - self.transform_binding.rebind_memory(session, memory); self.path_binding.rebind_memory(session, memory); self.draw_binding.rebind_memory(session, memory); } pub unsafe fn rebind_scene(&mut self, session: &Session, scene: &Buffer) { - self.transform_binding.rebind_scene(session, scene); self.path_binding.rebind_scene(session, scene); self.draw_binding.rebind_scene(session, scene); } diff --git a/piet-gpu/src/stages/transform.rs b/piet-gpu/src/stages/transform.rs index 43b68df..0e0a3f6 100644 --- a/piet-gpu/src/stages/transform.rs +++ b/piet-gpu/src/stages/transform.rs @@ -19,9 +19,6 @@ use bytemuck::{Pod, Zeroable}; use piet::kurbo::Affine; -use piet_gpu_hal::{ - include_shader, BindType, Buffer, BufferUsage, ComputePass, DescriptorSet, Pipeline, Session, -}; /// An affine transform. // This is equivalent to the version in piet-gpu-types, but the bytemuck @@ -33,151 +30,6 @@ pub struct Transform { pub translate: [f32; 2], } -const TRANSFORM_WG: u64 = 256; -const TRANSFORM_N_ROWS: u64 = 8; -pub const TRANSFORM_PART_SIZE: u64 = TRANSFORM_WG * TRANSFORM_N_ROWS; - -pub struct TransformCode { - reduce_pipeline: Pipeline, - root_pipeline: Pipeline, - leaf_pipeline: Pipeline, -} - -pub struct TransformStage { - // Right now we're limited to partition^2 (~16M) elements. This can be - // expanded but is tedious. - root_buf: Buffer, - root_ds: DescriptorSet, -} - -pub struct TransformBinding { - reduce_ds: DescriptorSet, - leaf_ds: DescriptorSet, -} - -impl TransformCode { - pub unsafe fn new(session: &Session) -> TransformCode { - let reduce_code = include_shader!(session, "../../shader/gen/transform_reduce"); - let reduce_pipeline = session - .create_compute_pipeline( - reduce_code, - &[ - BindType::Buffer, - BindType::BufReadOnly, - BindType::BufReadOnly, - BindType::Buffer, - ], - ) - .unwrap(); - let root_code = include_shader!(session, "../../shader/gen/transform_root"); - let root_pipeline = session - .create_compute_pipeline(root_code, &[BindType::Buffer]) - .unwrap(); - let leaf_code = include_shader!(session, "../../shader/gen/transform_leaf"); - let leaf_pipeline = session - .create_compute_pipeline( - leaf_code, - &[ - BindType::Buffer, - BindType::BufReadOnly, - BindType::BufReadOnly, - BindType::BufReadOnly, - ], - ) - .unwrap(); - TransformCode { - reduce_pipeline, - root_pipeline, - leaf_pipeline, - } - } -} - -impl TransformStage { - pub unsafe fn new(session: &Session, code: &TransformCode) -> TransformStage { - // We're limited to TRANSFORM_PART_SIZE^2 - // Also note: size here allows padding - let root_buf_size = TRANSFORM_PART_SIZE * 32; - let root_buf = session - .create_buffer(root_buf_size, BufferUsage::STORAGE) - .unwrap(); - let root_ds = session - .create_simple_descriptor_set(&code.root_pipeline, &[&root_buf]) - .unwrap(); - TransformStage { root_buf, root_ds } - } - - pub unsafe fn bind( - &self, - session: &Session, - code: &TransformCode, - config_buf: &Buffer, - scene_buf: &Buffer, - memory_buf: &Buffer, - ) -> TransformBinding { - let reduce_ds = session - .create_simple_descriptor_set( - &code.reduce_pipeline, - &[memory_buf, config_buf, scene_buf, &self.root_buf], - ) - .unwrap(); - let leaf_ds = session - .create_simple_descriptor_set( - &code.leaf_pipeline, - &[memory_buf, config_buf, scene_buf, &self.root_buf], - ) - .unwrap(); - TransformBinding { reduce_ds, leaf_ds } - } - - pub unsafe fn record( - &self, - pass: &mut ComputePass, - code: &TransformCode, - binding: &TransformBinding, - size: u64, - ) { - if size > TRANSFORM_PART_SIZE.pow(2) { - panic!("very large scan not yet implemented"); - } - let n_workgroups = (size + TRANSFORM_PART_SIZE - 1) / TRANSFORM_PART_SIZE; - if n_workgroups > 1 { - pass.dispatch( - &code.reduce_pipeline, - &binding.reduce_ds, - (n_workgroups as u32, 1, 1), - (TRANSFORM_WG as u32, 1, 1), - ); - pass.memory_barrier(); - pass.dispatch( - &code.root_pipeline, - &self.root_ds, - (1, 1, 1), - (TRANSFORM_WG as u32, 1, 1), - ); - pass.memory_barrier(); - } - pass.dispatch( - &code.leaf_pipeline, - &binding.leaf_ds, - (n_workgroups as u32, 1, 1), - (TRANSFORM_WG as u32, 1, 1), - ); - } -} - -impl TransformBinding { - pub unsafe fn rebind_memory(&mut self, session: &Session, memory: &Buffer) { - session.update_buffer_descriptor(&mut self.reduce_ds, 0, memory); - session.update_buffer_descriptor(&mut self.leaf_ds, 0, memory); - } - - pub unsafe fn rebind_scene(&mut self, session: &Session, scene: &Buffer) { - session.update_buffer_descriptor(&mut self.reduce_ds, 2, scene); - session.update_buffer_descriptor(&mut self.leaf_ds, 2, scene); - } -} - impl Transform { pub const IDENTITY: Transform = Transform { mat: [1.0, 0.0, 0.0, 1.0], diff --git a/piet-gpu/src/text.rs b/piet-gpu/src/text.rs index 0fb508b..da8b86e 100644 --- a/piet-gpu/src/text.rs +++ b/piet-gpu/src/text.rs @@ -6,8 +6,8 @@ use swash::{FontRef, GlyphId}; use piet::kurbo::{Point, Rect, Size}; use piet::{ - Error, FontFamily, HitTestPoint, HitTestPosition, LineMetric, Text, TextAttribute, TextLayout, - TextLayoutBuilder, TextStorage, + Error, FontFamily, HitTestPoint, HitTestPosition, LineMetric, RenderContext, Text, + TextAttribute, TextLayout, TextLayoutBuilder, TextStorage, }; use crate::encoder::GlyphEncoder; @@ -169,38 +169,14 @@ impl PietGpuTextLayout { // Should we use ppem from font, or let swash scale? const DEFAULT_UPEM: u16 = 2048; let scale = self.size as f32 / DEFAULT_UPEM as f32; - let mut inv_transform = None; + ctx.save().unwrap(); // TODO: handle y offsets also - let mut last_x = 0.0; for glyph in &self.glyphs { - let transform = match &mut inv_transform { - None => { - let inv_scale = scale.recip(); - let translate = render_ctx::to_f32_2(pos); - inv_transform = Some(Transform { - mat: [inv_scale, 0.0, 0.0, -inv_scale], - translate: [ - -translate[0] * inv_scale - glyph.x, - translate[1] * inv_scale, - ], - }); - let tpos = render_ctx::to_f32_2(pos); - let translate = [tpos[0] + scale * glyph.x, tpos[1]]; - Transform { - mat: [scale, 0.0, 0.0, -scale], - translate, - } - } - Some(inv) => { - let delta_x = glyph.x - last_x; - inv.translate[0] -= delta_x; - Transform { - mat: [1.0, 0.0, 0.0, 1.0], - translate: [delta_x, 0.0], - } - } + let tpos = render_ctx::to_f32_2(pos); + let transform = Transform { + mat: [scale, 0.0, 0.0, -scale], + translate: [tpos[0] + scale * glyph.x, tpos[1]], }; - last_x = glyph.x; //println!("{:?}, {:?}", transform.mat, transform.translate); ctx.encode_transform(transform); let glyph = self.font.make_path(glyph.glyph_id, &mut tc); @@ -209,9 +185,7 @@ impl PietGpuTextLayout { ctx.fill_glyph(0xff_ff_ff_ff); } } - if let Some(transform) = inv_transform { - ctx.encode_transform(transform); - } + ctx.restore().unwrap(); } } diff --git a/piet-scene/src/geometry.rs b/piet-scene/src/geometry.rs index 1ea8f33..2df7f83 100644 --- a/piet-scene/src/geometry.rs +++ b/piet-scene/src/geometry.rs @@ -61,7 +61,7 @@ impl From<(f32, f32)> for Point { } /// Affine transformation matrix. -#[derive(Copy, Clone, Debug, Pod, Zeroable)] +#[derive(Copy, Clone, PartialEq, Debug, Pod, Zeroable)] #[repr(C)] pub struct Affine { pub xx: f32, diff --git a/piet-scene/src/scene/builder.rs b/piet-scene/src/scene/builder.rs index 8aa1bf5..5394f88 100644 --- a/piet-scene/src/scene/builder.rs +++ b/piet-scene/src/scene/builder.rs @@ -43,7 +43,6 @@ pub struct Builder<'a> { scene: &'a mut SceneData, resources: ResourceData<'a>, layers: Vec, - transforms: Vec, } impl<'a> Builder<'a> { @@ -59,21 +58,12 @@ impl<'a> Builder<'a> { scene, resources, layers: vec![], - transforms: vec![], } } - /// Pushes a transform matrix onto the stack. - pub fn push_transform(&mut self, transform: Affine) { - self.transform(transform); - self.transforms.push(transform); - } - - /// Pops the current transform matrix. - pub fn pop_transform(&mut self) { - if let Some(transform) = self.transforms.pop() { - self.transform(transform.inverse()); - } + /// Sets the current transformation. + pub fn transform(&mut self, transform: Affine) { + self.encode_transform(transform); } /// Pushes a new layer bound by the specifed shape and composed with @@ -117,10 +107,17 @@ impl<'a> Builder<'a> { let elements = elements.into_iter(); self.encode_path(elements, true); if let Some(brush_transform) = brush_transform { - self.transform(brush_transform); - self.swap_last_tags(); - self.encode_brush(brush); - self.transform(brush_transform.inverse()); + if let Some(last_transform) = self.scene.transform_stream.last().copied() { + self.encode_transform(brush_transform * last_transform); + self.swap_last_tags(); + self.encode_brush(brush); + self.encode_transform(last_transform); + } else { + self.encode_transform(brush_transform); + self.swap_last_tags(); + self.encode_brush(brush); + self.encode_transform(Affine::IDENTITY); + } } else { self.encode_brush(brush); } @@ -143,19 +140,35 @@ impl<'a> Builder<'a> { let elements = elements.into_iter(); self.encode_path(elements, false); if let Some(brush_transform) = brush_transform { - self.transform(brush_transform); - self.swap_last_tags(); - self.encode_brush(brush); - self.transform(brush_transform.inverse()); + if let Some(last_transform) = self.scene.transform_stream.last().copied() { + self.encode_transform(brush_transform * last_transform); + self.swap_last_tags(); + self.encode_brush(brush); + self.encode_transform(last_transform); + } else { + self.encode_transform(brush_transform); + self.swap_last_tags(); + self.encode_brush(brush); + self.encode_transform(Affine::IDENTITY); + } } else { self.encode_brush(brush); } } /// Appends a fragment to the scene. - pub fn append(&mut self, fragment: &Fragment) { + pub fn append(&mut self, fragment: &Fragment, transform: Option) { let drawdata_base = self.scene.drawdata_stream.len(); - self.scene.append(&fragment.data); + let mut cur_transform = self.scene.transform_stream.last().copied(); + if let Some(transform) = transform { + if cur_transform.is_none() { + cur_transform = Some(Affine::IDENTITY); + } + self.encode_transform(transform); + } else if cur_transform != Some(Affine::IDENTITY) { + self.encode_transform(Affine::IDENTITY); + } + self.scene.append(&fragment.data, &transform); match &mut self.resources { ResourceData::Scene(res) => { for patch in &fragment.resources.patches { @@ -189,6 +202,10 @@ impl<'a> Builder<'a> { )); } } + // Prevent fragments from affecting transform state. Should we allow this? + if let Some(transform) = cur_transform { + self.encode_transform(transform); + } } /// Completes construction and finalizes the underlying scene. @@ -196,15 +213,6 @@ impl<'a> Builder<'a> { while let Some(layer) = self.layers.pop() { self.end_clip(Some(layer)); } - match self.resources { - ResourceData::Fragment(_) => { - // Make sure the transform state is invariant for fragments - while !self.transforms.is_empty() { - self.pop_transform(); - } - } - _ => {} - } } } @@ -250,7 +258,7 @@ impl<'a> Builder<'a> { self.scene.n_pathseg += n_pathseg; } - fn transform(&mut self, transform: Affine) { + fn encode_transform(&mut self, transform: Affine) { self.scene.tag_stream.push(0x20); self.scene.transform_stream.push(transform); } diff --git a/piet-scene/src/scene/mod.rs b/piet-scene/src/scene/mod.rs index 577f81e..5f0e77f 100644 --- a/piet-scene/src/scene/mod.rs +++ b/piet-scene/src/scene/mod.rs @@ -60,9 +60,14 @@ impl SceneData { } } - fn append(&mut self, other: &SceneData) { - self.transform_stream - .extend_from_slice(&other.transform_stream); + fn append(&mut self, other: &SceneData, transform: &Option) { + if let Some(transform) = *transform { + self.transform_stream + .extend(other.transform_stream.iter().map(|x| *x * transform)); + } else { + self.transform_stream + .extend_from_slice(&other.transform_stream); + } self.tag_stream.extend_from_slice(&other.tag_stream); self.pathseg_stream.extend_from_slice(&other.pathseg_stream); self.linewidth_stream diff --git a/tests/src/main.rs b/tests/src/main.rs index 5599f70..96504f1 100644 --- a/tests/src/main.rs +++ b/tests/src/main.rs @@ -29,8 +29,6 @@ mod test_result; #[cfg(feature = "piet-gpu")] mod path; -#[cfg(feature = "piet-gpu")] -mod transform; use clap::{App, Arg}; use piet_gpu_hal::InstanceFlags; @@ -137,7 +135,6 @@ fn main() { } #[cfg(feature = "piet-gpu")] if config.groups.matches("piet") { - report(&transform::transform_test(&mut runner, &config)); report(&path::path_test(&mut runner, &config)); report(&draw::draw_test(&mut runner, &config)); report(&clip::clip_test(&mut runner, &config)); diff --git a/tests/src/path.rs b/tests/src/path.rs index 9d794e1..1a933d0 100644 --- a/tests/src/path.rs +++ b/tests/src/path.rs @@ -210,7 +210,6 @@ impl PathData { let path_bbox_alloc = pathseg_alloc + self.n_pathseg * PATHSEG_SIZE; let stage_config = stages::Config { pathseg_alloc, - trans_alloc, path_bbox_alloc, n_trans, n_path: self.n_path, diff --git a/tests/src/transform.rs b/tests/src/transform.rs deleted file mode 100644 index 43bfc67..0000000 --- a/tests/src/transform.rs +++ /dev/null @@ -1,132 +0,0 @@ -// Copyright 2021 The piet-gpu authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Also licensed under MIT license, at your choice. - -//! Tests for the piet-gpu transform stage. - -use crate::{Config, Runner, TestResult}; - -use kurbo::Affine; -use piet_gpu::stages::{self, Transform, TransformCode, TransformStage}; -use piet_gpu_hal::BufferUsage; -use rand::Rng; - -struct AffineTestData { - input_data: Vec, - expected: Vec, -} - -pub unsafe fn transform_test(runner: &mut Runner, config: &Config) -> TestResult { - let mut result = TestResult::new("transform"); - // TODO: implement large scan and set large to 1 << 24 - let n_elements: u64 = config.size.choose(1 << 12, 1 << 18, 1 << 22); - // Validate with real transform data. - let data = AffineTestData::new(n_elements as usize); - let data_buf = runner - .session - .create_buffer_init(&data.input_data, BufferUsage::STORAGE) - .unwrap(); - let memory = runner.buf_down(data_buf.size() + 8, BufferUsage::empty()); - let stage_config = stages::Config { - n_trans: n_elements as u32, - ..Default::default() - }; - let config_buf = runner - .session - .create_buffer_init(std::slice::from_ref(&stage_config), BufferUsage::STORAGE) - .unwrap(); - - let code = TransformCode::new(&runner.session); - let stage = TransformStage::new(&runner.session, &code); - let binding = stage.bind( - &runner.session, - &code, - &config_buf, - &data_buf, - &memory.dev_buf, - ); - let mut total_elapsed = 0.0; - let n_iter = config.n_iter; - for i in 0..n_iter { - let mut commands = runner.commands(); - let mut pass = commands.compute_pass(0, 1); - stage.record(&mut pass, &code, &binding, n_elements); - pass.end(); - if i == 0 || config.verify_all { - commands.cmd_buf.memory_barrier(); - commands.download(&memory); - } - total_elapsed += runner.submit(commands); - if i == 0 || config.verify_all { - let dst = memory.map_read(8..); - if let Some(failure) = data.verify(dst.cast_slice()) { - result.fail(failure); - } - } - } - result.timing(total_elapsed, n_elements * n_iter); - result -} - -impl AffineTestData { - fn new(n: usize) -> AffineTestData { - let mut rng = rand::thread_rng(); - let mut a = Affine::default(); - let mut input_data = Vec::with_capacity(n); - let mut expected = Vec::with_capacity(n); - for _ in 0..n { - loop { - let b = Affine::new([ - rng.gen_range(-3.0, 3.0), - rng.gen_range(-3.0, 3.0), - rng.gen_range(-3.0, 3.0), - rng.gen_range(-3.0, 3.0), - rng.gen_range(-3.0, 3.0), - rng.gen_range(-3.0, 3.0), - ]); - if b.determinant().abs() >= 1.0 { - expected.push(b); - let c = a.inverse() * b; - input_data.push(Transform::from_kurbo(c)); - a = b; - break; - } - } - } - AffineTestData { - input_data, - expected, - } - } - - fn verify(&self, actual: &[Transform]) -> Option { - for (i, (actual, expected)) in actual.iter().zip(&self.expected).enumerate() { - let error: f64 = actual - .to_kurbo() - .as_coeffs() - .iter() - .zip(expected.as_coeffs()) - .map(|(actual, expected)| (actual - expected).powi(2)) - .sum(); - // Hopefully this is right; most of the time the error is much - // smaller, but occasionally we see outliers. - let tolerance = 1e-9 * (i + 1) as f64; - if error > tolerance { - return Some(format!("{}: {} {}", i, error, tolerance)); - } - } - None - } -}