diff --git a/piet-gpu-types/src/pathseg.rs b/piet-gpu-types/src/pathseg.rs index 080344d..a679e44 100644 --- a/piet-gpu-types/src/pathseg.rs +++ b/piet-gpu-types/src/pathseg.rs @@ -9,6 +9,8 @@ piet_gpu! { p2: [f32; 2], p3: [f32; 2], path_ix: u32, + // trans_ix is 1-based, 0 means no transformation. + trans_ix: u32, // A note: the layout of this struct is shared with // PathStrokeCubic. In that case, we actually write // [0.0, 0.0] as the stroke field, to minimize divergence. @@ -19,6 +21,7 @@ piet_gpu! { p2: [f32; 2], p3: [f32; 2], path_ix: u32, + trans_ix: u32, // halfwidth in both x and y for binning stroke: [f32; 2], } diff --git a/piet-gpu-types/src/state.rs b/piet-gpu-types/src/state.rs index 602fab9..6e2b581 100644 --- a/piet-gpu-types/src/state.rs +++ b/piet-gpu-types/src/state.rs @@ -11,6 +11,7 @@ piet_gpu! { flags: u32, path_count: u32, pathseg_count: u32, + trans_count: u32, } } } diff --git a/piet-gpu-types/src/tile.rs b/piet-gpu-types/src/tile.rs index 38ee93b..27e87f4 100644 --- a/piet-gpu-types/src/tile.rs +++ b/piet-gpu-types/src/tile.rs @@ -18,5 +18,9 @@ piet_gpu! { y_edge: f32, next: Ref, } + struct TransformSeg { + mat: [f32; 4], + translate: [f32; 2], + } } } diff --git a/piet-gpu/bin/cli.rs b/piet-gpu/bin/cli.rs index dfed520..646a505 100644 --- a/piet-gpu/bin/cli.rs +++ b/piet-gpu/bin/cli.rs @@ -248,10 +248,11 @@ fn main() -> Result<(), Error> { } let n_paths = ctx.path_count(); let n_pathseg = ctx.pathseg_count(); + let n_trans = ctx.trans_count(); let scene = ctx.get_scene_buf(); //dump_scene(&scene); - let renderer = Renderer::new(&session, scene, n_paths, n_pathseg)?; + let renderer = Renderer::new(&session, scene, n_paths, n_pathseg, n_trans)?; let image_buf = session.create_buffer((WIDTH * HEIGHT * 4) as u64, MemFlags::host_coherent())?; diff --git a/piet-gpu/bin/winit.rs b/piet-gpu/bin/winit.rs index 786c78c..db501e9 100644 --- a/piet-gpu/bin/winit.rs +++ b/piet-gpu/bin/winit.rs @@ -40,9 +40,10 @@ fn main() -> Result<(), Error> { render_scene(&mut ctx); let n_paths = ctx.path_count(); let n_pathseg = ctx.pathseg_count(); + let n_trans = ctx.pathseg_count(); let scene = ctx.get_scene_buf(); - let renderer = Renderer::new(&session, scene, n_paths, n_pathseg)?; + let renderer = Renderer::new(&session, scene, n_paths, n_pathseg, n_trans)?; let mut submitted: Option = None; let mut last_frame_idx = 0; diff --git a/piet-gpu/shader/backdrop.spv b/piet-gpu/shader/backdrop.spv index 81d26c0..48fcb48 100644 Binary files a/piet-gpu/shader/backdrop.spv and b/piet-gpu/shader/backdrop.spv differ diff --git a/piet-gpu/shader/binning.spv b/piet-gpu/shader/binning.spv index a9a05f5..be8662d 100644 Binary files a/piet-gpu/shader/binning.spv and b/piet-gpu/shader/binning.spv differ diff --git a/piet-gpu/shader/coarse.spv b/piet-gpu/shader/coarse.spv index 974e400..23eb962 100644 Binary files a/piet-gpu/shader/coarse.spv and b/piet-gpu/shader/coarse.spv differ diff --git a/piet-gpu/shader/elements.comp b/piet-gpu/shader/elements.comp index b7b0e9c..ad899e0 100644 --- a/piet-gpu/shader/elements.comp +++ b/piet-gpu/shader/elements.comp @@ -39,6 +39,7 @@ layout(set = 0, binding = 3) volatile buffer StateBuf { #include "state.h" #include "annotated.h" #include "pathseg.h" +#include "tile.h" #define StateBuf_stride (4 + 2 * State_size) @@ -91,6 +92,7 @@ State combine_state(State a, State b) { c.flags |= (a.flags & FLAG_RESET_BBOX) >> 1; c.path_count = a.path_count + b.path_count; c.pathseg_count = a.pathseg_count + b.pathseg_count; + c.trans_count = a.trans_count + b.trans_count; return c; } @@ -106,6 +108,7 @@ State map_element(ElementRef ref) { c.flags = 0; c.path_count = 0; c.pathseg_count = 0; + c.trans_count = 0; switch (tag) { case Element_FillLine: case Element_StrokeLine: @@ -146,6 +149,7 @@ State map_element(ElementRef ref) { Transform t = Element_Transform_read(ref); c.mat = t.mat; c.translate = t.translate; + c.trans_count = 1; break; } return c; @@ -205,6 +209,7 @@ void main() { exclusive.flags = 0; exclusive.path_count = 0; exclusive.pathseg_count = 0; + exclusive.trans_count = 0; // Publish aggregate for this partition if (gl_LocalInvocationID.x == WG_SIZE - 1) { @@ -290,14 +295,13 @@ void main() { case Element_FillLine: case Element_StrokeLine: LineSeg line = Element_StrokeLine_read(this_ref); - vec2 p0 = st.mat.xy * line.p0.x + st.mat.zw * line.p0.y + st.translate; - vec2 p1 = st.mat.xy * line.p1.x + st.mat.zw * line.p1.y + st.translate; PathStrokeCubic path_cubic; - path_cubic.p0 = p0; - path_cubic.p1 = mix(p0, p1, 1.0 / 3.0); - path_cubic.p2 = mix(p1, p0, 1.0 / 3.0); - path_cubic.p3 = p1; + path_cubic.p0 = line.p0; + path_cubic.p1 = mix(line.p0, line.p1, 1.0 / 3.0); + path_cubic.p2 = mix(line.p1, line.p0, 1.0 / 3.0); + path_cubic.p3 = line.p1; path_cubic.path_ix = st.path_count; + path_cubic.trans_ix = st.trans_count; if (tag == Element_StrokeLine) { path_cubic.stroke = get_linewidth(st); } else { @@ -313,15 +317,12 @@ void main() { case Element_FillQuad: case Element_StrokeQuad: QuadSeg quad = Element_StrokeQuad_read(this_ref); - p0 = st.mat.xy * quad.p0.x + st.mat.zw * quad.p0.y + st.translate; - p1 = st.mat.xy * quad.p1.x + st.mat.zw * quad.p1.y + st.translate; - vec2 p2 = st.mat.xy * quad.p2.x + st.mat.zw * quad.p2.y + st.translate; - path_cubic; - path_cubic.p0 = p0; - path_cubic.p1 = mix(p1, p0, 1.0 / 3.0); - path_cubic.p2 = mix(p1, p2, 1.0 / 3.0); - path_cubic.p3 = p2; + path_cubic.p0 = quad.p0; + path_cubic.p1 = mix(quad.p1, quad.p0, 1.0 / 3.0); + path_cubic.p2 = mix(quad.p1, quad.p2, 1.0 / 3.0); + path_cubic.p3 = quad.p2; path_cubic.path_ix = st.path_count; + path_cubic.trans_ix = st.trans_count; if (tag == Element_StrokeQuad) { path_cubic.stroke = get_linewidth(st); } else { @@ -337,12 +338,12 @@ void main() { case Element_FillCubic: case Element_StrokeCubic: CubicSeg cubic = Element_StrokeCubic_read(this_ref); - path_cubic; - path_cubic.p0 = st.mat.xy * cubic.p0.x + st.mat.zw * cubic.p0.y + st.translate; - path_cubic.p1 = st.mat.xy * cubic.p1.x + st.mat.zw * cubic.p1.y + st.translate; - path_cubic.p2 = st.mat.xy * cubic.p2.x + st.mat.zw * cubic.p2.y + st.translate; - path_cubic.p3 = st.mat.xy * cubic.p3.x + st.mat.zw * cubic.p3.y + st.translate; + path_cubic.p0 = cubic.p0; + path_cubic.p1 = cubic.p1; + path_cubic.p2 = cubic.p2; + path_cubic.p3 = cubic.p3; path_cubic.path_ix = st.path_count; + path_cubic.trans_ix = st.trans_count; if (tag == Element_StrokeCubic) { path_cubic.stroke = get_linewidth(st); } else { @@ -388,6 +389,11 @@ void main() { out_ref = AnnotatedRef(conf.anno_alloc.offset + (st.path_count - 1) * Annotated_size); Annotated_EndClip_write(conf.anno_alloc, out_ref, anno_end_clip); break; + case Element_Transform: + TransformSeg transform = TransformSeg(st.mat, st.translate); + TransformSegRef trans_ref = TransformSegRef(conf.trans_alloc.offset + (st.trans_count - 1) * TransformSeg_size); + TransformSeg_write(conf.trans_alloc, trans_ref, transform); + break; } } } diff --git a/piet-gpu/shader/elements.spv b/piet-gpu/shader/elements.spv index 7857b93..70db1ae 100644 Binary files a/piet-gpu/shader/elements.spv and b/piet-gpu/shader/elements.spv differ diff --git a/piet-gpu/shader/kernel4.spv b/piet-gpu/shader/kernel4.spv index ede04e4..b48d661 100644 Binary files a/piet-gpu/shader/kernel4.spv and b/piet-gpu/shader/kernel4.spv differ diff --git a/piet-gpu/shader/path_coarse.comp b/piet-gpu/shader/path_coarse.comp index 4f77ff9..70251bf 100644 --- a/piet-gpu/shader/path_coarse.comp +++ b/piet-gpu/shader/path_coarse.comp @@ -102,6 +102,17 @@ void main() { case PathSeg_FillCubic: case PathSeg_StrokeCubic: PathStrokeCubic cubic = PathSeg_StrokeCubic_read(conf.pathseg_alloc, ref); + + uint trans_ix = cubic.trans_ix; + if (trans_ix > 0) { + TransformSegRef trans_ref = TransformSegRef(conf.trans_alloc.offset + (trans_ix - 1) * TransformSeg_size); + TransformSeg trans = TransformSeg_read(conf.trans_alloc, trans_ref); + cubic.p0 = trans.mat.xy * cubic.p0.x + trans.mat.zw * cubic.p0.y + trans.translate; + cubic.p1 = trans.mat.xy * cubic.p1.x + trans.mat.zw * cubic.p1.y + trans.translate; + cubic.p2 = trans.mat.xy * cubic.p2.x + trans.mat.zw * cubic.p2.y + trans.translate; + cubic.p3 = trans.mat.xy * cubic.p3.x + trans.mat.zw * cubic.p3.y + trans.translate; + } + vec2 err_v = 3.0 * (cubic.p2 - cubic.p1) + cubic.p0 - cubic.p3; float err = err_v.x * err_v.x + err_v.y * err_v.y; // The number of quadratics. diff --git a/piet-gpu/shader/path_coarse.spv b/piet-gpu/shader/path_coarse.spv index 0236541..d84bd6d 100644 Binary files a/piet-gpu/shader/path_coarse.spv and b/piet-gpu/shader/path_coarse.spv differ diff --git a/piet-gpu/shader/pathseg.h b/piet-gpu/shader/pathseg.h index a170090..7c69b9d 100644 --- a/piet-gpu/shader/pathseg.h +++ b/piet-gpu/shader/pathseg.h @@ -20,9 +20,10 @@ struct PathFillCubic { vec2 p2; vec2 p3; uint path_ix; + uint trans_ix; }; -#define PathFillCubic_size 36 +#define PathFillCubic_size 40 PathFillCubicRef PathFillCubic_index(PathFillCubicRef ref, uint index) { return PathFillCubicRef(ref.offset + index * PathFillCubic_size); @@ -34,10 +35,11 @@ struct PathStrokeCubic { vec2 p2; vec2 p3; uint path_ix; + uint trans_ix; vec2 stroke; }; -#define PathStrokeCubic_size 44 +#define PathStrokeCubic_size 48 PathStrokeCubicRef PathStrokeCubic_index(PathStrokeCubicRef ref, uint index) { return PathStrokeCubicRef(ref.offset + index * PathStrokeCubic_size); @@ -46,7 +48,7 @@ PathStrokeCubicRef PathStrokeCubic_index(PathStrokeCubicRef ref, uint index) { #define PathSeg_Nop 0 #define PathSeg_FillCubic 1 #define PathSeg_StrokeCubic 2 -#define PathSeg_size 48 +#define PathSeg_size 52 PathSegRef PathSeg_index(PathSegRef ref, uint index) { return PathSegRef(ref.offset + index * PathSeg_size); @@ -63,12 +65,14 @@ PathFillCubic PathFillCubic_read(Alloc a, PathFillCubicRef ref) { uint raw6 = read_mem(a, ix + 6); uint raw7 = read_mem(a, ix + 7); uint raw8 = read_mem(a, ix + 8); + uint raw9 = read_mem(a, ix + 9); PathFillCubic s; s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1)); s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3)); s.p2 = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5)); s.p3 = vec2(uintBitsToFloat(raw6), uintBitsToFloat(raw7)); s.path_ix = raw8; + s.trans_ix = raw9; return s; } @@ -83,6 +87,7 @@ void PathFillCubic_write(Alloc a, PathFillCubicRef ref, PathFillCubic s) { write_mem(a, ix + 6, floatBitsToUint(s.p3.x)); write_mem(a, ix + 7, floatBitsToUint(s.p3.y)); write_mem(a, ix + 8, s.path_ix); + write_mem(a, ix + 9, s.trans_ix); } PathStrokeCubic PathStrokeCubic_read(Alloc a, PathStrokeCubicRef ref) { @@ -98,13 +103,15 @@ PathStrokeCubic PathStrokeCubic_read(Alloc a, PathStrokeCubicRef ref) { uint raw8 = read_mem(a, ix + 8); uint raw9 = read_mem(a, ix + 9); uint raw10 = read_mem(a, ix + 10); + uint raw11 = read_mem(a, ix + 11); PathStrokeCubic s; s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1)); s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3)); s.p2 = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5)); s.p3 = vec2(uintBitsToFloat(raw6), uintBitsToFloat(raw7)); s.path_ix = raw8; - s.stroke = vec2(uintBitsToFloat(raw9), uintBitsToFloat(raw10)); + s.trans_ix = raw9; + s.stroke = vec2(uintBitsToFloat(raw10), uintBitsToFloat(raw11)); return s; } @@ -119,8 +126,9 @@ void PathStrokeCubic_write(Alloc a, PathStrokeCubicRef ref, PathStrokeCubic s) { write_mem(a, ix + 6, floatBitsToUint(s.p3.x)); write_mem(a, ix + 7, floatBitsToUint(s.p3.y)); write_mem(a, ix + 8, s.path_ix); - write_mem(a, ix + 9, floatBitsToUint(s.stroke.x)); - write_mem(a, ix + 10, floatBitsToUint(s.stroke.y)); + write_mem(a, ix + 9, s.trans_ix); + write_mem(a, ix + 10, floatBitsToUint(s.stroke.x)); + write_mem(a, ix + 11, floatBitsToUint(s.stroke.y)); } uint PathSeg_tag(Alloc a, PathSegRef ref) { diff --git a/piet-gpu/shader/setup.h b/piet-gpu/shader/setup.h index 5a4935c..f2ca87c 100644 --- a/piet-gpu/shader/setup.h +++ b/piet-gpu/shader/setup.h @@ -35,4 +35,5 @@ struct Config { Alloc ptcl_alloc; Alloc pathseg_alloc; Alloc anno_alloc; + Alloc trans_alloc; }; diff --git a/piet-gpu/shader/state.h b/piet-gpu/shader/state.h index 8479dcf..d2df804 100644 --- a/piet-gpu/shader/state.h +++ b/piet-gpu/shader/state.h @@ -14,9 +14,10 @@ struct State { uint flags; uint path_count; uint pathseg_count; + uint trans_count; }; -#define State_size 56 +#define State_size 60 StateRef State_index(StateRef ref, uint index) { return StateRef(ref.offset + index * State_size); @@ -38,6 +39,7 @@ State State_read(StateRef ref) { uint raw11 = state[ix + 11]; uint raw12 = state[ix + 12]; uint raw13 = state[ix + 13]; + uint raw14 = state[ix + 14]; State s; s.mat = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3)); s.translate = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5)); @@ -46,6 +48,7 @@ State State_read(StateRef ref) { s.flags = raw11; s.path_count = raw12; s.pathseg_count = raw13; + s.trans_count = raw14; return s; } @@ -65,5 +68,6 @@ void State_write(StateRef ref, State s) { state[ix + 11] = s.flags; state[ix + 12] = s.path_count; state[ix + 13] = s.pathseg_count; + state[ix + 14] = s.trans_count; } diff --git a/piet-gpu/shader/tile.h b/piet-gpu/shader/tile.h index 500277b..e11329c 100644 --- a/piet-gpu/shader/tile.h +++ b/piet-gpu/shader/tile.h @@ -14,6 +14,10 @@ struct TileSegRef { uint offset; }; +struct TransformSegRef { + uint offset; +}; + struct Path { uvec4 bbox; TileRef tiles; @@ -49,6 +53,17 @@ TileSegRef TileSeg_index(TileSegRef ref, uint index) { return TileSegRef(ref.offset + index * TileSeg_size); } +struct TransformSeg { + vec4 mat; + vec2 translate; +}; + +#define TransformSeg_size 24 + +TransformSegRef TransformSeg_index(TransformSegRef ref, uint index) { + return TransformSegRef(ref.offset + index * TransformSeg_size); +} + Path Path_read(Alloc a, PathRef ref) { uint ix = ref.offset >> 2; uint raw0 = read_mem(a, ix + 0); @@ -109,3 +124,27 @@ void TileSeg_write(Alloc a, TileSegRef ref, TileSeg s) { write_mem(a, ix + 5, s.next.offset); } +TransformSeg TransformSeg_read(Alloc a, TransformSegRef ref) { + uint ix = ref.offset >> 2; + uint raw0 = read_mem(a, ix + 0); + uint raw1 = read_mem(a, ix + 1); + uint raw2 = read_mem(a, ix + 2); + uint raw3 = read_mem(a, ix + 3); + uint raw4 = read_mem(a, ix + 4); + uint raw5 = read_mem(a, ix + 5); + TransformSeg s; + s.mat = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3)); + s.translate = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5)); + return s; +} + +void TransformSeg_write(Alloc a, TransformSegRef ref, TransformSeg s) { + uint ix = ref.offset >> 2; + write_mem(a, ix + 0, floatBitsToUint(s.mat.x)); + write_mem(a, ix + 1, floatBitsToUint(s.mat.y)); + write_mem(a, ix + 2, floatBitsToUint(s.mat.z)); + write_mem(a, ix + 3, floatBitsToUint(s.mat.w)); + write_mem(a, ix + 4, floatBitsToUint(s.translate.x)); + write_mem(a, ix + 5, floatBitsToUint(s.translate.y)); +} + diff --git a/piet-gpu/shader/tile_alloc.spv b/piet-gpu/shader/tile_alloc.spv index b256392..f97a7d5 100644 Binary files a/piet-gpu/shader/tile_alloc.spv and b/piet-gpu/shader/tile_alloc.spv differ diff --git a/piet-gpu/src/lib.rs b/piet-gpu/src/lib.rs index f53e511..5f0f504 100644 --- a/piet-gpu/src/lib.rs +++ b/piet-gpu/src/lib.rs @@ -201,14 +201,15 @@ impl Renderer { scene: &[u8], n_paths: usize, n_pathseg: usize, + n_trans: usize, ) -> Result { let host = MemFlags::host_coherent(); let dev = MemFlags::device_local(); let n_elements = scene.len() / piet_gpu_types::scene::Element::fixed_size(); println!( - "scene: {} elements, {} paths, {} path_segments", - n_elements, n_paths, n_pathseg + "scene: {} elements, {} paths, {} path_segments, {} transforms", + n_elements, n_paths, n_pathseg, n_trans ); let mut scene_buf_host = session @@ -222,15 +223,16 @@ impl Renderer { let state_buf = session.create_buffer(1 * 1024 * 1024, dev)?; let image_dev = session.create_image2d(WIDTH as u32, HEIGHT as u32, dev)?; - const CONFIG_SIZE: u64 = 9*4; // Size of Config in setup.h. + const CONFIG_SIZE: u64 = 10*4; // Size of Config in setup.h. let mut config_buf_host = session.create_buffer(CONFIG_SIZE, host)?; let config_buf_dev = session.create_buffer(CONFIG_SIZE, dev)?; // TODO: constants const PATH_SIZE: usize = 12; const BIN_SIZE: usize = 8; - const PATHSEG_SIZE: usize = 48; + const PATHSEG_SIZE: usize = 52; const ANNO_SIZE: usize = 28; + const TRANS_SIZE: usize = 24; let mut alloc = 0; let tile_base = alloc; alloc += ((n_paths + 3) & !3) * PATH_SIZE; @@ -242,7 +244,9 @@ impl Renderer { alloc += (n_pathseg * PATHSEG_SIZE + 3) & !3; let anno_base = alloc; alloc += (n_paths * ANNO_SIZE + 3) & !3; - config_buf_host.write(&[n_paths as u32, n_pathseg as u32, WIDTH_IN_TILES as u32, HEIGHT_IN_TILES as u32, tile_base as u32, bin_base as u32, ptcl_base as u32, pathseg_base as u32, anno_base as u32])?; + let trans_base = alloc; + alloc += (n_trans * TRANS_SIZE + 3) & !3; + config_buf_host.write(&[n_paths as u32, n_pathseg as u32, WIDTH_IN_TILES as u32, HEIGHT_IN_TILES as u32, tile_base as u32, bin_base as u32, ptcl_base as u32, pathseg_base as u32, anno_base as u32, trans_base as u32])?; let mut memory_buf_host = session.create_buffer(2*4, host)?; let memory_buf_dev = session.create_buffer(128 * 1024 * 1024, dev)?; diff --git a/piet-gpu/src/render_ctx.rs b/piet-gpu/src/render_ctx.rs index 419c9ec..d05b712 100644 --- a/piet-gpu/src/render_ctx.rs +++ b/piet-gpu/src/render_ctx.rs @@ -38,6 +38,8 @@ pub struct PietGpuRenderContext { path_count: usize, /// The count of path segment elements. pathseg_count: usize, + /// The count of transform elements. + trans_count: usize, cur_transform: Affine, state_stack: Vec, @@ -82,6 +84,7 @@ impl PietGpuRenderContext { stroke_width, path_count: 0, pathseg_count: 0, + trans_count: 0, cur_transform: Affine::default(), state_stack: Vec::new(), clip_stack: Vec::new(), @@ -100,6 +103,10 @@ impl PietGpuRenderContext { pub fn pathseg_count(&self) -> usize { self.pathseg_count } + + pub fn trans_count(&self) -> usize { + self.trans_count + } } impl RenderContext for PietGpuRenderContext { @@ -207,6 +214,7 @@ impl RenderContext for PietGpuRenderContext { let a_inv = state.rel_transform.inverse(); self.elements .push(Element::Transform(to_scene_transform(a_inv))); + self.trans_count += 1; } self.cur_transform = state.transform; for _ in 0..state.n_clip { @@ -228,6 +236,7 @@ impl RenderContext for PietGpuRenderContext { fn transform(&mut self, transform: Affine) { self.elements .push(Element::Transform(to_scene_transform(transform))); + self.trans_count += 1; if let Some(tos) = self.state_stack.last_mut() { tos.rel_transform *= transform; }