diff --git a/piet-gpu-derive/src/glsl.rs b/piet-gpu-derive/src/glsl.rs index 96cb3c3..d9f08f3 100644 --- a/piet-gpu-derive/src/glsl.rs +++ b/piet-gpu-derive/src/glsl.rs @@ -29,6 +29,7 @@ pub fn gen_glsl(module: &LayoutModule) -> String { (size, LayoutTypeDef::Enum(en)) => { gen_enum_def(&mut r, name, en); gen_item_def(&mut r, name, size.size); + gen_tag_def(&mut r, name); } } } @@ -92,6 +93,13 @@ fn gen_item_def(r: &mut String, name: &str, size: usize) { writeln!(r, "}}\n").unwrap(); } +fn gen_tag_def(r: &mut String, name: &str) { + writeln!(r, "struct {}Tag {{", name).unwrap(); + writeln!(r, " uint tag;").unwrap(); + writeln!(r, " uint flags;").unwrap(); + writeln!(r, "}};\n").unwrap(); +} + fn gen_struct_read( r: &mut String, bufname: &str, @@ -143,12 +151,13 @@ fn gen_enum_read( variants: &[(String, Vec<(usize, LayoutType)>)], ) { if is_mem { - writeln!(r, "uint {}_tag(Alloc a, {}Ref ref) {{", name, name).unwrap(); - writeln!(r, " return read_mem(a, ref.offset >> 2);").unwrap(); + writeln!(r, "{}Tag {}_tag(Alloc a, {}Ref ref) {{", name, name, name).unwrap(); + writeln!(r, " uint tag_and_flags = read_mem(a, ref.offset >> 2);").unwrap(); } else { - writeln!(r, "uint {}_tag({}Ref ref) {{", name, name).unwrap(); - writeln!(r, " return {}[ref.offset >> 2];", bufname).unwrap(); + writeln!(r, "{}Tag {}_tag({}Ref ref) {{", name, name, name).unwrap(); + writeln!(r, " uint tag_and_flags = {}[ref.offset >> 2];", bufname).unwrap(); } + writeln!(r, " return {}Tag(tag_and_flags & 0xffff, tag_and_flags >> 16);", name).unwrap(); writeln!(r, "}}\n").unwrap(); for (var_name, payload) in variants { let payload_ix = if payload.len() == 1 { @@ -555,6 +564,49 @@ fn gen_enum_write( } writeln!(r, "}}\n").unwrap(); } + } else if payload.len() == 2 && matches!(payload[0].1.ty, GpuType::Scalar(GpuScalar::TagFlags)) { + if let GpuType::InlineStruct(structname) = &payload[1].1.ty { + if is_mem { + writeln!( + r, + "void {}_{}_write(Alloc a, {}Ref ref, uint flags, {} s) {{", + name, var_name, name, structname + ) + .unwrap(); + writeln!( + r, + " write_mem(a, ref.offset >> 2, (flags << 16) | {}_{});", + name, var_name + ) + .unwrap(); + writeln!( + r, + " {}_write(a, {}Ref(ref.offset + {}), s);", + structname, structname, payload[0].0 + ) + .unwrap(); + } else { + writeln!( + r, + "void {}_{}_write({}Ref ref, uint flags, {} s) {{", + name, var_name, name, structname + ) + .unwrap(); + writeln!( + r, + " {}[ref.offset >> 2] = (flags << 16) | {}_{};", + bufname, name, var_name + ) + .unwrap(); + writeln!( + r, + " {}_write({}Ref(ref.offset + {}), s);", + structname, structname, payload[0].0 + ) + .unwrap(); + } + writeln!(r, "}}\n").unwrap(); + } } // TODO: support for variants that aren't one struct. } diff --git a/piet-gpu/shader/annotated.h b/piet-gpu/shader/annotated.h index 40ded79..63d4bdb 100644 --- a/piet-gpu/shader/annotated.h +++ b/piet-gpu/shader/annotated.h @@ -79,6 +79,11 @@ AnnotatedRef Annotated_index(AnnotatedRef ref, uint index) { return AnnotatedRef(ref.offset + index * Annotated_size); } +struct AnnotatedTag { + uint tag; + uint flags; +}; + AnnoFill AnnoFill_read(Alloc a, AnnoFillRef ref) { uint ix = ref.offset >> 2; uint raw0 = read_mem(a, ix + 0); @@ -170,8 +175,9 @@ void AnnoClip_write(Alloc a, AnnoClipRef ref, AnnoClip s) { write_mem(a, ix + 3, floatBitsToUint(s.bbox.w)); } -uint Annotated_tag(Alloc a, AnnotatedRef ref) { - return read_mem(a, ref.offset >> 2); +AnnotatedTag Annotated_tag(Alloc a, AnnotatedRef ref) { + uint tag_and_flags = read_mem(a, ref.offset >> 2); + return AnnotatedTag(tag_and_flags & 0xffff, tag_and_flags >> 16); } AnnoStroke Annotated_Stroke_read(Alloc a, AnnotatedRef ref) { diff --git a/piet-gpu/shader/backdrop.comp b/piet-gpu/shader/backdrop.comp index 49de925..03212c9 100644 --- a/piet-gpu/shader/backdrop.comp +++ b/piet-gpu/shader/backdrop.comp @@ -46,7 +46,7 @@ void main() { // Work assignment: 1 thread : 1 path element uint row_count = 0; if (element_ix < conf.n_elements) { - uint tag = Annotated_tag(conf.anno_alloc, ref); + uint tag = Annotated_tag(conf.anno_alloc, ref).tag; switch (tag) { case Annotated_Fill: case Annotated_FillImage: diff --git a/piet-gpu/shader/backdrop.spv b/piet-gpu/shader/backdrop.spv index f33a50b..932192a 100644 Binary files a/piet-gpu/shader/backdrop.spv and b/piet-gpu/shader/backdrop.spv differ diff --git a/piet-gpu/shader/binning.comp b/piet-gpu/shader/binning.comp index 3a63ac2..c5cc806 100644 --- a/piet-gpu/shader/binning.comp +++ b/piet-gpu/shader/binning.comp @@ -56,7 +56,7 @@ void main() { AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size); uint tag = Annotated_Nop; if (element_ix < my_n_elements) { - tag = Annotated_tag(conf.anno_alloc, ref); + tag = Annotated_tag(conf.anno_alloc, ref).tag; } int x0 = 0, y0 = 0, x1 = 0, y1 = 0; switch (tag) { diff --git a/piet-gpu/shader/binning.spv b/piet-gpu/shader/binning.spv index 6fb185d..96fbcdd 100644 Binary files a/piet-gpu/shader/binning.spv and b/piet-gpu/shader/binning.spv differ diff --git a/piet-gpu/shader/coarse.comp b/piet-gpu/shader/coarse.comp index 069367f..332fb48 100644 --- a/piet-gpu/shader/coarse.comp +++ b/piet-gpu/shader/coarse.comp @@ -196,7 +196,7 @@ void main() { if (th_ix + rd_ix < wr_ix) { element_ix = sh_elements[th_ix]; ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size); - tag = Annotated_tag(conf.anno_alloc, ref); + tag = Annotated_tag(conf.anno_alloc, ref).tag; } // Bounding box of element in pixel coordinates. @@ -256,7 +256,7 @@ void main() { } } AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + sh_elements[el_ix] * Annotated_size); - uint tag = Annotated_tag(conf.anno_alloc, ref); + uint tag = Annotated_tag(conf.anno_alloc, ref).tag; uint seq_ix = ix - (el_ix > 0 ? sh_tile_count[el_ix - 1] : 0); uint width = sh_tile_width[el_ix]; uint x = sh_tile_x0[el_ix] + seq_ix % width; @@ -305,7 +305,7 @@ void main() { // If that turns out to be expensive, maybe we can pack it into // shared memory (or perhaps just the tag). ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size); - tag = Annotated_tag(conf.anno_alloc, ref); + tag = Annotated_tag(conf.anno_alloc, ref).tag; if (clip_zero_depth == 0) { switch (tag) { diff --git a/piet-gpu/shader/coarse.spv b/piet-gpu/shader/coarse.spv index 7d0dcce..a7637c6 100644 Binary files a/piet-gpu/shader/coarse.spv and b/piet-gpu/shader/coarse.spv differ diff --git a/piet-gpu/shader/elements.comp b/piet-gpu/shader/elements.comp index eb18571..9dc714e 100644 --- a/piet-gpu/shader/elements.comp +++ b/piet-gpu/shader/elements.comp @@ -99,7 +99,7 @@ State combine_state(State a, State b) { State map_element(ElementRef ref) { // TODO: it would *probably* be more efficient to make the memory read patterns less // divergent, though it would be more wasted memory. - uint tag = Element_tag(ref); + uint tag = Element_tag(ref).tag; State c; c.bbox = vec4(0.0, 0.0, 0.0, 0.0); c.mat = vec4(1.0, 0.0, 0.0, 1.0); @@ -291,7 +291,7 @@ void main() { // gains to be had from stashing in shared memory or possibly // registers (though register pressure is an issue). ElementRef this_ref = Element_index(ref, i); - uint tag = Element_tag(this_ref); + uint tag = Element_tag(this_ref).tag; switch (tag) { case Element_FillLine: case Element_StrokeLine: diff --git a/piet-gpu/shader/elements.spv b/piet-gpu/shader/elements.spv index 2c61b2e..51d3e4c 100644 Binary files a/piet-gpu/shader/elements.spv and b/piet-gpu/shader/elements.spv differ diff --git a/piet-gpu/shader/kernel4.comp b/piet-gpu/shader/kernel4.comp index 922ae83..82eb89b 100644 --- a/piet-gpu/shader/kernel4.comp +++ b/piet-gpu/shader/kernel4.comp @@ -161,7 +161,7 @@ void main() { } while (true) { - uint tag = Cmd_tag(cmd_alloc, cmd_ref); + uint tag = Cmd_tag(cmd_alloc, cmd_ref).tag; if (tag == Cmd_End) { break; } diff --git a/piet-gpu/shader/kernel4.spv b/piet-gpu/shader/kernel4.spv index 7768659..ebd4811 100644 Binary files a/piet-gpu/shader/kernel4.spv and b/piet-gpu/shader/kernel4.spv differ diff --git a/piet-gpu/shader/path_coarse.comp b/piet-gpu/shader/path_coarse.comp index 70251bf..663fe45 100644 --- a/piet-gpu/shader/path_coarse.comp +++ b/piet-gpu/shader/path_coarse.comp @@ -96,7 +96,7 @@ void main() { uint tag = PathSeg_Nop; if (element_ix < conf.n_pathseg) { - tag = PathSeg_tag(conf.pathseg_alloc, ref); + tag = PathSeg_tag(conf.pathseg_alloc, ref).tag; } switch (tag) { case PathSeg_FillCubic: diff --git a/piet-gpu/shader/path_coarse.spv b/piet-gpu/shader/path_coarse.spv index d84bd6d..16eb64c 100644 Binary files a/piet-gpu/shader/path_coarse.spv and b/piet-gpu/shader/path_coarse.spv differ diff --git a/piet-gpu/shader/pathseg.h b/piet-gpu/shader/pathseg.h index 7c69b9d..f3b2dec 100644 --- a/piet-gpu/shader/pathseg.h +++ b/piet-gpu/shader/pathseg.h @@ -54,6 +54,11 @@ PathSegRef PathSeg_index(PathSegRef ref, uint index) { return PathSegRef(ref.offset + index * PathSeg_size); } +struct PathSegTag { + uint tag; + uint flags; +}; + PathFillCubic PathFillCubic_read(Alloc a, PathFillCubicRef ref) { uint ix = ref.offset >> 2; uint raw0 = read_mem(a, ix + 0); @@ -131,8 +136,9 @@ void PathStrokeCubic_write(Alloc a, PathStrokeCubicRef ref, PathStrokeCubic s) { write_mem(a, ix + 11, floatBitsToUint(s.stroke.y)); } -uint PathSeg_tag(Alloc a, PathSegRef ref) { - return read_mem(a, ref.offset >> 2); +PathSegTag PathSeg_tag(Alloc a, PathSegRef ref) { + uint tag_and_flags = read_mem(a, ref.offset >> 2); + return PathSegTag(tag_and_flags & 0xffff, tag_and_flags >> 16); } PathFillCubic PathSeg_FillCubic_read(Alloc a, PathSegRef ref) { diff --git a/piet-gpu/shader/ptcl.h b/piet-gpu/shader/ptcl.h index 53b9850..0480ad9 100644 --- a/piet-gpu/shader/ptcl.h +++ b/piet-gpu/shader/ptcl.h @@ -157,6 +157,11 @@ CmdRef Cmd_index(CmdRef ref, uint index) { return CmdRef(ref.offset + index * Cmd_size); } +struct CmdTag { + uint tag; + uint flags; +}; + CmdStroke CmdStroke_read(Alloc a, CmdStrokeRef ref) { uint ix = ref.offset >> 2; uint raw0 = read_mem(a, ix + 0); @@ -301,8 +306,9 @@ void CmdJump_write(Alloc a, CmdJumpRef ref, CmdJump s) { write_mem(a, ix + 0, s.new_ref); } -uint Cmd_tag(Alloc a, CmdRef ref) { - return read_mem(a, ref.offset >> 2); +CmdTag Cmd_tag(Alloc a, CmdRef ref) { + uint tag_and_flags = read_mem(a, ref.offset >> 2); + return CmdTag(tag_and_flags & 0xffff, tag_and_flags >> 16); } CmdFill Cmd_Fill_read(Alloc a, CmdRef ref) { diff --git a/piet-gpu/shader/scene.h b/piet-gpu/shader/scene.h index e6ea591..56c1a86 100644 --- a/piet-gpu/shader/scene.h +++ b/piet-gpu/shader/scene.h @@ -160,6 +160,11 @@ ElementRef Element_index(ElementRef ref, uint index) { return ElementRef(ref.offset + index * Element_size); } +struct ElementTag { + uint tag; + uint flags; +}; + LineSeg LineSeg_read(LineSegRef ref) { uint ix = ref.offset >> 2; uint raw0 = scene[ix + 0]; @@ -264,8 +269,9 @@ Clip Clip_read(ClipRef ref) { return s; } -uint Element_tag(ElementRef ref) { - return scene[ref.offset >> 2]; +ElementTag Element_tag(ElementRef ref) { + uint tag_and_flags = scene[ref.offset >> 2]; + return ElementTag(tag_and_flags & 0xffff, tag_and_flags >> 16); } LineSeg Element_StrokeLine_read(ElementRef ref) { diff --git a/piet-gpu/shader/tile_alloc.comp b/piet-gpu/shader/tile_alloc.comp index 896bb22..fd89bf7 100644 --- a/piet-gpu/shader/tile_alloc.comp +++ b/piet-gpu/shader/tile_alloc.comp @@ -39,7 +39,7 @@ void main() { uint tag = Annotated_Nop; if (element_ix < conf.n_elements) { - tag = Annotated_tag(conf.anno_alloc, ref); + tag = Annotated_tag(conf.anno_alloc, ref).tag; } int x0 = 0, y0 = 0, x1 = 0, y1 = 0; switch (tag) { diff --git a/piet-gpu/shader/tile_alloc.spv b/piet-gpu/shader/tile_alloc.spv index 5fb2a95..66a9fd2 100644 Binary files a/piet-gpu/shader/tile_alloc.spv and b/piet-gpu/shader/tile_alloc.spv differ