diff --git a/piet-gpu/shader/backdrop.comp b/piet-gpu/shader/backdrop.comp index 0c698b1..4a45d28 100644 --- a/piet-gpu/shader/backdrop.comp +++ b/piet-gpu/shader/backdrop.comp @@ -38,7 +38,6 @@ layout(set = 0, binding = 1) readonly buffer ConfigBuf { Config conf; }; -#include "annotated.h" #include "tile.h" shared uint sh_row_count[BACKDROP_WG]; @@ -48,39 +47,29 @@ shared uint sh_row_width[BACKDROP_WG]; void main() { uint th_ix = gl_LocalInvocationIndex; uint element_ix = gl_GlobalInvocationID.x; - AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size); // Work assignment: 1 thread : 1 path element uint row_count = 0; bool mem_ok = mem_error == NO_ERROR; if (gl_LocalInvocationID.y == 0) { if (element_ix < conf.n_elements) { - AnnotatedTag tag = Annotated_tag(conf.anno_alloc, ref); - switch (tag.tag) { - case Annotated_Image: - case Annotated_LinGradient: - case Annotated_BeginClip: - case Annotated_Color: - if (fill_mode_from_flags(tag.flags) != MODE_NONZERO) { - break; - } - // Fall through. - PathRef path_ref = PathRef(conf.tile_alloc.offset + element_ix * Path_size); - Path path = Path_read(conf.tile_alloc, path_ref); - sh_row_width[th_ix] = path.bbox.z - path.bbox.x; - row_count = path.bbox.w - path.bbox.y; - // Paths that don't cross tile top edges don't have backdrops. - // Don't apply the optimization to paths that may cross the y = 0 - // top edge, but clipped to 1 row. - if (row_count == 1 && path.bbox.y > 0) { - // Note: this can probably be expanded to width = 2 as - // long as it doesn't cross the left edge. - row_count = 0; - } - Alloc path_alloc = new_alloc( - path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, mem_ok); - sh_row_alloc[th_ix] = path_alloc; + // Possible TODO: it's not necessary to process backdrops of stroked paths. + // We had logic for that but took it out because it used the Annotated struct. + PathRef path_ref = PathRef(conf.tile_alloc.offset + element_ix * Path_size); + Path path = Path_read(conf.tile_alloc, path_ref); + sh_row_width[th_ix] = path.bbox.z - path.bbox.x; + row_count = path.bbox.w - path.bbox.y; + // Paths that don't cross tile top edges don't have backdrops. + // Don't apply the optimization to paths that may cross the y = 0 + // top edge, but clipped to 1 row. + if (row_count == 1 && path.bbox.y > 0) { + // Note: this can probably be expanded to width = 2 as + // long as it doesn't cross the left edge. + row_count = 0; } + Alloc path_alloc = new_alloc( + path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, mem_ok); + sh_row_alloc[th_ix] = path_alloc; } sh_row_count[th_ix] = row_count; } diff --git a/piet-gpu/shader/bbox_clear.comp b/piet-gpu/shader/bbox_clear.comp index c609642..52577f9 100644 --- a/piet-gpu/shader/bbox_clear.comp +++ b/piet-gpu/shader/bbox_clear.comp @@ -20,7 +20,7 @@ layout(binding = 1) readonly buffer ConfigBuf { void main() { uint ix = gl_GlobalInvocationID.x; if (ix < conf.n_path) { - uint out_ix = (conf.bbox_alloc.offset >> 2) + 6 * ix; + uint out_ix = (conf.path_bbox_alloc.offset >> 2) + 6 * ix; memory[out_ix] = 0xffff; memory[out_ix + 1] = 0xffff; memory[out_ix + 2] = 0; diff --git a/piet-gpu/shader/binning.comp b/piet-gpu/shader/binning.comp index 2304ea2..9b04400 100644 --- a/piet-gpu/shader/binning.comp +++ b/piet-gpu/shader/binning.comp @@ -18,7 +18,6 @@ layout(set = 0, binding = 1) readonly buffer ConfigBuf { Config conf; }; -#include "annotated.h" #include "bins.h" #include "drawtag.h" @@ -37,10 +36,12 @@ shared Alloc sh_chunk_alloc[N_TILE]; shared bool sh_alloc_failed; DrawMonoid load_draw_monoid(uint element_ix) { - uint base = (conf.drawmonoid_alloc.offset >> 2) + 2 * element_ix; + uint base = (conf.drawmonoid_alloc.offset >> 2) + 4 * element_ix; uint path_ix = memory[base]; uint clip_ix = memory[base + 1]; - return DrawMonoid(path_ix, clip_ix); + uint scene_offset = memory[base + 2]; + uint info_offset = memory[base + 3]; + return DrawMonoid(path_ix, clip_ix, scene_offset, info_offset); } // Load bounding box computed by clip processing @@ -60,7 +61,7 @@ vec4 bbox_intersect(vec4 a, vec4 b) { // Load path's bbox from bbox (as written by pathseg). vec4 load_path_bbox(uint path_ix) { - uint base = (conf.bbox_alloc.offset >> 2) + 6 * path_ix; + uint base = (conf.path_bbox_alloc.offset >> 2) + 6 * path_ix; float bbox_l = float(memory[base]) - 32768.0; float bbox_t = float(memory[base + 1]) - 32768.0; float bbox_r = float(memory[base + 2]) - 32768.0; @@ -69,16 +70,15 @@ vec4 load_path_bbox(uint path_ix) { return bbox; } -void store_path_bbox(AnnotatedRef ref, vec4 bbox) { - uint ix = ref.offset >> 2; - memory[ix + 1] = floatBitsToUint(bbox.x); - memory[ix + 2] = floatBitsToUint(bbox.y); - memory[ix + 3] = floatBitsToUint(bbox.z); - memory[ix + 4] = floatBitsToUint(bbox.w); +void store_draw_bbox(uint draw_ix, vec4 bbox) { + uint base = (conf.draw_bbox_alloc.offset >> 2) + 4 * draw_ix; + memory[base] = floatBitsToUint(bbox.x); + memory[base + 1] = floatBitsToUint(bbox.y); + memory[base + 2] = floatBitsToUint(bbox.z); + memory[base + 3] = floatBitsToUint(bbox.w); } void main() { - uint my_n_elements = conf.n_elements; uint my_partition = gl_WorkGroupID.x; for (uint i = 0; i < N_SLICE; i++) { @@ -91,18 +91,8 @@ void main() { // Read inputs and determine coverage of bins uint element_ix = my_partition * N_TILE + gl_LocalInvocationID.x; - AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size); - uint tag = Annotated_Nop; - if (element_ix < my_n_elements) { - tag = Annotated_tag(conf.anno_alloc, ref).tag; - } int x0 = 0, y0 = 0, x1 = 0, y1 = 0; - switch (tag) { - case Annotated_Color: - case Annotated_LinGradient: - case Annotated_Image: - case Annotated_BeginClip: - case Annotated_EndClip: + if (element_ix < conf.n_elements) { DrawMonoid draw_monoid = load_draw_monoid(element_ix); uint path_ix = draw_monoid.path_ix; vec4 clip_bbox = vec4(-1e9, -1e9, 1e9, 1e9); @@ -119,12 +109,11 @@ void main() { // Avoid negative-size bbox (is this necessary)? bbox.zw = max(bbox.xy, bbox.zw); // Store clip-intersected bbox for tile_alloc. - store_path_bbox(ref, bbox); + store_draw_bbox(element_ix, bbox); x0 = int(floor(bbox.x * SX)); y0 = int(floor(bbox.y * SY)); x1 = int(ceil(bbox.z * SX)); y1 = int(ceil(bbox.w * SY)); - break; } // At this point, we run an iterator over the coverage area, diff --git a/piet-gpu/shader/build.ninja b/piet-gpu/shader/build.ninja index ac4f3d7..785e016 100644 --- a/piet-gpu/shader/build.ninja +++ b/piet-gpu/shader/build.ninja @@ -22,43 +22,43 @@ rule dxil rule msl command = $spirv_cross --msl $in --output $out $msl_flags -build gen/binning.spv: glsl binning.comp | annotated.h bins.h drawtag.h setup.h mem.h +build gen/binning.spv: glsl binning.comp | bins.h drawtag.h setup.h mem.h build gen/binning.hlsl: hlsl gen/binning.spv build gen/binning.dxil: dxil gen/binning.hlsl build gen/binning.msl: msl gen/binning.spv -build gen/tile_alloc.spv: glsl tile_alloc.comp | annotated.h tile.h setup.h +build gen/tile_alloc.spv: glsl tile_alloc.comp | drawtag.h tile.h setup.h mem.h build gen/tile_alloc.hlsl: hlsl gen/tile_alloc.spv build gen/tile_alloc.dxil: dxil gen/tile_alloc.hlsl build gen/tile_alloc.msl: msl gen/tile_alloc.spv -build gen/path_coarse.spv: glsl path_coarse.comp | annotated.h pathseg.h tile.h setup.h +build gen/path_coarse.spv: glsl path_coarse.comp | pathseg.h tile.h setup.h mem.h build gen/path_coarse.hlsl: hlsl gen/path_coarse.spv build gen/path_coarse.dxil: dxil gen/path_coarse.hlsl build gen/path_coarse.msl: msl gen/path_coarse.spv -build gen/backdrop.spv: glsl backdrop.comp | annotated.h tile.h setup.h +build gen/backdrop.spv: glsl backdrop.comp | tile.h setup.h mem.h build gen/backdrop.hlsl: hlsl gen/backdrop.spv build gen/backdrop.dxil: dxil gen/backdrop.hlsl build gen/backdrop.msl: msl gen/backdrop.spv -build gen/backdrop_lg.spv: glsl backdrop.comp | annotated.h tile.h setup.h +build gen/backdrop_lg.spv: glsl backdrop.comp | tile.h setup.h mem.h flags = -DBACKDROP_DIST_FACTOR=4 build gen/backdrop_lg.hlsl: hlsl gen/backdrop_lg.spv build gen/backdrop_lg.dxil: dxil gen/backdrop_lg.hlsl build gen/backdrop_lg.msl: msl gen/backdrop_lg.spv -build gen/coarse.spv: glsl coarse.comp | annotated.h bins.h ptcl.h setup.h +build gen/coarse.spv: glsl coarse.comp | drawtag.h bins.h ptcl.h setup.h mem.h build gen/coarse.hlsl: hlsl gen/coarse.spv build gen/coarse.dxil: dxil gen/coarse.hlsl build gen/coarse.msl: msl gen/coarse.spv -build gen/kernel4.spv: glsl kernel4.comp | blend.h ptcl.h setup.h +build gen/kernel4.spv: glsl kernel4.comp | blend.h ptcl.h setup.h mem.h build gen/kernel4.hlsl: hlsl gen/kernel4.spv build gen/kernel4.dxil: dxil gen/kernel4.hlsl build gen/kernel4.msl: msl gen/kernel4.spv -build gen/kernel4_gray.spv: glsl kernel4.comp | ptcl.h setup.h +build gen/kernel4_gray.spv: glsl kernel4.comp | ptcl.h setup.h mem.h flags = -DGRAY build gen/kernel4_gray.hlsl: hlsl gen/kernel4_gray.spv build gen/kernel4_gray.dxil: dxil gen/kernel4_gray.hlsl @@ -114,17 +114,17 @@ build gen/draw_root.hlsl: hlsl gen/draw_root.spv build gen/draw_root.dxil: dxil gen/draw_root.hlsl build gen/draw_root.msl: msl gen/draw_root.spv -build gen/draw_leaf.spv: glsl draw_leaf.comp | blend.h scene.h drawtag.h annotated.h setup.h mem.h +build gen/draw_leaf.spv: glsl draw_leaf.comp | blend.h scene.h drawtag.h setup.h mem.h build gen/draw_leaf.hlsl: hlsl gen/draw_leaf.spv build gen/draw_leaf.dxil: dxil gen/draw_leaf.hlsl build gen/draw_leaf.msl: msl gen/draw_leaf.spv -build gen/clip_reduce.spv: glsl clip_reduce.comp | mem.h setup.h annotated.h +build gen/clip_reduce.spv: glsl clip_reduce.comp | mem.h setup.h build gen/clip_reduce.hlsl: hlsl gen/clip_reduce.spv build gen/clip_reduce.dxil: dxil gen/clip_reduce.hlsl build gen/clip_reduce.msl: msl gen/clip_reduce.spv -build gen/clip_leaf.spv: glsl clip_leaf.comp | mem.h setup.h annotated.h +build gen/clip_leaf.spv: glsl clip_leaf.comp | mem.h setup.h build gen/clip_leaf.hlsl: hlsl gen/clip_leaf.spv build gen/clip_leaf.dxil: dxil gen/clip_leaf.hlsl build gen/clip_leaf.msl: msl gen/clip_leaf.spv diff --git a/piet-gpu/shader/clip_leaf.comp b/piet-gpu/shader/clip_leaf.comp index 5f7e79b..5353b0b 100644 --- a/piet-gpu/shader/clip_leaf.comp +++ b/piet-gpu/shader/clip_leaf.comp @@ -18,8 +18,6 @@ layout(binding = 1) readonly buffer ConfigBuf { Config conf; }; -#include "annotated.h" - // Some of this is cut'n'paste duplication with the reduce pass, and // arguably should be moved to a common .h file. // The bicyclic monoid @@ -43,7 +41,7 @@ Bic bic_combine(Bic x, Bic y) { // Load path's bbox from bbox (as written by pathseg). vec4 load_path_bbox(uint path_ix) { - uint base = (conf.bbox_alloc.offset >> 2) + 6 * path_ix; + uint base = (conf.path_bbox_alloc.offset >> 2) + 6 * path_ix; float bbox_l = float(memory[base]) - 32768.0; float bbox_t = float(memory[base + 1]) - 32768.0; float bbox_r = float(memory[base + 2]) - 32768.0; @@ -271,7 +269,7 @@ void main() { // and is in the ClipEl for cross-partition. // If not, can probably get rid of it in the stack intermediate buf. path_ix = load_path_ix(parent); - uint drawmonoid_out_base = (conf.drawmonoid_alloc.offset >> 2) + 2 * ~inp; + uint drawmonoid_out_base = (conf.drawmonoid_alloc.offset >> 2) + 4 * ~inp; // Fix up drawmonoid so path_ix at EndClip matches BeginClip memory[drawmonoid_out_base] = path_ix; diff --git a/piet-gpu/shader/clip_reduce.comp b/piet-gpu/shader/clip_reduce.comp index c62b239..8b247ab 100644 --- a/piet-gpu/shader/clip_reduce.comp +++ b/piet-gpu/shader/clip_reduce.comp @@ -31,8 +31,6 @@ layout(binding = 1) readonly buffer ConfigBuf { Config conf; }; -#include "annotated.h" - // The intermediate state for clip processing. struct ClipEl { // index of parent node @@ -59,7 +57,7 @@ shared vec4 sh_bbox[WG_SIZE]; // Load path's bbox from bbox (as written by pathseg). vec4 load_path_bbox(uint path_ix) { - uint base = (conf.bbox_alloc.offset >> 2) + 6 * path_ix; + uint base = (conf.path_bbox_alloc.offset >> 2) + 6 * path_ix; float bbox_l = float(memory[base]) - 32768.0; float bbox_t = float(memory[base + 1]) - 32768.0; float bbox_r = float(memory[base + 2]) - 32768.0; diff --git a/piet-gpu/shader/coarse.comp b/piet-gpu/shader/coarse.comp index df306e0..961fc99 100644 --- a/piet-gpu/shader/coarse.comp +++ b/piet-gpu/shader/coarse.comp @@ -19,11 +19,15 @@ layout(local_size_x = N_TILE, local_size_y = 1) in; -layout(set = 0, binding = 1) readonly buffer ConfigBuf { +layout(binding = 1) readonly buffer ConfigBuf { Config conf; }; -#include "annotated.h" +layout(binding = 2) readonly buffer SceneBuf { + uint[] scene; +}; + +#include "drawtag.h" #include "bins.h" #include "tile.h" #include "ptcl.h" @@ -92,8 +96,8 @@ bool alloc_cmd(inout Alloc cmd_alloc, inout CmdRef cmd_ref, inout uint cmd_limit return true; } -void write_fill(Alloc alloc, inout CmdRef cmd_ref, uint flags, Tile tile, float linewidth) { - if (fill_mode_from_flags(flags) == MODE_NONZERO) { +void write_fill(Alloc alloc, inout CmdRef cmd_ref, Tile tile, float linewidth) { + if (linewidth < 0.0) { if (tile.tile.offset != 0) { CmdFill cmd_fill = CmdFill(tile.tile.offset, tile.backdrop); Cmd_Fill_write(alloc, cmd_ref, cmd_fill); @@ -146,6 +150,10 @@ void main() { uint part_start_ix = 0; uint ready_ix = 0; + uint drawmonoid_start = conf.drawmonoid_alloc.offset >> 2; + uint drawtag_start = conf.drawtag_offset >> 2; + uint drawdata_start = conf.drawdata_offset >> 2; + uint drawinfo_start = conf.drawinfo_alloc.offset >> 2; bool mem_ok = mem_error == NO_ERROR; while (true) { for (uint i = 0; i < N_SLICE; i++) { @@ -207,24 +215,22 @@ void main() { // We've done the merge and filled the buffer. // Read one element, compute coverage. - uint tag = Annotated_Nop; + uint tag = Drawtag_Nop; uint element_ix; - AnnotatedRef ref; if (th_ix + rd_ix < wr_ix) { element_ix = sh_elements[th_ix]; - ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size); - tag = Annotated_tag(conf.anno_alloc, ref).tag; + tag = scene[drawtag_start + element_ix]; } // Bounding box of element in pixel coordinates. uint tile_count; switch (tag) { - case Annotated_Color: - case Annotated_Image: - case Annotated_LinGradient: - case Annotated_BeginClip: - case Annotated_EndClip: - uint drawmonoid_base = (conf.drawmonoid_alloc.offset >> 2) + 2 * element_ix; + case Drawtag_FillColor: + case Drawtag_FillImage: + case Drawtag_FillLinGradient: + case Drawtag_BeginClip: + case Drawtag_EndClip: + uint drawmonoid_base = drawmonoid_start + 4 * element_ix; uint path_ix = memory[drawmonoid_base]; Path path = Path_read(conf.tile_alloc, PathRef(conf.tile_alloc.offset + path_ix * Path_size)); uint stride = path.bbox.z - path.bbox.x; @@ -272,9 +278,7 @@ void main() { el_ix = probe; } } - AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + sh_elements[el_ix] * Annotated_size); - AnnotatedTag anno_tag = Annotated_tag(conf.anno_alloc, ref); - uint tag = anno_tag.tag; + uint tag = scene[drawtag_start + sh_elements[el_ix]]; uint seq_ix = ix - (el_ix > 0 ? sh_tile_count[el_ix - 1] : 0); uint width = sh_tile_width[el_ix]; uint x = sh_tile_x0[el_ix] + seq_ix % width; @@ -283,15 +287,16 @@ void main() { if (mem_ok) { Tile tile = Tile_read(read_tile_alloc(el_ix, mem_ok), TileRef(sh_tile_base[el_ix] + (sh_tile_stride[el_ix] * y + x) * Tile_size)); - bool is_clip = tag == Annotated_BeginClip || tag == Annotated_EndClip; + bool is_clip = (tag & 1) != 0; // Always include the tile if it contains a path segment. // For draws, include the tile if it is solid. // For clips, include the tile if it is empty - this way, logic // below will suppress the drawing of inner elements. // For blends, include the tile if // (blend_mode, composition_mode) != (Normal, SrcOver) + bool is_blend = false; // TODO include_tile = tile.tile.offset != 0 || (tile.backdrop == 0) == is_clip - || (is_clip && (anno_tag.flags & 0x2) != 0); + || (is_clip && is_blend); } if (include_tile) { uint el_slice = el_ix / 32; @@ -302,8 +307,8 @@ void main() { barrier(); - // Output non-segment elements for this tile. The thread does a sequential walk - // through the non-segment elements. + // Output draw objects for this tile. The thread does a sequential walk + // through the draw objects. uint slice_ix = 0; uint bitmap = sh_bitmaps[0][th_ix]; while (mem_ok) { @@ -323,59 +328,55 @@ void main() { // Clear LSB bitmap &= bitmap - 1; - // At this point, we read the element again from global memory. - // If that turns out to be expensive, maybe we can pack it into - // shared memory (or perhaps just the tag). - ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size); - AnnotatedTag tag = Annotated_tag(conf.anno_alloc, ref); + uint drawtag = scene[drawtag_start + element_ix]; if (clip_zero_depth == 0) { - switch (tag.tag) { - case Annotated_Color: - Tile tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok), - TileRef(sh_tile_base[element_ref_ix] + - (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size)); - AnnoColor fill = Annotated_Color_read(conf.anno_alloc, ref); + Tile tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok), + TileRef(sh_tile_base[element_ref_ix] + + (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size)); + uint drawmonoid_base = drawmonoid_start + 4 * element_ix; + uint scene_offset = memory[drawmonoid_base + 2]; + uint info_offset = memory[drawmonoid_base + 3]; + uint dd = drawdata_start + (scene_offset >> 2); + uint di = drawinfo_start + (info_offset >> 2); + switch (drawtag) { + case Drawtag_FillColor: + float linewidth = uintBitsToFloat(memory[di]); if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) { break; } - write_fill(cmd_alloc, cmd_ref, tag.flags, tile, fill.linewidth); - Cmd_Color_write(cmd_alloc, cmd_ref, CmdColor(fill.rgba_color)); + write_fill(cmd_alloc, cmd_ref, tile, linewidth); + uint rgba = scene[dd]; + Cmd_Color_write(cmd_alloc, cmd_ref, CmdColor(rgba)); cmd_ref.offset += 4 + CmdColor_size; break; - case Annotated_LinGradient: - tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok), - TileRef(sh_tile_base[element_ref_ix] + - (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size)); - AnnoLinGradient lin = Annotated_LinGradient_read(conf.anno_alloc, ref); + case Drawtag_FillLinGradient: if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) { break; } - write_fill(cmd_alloc, cmd_ref, tag.flags, tile, fill.linewidth); + linewidth = uintBitsToFloat(memory[di]); + write_fill(cmd_alloc, cmd_ref, tile, linewidth); CmdLinGrad cmd_lin; - cmd_lin.index = lin.index; - cmd_lin.line_x = lin.line_x; - cmd_lin.line_y = lin.line_y; - cmd_lin.line_c = lin.line_c; + cmd_lin.index = scene[dd]; + cmd_lin.line_x = uintBitsToFloat(memory[di + 1]); + cmd_lin.line_y = uintBitsToFloat(memory[di + 2]); + cmd_lin.line_c = uintBitsToFloat(memory[di + 3]); Cmd_LinGrad_write(cmd_alloc, cmd_ref, cmd_lin); cmd_ref.offset += 4 + CmdLinGrad_size; break; - case Annotated_Image: - tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok), - TileRef(sh_tile_base[element_ref_ix] + - (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size)); - AnnoImage fill_img = Annotated_Image_read(conf.anno_alloc, ref); + case Drawtag_FillImage: + linewidth = uintBitsToFloat(memory[di]); if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) { break; } - write_fill(cmd_alloc, cmd_ref, tag.flags, tile, fill_img.linewidth); - Cmd_Image_write(cmd_alloc, cmd_ref, CmdImage(fill_img.index, fill_img.offset)); + write_fill(cmd_alloc, cmd_ref, tile, linewidth); + uint index = scene[dd]; + uint raw1 = scene[dd + 1]; + ivec2 offset = ivec2(int(raw1 << 16) >> 16, int(raw1) >> 16); + Cmd_Image_write(cmd_alloc, cmd_ref, CmdImage(index, offset)); cmd_ref.offset += 4 + CmdImage_size; break; - case Annotated_BeginClip: - tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok), - TileRef(sh_tile_base[element_ref_ix] + - (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size)); + case Drawtag_BeginClip: if (tile.tile.offset == 0 && tile.backdrop == 0) { clip_zero_depth = clip_depth + 1; } else { @@ -387,27 +388,24 @@ void main() { } clip_depth++; break; - case Annotated_EndClip: - tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok), - TileRef(sh_tile_base[element_ref_ix] + - (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size)); - AnnoEndClip end_clip = Annotated_EndClip_read(conf.anno_alloc, ref); + case Drawtag_EndClip: clip_depth--; if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) { break; } - write_fill(cmd_alloc, cmd_ref, MODE_NONZERO, tile, 0.0); - Cmd_EndClip_write(cmd_alloc, cmd_ref, CmdEndClip(end_clip.blend)); + write_fill(cmd_alloc, cmd_ref, tile, -1.0); + uint blend = scene[dd]; + Cmd_EndClip_write(cmd_alloc, cmd_ref, CmdEndClip(blend)); cmd_ref.offset += 4 + CmdEndClip_size; break; } } else { // In "clip zero" state, suppress all drawing - switch (tag.tag) { - case Annotated_BeginClip: + switch (drawtag) { + case Drawtag_BeginClip: clip_depth++; break; - case Annotated_EndClip: + case Drawtag_EndClip: if (clip_depth == clip_zero_depth) { clip_zero_depth = 0; } diff --git a/piet-gpu/shader/draw_leaf.comp b/piet-gpu/shader/draw_leaf.comp index 74fc2f8..1cee0ef 100644 --- a/piet-gpu/shader/draw_leaf.comp +++ b/piet-gpu/shader/draw_leaf.comp @@ -27,7 +27,6 @@ layout(binding = 2) readonly buffer SceneBuf { #include "scene.h" #include "tile.h" #include "drawtag.h" -#include "annotated.h" #include "blend.h" #define Monoid DrawMonoid @@ -42,14 +41,14 @@ void main() { Monoid local[N_ROWS]; uint ix = gl_GlobalInvocationID.x * N_ROWS; - ElementRef ref = ElementRef(ix * Element_size); - uint tag_word = Element_tag(ref).tag; + uint drawtag_base = conf.drawtag_offset >> 2; + uint tag_word = scene[drawtag_base + ix]; Monoid agg = map_tag(tag_word); local[0] = agg; for (uint i = 1; i < N_ROWS; i++) { - tag_word = Element_tag(Element_index(ref, i)).tag; - agg = combine_tag_monoid(agg, map_tag(tag_word)); + tag_word = scene[drawtag_base + ix + i]; + agg = combine_draw_monoid(agg, map_tag(tag_word)); local[i] = agg; } sh_scratch[gl_LocalInvocationID.x] = agg; @@ -57,41 +56,47 @@ void main() { barrier(); if (gl_LocalInvocationID.x >= (1u << i)) { Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i)]; - agg = combine_tag_monoid(other, agg); + agg = combine_draw_monoid(other, agg); } barrier(); sh_scratch[gl_LocalInvocationID.x] = agg; } barrier(); - Monoid row = tag_monoid_identity(); + Monoid row = draw_monoid_identity(); if (gl_WorkGroupID.x > 0) { row = parent[gl_WorkGroupID.x - 1]; } if (gl_LocalInvocationID.x > 0) { - row = combine_tag_monoid(row, sh_scratch[gl_LocalInvocationID.x - 1]); + row = combine_draw_monoid(row, sh_scratch[gl_LocalInvocationID.x - 1]); } + uint drawdata_base = conf.drawdata_offset >> 2; + uint drawinfo_base = conf.drawinfo_alloc.offset >> 2; uint out_ix = gl_GlobalInvocationID.x * N_ROWS; - uint out_base = (conf.drawmonoid_alloc.offset >> 2) + out_ix * 2; + uint out_base = (conf.drawmonoid_alloc.offset >> 2) + out_ix * 4; uint clip_out_base = conf.clip_alloc.offset >> 2; - AnnotatedRef out_ref = AnnotatedRef(conf.anno_alloc.offset + out_ix * Annotated_size); for (uint i = 0; i < N_ROWS; i++) { Monoid m = row; if (i > 0) { - m = combine_tag_monoid(m, local[i - 1]); + m = combine_draw_monoid(m, local[i - 1]); } // m now holds exclusive scan of draw monoid - memory[out_base + i * 2] = m.path_ix; - memory[out_base + i * 2 + 1] = m.clip_ix; + memory[out_base + i * 4] = m.path_ix; + memory[out_base + i * 4 + 1] = m.clip_ix; + memory[out_base + i * 4 + 2] = m.scene_offset; + memory[out_base + i * 4 + 3] = m.info_offset; + + // u32 offset of drawobj data + uint dd = drawdata_base + (m.scene_offset >> 2); + uint di = drawinfo_base + (m.info_offset >> 2); // For compatibility, we'll generate an Annotated object, same as old // pipeline. However, going forward we'll get rid of that, and have // later stages read scene + bbox etc. - ElementRef this_ref = Element_index(ref, i); - tag_word = Element_tag(this_ref).tag; - if (tag_word == Element_FillColor || tag_word == Element_FillLinGradient || tag_word == Element_FillImage || - tag_word == Element_BeginClip) { - uint bbox_offset = (conf.bbox_alloc.offset >> 2) + 6 * m.path_ix; + tag_word = scene[drawtag_base + ix + i]; + if (tag_word == Drawtag_FillColor || tag_word == Drawtag_FillLinGradient || tag_word == Drawtag_FillImage || + tag_word == Drawtag_BeginClip) { + uint bbox_offset = (conf.path_bbox_alloc.offset >> 2) + 6 * m.path_ix; float bbox_l = float(memory[bbox_offset]) - 32768.0; float bbox_t = float(memory[bbox_offset + 1]) - 32768.0; float bbox_r = float(memory[bbox_offset + 2]) - 32768.0; @@ -101,11 +106,11 @@ void main() { uint fill_mode = uint(linewidth >= 0.0); vec4 mat; vec2 translate; - if (linewidth >= 0.0 || tag_word == Element_FillLinGradient) { + if (linewidth >= 0.0 || tag_word == Drawtag_FillLinGradient) { uint trans_ix = memory[bbox_offset + 5]; uint t = (conf.trans_alloc.offset >> 2) + 6 * trans_ix; mat = uintBitsToFloat(uvec4(memory[t], memory[t + 1], memory[t + 2], memory[t + 3])); - if (tag_word == Element_FillLinGradient) { + if (tag_word == Drawtag_FillLinGradient) { translate = uintBitsToFloat(uvec2(memory[t + 4], memory[t + 5])); } } @@ -113,69 +118,38 @@ void main() { // TODO: need to deal with anisotropic case linewidth *= sqrt(abs(mat.x * mat.w - mat.y * mat.z)); } - linewidth = max(linewidth, 0.0); switch (tag_word) { - case Element_FillColor: - FillColor fill = Element_FillColor_read(this_ref); - AnnoColor anno_fill; - anno_fill.bbox = bbox; - anno_fill.linewidth = linewidth; - anno_fill.rgba_color = fill.rgba_color; - Annotated_Color_write(conf.anno_alloc, out_ref, fill_mode, anno_fill); + case Drawtag_FillColor: + case Drawtag_FillImage: + memory[di] = floatBitsToUint(linewidth); break; - case Element_FillLinGradient: - FillLinGradient lin = Element_FillLinGradient_read(this_ref); - AnnoLinGradient anno_lin; - anno_lin.bbox = bbox; - anno_lin.linewidth = linewidth; - anno_lin.index = lin.index; - vec2 p0 = mat.xy * lin.p0.x + mat.zw * lin.p0.y + translate; - vec2 p1 = mat.xy * lin.p1.x + mat.zw * lin.p1.y + translate; + case Drawtag_FillLinGradient: + memory[di] = floatBitsToUint(linewidth); + uint index = scene[dd]; + vec2 p0 = uintBitsToFloat(uvec2(scene[dd + 1], scene[dd + 2])); + vec2 p1 = uintBitsToFloat(uvec2(scene[dd + 3], scene[dd + 4])); + p0 = mat.xy * p0.x + mat.zw * p0.y + translate; + p1 = mat.xy * p1.x + mat.zw * p1.y + translate; vec2 dxy = p1 - p0; float scale = 1.0 / (dxy.x * dxy.x + dxy.y * dxy.y); float line_x = dxy.x * scale; float line_y = dxy.y * scale; - anno_lin.line_x = line_x; - anno_lin.line_y = line_y; - anno_lin.line_c = -(p0.x * line_x + p0.y * line_y); - Annotated_LinGradient_write(conf.anno_alloc, out_ref, fill_mode, anno_lin); + float line_c = -(p0.x * line_x + p0.y * line_y); + memory[di + 1] = floatBitsToUint(line_x); + memory[di + 2] = floatBitsToUint(line_y); + memory[di + 3] = floatBitsToUint(line_c); break; - case Element_FillImage: - FillImage fill_img = Element_FillImage_read(this_ref); - AnnoImage anno_img; - anno_img.bbox = bbox; - anno_img.linewidth = linewidth; - anno_img.index = fill_img.index; - anno_img.offset = fill_img.offset; - Annotated_Image_write(conf.anno_alloc, out_ref, fill_mode, anno_img); - break; - case Element_BeginClip: - Clip begin_clip = Element_BeginClip_read(this_ref); - AnnoBeginClip anno_begin_clip; - anno_begin_clip.bbox = bbox; - anno_begin_clip.linewidth = 0.0; // don't support clip-with-stroke - anno_begin_clip.blend = begin_clip.blend; - uint flags = uint(begin_clip.blend != BlendComp_default) << 1; - Annotated_BeginClip_write(conf.anno_alloc, out_ref, flags, anno_begin_clip); + case Drawtag_BeginClip: break; } - } else if (tag_word == Element_EndClip) { - Clip end_clip = Element_BeginClip_read(this_ref); - AnnoEndClip anno_end_clip; - // The actual bbox will be reconstructed from clip stream output. - anno_end_clip.bbox = vec4(-1e9, -1e9, 1e9, 1e9); - anno_end_clip.blend = end_clip.blend; - uint flags = uint(end_clip.blend != BlendComp_default) << 1; - Annotated_EndClip_write(conf.anno_alloc, out_ref, flags, anno_end_clip); } // Generate clip stream. - if (tag_word == Element_BeginClip || tag_word == Element_EndClip) { + if (tag_word == Drawtag_BeginClip || tag_word == Drawtag_EndClip) { uint path_ix = ~(out_ix + i); - if (tag_word == Element_BeginClip) { + if (tag_word == Drawtag_BeginClip) { path_ix = m.path_ix; } memory[clip_out_base + m.clip_ix] = path_ix; } - out_ref.offset += Annotated_size; } } diff --git a/piet-gpu/shader/draw_reduce.comp b/piet-gpu/shader/draw_reduce.comp index 68d43e9..d125d6e 100644 --- a/piet-gpu/shader/draw_reduce.comp +++ b/piet-gpu/shader/draw_reduce.comp @@ -36,13 +36,13 @@ shared Monoid sh_scratch[WG_SIZE]; void main() { uint ix = gl_GlobalInvocationID.x * N_ROWS; - ElementRef ref = ElementRef(ix * Element_size); - uint tag_word = Element_tag(ref).tag; + uint drawtag_base = conf.drawtag_offset >> 2; + uint tag_word = scene[drawtag_base + ix]; Monoid agg = map_tag(tag_word); for (uint i = 1; i < N_ROWS; i++) { - tag_word = Element_tag(Element_index(ref, i)).tag; - agg = combine_tag_monoid(agg, map_tag(tag_word)); + uint tag_word = scene[drawtag_base + ix + i]; + agg = combine_draw_monoid(agg, map_tag(tag_word)); } sh_scratch[gl_LocalInvocationID.x] = agg; for (uint i = 0; i < LG_WG_SIZE; i++) { @@ -50,7 +50,7 @@ void main() { // We could make this predicate tighter, but would it help? if (gl_LocalInvocationID.x + (1u << i) < WG_SIZE) { Monoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i)]; - agg = combine_tag_monoid(agg, other); + agg = combine_draw_monoid(agg, other); } barrier(); sh_scratch[gl_LocalInvocationID.x] = agg; diff --git a/piet-gpu/shader/draw_scan.comp b/piet-gpu/shader/draw_scan.comp index 1c26c26..d285020 100644 --- a/piet-gpu/shader/draw_scan.comp +++ b/piet-gpu/shader/draw_scan.comp @@ -16,8 +16,8 @@ layout(local_size_x = WG_SIZE, local_size_y = 1) in; #define Monoid DrawMonoid -#define combine_monoid combine_tag_monoid -#define monoid_identity tag_monoid_identity +#define combine_monoid combine_draw_monoid +#define monoid_identity draw_monoid_identity layout(binding = 0) buffer DataBuf { Monoid[] data; diff --git a/piet-gpu/shader/drawtag.h b/piet-gpu/shader/drawtag.h index 17105f6..7f73546 100644 --- a/piet-gpu/shader/drawtag.h +++ b/piet-gpu/shader/drawtag.h @@ -2,36 +2,39 @@ // Common data structures and functions for the draw tag stream. +// Design of draw tag: & 0x1c gives scene size in bytes +// & 1 gives clip +// (tag >> 4) & 0x1c is info size in bytes + +#define Drawtag_Nop 0 +#define Drawtag_FillColor 0x44 +#define Drawtag_FillLinGradient 0x114 +#define Drawtag_FillImage 0x48 +#define Drawtag_BeginClip 0x05 +#define Drawtag_EndClip 0x25 + struct DrawMonoid { uint path_ix; uint clip_ix; + uint scene_offset; + uint info_offset; }; -DrawMonoid tag_monoid_identity() { - return DrawMonoid(0, 0); +DrawMonoid draw_monoid_identity() { + return DrawMonoid(0, 0, 0, 0); } -DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b) { +DrawMonoid combine_draw_monoid(DrawMonoid a, DrawMonoid b) { DrawMonoid c; c.path_ix = a.path_ix + b.path_ix; c.clip_ix = a.clip_ix + b.clip_ix; + c.scene_offset = a.scene_offset + b.scene_offset; + c.info_offset = a.info_offset + b.info_offset; return c; } -#ifdef Element_size DrawMonoid map_tag(uint tag_word) { - switch (tag_word) { - case Element_FillColor: - case Element_FillLinGradient: - case Element_FillImage: - return DrawMonoid(1, 0); - case Element_BeginClip: - // TODO: endclip should be (0, 1), ie not generate a path. But for now - // we generate a dummy path. - case Element_EndClip: - return DrawMonoid(1, 1); - default: - return DrawMonoid(0, 0); - } + // TODO: at some point, EndClip should not generate a path + uint has_path = uint(tag_word != Drawtag_Nop); + return DrawMonoid(has_path, tag_word & 1, tag_word & 0x1c, (tag_word >> 4) & 0x1c); } -#endif diff --git a/piet-gpu/shader/gen/backdrop.dxil b/piet-gpu/shader/gen/backdrop.dxil index 4ebcb1c..df2be88 100644 Binary files a/piet-gpu/shader/gen/backdrop.dxil and b/piet-gpu/shader/gen/backdrop.dxil differ diff --git a/piet-gpu/shader/gen/backdrop.hlsl b/piet-gpu/shader/gen/backdrop.hlsl index a2e71a8..aba3cff 100644 --- a/piet-gpu/shader/gen/backdrop.hlsl +++ b/piet-gpu/shader/gen/backdrop.hlsl @@ -3,17 +3,6 @@ struct Alloc uint offset; }; -struct AnnotatedRef -{ - uint offset; -}; - -struct AnnotatedTag -{ - uint tag; - uint flags; -}; - struct PathRef { uint offset; @@ -42,12 +31,14 @@ struct Config Alloc pathseg_alloc; Alloc anno_alloc; Alloc trans_alloc; - Alloc bbox_alloc; + Alloc path_bbox_alloc; Alloc drawmonoid_alloc; Alloc clip_alloc; Alloc clip_bic_alloc; Alloc clip_stack_alloc; Alloc clip_bbox_alloc; + Alloc draw_bbox_alloc; + Alloc drawinfo_alloc; uint n_trans; uint n_path; uint n_clip; @@ -55,12 +46,14 @@ struct Config uint linewidth_offset; uint pathtag_offset; uint pathseg_offset; + uint drawtag_offset; + uint drawdata_offset; }; static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); -RWByteAddressBuffer _79 : register(u0, space0); -ByteAddressBuffer _186 : register(t1, space0); +RWByteAddressBuffer _67 : register(u0, space0); +ByteAddressBuffer _166 : register(t1, space0); static uint3 gl_LocalInvocationID; static uint3 gl_GlobalInvocationID; @@ -89,24 +82,10 @@ uint read_mem(Alloc alloc, uint offset) { return 0u; } - uint v = _79.Load(offset * 4 + 8); + uint v = _67.Load(offset * 4 + 8); return v; } -AnnotatedTag Annotated_tag(Alloc a, AnnotatedRef ref) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint tag_and_flags = read_mem(param, param_1); - AnnotatedTag _121 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; - return _121; -} - -uint fill_mode_from_flags(uint flags) -{ - return flags & 1u; -} - Path Path_read(Alloc a, PathRef ref) { uint ix = ref.offset >> uint(2); @@ -121,8 +100,8 @@ Path Path_read(Alloc a, PathRef ref) uint raw2 = read_mem(param_4, param_5); Path s; s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); - TileRef _165 = { raw2 }; - s.tiles = _165; + TileRef _134 = { raw2 }; + s.tiles = _134; return s; } @@ -141,88 +120,65 @@ void write_mem(Alloc alloc, uint offset, uint val) { return; } - _79.Store(offset * 4 + 8, val); + _67.Store(offset * 4 + 8, val); } void comp_main() { uint th_ix = gl_LocalInvocationIndex; uint element_ix = gl_GlobalInvocationID.x; - AnnotatedRef _194 = { _186.Load(32) + (element_ix * 40u) }; - AnnotatedRef ref = _194; uint row_count = 0u; - bool mem_ok = _79.Load(4) == 0u; + bool mem_ok = _67.Load(4) == 0u; if (gl_LocalInvocationID.y == 0u) { - if (element_ix < _186.Load(0)) + if (element_ix < _166.Load(0)) { - Alloc _217; - _217.offset = _186.Load(32); + PathRef _180 = { _166.Load(16) + (element_ix * 12u) }; + PathRef path_ref = _180; + Alloc _185; + _185.offset = _166.Load(16); Alloc param; - param.offset = _217.offset; - AnnotatedRef param_1 = ref; - AnnotatedTag tag = Annotated_tag(param, param_1); - switch (tag.tag) + param.offset = _185.offset; + PathRef param_1 = path_ref; + Path path = Path_read(param, param_1); + sh_row_width[th_ix] = path.bbox.z - path.bbox.x; + row_count = path.bbox.w - path.bbox.y; + bool _210 = row_count == 1u; + bool _216; + if (_210) { - case 3u: - case 2u: - case 4u: - case 1u: - { - uint param_2 = tag.flags; - if (fill_mode_from_flags(param_2) != 0u) - { - break; - } - PathRef _243 = { _186.Load(16) + (element_ix * 12u) }; - PathRef path_ref = _243; - Alloc _247; - _247.offset = _186.Load(16); - Alloc param_3; - param_3.offset = _247.offset; - PathRef param_4 = path_ref; - Path path = Path_read(param_3, param_4); - sh_row_width[th_ix] = path.bbox.z - path.bbox.x; - row_count = path.bbox.w - path.bbox.y; - bool _272 = row_count == 1u; - bool _278; - if (_272) - { - _278 = path.bbox.y > 0u; - } - else - { - _278 = _272; - } - if (_278) - { - row_count = 0u; - } - uint param_5 = path.tiles.offset; - uint param_6 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u; - bool param_7 = mem_ok; - Alloc path_alloc = new_alloc(param_5, param_6, param_7); - sh_row_alloc[th_ix] = path_alloc; - break; - } + _216 = path.bbox.y > 0u; } + else + { + _216 = _210; + } + if (_216) + { + row_count = 0u; + } + uint param_2 = path.tiles.offset; + uint param_3 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u; + bool param_4 = mem_ok; + Alloc path_alloc = new_alloc(param_2, param_3, param_4); + sh_row_alloc[th_ix] = path_alloc; } sh_row_count[th_ix] = row_count; } for (uint i = 0u; i < 8u; i++) { GroupMemoryBarrierWithGroupSync(); - bool _325 = gl_LocalInvocationID.y == 0u; - bool _332; - if (_325) + bool _262 = gl_LocalInvocationID.y == 0u; + bool _269; + if (_262) { - _332 = th_ix >= (1u << i); + _269 = th_ix >= (1u << i); } else { - _332 = _325; + _269 = _262; } - if (_332) + if (_269) { row_count += sh_row_count[th_ix - (1u << i)]; } @@ -234,7 +190,7 @@ void comp_main() } GroupMemoryBarrierWithGroupSync(); uint total_rows = sh_row_count[255]; - uint _411; + uint _348; for (uint row = th_ix; row < total_rows; row += 256u) { uint el_ix = 0u; @@ -252,27 +208,27 @@ void comp_main() Alloc tiles_alloc = sh_row_alloc[el_ix]; if (el_ix > 0u) { - _411 = sh_row_count[el_ix - 1u]; + _348 = sh_row_count[el_ix - 1u]; } else { - _411 = 0u; + _348 = 0u; } - uint seq_ix = row - _411; + uint seq_ix = row - _348; uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width); - Alloc param_8 = tiles_alloc; - uint param_9 = tile_el_ix; - uint sum = read_mem(param_8, param_9); + Alloc param_5 = tiles_alloc; + uint param_6 = tile_el_ix; + uint sum = read_mem(param_5, param_6); for (uint x = 1u; x < width; x++) { tile_el_ix += 2u; - Alloc param_10 = tiles_alloc; - uint param_11 = tile_el_ix; - sum += read_mem(param_10, param_11); - Alloc param_12 = tiles_alloc; - uint param_13 = tile_el_ix; - uint param_14 = sum; - write_mem(param_12, param_13, param_14); + Alloc param_7 = tiles_alloc; + uint param_8 = tile_el_ix; + sum += read_mem(param_7, param_8); + Alloc param_9 = tiles_alloc; + uint param_10 = tile_el_ix; + uint param_11 = sum; + write_mem(param_9, param_10, param_11); } } } diff --git a/piet-gpu/shader/gen/backdrop.msl b/piet-gpu/shader/gen/backdrop.msl index be670fc..1c0a0bb 100644 --- a/piet-gpu/shader/gen/backdrop.msl +++ b/piet-gpu/shader/gen/backdrop.msl @@ -10,17 +10,6 @@ struct Alloc uint offset; }; -struct AnnotatedRef -{ - uint offset; -}; - -struct AnnotatedTag -{ - uint tag; - uint flags; -}; - struct PathRef { uint offset; @@ -61,12 +50,14 @@ struct Config Alloc_1 pathseg_alloc; Alloc_1 anno_alloc; Alloc_1 trans_alloc; - Alloc_1 bbox_alloc; + Alloc_1 path_bbox_alloc; Alloc_1 drawmonoid_alloc; Alloc_1 clip_alloc; Alloc_1 clip_bic_alloc; Alloc_1 clip_stack_alloc; Alloc_1 clip_bbox_alloc; + Alloc_1 draw_bbox_alloc; + Alloc_1 drawinfo_alloc; uint n_trans; uint n_path; uint n_clip; @@ -74,6 +65,8 @@ struct Config uint linewidth_offset; uint pathtag_offset; uint pathseg_offset; + uint drawtag_offset; + uint drawdata_offset; }; struct ConfigBuf @@ -90,7 +83,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset) } static inline __attribute__((always_inline)) -uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_79) +uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_67) { Alloc param = alloc; uint param_1 = offset; @@ -98,38 +91,23 @@ uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memor { return 0u; } - uint v = v_79.memory[offset]; + uint v = v_67.memory[offset]; return v; } static inline __attribute__((always_inline)) -AnnotatedTag Annotated_tag(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_79) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint tag_and_flags = read_mem(param, param_1, v_79); - return AnnotatedTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) }; -} - -static inline __attribute__((always_inline)) -uint fill_mode_from_flags(thread const uint& flags) -{ - return flags & 1u; -} - -static inline __attribute__((always_inline)) -Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_79) +Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_67) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_79); + uint raw0 = read_mem(param, param_1, v_67); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_79); + uint raw1 = read_mem(param_2, param_3, v_67); Alloc param_4 = a; uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_79); + uint raw2 = read_mem(param_4, param_5, v_67); Path s; s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); s.tiles = TileRef{ raw2 }; @@ -145,7 +123,7 @@ Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const } static inline __attribute__((always_inline)) -void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_79) +void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_67) { Alloc param = alloc; uint param_1 = offset; @@ -153,85 +131,65 @@ void write_mem(thread const Alloc& alloc, thread const uint& offset, thread cons { return; } - v_79.memory[offset] = val; + v_67.memory[offset] = val; } -kernel void main0(device Memory& v_79 [[buffer(0)]], const device ConfigBuf& _186 [[buffer(1)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) +kernel void main0(device Memory& v_67 [[buffer(0)]], const device ConfigBuf& _166 [[buffer(1)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) { threadgroup uint sh_row_width[256]; threadgroup Alloc sh_row_alloc[256]; threadgroup uint sh_row_count[256]; uint th_ix = gl_LocalInvocationIndex; uint element_ix = gl_GlobalInvocationID.x; - AnnotatedRef ref = AnnotatedRef{ _186.conf.anno_alloc.offset + (element_ix * 40u) }; uint row_count = 0u; - bool mem_ok = v_79.mem_error == 0u; + bool mem_ok = v_67.mem_error == 0u; if (gl_LocalInvocationID.y == 0u) { - if (element_ix < _186.conf.n_elements) + if (element_ix < _166.conf.n_elements) { + PathRef path_ref = PathRef{ _166.conf.tile_alloc.offset + (element_ix * 12u) }; Alloc param; - param.offset = _186.conf.anno_alloc.offset; - AnnotatedRef param_1 = ref; - AnnotatedTag tag = Annotated_tag(param, param_1, v_79); - switch (tag.tag) + param.offset = _166.conf.tile_alloc.offset; + PathRef param_1 = path_ref; + Path path = Path_read(param, param_1, v_67); + sh_row_width[th_ix] = path.bbox.z - path.bbox.x; + row_count = path.bbox.w - path.bbox.y; + bool _210 = row_count == 1u; + bool _216; + if (_210) { - case 3u: - case 2u: - case 4u: - case 1u: - { - uint param_2 = tag.flags; - if (fill_mode_from_flags(param_2) != 0u) - { - break; - } - PathRef path_ref = PathRef{ _186.conf.tile_alloc.offset + (element_ix * 12u) }; - Alloc param_3; - param_3.offset = _186.conf.tile_alloc.offset; - PathRef param_4 = path_ref; - Path path = Path_read(param_3, param_4, v_79); - sh_row_width[th_ix] = path.bbox.z - path.bbox.x; - row_count = path.bbox.w - path.bbox.y; - bool _272 = row_count == 1u; - bool _278; - if (_272) - { - _278 = path.bbox.y > 0u; - } - else - { - _278 = _272; - } - if (_278) - { - row_count = 0u; - } - uint param_5 = path.tiles.offset; - uint param_6 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u; - bool param_7 = mem_ok; - Alloc path_alloc = new_alloc(param_5, param_6, param_7); - sh_row_alloc[th_ix] = path_alloc; - break; - } + _216 = path.bbox.y > 0u; } + else + { + _216 = _210; + } + if (_216) + { + row_count = 0u; + } + uint param_2 = path.tiles.offset; + uint param_3 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u; + bool param_4 = mem_ok; + Alloc path_alloc = new_alloc(param_2, param_3, param_4); + sh_row_alloc[th_ix] = path_alloc; } sh_row_count[th_ix] = row_count; } for (uint i = 0u; i < 8u; i++) { threadgroup_barrier(mem_flags::mem_threadgroup); - bool _325 = gl_LocalInvocationID.y == 0u; - bool _332; - if (_325) + bool _262 = gl_LocalInvocationID.y == 0u; + bool _269; + if (_262) { - _332 = th_ix >= (1u << i); + _269 = th_ix >= (1u << i); } else { - _332 = _325; + _269 = _262; } - if (_332) + if (_269) { row_count += sh_row_count[th_ix - (1u << i)]; } @@ -243,7 +201,7 @@ kernel void main0(device Memory& v_79 [[buffer(0)]], const device ConfigBuf& _18 } threadgroup_barrier(mem_flags::mem_threadgroup); uint total_rows = sh_row_count[255]; - uint _411; + uint _348; for (uint row = th_ix; row < total_rows; row += 256u) { uint el_ix = 0u; @@ -261,27 +219,27 @@ kernel void main0(device Memory& v_79 [[buffer(0)]], const device ConfigBuf& _18 Alloc tiles_alloc = sh_row_alloc[el_ix]; if (el_ix > 0u) { - _411 = sh_row_count[el_ix - 1u]; + _348 = sh_row_count[el_ix - 1u]; } else { - _411 = 0u; + _348 = 0u; } - uint seq_ix = row - _411; + uint seq_ix = row - _348; uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width); - Alloc param_8 = tiles_alloc; - uint param_9 = tile_el_ix; - uint sum = read_mem(param_8, param_9, v_79); + Alloc param_5 = tiles_alloc; + uint param_6 = tile_el_ix; + uint sum = read_mem(param_5, param_6, v_67); for (uint x = 1u; x < width; x++) { tile_el_ix += 2u; - Alloc param_10 = tiles_alloc; - uint param_11 = tile_el_ix; - sum += read_mem(param_10, param_11, v_79); - Alloc param_12 = tiles_alloc; - uint param_13 = tile_el_ix; - uint param_14 = sum; - write_mem(param_12, param_13, param_14, v_79); + Alloc param_7 = tiles_alloc; + uint param_8 = tile_el_ix; + sum += read_mem(param_7, param_8, v_67); + Alloc param_9 = tiles_alloc; + uint param_10 = tile_el_ix; + uint param_11 = sum; + write_mem(param_9, param_10, param_11, v_67); } } } diff --git a/piet-gpu/shader/gen/backdrop.spv b/piet-gpu/shader/gen/backdrop.spv index f90bf6e..2bd17d8 100644 Binary files a/piet-gpu/shader/gen/backdrop.spv and b/piet-gpu/shader/gen/backdrop.spv differ diff --git a/piet-gpu/shader/gen/backdrop_lg.dxil b/piet-gpu/shader/gen/backdrop_lg.dxil index e6b2f1a..81f9b65 100644 Binary files a/piet-gpu/shader/gen/backdrop_lg.dxil and b/piet-gpu/shader/gen/backdrop_lg.dxil differ diff --git a/piet-gpu/shader/gen/backdrop_lg.hlsl b/piet-gpu/shader/gen/backdrop_lg.hlsl index 5071af2..c506403 100644 --- a/piet-gpu/shader/gen/backdrop_lg.hlsl +++ b/piet-gpu/shader/gen/backdrop_lg.hlsl @@ -3,17 +3,6 @@ struct Alloc uint offset; }; -struct AnnotatedRef -{ - uint offset; -}; - -struct AnnotatedTag -{ - uint tag; - uint flags; -}; - struct PathRef { uint offset; @@ -42,12 +31,14 @@ struct Config Alloc pathseg_alloc; Alloc anno_alloc; Alloc trans_alloc; - Alloc bbox_alloc; + Alloc path_bbox_alloc; Alloc drawmonoid_alloc; Alloc clip_alloc; Alloc clip_bic_alloc; Alloc clip_stack_alloc; Alloc clip_bbox_alloc; + Alloc draw_bbox_alloc; + Alloc drawinfo_alloc; uint n_trans; uint n_path; uint n_clip; @@ -55,12 +46,14 @@ struct Config uint linewidth_offset; uint pathtag_offset; uint pathseg_offset; + uint drawtag_offset; + uint drawdata_offset; }; static const uint3 gl_WorkGroupSize = uint3(256u, 4u, 1u); -RWByteAddressBuffer _79 : register(u0, space0); -ByteAddressBuffer _186 : register(t1, space0); +RWByteAddressBuffer _67 : register(u0, space0); +ByteAddressBuffer _166 : register(t1, space0); static uint3 gl_LocalInvocationID; static uint3 gl_GlobalInvocationID; @@ -89,24 +82,10 @@ uint read_mem(Alloc alloc, uint offset) { return 0u; } - uint v = _79.Load(offset * 4 + 8); + uint v = _67.Load(offset * 4 + 8); return v; } -AnnotatedTag Annotated_tag(Alloc a, AnnotatedRef ref) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint tag_and_flags = read_mem(param, param_1); - AnnotatedTag _121 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; - return _121; -} - -uint fill_mode_from_flags(uint flags) -{ - return flags & 1u; -} - Path Path_read(Alloc a, PathRef ref) { uint ix = ref.offset >> uint(2); @@ -121,8 +100,8 @@ Path Path_read(Alloc a, PathRef ref) uint raw2 = read_mem(param_4, param_5); Path s; s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); - TileRef _165 = { raw2 }; - s.tiles = _165; + TileRef _134 = { raw2 }; + s.tiles = _134; return s; } @@ -141,88 +120,65 @@ void write_mem(Alloc alloc, uint offset, uint val) { return; } - _79.Store(offset * 4 + 8, val); + _67.Store(offset * 4 + 8, val); } void comp_main() { uint th_ix = gl_LocalInvocationIndex; uint element_ix = gl_GlobalInvocationID.x; - AnnotatedRef _194 = { _186.Load(32) + (element_ix * 40u) }; - AnnotatedRef ref = _194; uint row_count = 0u; - bool mem_ok = _79.Load(4) == 0u; + bool mem_ok = _67.Load(4) == 0u; if (gl_LocalInvocationID.y == 0u) { - if (element_ix < _186.Load(0)) + if (element_ix < _166.Load(0)) { - Alloc _217; - _217.offset = _186.Load(32); + PathRef _180 = { _166.Load(16) + (element_ix * 12u) }; + PathRef path_ref = _180; + Alloc _185; + _185.offset = _166.Load(16); Alloc param; - param.offset = _217.offset; - AnnotatedRef param_1 = ref; - AnnotatedTag tag = Annotated_tag(param, param_1); - switch (tag.tag) + param.offset = _185.offset; + PathRef param_1 = path_ref; + Path path = Path_read(param, param_1); + sh_row_width[th_ix] = path.bbox.z - path.bbox.x; + row_count = path.bbox.w - path.bbox.y; + bool _210 = row_count == 1u; + bool _216; + if (_210) { - case 3u: - case 2u: - case 4u: - case 1u: - { - uint param_2 = tag.flags; - if (fill_mode_from_flags(param_2) != 0u) - { - break; - } - PathRef _243 = { _186.Load(16) + (element_ix * 12u) }; - PathRef path_ref = _243; - Alloc _247; - _247.offset = _186.Load(16); - Alloc param_3; - param_3.offset = _247.offset; - PathRef param_4 = path_ref; - Path path = Path_read(param_3, param_4); - sh_row_width[th_ix] = path.bbox.z - path.bbox.x; - row_count = path.bbox.w - path.bbox.y; - bool _272 = row_count == 1u; - bool _278; - if (_272) - { - _278 = path.bbox.y > 0u; - } - else - { - _278 = _272; - } - if (_278) - { - row_count = 0u; - } - uint param_5 = path.tiles.offset; - uint param_6 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u; - bool param_7 = mem_ok; - Alloc path_alloc = new_alloc(param_5, param_6, param_7); - sh_row_alloc[th_ix] = path_alloc; - break; - } + _216 = path.bbox.y > 0u; } + else + { + _216 = _210; + } + if (_216) + { + row_count = 0u; + } + uint param_2 = path.tiles.offset; + uint param_3 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u; + bool param_4 = mem_ok; + Alloc path_alloc = new_alloc(param_2, param_3, param_4); + sh_row_alloc[th_ix] = path_alloc; } sh_row_count[th_ix] = row_count; } for (uint i = 0u; i < 8u; i++) { GroupMemoryBarrierWithGroupSync(); - bool _325 = gl_LocalInvocationID.y == 0u; - bool _332; - if (_325) + bool _262 = gl_LocalInvocationID.y == 0u; + bool _269; + if (_262) { - _332 = th_ix >= (1u << i); + _269 = th_ix >= (1u << i); } else { - _332 = _325; + _269 = _262; } - if (_332) + if (_269) { row_count += sh_row_count[th_ix - (1u << i)]; } @@ -234,7 +190,7 @@ void comp_main() } GroupMemoryBarrierWithGroupSync(); uint total_rows = sh_row_count[255]; - uint _411; + uint _348; for (uint row = th_ix; row < total_rows; row += 1024u) { uint el_ix = 0u; @@ -252,27 +208,27 @@ void comp_main() Alloc tiles_alloc = sh_row_alloc[el_ix]; if (el_ix > 0u) { - _411 = sh_row_count[el_ix - 1u]; + _348 = sh_row_count[el_ix - 1u]; } else { - _411 = 0u; + _348 = 0u; } - uint seq_ix = row - _411; + uint seq_ix = row - _348; uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width); - Alloc param_8 = tiles_alloc; - uint param_9 = tile_el_ix; - uint sum = read_mem(param_8, param_9); + Alloc param_5 = tiles_alloc; + uint param_6 = tile_el_ix; + uint sum = read_mem(param_5, param_6); for (uint x = 1u; x < width; x++) { tile_el_ix += 2u; - Alloc param_10 = tiles_alloc; - uint param_11 = tile_el_ix; - sum += read_mem(param_10, param_11); - Alloc param_12 = tiles_alloc; - uint param_13 = tile_el_ix; - uint param_14 = sum; - write_mem(param_12, param_13, param_14); + Alloc param_7 = tiles_alloc; + uint param_8 = tile_el_ix; + sum += read_mem(param_7, param_8); + Alloc param_9 = tiles_alloc; + uint param_10 = tile_el_ix; + uint param_11 = sum; + write_mem(param_9, param_10, param_11); } } } diff --git a/piet-gpu/shader/gen/backdrop_lg.msl b/piet-gpu/shader/gen/backdrop_lg.msl index 31cd6cd..de43ebe 100644 --- a/piet-gpu/shader/gen/backdrop_lg.msl +++ b/piet-gpu/shader/gen/backdrop_lg.msl @@ -10,17 +10,6 @@ struct Alloc uint offset; }; -struct AnnotatedRef -{ - uint offset; -}; - -struct AnnotatedTag -{ - uint tag; - uint flags; -}; - struct PathRef { uint offset; @@ -61,12 +50,14 @@ struct Config Alloc_1 pathseg_alloc; Alloc_1 anno_alloc; Alloc_1 trans_alloc; - Alloc_1 bbox_alloc; + Alloc_1 path_bbox_alloc; Alloc_1 drawmonoid_alloc; Alloc_1 clip_alloc; Alloc_1 clip_bic_alloc; Alloc_1 clip_stack_alloc; Alloc_1 clip_bbox_alloc; + Alloc_1 draw_bbox_alloc; + Alloc_1 drawinfo_alloc; uint n_trans; uint n_path; uint n_clip; @@ -74,6 +65,8 @@ struct Config uint linewidth_offset; uint pathtag_offset; uint pathseg_offset; + uint drawtag_offset; + uint drawdata_offset; }; struct ConfigBuf @@ -90,7 +83,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset) } static inline __attribute__((always_inline)) -uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_79) +uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_67) { Alloc param = alloc; uint param_1 = offset; @@ -98,38 +91,23 @@ uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memor { return 0u; } - uint v = v_79.memory[offset]; + uint v = v_67.memory[offset]; return v; } static inline __attribute__((always_inline)) -AnnotatedTag Annotated_tag(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_79) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint tag_and_flags = read_mem(param, param_1, v_79); - return AnnotatedTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) }; -} - -static inline __attribute__((always_inline)) -uint fill_mode_from_flags(thread const uint& flags) -{ - return flags & 1u; -} - -static inline __attribute__((always_inline)) -Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_79) +Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_67) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_79); + uint raw0 = read_mem(param, param_1, v_67); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_79); + uint raw1 = read_mem(param_2, param_3, v_67); Alloc param_4 = a; uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_79); + uint raw2 = read_mem(param_4, param_5, v_67); Path s; s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); s.tiles = TileRef{ raw2 }; @@ -145,7 +123,7 @@ Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const } static inline __attribute__((always_inline)) -void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_79) +void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_67) { Alloc param = alloc; uint param_1 = offset; @@ -153,85 +131,65 @@ void write_mem(thread const Alloc& alloc, thread const uint& offset, thread cons { return; } - v_79.memory[offset] = val; + v_67.memory[offset] = val; } -kernel void main0(device Memory& v_79 [[buffer(0)]], const device ConfigBuf& _186 [[buffer(1)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) +kernel void main0(device Memory& v_67 [[buffer(0)]], const device ConfigBuf& _166 [[buffer(1)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) { threadgroup uint sh_row_width[256]; threadgroup Alloc sh_row_alloc[256]; threadgroup uint sh_row_count[256]; uint th_ix = gl_LocalInvocationIndex; uint element_ix = gl_GlobalInvocationID.x; - AnnotatedRef ref = AnnotatedRef{ _186.conf.anno_alloc.offset + (element_ix * 40u) }; uint row_count = 0u; - bool mem_ok = v_79.mem_error == 0u; + bool mem_ok = v_67.mem_error == 0u; if (gl_LocalInvocationID.y == 0u) { - if (element_ix < _186.conf.n_elements) + if (element_ix < _166.conf.n_elements) { + PathRef path_ref = PathRef{ _166.conf.tile_alloc.offset + (element_ix * 12u) }; Alloc param; - param.offset = _186.conf.anno_alloc.offset; - AnnotatedRef param_1 = ref; - AnnotatedTag tag = Annotated_tag(param, param_1, v_79); - switch (tag.tag) + param.offset = _166.conf.tile_alloc.offset; + PathRef param_1 = path_ref; + Path path = Path_read(param, param_1, v_67); + sh_row_width[th_ix] = path.bbox.z - path.bbox.x; + row_count = path.bbox.w - path.bbox.y; + bool _210 = row_count == 1u; + bool _216; + if (_210) { - case 3u: - case 2u: - case 4u: - case 1u: - { - uint param_2 = tag.flags; - if (fill_mode_from_flags(param_2) != 0u) - { - break; - } - PathRef path_ref = PathRef{ _186.conf.tile_alloc.offset + (element_ix * 12u) }; - Alloc param_3; - param_3.offset = _186.conf.tile_alloc.offset; - PathRef param_4 = path_ref; - Path path = Path_read(param_3, param_4, v_79); - sh_row_width[th_ix] = path.bbox.z - path.bbox.x; - row_count = path.bbox.w - path.bbox.y; - bool _272 = row_count == 1u; - bool _278; - if (_272) - { - _278 = path.bbox.y > 0u; - } - else - { - _278 = _272; - } - if (_278) - { - row_count = 0u; - } - uint param_5 = path.tiles.offset; - uint param_6 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u; - bool param_7 = mem_ok; - Alloc path_alloc = new_alloc(param_5, param_6, param_7); - sh_row_alloc[th_ix] = path_alloc; - break; - } + _216 = path.bbox.y > 0u; } + else + { + _216 = _210; + } + if (_216) + { + row_count = 0u; + } + uint param_2 = path.tiles.offset; + uint param_3 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u; + bool param_4 = mem_ok; + Alloc path_alloc = new_alloc(param_2, param_3, param_4); + sh_row_alloc[th_ix] = path_alloc; } sh_row_count[th_ix] = row_count; } for (uint i = 0u; i < 8u; i++) { threadgroup_barrier(mem_flags::mem_threadgroup); - bool _325 = gl_LocalInvocationID.y == 0u; - bool _332; - if (_325) + bool _262 = gl_LocalInvocationID.y == 0u; + bool _269; + if (_262) { - _332 = th_ix >= (1u << i); + _269 = th_ix >= (1u << i); } else { - _332 = _325; + _269 = _262; } - if (_332) + if (_269) { row_count += sh_row_count[th_ix - (1u << i)]; } @@ -243,7 +201,7 @@ kernel void main0(device Memory& v_79 [[buffer(0)]], const device ConfigBuf& _18 } threadgroup_barrier(mem_flags::mem_threadgroup); uint total_rows = sh_row_count[255]; - uint _411; + uint _348; for (uint row = th_ix; row < total_rows; row += 1024u) { uint el_ix = 0u; @@ -261,27 +219,27 @@ kernel void main0(device Memory& v_79 [[buffer(0)]], const device ConfigBuf& _18 Alloc tiles_alloc = sh_row_alloc[el_ix]; if (el_ix > 0u) { - _411 = sh_row_count[el_ix - 1u]; + _348 = sh_row_count[el_ix - 1u]; } else { - _411 = 0u; + _348 = 0u; } - uint seq_ix = row - _411; + uint seq_ix = row - _348; uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width); - Alloc param_8 = tiles_alloc; - uint param_9 = tile_el_ix; - uint sum = read_mem(param_8, param_9, v_79); + Alloc param_5 = tiles_alloc; + uint param_6 = tile_el_ix; + uint sum = read_mem(param_5, param_6, v_67); for (uint x = 1u; x < width; x++) { tile_el_ix += 2u; - Alloc param_10 = tiles_alloc; - uint param_11 = tile_el_ix; - sum += read_mem(param_10, param_11, v_79); - Alloc param_12 = tiles_alloc; - uint param_13 = tile_el_ix; - uint param_14 = sum; - write_mem(param_12, param_13, param_14, v_79); + Alloc param_7 = tiles_alloc; + uint param_8 = tile_el_ix; + sum += read_mem(param_7, param_8, v_67); + Alloc param_9 = tiles_alloc; + uint param_10 = tile_el_ix; + uint param_11 = sum; + write_mem(param_9, param_10, param_11, v_67); } } } diff --git a/piet-gpu/shader/gen/backdrop_lg.spv b/piet-gpu/shader/gen/backdrop_lg.spv index 3f90d2e..ff2b1d7 100644 Binary files a/piet-gpu/shader/gen/backdrop_lg.spv and b/piet-gpu/shader/gen/backdrop_lg.spv differ diff --git a/piet-gpu/shader/gen/bbox_clear.dxil b/piet-gpu/shader/gen/bbox_clear.dxil index 26b5f16..6b3efaf 100644 Binary files a/piet-gpu/shader/gen/bbox_clear.dxil and b/piet-gpu/shader/gen/bbox_clear.dxil differ diff --git a/piet-gpu/shader/gen/bbox_clear.hlsl b/piet-gpu/shader/gen/bbox_clear.hlsl index 903d84c..8a884d3 100644 --- a/piet-gpu/shader/gen/bbox_clear.hlsl +++ b/piet-gpu/shader/gen/bbox_clear.hlsl @@ -15,12 +15,14 @@ struct Config Alloc pathseg_alloc; Alloc anno_alloc; Alloc trans_alloc; - Alloc bbox_alloc; + Alloc path_bbox_alloc; Alloc drawmonoid_alloc; Alloc clip_alloc; Alloc clip_bic_alloc; Alloc clip_stack_alloc; Alloc clip_bbox_alloc; + Alloc draw_bbox_alloc; + Alloc drawinfo_alloc; uint n_trans; uint n_path; uint n_clip; @@ -28,6 +30,8 @@ struct Config uint linewidth_offset; uint pathtag_offset; uint pathseg_offset; + uint drawtag_offset; + uint drawdata_offset; }; static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u); @@ -44,7 +48,7 @@ struct SPIRV_Cross_Input void comp_main() { uint ix = gl_GlobalInvocationID.x; - if (ix < _21.Load(68)) + if (ix < _21.Load(76)) { uint out_ix = (_21.Load(40) >> uint(2)) + (6u * ix); _45.Store(out_ix * 4 + 8, 65535u); diff --git a/piet-gpu/shader/gen/bbox_clear.msl b/piet-gpu/shader/gen/bbox_clear.msl index e80f15e..c278c68 100644 --- a/piet-gpu/shader/gen/bbox_clear.msl +++ b/piet-gpu/shader/gen/bbox_clear.msl @@ -20,12 +20,14 @@ struct Config Alloc pathseg_alloc; Alloc anno_alloc; Alloc trans_alloc; - Alloc bbox_alloc; + Alloc path_bbox_alloc; Alloc drawmonoid_alloc; Alloc clip_alloc; Alloc clip_bic_alloc; Alloc clip_stack_alloc; Alloc clip_bbox_alloc; + Alloc draw_bbox_alloc; + Alloc drawinfo_alloc; uint n_trans; uint n_path; uint n_clip; @@ -33,6 +35,8 @@ struct Config uint linewidth_offset; uint pathtag_offset; uint pathseg_offset; + uint drawtag_offset; + uint drawdata_offset; }; struct ConfigBuf @@ -54,7 +58,7 @@ kernel void main0(device Memory& _45 [[buffer(0)]], const device ConfigBuf& _21 uint ix = gl_GlobalInvocationID.x; if (ix < _21.conf.n_path) { - uint out_ix = (_21.conf.bbox_alloc.offset >> uint(2)) + (6u * ix); + uint out_ix = (_21.conf.path_bbox_alloc.offset >> uint(2)) + (6u * ix); _45.memory[out_ix] = 65535u; _45.memory[out_ix + 1u] = 65535u; _45.memory[out_ix + 2u] = 0u; diff --git a/piet-gpu/shader/gen/bbox_clear.spv b/piet-gpu/shader/gen/bbox_clear.spv index e3e88d7..58a270e 100644 Binary files a/piet-gpu/shader/gen/bbox_clear.spv and b/piet-gpu/shader/gen/bbox_clear.spv differ diff --git a/piet-gpu/shader/gen/binning.dxil b/piet-gpu/shader/gen/binning.dxil index fe53f27..4a4f073 100644 Binary files a/piet-gpu/shader/gen/binning.dxil and b/piet-gpu/shader/gen/binning.dxil differ diff --git a/piet-gpu/shader/gen/binning.hlsl b/piet-gpu/shader/gen/binning.hlsl index e4de1b9..986f42b 100644 --- a/piet-gpu/shader/gen/binning.hlsl +++ b/piet-gpu/shader/gen/binning.hlsl @@ -9,17 +9,6 @@ struct MallocResult bool failed; }; -struct AnnotatedRef -{ - uint offset; -}; - -struct AnnotatedTag -{ - uint tag; - uint flags; -}; - struct BinInstanceRef { uint offset; @@ -34,6 +23,8 @@ struct DrawMonoid { uint path_ix; uint clip_ix; + uint scene_offset; + uint info_offset; }; struct Config @@ -48,12 +39,14 @@ struct Config Alloc pathseg_alloc; Alloc anno_alloc; Alloc trans_alloc; - Alloc bbox_alloc; + Alloc path_bbox_alloc; Alloc drawmonoid_alloc; Alloc clip_alloc; Alloc clip_bic_alloc; Alloc clip_stack_alloc; Alloc clip_bbox_alloc; + Alloc draw_bbox_alloc; + Alloc drawinfo_alloc; uint n_trans; uint n_path; uint n_clip; @@ -61,12 +54,14 @@ struct Config uint linewidth_offset; uint pathtag_offset; uint pathseg_offset; + uint drawtag_offset; + uint drawdata_offset; }; static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); -RWByteAddressBuffer _94 : register(u0, space0); -ByteAddressBuffer _202 : register(t1, space0); +RWByteAddressBuffer _81 : register(u0, space0); +ByteAddressBuffer _156 : register(t1, space0); static uint3 gl_WorkGroupID; static uint3 gl_LocalInvocationID; @@ -81,59 +76,35 @@ groupshared bool sh_alloc_failed; groupshared uint count[8][256]; groupshared Alloc sh_chunk_alloc[256]; -bool touch_mem(Alloc alloc, uint offset) -{ - return true; -} - -uint read_mem(Alloc alloc, uint offset) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return 0u; - } - uint v = _94.Load(offset * 4 + 8); - return v; -} - -AnnotatedTag Annotated_tag(Alloc a, AnnotatedRef ref) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint tag_and_flags = read_mem(param, param_1); - AnnotatedTag _181 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; - return _181; -} - DrawMonoid load_draw_monoid(uint element_ix) { - uint base = (_202.Load(44) >> uint(2)) + (2u * element_ix); - uint path_ix = _94.Load(base * 4 + 8); - uint clip_ix = _94.Load((base + 1u) * 4 + 8); - DrawMonoid _222 = { path_ix, clip_ix }; - return _222; + uint base = (_156.Load(44) >> uint(2)) + (4u * element_ix); + uint path_ix = _81.Load(base * 4 + 8); + uint clip_ix = _81.Load((base + 1u) * 4 + 8); + uint scene_offset = _81.Load((base + 2u) * 4 + 8); + uint info_offset = _81.Load((base + 3u) * 4 + 8); + DrawMonoid _190 = { path_ix, clip_ix, scene_offset, info_offset }; + return _190; } float4 load_clip_bbox(uint clip_ix) { - uint base = (_202.Load(60) >> uint(2)) + (4u * clip_ix); - float x0 = asfloat(_94.Load(base * 4 + 8)); - float y0 = asfloat(_94.Load((base + 1u) * 4 + 8)); - float x1 = asfloat(_94.Load((base + 2u) * 4 + 8)); - float y1 = asfloat(_94.Load((base + 3u) * 4 + 8)); + uint base = (_156.Load(60) >> uint(2)) + (4u * clip_ix); + float x0 = asfloat(_81.Load(base * 4 + 8)); + float y0 = asfloat(_81.Load((base + 1u) * 4 + 8)); + float x1 = asfloat(_81.Load((base + 2u) * 4 + 8)); + float y1 = asfloat(_81.Load((base + 3u) * 4 + 8)); float4 bbox = float4(x0, y0, x1, y1); return bbox; } float4 load_path_bbox(uint path_ix) { - uint base = (_202.Load(40) >> uint(2)) + (6u * path_ix); - float bbox_l = float(_94.Load(base * 4 + 8)) - 32768.0f; - float bbox_t = float(_94.Load((base + 1u) * 4 + 8)) - 32768.0f; - float bbox_r = float(_94.Load((base + 2u) * 4 + 8)) - 32768.0f; - float bbox_b = float(_94.Load((base + 3u) * 4 + 8)) - 32768.0f; + uint base = (_156.Load(40) >> uint(2)) + (6u * path_ix); + float bbox_l = float(_81.Load(base * 4 + 8)) - 32768.0f; + float bbox_t = float(_81.Load((base + 1u) * 4 + 8)) - 32768.0f; + float bbox_r = float(_81.Load((base + 2u) * 4 + 8)) - 32768.0f; + float bbox_b = float(_81.Load((base + 3u) * 4 + 8)) - 32768.0f; float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); return bbox; } @@ -143,13 +114,13 @@ float4 bbox_intersect(float4 a, float4 b) return float4(max(a.xy, b.xy), min(a.zw, b.zw)); } -void store_path_bbox(AnnotatedRef ref, float4 bbox) +void store_draw_bbox(uint draw_ix, float4 bbox) { - uint ix = ref.offset >> uint(2); - _94.Store((ix + 1u) * 4 + 8, asuint(bbox.x)); - _94.Store((ix + 2u) * 4 + 8, asuint(bbox.y)); - _94.Store((ix + 3u) * 4 + 8, asuint(bbox.z)); - _94.Store((ix + 4u) * 4 + 8, asuint(bbox.w)); + uint base = (_156.Load(64) >> uint(2)) + (4u * draw_ix); + _81.Store(base * 4 + 8, asuint(bbox.x)); + _81.Store((base + 1u) * 4 + 8, asuint(bbox.y)); + _81.Store((base + 2u) * 4 + 8, asuint(bbox.z)); + _81.Store((base + 3u) * 4 + 8, asuint(bbox.w)); } Alloc new_alloc(uint offset, uint size, bool mem_ok) @@ -161,27 +132,32 @@ Alloc new_alloc(uint offset, uint size, bool mem_ok) MallocResult malloc(uint size) { - uint _100; - _94.InterlockedAdd(0, size, _100); - uint offset = _100; - uint _107; - _94.GetDimensions(_107); - _107 = (_107 - 8) / 4; + uint _87; + _81.InterlockedAdd(0, size, _87); + uint offset = _87; + uint _94; + _81.GetDimensions(_94); + _94 = (_94 - 8) / 4; MallocResult r; - r.failed = (offset + size) > uint(int(_107) * 4); + r.failed = (offset + size) > uint(int(_94) * 4); uint param = offset; uint param_1 = size; bool param_2 = !r.failed; r.alloc = new_alloc(param, param_1, param_2); if (r.failed) { - uint _129; - _94.InterlockedMax(4, 1u, _129); + uint _116; + _81.InterlockedMax(4, 1u, _116); return r; } return r; } +bool touch_mem(Alloc alloc, uint offset) +{ + return true; +} + void write_mem(Alloc alloc, uint offset, uint val) { Alloc param = alloc; @@ -190,7 +166,7 @@ void write_mem(Alloc alloc, uint offset, uint val) { return; } - _94.Store(offset * 4 + 8, val); + _81.Store(offset * 4 + 8, val); } void BinInstance_write(Alloc a, BinInstanceRef ref, BinInstance s) @@ -204,7 +180,6 @@ void BinInstance_write(Alloc a, BinInstanceRef ref, BinInstance s) void comp_main() { - uint my_n_elements = _202.Load(0); uint my_partition = gl_WorkGroupID.x; for (uint i = 0u; i < 8u; i++) { @@ -216,62 +191,42 @@ void comp_main() } GroupMemoryBarrierWithGroupSync(); uint element_ix = (my_partition * 256u) + gl_LocalInvocationID.x; - AnnotatedRef _415 = { _202.Load(32) + (element_ix * 40u) }; - AnnotatedRef ref = _415; - uint tag = 0u; - if (element_ix < my_n_elements) - { - Alloc _425; - _425.offset = _202.Load(32); - Alloc param; - param.offset = _425.offset; - AnnotatedRef param_1 = ref; - tag = Annotated_tag(param, param_1).tag; - } int x0 = 0; int y0 = 0; int x1 = 0; int y1 = 0; - switch (tag) + if (element_ix < _156.Load(0)) { - case 1u: - case 2u: - case 3u: - case 4u: - case 5u: + uint param = element_ix; + DrawMonoid draw_monoid = load_draw_monoid(param); + uint path_ix = draw_monoid.path_ix; + float4 clip_bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f); + uint clip_ix = draw_monoid.clip_ix; + if (clip_ix > 0u) { - uint param_2 = element_ix; - DrawMonoid draw_monoid = load_draw_monoid(param_2); - uint path_ix = draw_monoid.path_ix; - float4 clip_bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f); - uint clip_ix = draw_monoid.clip_ix; - if (clip_ix > 0u) - { - uint param_3 = clip_ix - 1u; - clip_bbox = load_clip_bbox(param_3); - } - uint param_4 = path_ix; - float4 path_bbox = load_path_bbox(param_4); - float4 param_5 = path_bbox; - float4 param_6 = clip_bbox; - float4 bbox = bbox_intersect(param_5, param_6); - float4 _473 = bbox; - float4 _475 = bbox; - float2 _477 = max(_473.xy, _475.zw); - bbox.z = _477.x; - bbox.w = _477.y; - AnnotatedRef param_7 = ref; - float4 param_8 = bbox; - store_path_bbox(param_7, param_8); - x0 = int(floor(bbox.x * 0.00390625f)); - y0 = int(floor(bbox.y * 0.00390625f)); - x1 = int(ceil(bbox.z * 0.00390625f)); - y1 = int(ceil(bbox.w * 0.00390625f)); - break; + uint param_1 = clip_ix - 1u; + clip_bbox = load_clip_bbox(param_1); } + uint param_2 = path_ix; + float4 path_bbox = load_path_bbox(param_2); + float4 param_3 = path_bbox; + float4 param_4 = clip_bbox; + float4 bbox = bbox_intersect(param_3, param_4); + float4 _417 = bbox; + float4 _419 = bbox; + float2 _421 = max(_417.xy, _419.zw); + bbox.z = _421.x; + bbox.w = _421.y; + uint param_5 = element_ix; + float4 param_6 = bbox; + store_draw_bbox(param_5, param_6); + x0 = int(floor(bbox.x * 0.00390625f)); + y0 = int(floor(bbox.y * 0.00390625f)); + x1 = int(ceil(bbox.z * 0.00390625f)); + y1 = int(ceil(bbox.w * 0.00390625f)); } - uint width_in_bins = ((_202.Load(8) + 16u) - 1u) / 16u; - uint height_in_bins = ((_202.Load(12) + 16u) - 1u) / 16u; + uint width_in_bins = ((_156.Load(8) + 16u) - 1u) / 16u; + uint height_in_bins = ((_156.Load(12) + 16u) - 1u) / 16u; x0 = clamp(x0, 0, int(width_in_bins)); x1 = clamp(x1, x0, int(width_in_bins)); y0 = clamp(y0, 0, int(height_in_bins)); @@ -286,8 +241,8 @@ void comp_main() uint my_mask = 1u << (gl_LocalInvocationID.x & 31u); while (y < y1) { - uint _581; - InterlockedOr(bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, _581); + uint _523; + InterlockedOr(bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, _523); x++; if (x == x1) { @@ -302,15 +257,15 @@ void comp_main() element_count += uint(int(countbits(bitmaps[i_1][gl_LocalInvocationID.x]))); count[i_1][gl_LocalInvocationID.x] = element_count; } - uint param_9 = 0u; - uint param_10 = 0u; - bool param_11 = true; - Alloc chunk_alloc = new_alloc(param_9, param_10, param_11); + uint param_7 = 0u; + uint param_8 = 0u; + bool param_9 = true; + Alloc chunk_alloc = new_alloc(param_7, param_8, param_9); if (element_count != 0u) { - uint param_12 = element_count * 4u; - MallocResult _631 = malloc(param_12); - MallocResult chunk = _631; + uint param_10 = element_count * 4u; + MallocResult _573 = malloc(param_10); + MallocResult chunk = _573; chunk_alloc = chunk.alloc; sh_chunk_alloc[gl_LocalInvocationID.x] = chunk_alloc; if (chunk.failed) @@ -318,32 +273,32 @@ void comp_main() sh_alloc_failed = true; } } - uint out_ix = (_202.Load(20) >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u); - Alloc _660; - _660.offset = _202.Load(20); - Alloc param_13; - param_13.offset = _660.offset; - uint param_14 = out_ix; - uint param_15 = element_count; - write_mem(param_13, param_14, param_15); - Alloc _672; - _672.offset = _202.Load(20); - Alloc param_16; - param_16.offset = _672.offset; - uint param_17 = out_ix + 1u; - uint param_18 = chunk_alloc.offset; - write_mem(param_16, param_17, param_18); + uint out_ix = (_156.Load(20) >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u); + Alloc _603; + _603.offset = _156.Load(20); + Alloc param_11; + param_11.offset = _603.offset; + uint param_12 = out_ix; + uint param_13 = element_count; + write_mem(param_11, param_12, param_13); + Alloc _615; + _615.offset = _156.Load(20); + Alloc param_14; + param_14.offset = _615.offset; + uint param_15 = out_ix + 1u; + uint param_16 = chunk_alloc.offset; + write_mem(param_14, param_15, param_16); GroupMemoryBarrierWithGroupSync(); - bool _687; + bool _630; if (!sh_alloc_failed) { - _687 = _94.Load(4) != 0u; + _630 = _81.Load(4) != 0u; } else { - _687 = sh_alloc_failed; + _630 = sh_alloc_failed; } - if (_687) + if (_630) { return; } @@ -362,12 +317,12 @@ void comp_main() } Alloc out_alloc = sh_chunk_alloc[bin_ix]; uint out_offset = out_alloc.offset + (idx * 4u); - BinInstanceRef _749 = { out_offset }; - BinInstance _751 = { element_ix }; - Alloc param_19 = out_alloc; - BinInstanceRef param_20 = _749; - BinInstance param_21 = _751; - BinInstance_write(param_19, param_20, param_21); + BinInstanceRef _692 = { out_offset }; + BinInstance _694 = { element_ix }; + Alloc param_17 = out_alloc; + BinInstanceRef param_18 = _692; + BinInstance param_19 = _694; + BinInstance_write(param_17, param_18, param_19); } x++; if (x == x1) diff --git a/piet-gpu/shader/gen/binning.msl b/piet-gpu/shader/gen/binning.msl index 0e3b6c8..2ee5168 100644 --- a/piet-gpu/shader/gen/binning.msl +++ b/piet-gpu/shader/gen/binning.msl @@ -18,17 +18,6 @@ struct MallocResult bool failed; }; -struct AnnotatedRef -{ - uint offset; -}; - -struct AnnotatedTag -{ - uint tag; - uint flags; -}; - struct BinInstanceRef { uint offset; @@ -43,6 +32,8 @@ struct DrawMonoid { uint path_ix; uint clip_ix; + uint scene_offset; + uint info_offset; }; struct Memory @@ -69,12 +60,14 @@ struct Config Alloc_1 pathseg_alloc; Alloc_1 anno_alloc; Alloc_1 trans_alloc; - Alloc_1 bbox_alloc; + Alloc_1 path_bbox_alloc; Alloc_1 drawmonoid_alloc; Alloc_1 clip_alloc; Alloc_1 clip_bic_alloc; Alloc_1 clip_stack_alloc; Alloc_1 clip_bbox_alloc; + Alloc_1 draw_bbox_alloc; + Alloc_1 drawinfo_alloc; uint n_trans; uint n_path; uint n_clip; @@ -82,6 +75,8 @@ struct Config uint linewidth_offset; uint pathtag_offset; uint pathseg_offset; + uint drawtag_offset; + uint drawdata_offset; }; struct ConfigBuf @@ -92,62 +87,36 @@ struct ConfigBuf constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); static inline __attribute__((always_inline)) -bool touch_mem(thread const Alloc& alloc, thread const uint& offset) +DrawMonoid load_draw_monoid(thread const uint& element_ix, device Memory& v_81, constant uint& v_81BufferSize, const device ConfigBuf& v_156) { - return true; + uint base = (v_156.conf.drawmonoid_alloc.offset >> uint(2)) + (4u * element_ix); + uint path_ix = v_81.memory[base]; + uint clip_ix = v_81.memory[base + 1u]; + uint scene_offset = v_81.memory[base + 2u]; + uint info_offset = v_81.memory[base + 3u]; + return DrawMonoid{ path_ix, clip_ix, scene_offset, info_offset }; } static inline __attribute__((always_inline)) -uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_94, constant uint& v_94BufferSize) +float4 load_clip_bbox(thread const uint& clip_ix, device Memory& v_81, constant uint& v_81BufferSize, const device ConfigBuf& v_156) { - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return 0u; - } - uint v = v_94.memory[offset]; - return v; -} - -static inline __attribute__((always_inline)) -AnnotatedTag Annotated_tag(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_94, constant uint& v_94BufferSize) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint tag_and_flags = read_mem(param, param_1, v_94, v_94BufferSize); - return AnnotatedTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) }; -} - -static inline __attribute__((always_inline)) -DrawMonoid load_draw_monoid(thread const uint& element_ix, device Memory& v_94, constant uint& v_94BufferSize, const device ConfigBuf& v_202) -{ - uint base = (v_202.conf.drawmonoid_alloc.offset >> uint(2)) + (2u * element_ix); - uint path_ix = v_94.memory[base]; - uint clip_ix = v_94.memory[base + 1u]; - return DrawMonoid{ path_ix, clip_ix }; -} - -static inline __attribute__((always_inline)) -float4 load_clip_bbox(thread const uint& clip_ix, device Memory& v_94, constant uint& v_94BufferSize, const device ConfigBuf& v_202) -{ - uint base = (v_202.conf.clip_bbox_alloc.offset >> uint(2)) + (4u * clip_ix); - float x0 = as_type(v_94.memory[base]); - float y0 = as_type(v_94.memory[base + 1u]); - float x1 = as_type(v_94.memory[base + 2u]); - float y1 = as_type(v_94.memory[base + 3u]); + uint base = (v_156.conf.clip_bbox_alloc.offset >> uint(2)) + (4u * clip_ix); + float x0 = as_type(v_81.memory[base]); + float y0 = as_type(v_81.memory[base + 1u]); + float x1 = as_type(v_81.memory[base + 2u]); + float y1 = as_type(v_81.memory[base + 3u]); float4 bbox = float4(x0, y0, x1, y1); return bbox; } static inline __attribute__((always_inline)) -float4 load_path_bbox(thread const uint& path_ix, device Memory& v_94, constant uint& v_94BufferSize, const device ConfigBuf& v_202) +float4 load_path_bbox(thread const uint& path_ix, device Memory& v_81, constant uint& v_81BufferSize, const device ConfigBuf& v_156) { - uint base = (v_202.conf.bbox_alloc.offset >> uint(2)) + (6u * path_ix); - float bbox_l = float(v_94.memory[base]) - 32768.0; - float bbox_t = float(v_94.memory[base + 1u]) - 32768.0; - float bbox_r = float(v_94.memory[base + 2u]) - 32768.0; - float bbox_b = float(v_94.memory[base + 3u]) - 32768.0; + uint base = (v_156.conf.path_bbox_alloc.offset >> uint(2)) + (6u * path_ix); + float bbox_l = float(v_81.memory[base]) - 32768.0; + float bbox_t = float(v_81.memory[base + 1u]) - 32768.0; + float bbox_r = float(v_81.memory[base + 2u]) - 32768.0; + float bbox_b = float(v_81.memory[base + 3u]) - 32768.0; float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); return bbox; } @@ -159,13 +128,13 @@ float4 bbox_intersect(thread const float4& a, thread const float4& b) } static inline __attribute__((always_inline)) -void store_path_bbox(thread const AnnotatedRef& ref, thread const float4& bbox, device Memory& v_94, constant uint& v_94BufferSize) +void store_draw_bbox(thread const uint& draw_ix, thread const float4& bbox, device Memory& v_81, constant uint& v_81BufferSize, const device ConfigBuf& v_156) { - uint ix = ref.offset >> uint(2); - v_94.memory[ix + 1u] = as_type(bbox.x); - v_94.memory[ix + 2u] = as_type(bbox.y); - v_94.memory[ix + 3u] = as_type(bbox.z); - v_94.memory[ix + 4u] = as_type(bbox.w); + uint base = (v_156.conf.draw_bbox_alloc.offset >> uint(2)) + (4u * draw_ix); + v_81.memory[base] = as_type(bbox.x); + v_81.memory[base + 1u] = as_type(bbox.y); + v_81.memory[base + 2u] = as_type(bbox.z); + v_81.memory[base + 3u] = as_type(bbox.w); } static inline __attribute__((always_inline)) @@ -177,26 +146,32 @@ Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const } static inline __attribute__((always_inline)) -MallocResult malloc(thread const uint& size, device Memory& v_94, constant uint& v_94BufferSize) +MallocResult malloc(thread const uint& size, device Memory& v_81, constant uint& v_81BufferSize) { - uint _100 = atomic_fetch_add_explicit((device atomic_uint*)&v_94.mem_offset, size, memory_order_relaxed); - uint offset = _100; + uint _87 = atomic_fetch_add_explicit((device atomic_uint*)&v_81.mem_offset, size, memory_order_relaxed); + uint offset = _87; MallocResult r; - r.failed = (offset + size) > uint(int((v_94BufferSize - 8) / 4) * 4); + r.failed = (offset + size) > uint(int((v_81BufferSize - 8) / 4) * 4); uint param = offset; uint param_1 = size; bool param_2 = !r.failed; r.alloc = new_alloc(param, param_1, param_2); if (r.failed) { - uint _129 = atomic_fetch_max_explicit((device atomic_uint*)&v_94.mem_error, 1u, memory_order_relaxed); + uint _116 = atomic_fetch_max_explicit((device atomic_uint*)&v_81.mem_error, 1u, memory_order_relaxed); return r; } return r; } static inline __attribute__((always_inline)) -void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_94, constant uint& v_94BufferSize) +bool touch_mem(thread const Alloc& alloc, thread const uint& offset) +{ + return true; +} + +static inline __attribute__((always_inline)) +void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_81, constant uint& v_81BufferSize) { Alloc param = alloc; uint param_1 = offset; @@ -204,27 +179,26 @@ void write_mem(thread const Alloc& alloc, thread const uint& offset, thread cons { return; } - v_94.memory[offset] = val; + v_81.memory[offset] = val; } static inline __attribute__((always_inline)) -void BinInstance_write(thread const Alloc& a, thread const BinInstanceRef& ref, thread const BinInstance& s, device Memory& v_94, constant uint& v_94BufferSize) +void BinInstance_write(thread const Alloc& a, thread const BinInstanceRef& ref, thread const BinInstance& s, device Memory& v_81, constant uint& v_81BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = s.element_ix; - write_mem(param, param_1, param_2, v_94, v_94BufferSize); + write_mem(param, param_1, param_2, v_81, v_81BufferSize); } -kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_94 [[buffer(0)]], const device ConfigBuf& v_202 [[buffer(1)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) +kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_81 [[buffer(0)]], const device ConfigBuf& v_156 [[buffer(1)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) { threadgroup uint bitmaps[8][256]; threadgroup short sh_alloc_failed; threadgroup uint count[8][256]; threadgroup Alloc sh_chunk_alloc[256]; - constant uint& v_94BufferSize = spvBufferSizeConstants[0]; - uint my_n_elements = v_202.conf.n_elements; + constant uint& v_81BufferSize = spvBufferSizeConstants[0]; uint my_partition = gl_WorkGroupID.x; for (uint i = 0u; i < 8u; i++) { @@ -236,59 +210,42 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M } threadgroup_barrier(mem_flags::mem_threadgroup); uint element_ix = (my_partition * 256u) + gl_LocalInvocationID.x; - AnnotatedRef ref = AnnotatedRef{ v_202.conf.anno_alloc.offset + (element_ix * 40u) }; - uint tag = 0u; - if (element_ix < my_n_elements) - { - Alloc param; - param.offset = v_202.conf.anno_alloc.offset; - AnnotatedRef param_1 = ref; - tag = Annotated_tag(param, param_1, v_94, v_94BufferSize).tag; - } int x0 = 0; int y0 = 0; int x1 = 0; int y1 = 0; - switch (tag) + if (element_ix < v_156.conf.n_elements) { - case 1u: - case 2u: - case 3u: - case 4u: - case 5u: + uint param = element_ix; + DrawMonoid draw_monoid = load_draw_monoid(param, v_81, v_81BufferSize, v_156); + uint path_ix = draw_monoid.path_ix; + float4 clip_bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0); + uint clip_ix = draw_monoid.clip_ix; + if (clip_ix > 0u) { - uint param_2 = element_ix; - DrawMonoid draw_monoid = load_draw_monoid(param_2, v_94, v_94BufferSize, v_202); - uint path_ix = draw_monoid.path_ix; - float4 clip_bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0); - uint clip_ix = draw_monoid.clip_ix; - if (clip_ix > 0u) - { - uint param_3 = clip_ix - 1u; - clip_bbox = load_clip_bbox(param_3, v_94, v_94BufferSize, v_202); - } - uint param_4 = path_ix; - float4 path_bbox = load_path_bbox(param_4, v_94, v_94BufferSize, v_202); - float4 param_5 = path_bbox; - float4 param_6 = clip_bbox; - float4 bbox = bbox_intersect(param_5, param_6); - float4 _473 = bbox; - float4 _475 = bbox; - float2 _477 = fast::max(_473.xy, _475.zw); - bbox.z = _477.x; - bbox.w = _477.y; - AnnotatedRef param_7 = ref; - float4 param_8 = bbox; - store_path_bbox(param_7, param_8, v_94, v_94BufferSize); - x0 = int(floor(bbox.x * 0.00390625)); - y0 = int(floor(bbox.y * 0.00390625)); - x1 = int(ceil(bbox.z * 0.00390625)); - y1 = int(ceil(bbox.w * 0.00390625)); - break; + uint param_1 = clip_ix - 1u; + clip_bbox = load_clip_bbox(param_1, v_81, v_81BufferSize, v_156); } + uint param_2 = path_ix; + float4 path_bbox = load_path_bbox(param_2, v_81, v_81BufferSize, v_156); + float4 param_3 = path_bbox; + float4 param_4 = clip_bbox; + float4 bbox = bbox_intersect(param_3, param_4); + float4 _417 = bbox; + float4 _419 = bbox; + float2 _421 = fast::max(_417.xy, _419.zw); + bbox.z = _421.x; + bbox.w = _421.y; + uint param_5 = element_ix; + float4 param_6 = bbox; + store_draw_bbox(param_5, param_6, v_81, v_81BufferSize, v_156); + x0 = int(floor(bbox.x * 0.00390625)); + y0 = int(floor(bbox.y * 0.00390625)); + x1 = int(ceil(bbox.z * 0.00390625)); + y1 = int(ceil(bbox.w * 0.00390625)); } - uint width_in_bins = ((v_202.conf.width_in_tiles + 16u) - 1u) / 16u; - uint height_in_bins = ((v_202.conf.height_in_tiles + 16u) - 1u) / 16u; + uint width_in_bins = ((v_156.conf.width_in_tiles + 16u) - 1u) / 16u; + uint height_in_bins = ((v_156.conf.height_in_tiles + 16u) - 1u) / 16u; x0 = clamp(x0, 0, int(width_in_bins)); x1 = clamp(x1, x0, int(width_in_bins)); y0 = clamp(y0, 0, int(height_in_bins)); @@ -303,7 +260,7 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M uint my_mask = 1u << (gl_LocalInvocationID.x & 31u); while (y < y1) { - uint _581 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, memory_order_relaxed); + uint _523 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, memory_order_relaxed); x++; if (x == x1) { @@ -318,15 +275,15 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M element_count += uint(int(popcount(bitmaps[i_1][gl_LocalInvocationID.x]))); count[i_1][gl_LocalInvocationID.x] = element_count; } - uint param_9 = 0u; - uint param_10 = 0u; - bool param_11 = true; - Alloc chunk_alloc = new_alloc(param_9, param_10, param_11); + uint param_7 = 0u; + uint param_8 = 0u; + bool param_9 = true; + Alloc chunk_alloc = new_alloc(param_7, param_8, param_9); if (element_count != 0u) { - uint param_12 = element_count * 4u; - MallocResult _631 = malloc(param_12, v_94, v_94BufferSize); - MallocResult chunk = _631; + uint param_10 = element_count * 4u; + MallocResult _573 = malloc(param_10, v_81, v_81BufferSize); + MallocResult chunk = _573; chunk_alloc = chunk.alloc; sh_chunk_alloc[gl_LocalInvocationID.x] = chunk_alloc; if (chunk.failed) @@ -334,28 +291,28 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M sh_alloc_failed = short(true); } } - uint out_ix = (v_202.conf.bin_alloc.offset >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u); - Alloc param_13; - param_13.offset = v_202.conf.bin_alloc.offset; - uint param_14 = out_ix; - uint param_15 = element_count; - write_mem(param_13, param_14, param_15, v_94, v_94BufferSize); - Alloc param_16; - param_16.offset = v_202.conf.bin_alloc.offset; - uint param_17 = out_ix + 1u; - uint param_18 = chunk_alloc.offset; - write_mem(param_16, param_17, param_18, v_94, v_94BufferSize); + uint out_ix = (v_156.conf.bin_alloc.offset >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u); + Alloc param_11; + param_11.offset = v_156.conf.bin_alloc.offset; + uint param_12 = out_ix; + uint param_13 = element_count; + write_mem(param_11, param_12, param_13, v_81, v_81BufferSize); + Alloc param_14; + param_14.offset = v_156.conf.bin_alloc.offset; + uint param_15 = out_ix + 1u; + uint param_16 = chunk_alloc.offset; + write_mem(param_14, param_15, param_16, v_81, v_81BufferSize); threadgroup_barrier(mem_flags::mem_threadgroup); - bool _687; + bool _630; if (!bool(sh_alloc_failed)) { - _687 = v_94.mem_error != 0u; + _630 = v_81.mem_error != 0u; } else { - _687 = bool(sh_alloc_failed); + _630 = bool(sh_alloc_failed); } - if (_687) + if (_630) { return; } @@ -374,10 +331,10 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M } Alloc out_alloc = sh_chunk_alloc[bin_ix]; uint out_offset = out_alloc.offset + (idx * 4u); - Alloc param_19 = out_alloc; - BinInstanceRef param_20 = BinInstanceRef{ out_offset }; - BinInstance param_21 = BinInstance{ element_ix }; - BinInstance_write(param_19, param_20, param_21, v_94, v_94BufferSize); + Alloc param_17 = out_alloc; + BinInstanceRef param_18 = BinInstanceRef{ out_offset }; + BinInstance param_19 = BinInstance{ element_ix }; + BinInstance_write(param_17, param_18, param_19, v_81, v_81BufferSize); } x++; if (x == x1) diff --git a/piet-gpu/shader/gen/binning.spv b/piet-gpu/shader/gen/binning.spv index eca0692..30eacd6 100644 Binary files a/piet-gpu/shader/gen/binning.spv and b/piet-gpu/shader/gen/binning.spv differ diff --git a/piet-gpu/shader/gen/clip_leaf.dxil b/piet-gpu/shader/gen/clip_leaf.dxil index fe87a11..b681a65 100644 Binary files a/piet-gpu/shader/gen/clip_leaf.dxil and b/piet-gpu/shader/gen/clip_leaf.dxil differ diff --git a/piet-gpu/shader/gen/clip_leaf.hlsl b/piet-gpu/shader/gen/clip_leaf.hlsl index d570420..ed45bf1 100644 --- a/piet-gpu/shader/gen/clip_leaf.hlsl +++ b/piet-gpu/shader/gen/clip_leaf.hlsl @@ -27,12 +27,14 @@ struct Config Alloc pathseg_alloc; Alloc anno_alloc; Alloc trans_alloc; - Alloc bbox_alloc; + Alloc path_bbox_alloc; Alloc drawmonoid_alloc; Alloc clip_alloc; Alloc clip_bic_alloc; Alloc clip_stack_alloc; Alloc clip_bbox_alloc; + Alloc draw_bbox_alloc; + Alloc drawinfo_alloc; uint n_trans; uint n_path; uint n_clip; @@ -40,6 +42,8 @@ struct Config uint linewidth_offset; uint pathtag_offset; uint pathseg_offset; + uint drawtag_offset; + uint drawdata_offset; }; static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); @@ -99,7 +103,7 @@ float4 bbox_intersect(float4 a, float4 b) uint load_path_ix(uint ix) { - if (ix < _80.Load(72)) + if (ix < _80.Load(80)) { return _96.Load(((_80.Load(48) >> uint(2)) + ix) * 4 + 8); } @@ -324,7 +328,7 @@ void comp_main() bool _725; if (_717) { - _725 = gl_GlobalInvocationID.x < _80.Load(72); + _725 = gl_GlobalInvocationID.x < _80.Load(80); } else { @@ -334,7 +338,7 @@ void comp_main() { uint param_15 = parent; path_ix = load_path_ix(param_15); - uint drawmonoid_out_base = (_80.Load(44) >> uint(2)) + (2u * (~inp)); + uint drawmonoid_out_base = (_80.Load(44) >> uint(2)) + (4u * (~inp)); _96.Store(drawmonoid_out_base * 4 + 8, path_ix); if (int(grandparent) >= 0) { diff --git a/piet-gpu/shader/gen/clip_leaf.msl b/piet-gpu/shader/gen/clip_leaf.msl index 4e2d059..5f5e0a7 100644 --- a/piet-gpu/shader/gen/clip_leaf.msl +++ b/piet-gpu/shader/gen/clip_leaf.msl @@ -34,12 +34,14 @@ struct Config Alloc pathseg_alloc; Alloc anno_alloc; Alloc trans_alloc; - Alloc bbox_alloc; + Alloc path_bbox_alloc; Alloc drawmonoid_alloc; Alloc clip_alloc; Alloc clip_bic_alloc; Alloc clip_stack_alloc; Alloc clip_bbox_alloc; + Alloc draw_bbox_alloc; + Alloc drawinfo_alloc; uint n_trans; uint n_path; uint n_clip; @@ -47,6 +49,8 @@ struct Config uint linewidth_offset; uint pathtag_offset; uint pathseg_offset; + uint drawtag_offset; + uint drawdata_offset; }; struct ConfigBuf @@ -112,7 +116,7 @@ uint load_path_ix(thread const uint& ix, const device ConfigBuf& v_80, device Me static inline __attribute__((always_inline)) float4 load_path_bbox(thread const uint& path_ix, const device ConfigBuf& v_80, device Memory& v_96) { - uint base = (v_80.conf.bbox_alloc.offset >> uint(2)) + (6u * path_ix); + uint base = (v_80.conf.path_bbox_alloc.offset >> uint(2)) + (6u * path_ix); float bbox_l = float(v_96.memory[base]) - 32768.0; float bbox_t = float(v_96.memory[base + 1u]) - 32768.0; float bbox_r = float(v_96.memory[base + 2u]) - 32768.0; @@ -341,7 +345,7 @@ kernel void main0(device Memory& v_96 [[buffer(0)]], const device ConfigBuf& v_8 { uint param_15 = parent; path_ix = load_path_ix(param_15, v_80, v_96); - uint drawmonoid_out_base = (v_80.conf.drawmonoid_alloc.offset >> uint(2)) + (2u * (~inp)); + uint drawmonoid_out_base = (v_80.conf.drawmonoid_alloc.offset >> uint(2)) + (4u * (~inp)); v_96.memory[drawmonoid_out_base] = path_ix; if (int(grandparent) >= 0) { diff --git a/piet-gpu/shader/gen/clip_leaf.spv b/piet-gpu/shader/gen/clip_leaf.spv index 7c4c174..beac64b 100644 Binary files a/piet-gpu/shader/gen/clip_leaf.spv and b/piet-gpu/shader/gen/clip_leaf.spv differ diff --git a/piet-gpu/shader/gen/clip_reduce.dxil b/piet-gpu/shader/gen/clip_reduce.dxil index 0dff71b..0ccaac9 100644 Binary files a/piet-gpu/shader/gen/clip_reduce.dxil and b/piet-gpu/shader/gen/clip_reduce.dxil differ diff --git a/piet-gpu/shader/gen/clip_reduce.hlsl b/piet-gpu/shader/gen/clip_reduce.hlsl index 6851e63..1276b5f 100644 --- a/piet-gpu/shader/gen/clip_reduce.hlsl +++ b/piet-gpu/shader/gen/clip_reduce.hlsl @@ -27,12 +27,14 @@ struct Config Alloc pathseg_alloc; Alloc anno_alloc; Alloc trans_alloc; - Alloc bbox_alloc; + Alloc path_bbox_alloc; Alloc drawmonoid_alloc; Alloc clip_alloc; Alloc clip_bic_alloc; Alloc clip_stack_alloc; Alloc clip_bbox_alloc; + Alloc draw_bbox_alloc; + Alloc drawinfo_alloc; uint n_trans; uint n_path; uint n_clip; @@ -40,6 +42,8 @@ struct Config uint linewidth_offset; uint pathtag_offset; uint pathseg_offset; + uint drawtag_offset; + uint drawdata_offset; }; static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); diff --git a/piet-gpu/shader/gen/clip_reduce.msl b/piet-gpu/shader/gen/clip_reduce.msl index 5845676..26214f1 100644 --- a/piet-gpu/shader/gen/clip_reduce.msl +++ b/piet-gpu/shader/gen/clip_reduce.msl @@ -34,12 +34,14 @@ struct Config Alloc pathseg_alloc; Alloc anno_alloc; Alloc trans_alloc; - Alloc bbox_alloc; + Alloc path_bbox_alloc; Alloc drawmonoid_alloc; Alloc clip_alloc; Alloc clip_bic_alloc; Alloc clip_stack_alloc; Alloc clip_bbox_alloc; + Alloc draw_bbox_alloc; + Alloc drawinfo_alloc; uint n_trans; uint n_path; uint n_clip; @@ -47,6 +49,8 @@ struct Config uint linewidth_offset; uint pathtag_offset; uint pathseg_offset; + uint drawtag_offset; + uint drawdata_offset; }; struct ConfigBuf @@ -81,7 +85,7 @@ void store_bic(thread const uint& ix, thread const Bic& bic, const device Config static inline __attribute__((always_inline)) float4 load_path_bbox(thread const uint& path_ix, const device ConfigBuf& v_64, device Memory& v_80) { - uint base = (v_64.conf.bbox_alloc.offset >> uint(2)) + (6u * path_ix); + uint base = (v_64.conf.path_bbox_alloc.offset >> uint(2)) + (6u * path_ix); float bbox_l = float(v_80.memory[base]) - 32768.0; float bbox_t = float(v_80.memory[base + 1u]) - 32768.0; float bbox_r = float(v_80.memory[base + 2u]) - 32768.0; diff --git a/piet-gpu/shader/gen/clip_reduce.spv b/piet-gpu/shader/gen/clip_reduce.spv index cbe8c1f..ce0b9bb 100644 Binary files a/piet-gpu/shader/gen/clip_reduce.spv and b/piet-gpu/shader/gen/clip_reduce.spv differ diff --git a/piet-gpu/shader/gen/coarse.dxil b/piet-gpu/shader/gen/coarse.dxil index c7e1682..2339e7d 100644 Binary files a/piet-gpu/shader/gen/coarse.dxil and b/piet-gpu/shader/gen/coarse.dxil differ diff --git a/piet-gpu/shader/gen/coarse.hlsl b/piet-gpu/shader/gen/coarse.hlsl index f00eabe..db1e496 100644 --- a/piet-gpu/shader/gen/coarse.hlsl +++ b/piet-gpu/shader/gen/coarse.hlsl @@ -9,68 +9,6 @@ struct MallocResult bool failed; }; -struct AnnoImageRef -{ - uint offset; -}; - -struct AnnoImage -{ - float4 bbox; - float linewidth; - uint index; - int2 offset; -}; - -struct AnnoColorRef -{ - uint offset; -}; - -struct AnnoColor -{ - float4 bbox; - float linewidth; - uint rgba_color; -}; - -struct AnnoLinGradientRef -{ - uint offset; -}; - -struct AnnoLinGradient -{ - float4 bbox; - float linewidth; - uint index; - float line_x; - float line_y; - float line_c; -}; - -struct AnnoEndClipRef -{ - uint offset; -}; - -struct AnnoEndClip -{ - float4 bbox; - uint blend; -}; - -struct AnnotatedRef -{ - uint offset; -}; - -struct AnnotatedTag -{ - uint tag; - uint flags; -}; - struct BinInstanceRef { uint offset; @@ -201,12 +139,14 @@ struct Config Alloc pathseg_alloc; Alloc anno_alloc; Alloc trans_alloc; - Alloc bbox_alloc; + Alloc path_bbox_alloc; Alloc drawmonoid_alloc; Alloc clip_alloc; Alloc clip_bic_alloc; Alloc clip_stack_alloc; Alloc clip_bbox_alloc; + Alloc draw_bbox_alloc; + Alloc drawinfo_alloc; uint n_trans; uint n_path; uint n_clip; @@ -214,12 +154,15 @@ struct Config uint linewidth_offset; uint pathtag_offset; uint pathseg_offset; + uint drawtag_offset; + uint drawdata_offset; }; static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); -RWByteAddressBuffer _308 : register(u0, space0); -ByteAddressBuffer _1283 : register(t1, space0); +RWByteAddressBuffer _242 : register(u0, space0); +ByteAddressBuffer _854 : register(t1, space0); +ByteAddressBuffer _1222 : register(t2, space0); static uint3 gl_WorkGroupID; static uint3 gl_LocalInvocationID; @@ -242,8 +185,8 @@ groupshared uint sh_tile_count[256]; Alloc slice_mem(Alloc a, uint offset, uint size) { - Alloc _385 = { a.offset + offset }; - return _385; + Alloc _319 = { a.offset + offset }; + return _319; } bool touch_mem(Alloc alloc, uint offset) @@ -259,7 +202,7 @@ uint read_mem(Alloc alloc, uint offset) { return 0u; } - uint v = _308.Load(offset * 4 + 8); + uint v = _242.Load(offset * 4 + 8); return v; } @@ -272,8 +215,8 @@ Alloc new_alloc(uint offset, uint size, bool mem_ok) BinInstanceRef BinInstance_index(BinInstanceRef ref, uint index) { - BinInstanceRef _765 = { ref.offset + (index * 4u) }; - return _765; + BinInstanceRef _328 = { ref.offset + (index * 4u) }; + return _328; } BinInstance BinInstance_read(Alloc a, BinInstanceRef ref) @@ -287,15 +230,6 @@ BinInstance BinInstance_read(Alloc a, BinInstanceRef ref) return s; } -AnnotatedTag Annotated_tag(Alloc a, AnnotatedRef ref) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint tag_and_flags = read_mem(param, param_1); - AnnotatedTag _717 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; - return _717; -} - Path Path_read(Alloc a, PathRef ref) { uint ix = ref.offset >> uint(2); @@ -310,8 +244,8 @@ Path Path_read(Alloc a, PathRef ref) uint raw2 = read_mem(param_4, param_5); Path s; s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); - TileRef _825 = { raw2 }; - s.tiles = _825; + TileRef _391 = { raw2 }; + s.tiles = _391; return s; } @@ -321,11 +255,11 @@ void write_tile_alloc(uint el_ix, Alloc a) Alloc read_tile_alloc(uint el_ix, bool mem_ok) { - uint _1169; - _308.GetDimensions(_1169); - _1169 = (_1169 - 8) / 4; + uint _741; + _242.GetDimensions(_741); + _741 = (_741 - 8) / 4; uint param = 0u; - uint param_1 = uint(int(_1169) * 4); + uint param_1 = uint(int(_741) * 4); bool param_2 = mem_ok; return new_alloc(param, param_1, param_2); } @@ -339,67 +273,31 @@ Tile Tile_read(Alloc a, TileRef ref) Alloc param_2 = a; uint param_3 = ix + 1u; uint raw1 = read_mem(param_2, param_3); - TileSegRef _850 = { raw0 }; + TileSegRef _416 = { raw0 }; Tile s; - s.tile = _850; + s.tile = _416; s.backdrop = int(raw1); return s; } -AnnoColor AnnoColor_read(Alloc a, AnnoColorRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7); - Alloc param_8 = a; - uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9); - Alloc param_10 = a; - uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11); - AnnoColor s; - s.bbox = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3)); - s.linewidth = asfloat(raw4); - s.rgba_color = raw5; - return s; -} - -AnnoColor Annotated_Color_read(Alloc a, AnnotatedRef ref) -{ - AnnoColorRef _723 = { ref.offset + 4u }; - Alloc param = a; - AnnoColorRef param_1 = _723; - return AnnoColor_read(param, param_1); -} - MallocResult malloc(uint size) { - uint _314; - _308.InterlockedAdd(0, size, _314); - uint offset = _314; - uint _321; - _308.GetDimensions(_321); - _321 = (_321 - 8) / 4; + uint _248; + _242.InterlockedAdd(0, size, _248); + uint offset = _248; + uint _255; + _242.GetDimensions(_255); + _255 = (_255 - 8) / 4; MallocResult r; - r.failed = (offset + size) > uint(int(_321) * 4); + r.failed = (offset + size) > uint(int(_255) * 4); uint param = offset; uint param_1 = size; bool param_2 = !r.failed; r.alloc = new_alloc(param, param_1, param_2); if (r.failed) { - uint _343; - _308.InterlockedMax(4, 1u, _343); + uint _277; + _242.InterlockedMax(4, 1u, _277); return r; } return r; @@ -413,7 +311,7 @@ void write_mem(Alloc alloc, uint offset, uint val) { return; } - _308.Store(offset * 4 + 8, val); + _242.Store(offset * 4 + 8, val); } void CmdJump_write(Alloc a, CmdJumpRef ref, CmdJump s) @@ -431,9 +329,9 @@ void Cmd_Jump_write(Alloc a, CmdRef ref, CmdJump s) uint param_1 = ref.offset >> uint(2); uint param_2 = 10u; write_mem(param, param_1, param_2); - CmdJumpRef _1162 = { ref.offset + 4u }; + CmdJumpRef _734 = { ref.offset + 4u }; Alloc param_3 = a; - CmdJumpRef param_4 = _1162; + CmdJumpRef param_4 = _734; CmdJump param_5 = s; CmdJump_write(param_3, param_4, param_5); } @@ -445,30 +343,25 @@ bool alloc_cmd(inout Alloc cmd_alloc, inout CmdRef cmd_ref, inout uint cmd_limit return true; } uint param = 1024u; - MallocResult _1190 = malloc(param); - MallocResult new_cmd = _1190; + MallocResult _762 = malloc(param); + MallocResult new_cmd = _762; if (new_cmd.failed) { return false; } - CmdJump _1200 = { new_cmd.alloc.offset }; - CmdJump jump = _1200; + CmdJump _772 = { new_cmd.alloc.offset }; + CmdJump jump = _772; Alloc param_1 = cmd_alloc; CmdRef param_2 = cmd_ref; CmdJump param_3 = jump; Cmd_Jump_write(param_1, param_2, param_3); cmd_alloc = new_cmd.alloc; - CmdRef _1212 = { cmd_alloc.offset }; - cmd_ref = _1212; + CmdRef _784 = { cmd_alloc.offset }; + cmd_ref = _784; cmd_limit = (cmd_alloc.offset + 1024u) - 60u; return true; } -uint fill_mode_from_flags(uint flags) -{ - return flags & 1u; -} - void CmdFill_write(Alloc a, CmdFillRef ref, CmdFill s) { uint ix = ref.offset >> uint(2); @@ -488,9 +381,9 @@ void Cmd_Fill_write(Alloc a, CmdRef ref, CmdFill s) uint param_1 = ref.offset >> uint(2); uint param_2 = 1u; write_mem(param, param_1, param_2); - CmdFillRef _1036 = { ref.offset + 4u }; + CmdFillRef _604 = { ref.offset + 4u }; Alloc param_3 = a; - CmdFillRef param_4 = _1036; + CmdFillRef param_4 = _604; CmdFill param_5 = s; CmdFill_write(param_3, param_4, param_5); } @@ -522,44 +415,43 @@ void Cmd_Stroke_write(Alloc a, CmdRef ref, CmdStroke s) uint param_1 = ref.offset >> uint(2); uint param_2 = 2u; write_mem(param, param_1, param_2); - CmdStrokeRef _1054 = { ref.offset + 4u }; + CmdStrokeRef _622 = { ref.offset + 4u }; Alloc param_3 = a; - CmdStrokeRef param_4 = _1054; + CmdStrokeRef param_4 = _622; CmdStroke param_5 = s; CmdStroke_write(param_3, param_4, param_5); } -void write_fill(Alloc alloc, inout CmdRef cmd_ref, uint flags, Tile tile, float linewidth) +void write_fill(Alloc alloc, inout CmdRef cmd_ref, Tile tile, float linewidth) { - uint param = flags; - if (fill_mode_from_flags(param) == 0u) + if (linewidth < 0.0f) { if (tile.tile.offset != 0u) { - CmdFill _1236 = { tile.tile.offset, tile.backdrop }; - CmdFill cmd_fill = _1236; - Alloc param_1 = alloc; - CmdRef param_2 = cmd_ref; - CmdFill param_3 = cmd_fill; - Cmd_Fill_write(param_1, param_2, param_3); + CmdFill _807 = { tile.tile.offset, tile.backdrop }; + CmdFill cmd_fill = _807; + Alloc param = alloc; + CmdRef param_1 = cmd_ref; + CmdFill param_2 = cmd_fill; + Cmd_Fill_write(param, param_1, param_2); cmd_ref.offset += 12u; } else { - Alloc param_4 = alloc; - CmdRef param_5 = cmd_ref; - Cmd_Solid_write(param_4, param_5); + Alloc param_3 = alloc; + CmdRef param_4 = cmd_ref; + Cmd_Solid_write(param_3, param_4); cmd_ref.offset += 4u; } } else { - CmdStroke _1266 = { tile.tile.offset, 0.5f * linewidth }; - CmdStroke cmd_stroke = _1266; - Alloc param_6 = alloc; - CmdRef param_7 = cmd_ref; - CmdStroke param_8 = cmd_stroke; - Cmd_Stroke_write(param_6, param_7, param_8); + CmdStroke _837 = { tile.tile.offset, 0.5f * linewidth }; + CmdStroke cmd_stroke = _837; + Alloc param_5 = alloc; + CmdRef param_6 = cmd_ref; + CmdStroke param_7 = cmd_stroke; + Cmd_Stroke_write(param_5, param_6, param_7); cmd_ref.offset += 12u; } } @@ -579,61 +471,13 @@ void Cmd_Color_write(Alloc a, CmdRef ref, CmdColor s) uint param_1 = ref.offset >> uint(2); uint param_2 = 5u; write_mem(param, param_1, param_2); - CmdColorRef _1080 = { ref.offset + 4u }; + CmdColorRef _649 = { ref.offset + 4u }; Alloc param_3 = a; - CmdColorRef param_4 = _1080; + CmdColorRef param_4 = _649; CmdColor param_5 = s; CmdColor_write(param_3, param_4, param_5); } -AnnoLinGradient AnnoLinGradient_read(Alloc a, AnnoLinGradientRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7); - Alloc param_8 = a; - uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9); - Alloc param_10 = a; - uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11); - Alloc param_12 = a; - uint param_13 = ix + 6u; - uint raw6 = read_mem(param_12, param_13); - Alloc param_14 = a; - uint param_15 = ix + 7u; - uint raw7 = read_mem(param_14, param_15); - Alloc param_16 = a; - uint param_17 = ix + 8u; - uint raw8 = read_mem(param_16, param_17); - AnnoLinGradient s; - s.bbox = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3)); - s.linewidth = asfloat(raw4); - s.index = raw5; - s.line_x = asfloat(raw6); - s.line_y = asfloat(raw7); - s.line_c = asfloat(raw8); - return s; -} - -AnnoLinGradient Annotated_LinGradient_read(Alloc a, AnnotatedRef ref) -{ - AnnoLinGradientRef _733 = { ref.offset + 4u }; - Alloc param = a; - AnnoLinGradientRef param_1 = _733; - return AnnoLinGradient_read(param, param_1); -} - void CmdLinGrad_write(Alloc a, CmdLinGradRef ref, CmdLinGrad s) { uint ix = ref.offset >> uint(2); @@ -661,53 +505,13 @@ void Cmd_LinGrad_write(Alloc a, CmdRef ref, CmdLinGrad s) uint param_1 = ref.offset >> uint(2); uint param_2 = 6u; write_mem(param, param_1, param_2); - CmdLinGradRef _1098 = { ref.offset + 4u }; + CmdLinGradRef _668 = { ref.offset + 4u }; Alloc param_3 = a; - CmdLinGradRef param_4 = _1098; + CmdLinGradRef param_4 = _668; CmdLinGrad param_5 = s; CmdLinGrad_write(param_3, param_4, param_5); } -AnnoImage AnnoImage_read(Alloc a, AnnoImageRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7); - Alloc param_8 = a; - uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9); - Alloc param_10 = a; - uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11); - Alloc param_12 = a; - uint param_13 = ix + 6u; - uint raw6 = read_mem(param_12, param_13); - AnnoImage s; - s.bbox = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3)); - s.linewidth = asfloat(raw4); - s.index = raw5; - s.offset = int2(int(raw6 << uint(16)) >> 16, int(raw6) >> 16); - return s; -} - -AnnoImage Annotated_Image_read(Alloc a, AnnotatedRef ref) -{ - AnnoImageRef _743 = { ref.offset + 4u }; - Alloc param = a; - AnnoImageRef param_1 = _743; - return AnnoImage_read(param, param_1); -} - void CmdImage_write(Alloc a, CmdImageRef ref, CmdImage s) { uint ix = ref.offset >> uint(2); @@ -727,9 +531,9 @@ void Cmd_Image_write(Alloc a, CmdRef ref, CmdImage s) uint param_1 = ref.offset >> uint(2); uint param_2 = 7u; write_mem(param, param_1, param_2); - CmdImageRef _1116 = { ref.offset + 4u }; + CmdImageRef _687 = { ref.offset + 4u }; Alloc param_3 = a; - CmdImageRef param_4 = _1116; + CmdImageRef param_4 = _687; CmdImage param_5 = s; CmdImage_write(param_3, param_4, param_5); } @@ -742,38 +546,6 @@ void Cmd_BeginClip_write(Alloc a, CmdRef ref) write_mem(param, param_1, param_2); } -AnnoEndClip AnnoEndClip_read(Alloc a, AnnoEndClipRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7); - Alloc param_8 = a; - uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9); - AnnoEndClip s; - s.bbox = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3)); - s.blend = raw4; - return s; -} - -AnnoEndClip Annotated_EndClip_read(Alloc a, AnnotatedRef ref) -{ - AnnoEndClipRef _753 = { ref.offset + 4u }; - Alloc param = a; - AnnoEndClipRef param_1 = _753; - return AnnoEndClip_read(param, param_1); -} - void CmdEndClip_write(Alloc a, CmdEndClipRef ref, CmdEndClip s) { uint ix = ref.offset >> uint(2); @@ -789,9 +561,9 @@ void Cmd_EndClip_write(Alloc a, CmdRef ref, CmdEndClip s) uint param_1 = ref.offset >> uint(2); uint param_2 = 9u; write_mem(param, param_1, param_2); - CmdEndClipRef _1143 = { ref.offset + 4u }; + CmdEndClipRef _715 = { ref.offset + 4u }; Alloc param_3 = a; - CmdEndClipRef param_4 = _1143; + CmdEndClipRef param_4 = _715; CmdEndClip param_5 = s; CmdEndClip_write(param_3, param_4, param_5); } @@ -806,25 +578,25 @@ void Cmd_End_write(Alloc a, CmdRef ref) void comp_main() { - uint width_in_bins = ((_1283.Load(8) + 16u) - 1u) / 16u; + uint width_in_bins = ((_854.Load(8) + 16u) - 1u) / 16u; uint bin_ix = (width_in_bins * gl_WorkGroupID.y) + gl_WorkGroupID.x; uint partition_ix = 0u; - uint n_partitions = ((_1283.Load(0) + 256u) - 1u) / 256u; + uint n_partitions = ((_854.Load(0) + 256u) - 1u) / 256u; uint th_ix = gl_LocalInvocationID.x; uint bin_tile_x = 16u * gl_WorkGroupID.x; uint bin_tile_y = 16u * gl_WorkGroupID.y; uint tile_x = gl_LocalInvocationID.x % 16u; uint tile_y = gl_LocalInvocationID.x / 16u; - uint this_tile_ix = (((bin_tile_y + tile_y) * _1283.Load(8)) + bin_tile_x) + tile_x; - Alloc _1348; - _1348.offset = _1283.Load(24); + uint this_tile_ix = (((bin_tile_y + tile_y) * _854.Load(8)) + bin_tile_x) + tile_x; + Alloc _919; + _919.offset = _854.Load(24); Alloc param; - param.offset = _1348.offset; + param.offset = _919.offset; uint param_1 = this_tile_ix * 1024u; uint param_2 = 1024u; Alloc cmd_alloc = slice_mem(param, param_1, param_2); - CmdRef _1357 = { cmd_alloc.offset }; - CmdRef cmd_ref = _1357; + CmdRef _928 = { cmd_alloc.offset }; + CmdRef cmd_ref = _928; uint cmd_limit = (cmd_ref.offset + 1024u) - 60u; uint clip_depth = 0u; uint clip_zero_depth = 0u; @@ -832,59 +604,54 @@ void comp_main() uint wr_ix = 0u; uint part_start_ix = 0u; uint ready_ix = 0u; - bool mem_ok = _308.Load(4) == 0u; + uint drawmonoid_start = _854.Load(44) >> uint(2); + uint drawtag_start = _854.Load(100) >> uint(2); + uint drawdata_start = _854.Load(104) >> uint(2); + uint drawinfo_start = _854.Load(68) >> uint(2); + bool mem_ok = _242.Load(4) == 0u; Alloc param_3; Alloc param_5; - uint _1562; + uint _1154; uint element_ix; - AnnotatedRef ref; Alloc param_14; - Alloc param_16; uint tile_count; - Alloc param_23; - uint _1887; - Alloc param_29; - Tile tile_1; - AnnoColor fill; - Alloc param_35; - Alloc param_52; + uint _1453; + float linewidth; CmdLinGrad cmd_lin; - Alloc param_69; - Alloc param_95; while (true) { for (uint i = 0u; i < 8u; i++) { sh_bitmaps[i][th_ix] = 0u; } - bool _1614; + bool _1206; for (;;) { if ((ready_ix == wr_ix) && (partition_ix < n_partitions)) { part_start_ix = ready_ix; uint count = 0u; - bool _1412 = th_ix < 256u; - bool _1420; - if (_1412) + bool _1003 = th_ix < 256u; + bool _1011; + if (_1003) { - _1420 = (partition_ix + th_ix) < n_partitions; + _1011 = (partition_ix + th_ix) < n_partitions; } else { - _1420 = _1412; + _1011 = _1003; } - if (_1420) + if (_1011) { - uint in_ix = (_1283.Load(20) >> uint(2)) + ((((partition_ix + th_ix) * 256u) + bin_ix) * 2u); - Alloc _1437; - _1437.offset = _1283.Load(20); - param_3.offset = _1437.offset; + uint in_ix = (_854.Load(20) >> uint(2)) + ((((partition_ix + th_ix) * 256u) + bin_ix) * 2u); + Alloc _1029; + _1029.offset = _854.Load(20); + param_3.offset = _1029.offset; uint param_4 = in_ix; count = read_mem(param_3, param_4); - Alloc _1448; - _1448.offset = _1283.Load(20); - param_5.offset = _1448.offset; + Alloc _1040; + _1040.offset = _854.Load(20); + param_5.offset = _1040.offset; uint param_6 = in_ix + 1u; uint offset = read_mem(param_5, param_6); uint param_7 = offset; @@ -930,16 +697,16 @@ void comp_main() } if (part_ix > 0u) { - _1562 = sh_part_count[part_ix - 1u]; + _1154 = sh_part_count[part_ix - 1u]; } else { - _1562 = part_start_ix; + _1154 = part_start_ix; } - ix -= _1562; + ix -= _1154; Alloc bin_alloc = sh_part_elements[part_ix]; - BinInstanceRef _1581 = { bin_alloc.offset }; - BinInstanceRef inst_ref = _1581; + BinInstanceRef _1173 = { bin_alloc.offset }; + BinInstanceRef inst_ref = _1173; BinInstanceRef param_10 = inst_ref; uint param_11 = ix; Alloc param_12 = bin_alloc; @@ -949,16 +716,16 @@ void comp_main() } GroupMemoryBarrierWithGroupSync(); wr_ix = min((rd_ix + 256u), ready_ix); - bool _1604 = (wr_ix - rd_ix) < 256u; - if (_1604) + bool _1196 = (wr_ix - rd_ix) < 256u; + if (_1196) { - _1614 = (wr_ix < ready_ix) || (partition_ix < n_partitions); + _1206 = (wr_ix < ready_ix) || (partition_ix < n_partitions); } else { - _1614 = _1604; + _1206 = _1196; } - if (_1614) + if (_1206) { continue; } @@ -971,30 +738,24 @@ void comp_main() if ((th_ix + rd_ix) < wr_ix) { element_ix = sh_elements[th_ix]; - AnnotatedRef _1635 = { _1283.Load(32) + (element_ix * 40u) }; - ref = _1635; - Alloc _1638; - _1638.offset = _1283.Load(32); - param_14.offset = _1638.offset; - AnnotatedRef param_15 = ref; - tag = Annotated_tag(param_14, param_15).tag; + tag = _1222.Load((drawtag_start + element_ix) * 4 + 0); } switch (tag) { - case 1u: - case 3u: - case 2u: - case 4u: + case 68u: + case 72u: + case 276u: case 5u: + case 37u: { - uint drawmonoid_base = (_1283.Load(44) >> uint(2)) + (2u * element_ix); - uint path_ix = _308.Load(drawmonoid_base * 4 + 8); - PathRef _1667 = { _1283.Load(16) + (path_ix * 12u) }; - Alloc _1670; - _1670.offset = _1283.Load(16); - param_16.offset = _1670.offset; - PathRef param_17 = _1667; - Path path = Path_read(param_16, param_17); + uint drawmonoid_base = drawmonoid_start + (4u * element_ix); + uint path_ix = _242.Load(drawmonoid_base * 4 + 8); + PathRef _1247 = { _854.Load(16) + (path_ix * 12u) }; + Alloc _1250; + _1250.offset = _854.Load(16); + param_14.offset = _1250.offset; + PathRef param_15 = _1247; + Path path = Path_read(param_14, param_15); uint stride = path.bbox.z - path.bbox.x; sh_tile_stride[th_ix] = stride; int dx = int(path.bbox.x) - int(bin_tile_x); @@ -1009,13 +770,13 @@ void comp_main() tile_count = uint(x1 - x0) * uint(y1 - y0); uint base = path.tiles.offset - (((uint(dy) * stride) + uint(dx)) * 8u); sh_tile_base[th_ix] = base; - uint param_18 = path.tiles.offset; - uint param_19 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u; - bool param_20 = mem_ok; - Alloc path_alloc = new_alloc(param_18, param_19, param_20); - uint param_21 = th_ix; - Alloc param_22 = path_alloc; - write_tile_alloc(param_21, param_22); + uint param_16 = path.tiles.offset; + uint param_17 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u; + bool param_18 = mem_ok; + Alloc path_alloc = new_alloc(param_16, param_17, param_18); + uint param_19 = th_ix; + Alloc param_20 = path_alloc; + write_tile_alloc(param_19, param_20); break; } default: @@ -1048,72 +809,57 @@ void comp_main() el_ix = probe_1; } } - AnnotatedRef _1869 = { _1283.Load(32) + (sh_elements[el_ix] * 40u) }; - AnnotatedRef ref_1 = _1869; - Alloc _1874; - _1874.offset = _1283.Load(32); - param_23.offset = _1874.offset; - AnnotatedRef param_24 = ref_1; - AnnotatedTag anno_tag = Annotated_tag(param_23, param_24); - uint tag_1 = anno_tag.tag; + uint tag_1 = _1222.Load((drawtag_start + sh_elements[el_ix]) * 4 + 0); if (el_ix > 0u) { - _1887 = sh_tile_count[el_ix - 1u]; + _1453 = sh_tile_count[el_ix - 1u]; } else { - _1887 = 0u; + _1453 = 0u; } - uint seq_ix = ix_1 - _1887; + uint seq_ix = ix_1 - _1453; uint width = sh_tile_width[el_ix]; uint x = sh_tile_x0[el_ix] + (seq_ix % width); uint y = sh_tile_y0[el_ix] + (seq_ix / width); bool include_tile = false; if (mem_ok) { - uint param_25 = el_ix; - bool param_26 = mem_ok; - TileRef _1939 = { sh_tile_base[el_ix] + (((sh_tile_stride[el_ix] * y) + x) * 8u) }; - Alloc param_27 = read_tile_alloc(param_25, param_26); - TileRef param_28 = _1939; - Tile tile = Tile_read(param_27, param_28); - bool is_clip = (tag_1 == 4u) || (tag_1 == 5u); - bool _1951 = tile.tile.offset != 0u; - bool _1960; - if (!_1951) + uint param_21 = el_ix; + bool param_22 = mem_ok; + TileRef _1505 = { sh_tile_base[el_ix] + (((sh_tile_stride[el_ix] * y) + x) * 8u) }; + Alloc param_23 = read_tile_alloc(param_21, param_22); + TileRef param_24 = _1505; + Tile tile = Tile_read(param_23, param_24); + bool is_clip = (tag_1 & 1u) != 0u; + bool is_blend = false; + bool _1516 = tile.tile.offset != 0u; + bool _1525; + if (!_1516) { - _1960 = (tile.backdrop == 0) == is_clip; + _1525 = (tile.backdrop == 0) == is_clip; } else { - _1960 = _1951; + _1525 = _1516; } - bool _1972; - if (!_1960) + bool _1532; + if (!_1525) { - bool _1971; - if (is_clip) - { - _1971 = (anno_tag.flags & 2u) != 0u; - } - else - { - _1971 = is_clip; - } - _1972 = _1971; + _1532 = is_clip && is_blend; } else { - _1972 = _1960; + _1532 = _1525; } - include_tile = _1972; + include_tile = _1532; } if (include_tile) { uint el_slice = el_ix / 32u; uint el_mask = 1u << (el_ix & 31u); - uint _1992; - InterlockedOr(sh_bitmaps[el_slice][(y * 16u) + x], el_mask, _1992); + uint _1552; + InterlockedOr(sh_bitmaps[el_slice][(y * 16u) + x], el_mask, _1552); } } GroupMemoryBarrierWithGroupSync(); @@ -1137,217 +883,176 @@ void comp_main() uint element_ref_ix = (slice_ix * 32u) + uint(int(firstbitlow(bitmap))); uint element_ix_1 = sh_elements[element_ref_ix]; bitmap &= (bitmap - 1u); - AnnotatedRef _2046 = { _1283.Load(32) + (element_ix_1 * 40u) }; - ref = _2046; - Alloc _2050; - _2050.offset = _1283.Load(32); - param_29.offset = _2050.offset; - AnnotatedRef param_30 = ref; - AnnotatedTag tag_2 = Annotated_tag(param_29, param_30); + uint drawtag = _1222.Load((drawtag_start + element_ix_1) * 4 + 0); if (clip_zero_depth == 0u) { - switch (tag_2.tag) + uint param_25 = element_ref_ix; + bool param_26 = mem_ok; + TileRef _1629 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; + Alloc param_27 = read_tile_alloc(param_25, param_26); + TileRef param_28 = _1629; + Tile tile_1 = Tile_read(param_27, param_28); + uint drawmonoid_base_1 = drawmonoid_start + (4u * element_ix_1); + uint scene_offset = _242.Load((drawmonoid_base_1 + 2u) * 4 + 8); + uint info_offset = _242.Load((drawmonoid_base_1 + 3u) * 4 + 8); + uint dd = drawdata_start + (scene_offset >> uint(2)); + uint di = drawinfo_start + (info_offset >> uint(2)); + switch (drawtag) { - case 1u: + case 68u: { - uint param_31 = element_ref_ix; - bool param_32 = mem_ok; - TileRef _2086 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; - Alloc param_33 = read_tile_alloc(param_31, param_32); - TileRef param_34 = _2086; - tile_1 = Tile_read(param_33, param_34); - Alloc _2093; - _2093.offset = _1283.Load(32); - param_35.offset = _2093.offset; - AnnotatedRef param_36 = ref; - fill = Annotated_Color_read(param_35, param_36); - Alloc param_37 = cmd_alloc; - CmdRef param_38 = cmd_ref; - uint param_39 = cmd_limit; - bool _2105 = alloc_cmd(param_37, param_38, param_39); - cmd_alloc = param_37; - cmd_ref = param_38; - cmd_limit = param_39; - if (!_2105) + linewidth = asfloat(_242.Load(di * 4 + 8)); + Alloc param_29 = cmd_alloc; + CmdRef param_30 = cmd_ref; + uint param_31 = cmd_limit; + bool _1676 = alloc_cmd(param_29, param_30, param_31); + cmd_alloc = param_29; + cmd_ref = param_30; + cmd_limit = param_31; + if (!_1676) { break; } - Alloc param_40 = cmd_alloc; - CmdRef param_41 = cmd_ref; - uint param_42 = tag_2.flags; - Tile param_43 = tile_1; - float param_44 = fill.linewidth; - write_fill(param_40, param_41, param_42, param_43, param_44); - cmd_ref = param_41; - CmdColor _2129 = { fill.rgba_color }; - Alloc param_45 = cmd_alloc; - CmdRef param_46 = cmd_ref; - CmdColor param_47 = _2129; - Cmd_Color_write(param_45, param_46, param_47); + Alloc param_32 = cmd_alloc; + CmdRef param_33 = cmd_ref; + Tile param_34 = tile_1; + float param_35 = linewidth; + write_fill(param_32, param_33, param_34, param_35); + cmd_ref = param_33; + uint rgba = _1222.Load(dd * 4 + 0); + CmdColor _1699 = { rgba }; + Alloc param_36 = cmd_alloc; + CmdRef param_37 = cmd_ref; + CmdColor param_38 = _1699; + Cmd_Color_write(param_36, param_37, param_38); cmd_ref.offset += 8u; break; } - case 2u: + case 276u: { - uint param_48 = element_ref_ix; - bool param_49 = mem_ok; - TileRef _2158 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; - Alloc param_50 = read_tile_alloc(param_48, param_49); - TileRef param_51 = _2158; - tile_1 = Tile_read(param_50, param_51); - Alloc _2165; - _2165.offset = _1283.Load(32); - param_52.offset = _2165.offset; - AnnotatedRef param_53 = ref; - AnnoLinGradient lin = Annotated_LinGradient_read(param_52, param_53); - Alloc param_54 = cmd_alloc; - CmdRef param_55 = cmd_ref; - uint param_56 = cmd_limit; - bool _2177 = alloc_cmd(param_54, param_55, param_56); - cmd_alloc = param_54; - cmd_ref = param_55; - cmd_limit = param_56; - if (!_2177) + Alloc param_39 = cmd_alloc; + CmdRef param_40 = cmd_ref; + uint param_41 = cmd_limit; + bool _1717 = alloc_cmd(param_39, param_40, param_41); + cmd_alloc = param_39; + cmd_ref = param_40; + cmd_limit = param_41; + if (!_1717) { break; } - Alloc param_57 = cmd_alloc; - CmdRef param_58 = cmd_ref; - uint param_59 = tag_2.flags; - Tile param_60 = tile_1; - float param_61 = fill.linewidth; - write_fill(param_57, param_58, param_59, param_60, param_61); - cmd_ref = param_58; - cmd_lin.index = lin.index; - cmd_lin.line_x = lin.line_x; - cmd_lin.line_y = lin.line_y; - cmd_lin.line_c = lin.line_c; - Alloc param_62 = cmd_alloc; - CmdRef param_63 = cmd_ref; - CmdLinGrad param_64 = cmd_lin; - Cmd_LinGrad_write(param_62, param_63, param_64); + linewidth = asfloat(_242.Load(di * 4 + 8)); + Alloc param_42 = cmd_alloc; + CmdRef param_43 = cmd_ref; + Tile param_44 = tile_1; + float param_45 = linewidth; + write_fill(param_42, param_43, param_44, param_45); + cmd_ref = param_43; + cmd_lin.index = _1222.Load(dd * 4 + 0); + cmd_lin.line_x = asfloat(_242.Load((di + 1u) * 4 + 8)); + cmd_lin.line_y = asfloat(_242.Load((di + 2u) * 4 + 8)); + cmd_lin.line_c = asfloat(_242.Load((di + 3u) * 4 + 8)); + Alloc param_46 = cmd_alloc; + CmdRef param_47 = cmd_ref; + CmdLinGrad param_48 = cmd_lin; + Cmd_LinGrad_write(param_46, param_47, param_48); cmd_ref.offset += 20u; break; } - case 3u: + case 72u: { - uint param_65 = element_ref_ix; - bool param_66 = mem_ok; - TileRef _2242 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; - Alloc param_67 = read_tile_alloc(param_65, param_66); - TileRef param_68 = _2242; - tile_1 = Tile_read(param_67, param_68); - Alloc _2249; - _2249.offset = _1283.Load(32); - param_69.offset = _2249.offset; - AnnotatedRef param_70 = ref; - AnnoImage fill_img = Annotated_Image_read(param_69, param_70); - Alloc param_71 = cmd_alloc; - CmdRef param_72 = cmd_ref; - uint param_73 = cmd_limit; - bool _2261 = alloc_cmd(param_71, param_72, param_73); - cmd_alloc = param_71; - cmd_ref = param_72; - cmd_limit = param_73; - if (!_2261) + linewidth = asfloat(_242.Load(di * 4 + 8)); + Alloc param_49 = cmd_alloc; + CmdRef param_50 = cmd_ref; + uint param_51 = cmd_limit; + bool _1785 = alloc_cmd(param_49, param_50, param_51); + cmd_alloc = param_49; + cmd_ref = param_50; + cmd_limit = param_51; + if (!_1785) { break; } - Alloc param_74 = cmd_alloc; - CmdRef param_75 = cmd_ref; - uint param_76 = tag_2.flags; - Tile param_77 = tile_1; - float param_78 = fill_img.linewidth; - write_fill(param_74, param_75, param_76, param_77, param_78); - cmd_ref = param_75; - CmdImage _2287 = { fill_img.index, fill_img.offset }; - Alloc param_79 = cmd_alloc; - CmdRef param_80 = cmd_ref; - CmdImage param_81 = _2287; - Cmd_Image_write(param_79, param_80, param_81); + Alloc param_52 = cmd_alloc; + CmdRef param_53 = cmd_ref; + Tile param_54 = tile_1; + float param_55 = linewidth; + write_fill(param_52, param_53, param_54, param_55); + cmd_ref = param_53; + uint index = _1222.Load(dd * 4 + 0); + uint raw1 = _1222.Load((dd + 1u) * 4 + 0); + int2 offset_1 = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); + CmdImage _1824 = { index, offset_1 }; + Alloc param_56 = cmd_alloc; + CmdRef param_57 = cmd_ref; + CmdImage param_58 = _1824; + Cmd_Image_write(param_56, param_57, param_58); cmd_ref.offset += 12u; break; } - case 4u: + case 5u: { - uint param_82 = element_ref_ix; - bool param_83 = mem_ok; - TileRef _2316 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; - Alloc param_84 = read_tile_alloc(param_82, param_83); - TileRef param_85 = _2316; - tile_1 = Tile_read(param_84, param_85); - bool _2322 = tile_1.tile.offset == 0u; - bool _2328; - if (_2322) + bool _1838 = tile_1.tile.offset == 0u; + bool _1844; + if (_1838) { - _2328 = tile_1.backdrop == 0; + _1844 = tile_1.backdrop == 0; } else { - _2328 = _2322; + _1844 = _1838; } - if (_2328) + if (_1844) { clip_zero_depth = clip_depth + 1u; } else { - Alloc param_86 = cmd_alloc; - CmdRef param_87 = cmd_ref; - uint param_88 = cmd_limit; - bool _2340 = alloc_cmd(param_86, param_87, param_88); - cmd_alloc = param_86; - cmd_ref = param_87; - cmd_limit = param_88; - if (!_2340) + Alloc param_59 = cmd_alloc; + CmdRef param_60 = cmd_ref; + uint param_61 = cmd_limit; + bool _1856 = alloc_cmd(param_59, param_60, param_61); + cmd_alloc = param_59; + cmd_ref = param_60; + cmd_limit = param_61; + if (!_1856) { break; } - Alloc param_89 = cmd_alloc; - CmdRef param_90 = cmd_ref; - Cmd_BeginClip_write(param_89, param_90); + Alloc param_62 = cmd_alloc; + CmdRef param_63 = cmd_ref; + Cmd_BeginClip_write(param_62, param_63); cmd_ref.offset += 4u; } clip_depth++; break; } - case 5u: + case 37u: { - uint param_91 = element_ref_ix; - bool param_92 = mem_ok; - TileRef _2377 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; - Alloc param_93 = read_tile_alloc(param_91, param_92); - TileRef param_94 = _2377; - tile_1 = Tile_read(param_93, param_94); - Alloc _2384; - _2384.offset = _1283.Load(32); - param_95.offset = _2384.offset; - AnnotatedRef param_96 = ref; - AnnoEndClip end_clip = Annotated_EndClip_read(param_95, param_96); clip_depth--; - Alloc param_97 = cmd_alloc; - CmdRef param_98 = cmd_ref; - uint param_99 = cmd_limit; - bool _2398 = alloc_cmd(param_97, param_98, param_99); - cmd_alloc = param_97; - cmd_ref = param_98; - cmd_limit = param_99; - if (!_2398) + Alloc param_64 = cmd_alloc; + CmdRef param_65 = cmd_ref; + uint param_66 = cmd_limit; + bool _1884 = alloc_cmd(param_64, param_65, param_66); + cmd_alloc = param_64; + cmd_ref = param_65; + cmd_limit = param_66; + if (!_1884) { break; } - Alloc param_100 = cmd_alloc; - CmdRef param_101 = cmd_ref; - uint param_102 = 0u; - Tile param_103 = tile_1; - float param_104 = 0.0f; - write_fill(param_100, param_101, param_102, param_103, param_104); - cmd_ref = param_101; - CmdEndClip _2419 = { end_clip.blend }; - Alloc param_105 = cmd_alloc; - CmdRef param_106 = cmd_ref; - CmdEndClip param_107 = _2419; - Cmd_EndClip_write(param_105, param_106, param_107); + Alloc param_67 = cmd_alloc; + CmdRef param_68 = cmd_ref; + Tile param_69 = tile_1; + float param_70 = -1.0f; + write_fill(param_67, param_68, param_69, param_70); + cmd_ref = param_68; + uint blend = _1222.Load(dd * 4 + 0); + CmdEndClip _1907 = { blend }; + Alloc param_71 = cmd_alloc; + CmdRef param_72 = cmd_ref; + CmdEndClip param_73 = _1907; + Cmd_EndClip_write(param_71, param_72, param_73); cmd_ref.offset += 8u; break; } @@ -1355,14 +1060,14 @@ void comp_main() } else { - switch (tag_2.tag) + switch (drawtag) { - case 4u: + case 5u: { clip_depth++; break; } - case 5u: + case 37u: { if (clip_depth == clip_zero_depth) { @@ -1381,21 +1086,21 @@ void comp_main() break; } } - bool _2467 = (bin_tile_x + tile_x) < _1283.Load(8); - bool _2476; - if (_2467) + bool _1954 = (bin_tile_x + tile_x) < _854.Load(8); + bool _1963; + if (_1954) { - _2476 = (bin_tile_y + tile_y) < _1283.Load(12); + _1963 = (bin_tile_y + tile_y) < _854.Load(12); } else { - _2476 = _2467; + _1963 = _1954; } - if (_2476) + if (_1963) { - Alloc param_108 = cmd_alloc; - CmdRef param_109 = cmd_ref; - Cmd_End_write(param_108, param_109); + Alloc param_74 = cmd_alloc; + CmdRef param_75 = cmd_ref; + Cmd_End_write(param_74, param_75); } } diff --git a/piet-gpu/shader/gen/coarse.msl b/piet-gpu/shader/gen/coarse.msl index 1422ff1..96f1026 100644 --- a/piet-gpu/shader/gen/coarse.msl +++ b/piet-gpu/shader/gen/coarse.msl @@ -25,68 +25,6 @@ struct MallocResult bool failed; }; -struct AnnoImageRef -{ - uint offset; -}; - -struct AnnoImage -{ - float4 bbox; - float linewidth; - uint index; - int2 offset; -}; - -struct AnnoColorRef -{ - uint offset; -}; - -struct AnnoColor -{ - float4 bbox; - float linewidth; - uint rgba_color; -}; - -struct AnnoLinGradientRef -{ - uint offset; -}; - -struct AnnoLinGradient -{ - float4 bbox; - float linewidth; - uint index; - float line_x; - float line_y; - float line_c; -}; - -struct AnnoEndClipRef -{ - uint offset; -}; - -struct AnnoEndClip -{ - float4 bbox; - uint blend; -}; - -struct AnnotatedRef -{ - uint offset; -}; - -struct AnnotatedTag -{ - uint tag; - uint flags; -}; - struct BinInstanceRef { uint offset; @@ -229,12 +167,14 @@ struct Config Alloc_1 pathseg_alloc; Alloc_1 anno_alloc; Alloc_1 trans_alloc; - Alloc_1 bbox_alloc; + Alloc_1 path_bbox_alloc; Alloc_1 drawmonoid_alloc; Alloc_1 clip_alloc; Alloc_1 clip_bic_alloc; Alloc_1 clip_stack_alloc; Alloc_1 clip_bbox_alloc; + Alloc_1 draw_bbox_alloc; + Alloc_1 drawinfo_alloc; uint n_trans; uint n_path; uint n_clip; @@ -242,6 +182,8 @@ struct Config uint linewidth_offset; uint pathtag_offset; uint pathseg_offset; + uint drawtag_offset; + uint drawdata_offset; }; struct ConfigBuf @@ -249,6 +191,11 @@ struct ConfigBuf Config conf; }; +struct SceneBuf +{ + uint scene[1]; +}; + constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); static inline __attribute__((always_inline)) @@ -264,7 +211,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset) } static inline __attribute__((always_inline)) -uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_308, constant uint& v_308BufferSize) +uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_242, constant uint& v_242BufferSize) { Alloc param = alloc; uint param_1 = offset; @@ -272,7 +219,7 @@ uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memor { return 0u; } - uint v = v_308.memory[offset]; + uint v = v_242.memory[offset]; return v; } @@ -291,39 +238,30 @@ BinInstanceRef BinInstance_index(thread const BinInstanceRef& ref, thread const } static inline __attribute__((always_inline)) -BinInstance BinInstance_read(thread const Alloc& a, thread const BinInstanceRef& ref, device Memory& v_308, constant uint& v_308BufferSize) +BinInstance BinInstance_read(thread const Alloc& a, thread const BinInstanceRef& ref, device Memory& v_242, constant uint& v_242BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_308, v_308BufferSize); + uint raw0 = read_mem(param, param_1, v_242, v_242BufferSize); BinInstance s; s.element_ix = raw0; return s; } static inline __attribute__((always_inline)) -AnnotatedTag Annotated_tag(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_308, constant uint& v_308BufferSize) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint tag_and_flags = read_mem(param, param_1, v_308, v_308BufferSize); - return AnnotatedTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) }; -} - -static inline __attribute__((always_inline)) -Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_308, constant uint& v_308BufferSize) +Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_242, constant uint& v_242BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_308, v_308BufferSize); + uint raw0 = read_mem(param, param_1, v_242, v_242BufferSize); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_308, v_308BufferSize); + uint raw1 = read_mem(param_2, param_3, v_242, v_242BufferSize); Alloc param_4 = a; uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_308, v_308BufferSize); + uint raw2 = read_mem(param_4, param_5, v_242, v_242BufferSize); Path s; s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); s.tiles = TileRef{ raw2 }; @@ -336,24 +274,24 @@ void write_tile_alloc(thread const uint& el_ix, thread const Alloc& a) } static inline __attribute__((always_inline)) -Alloc read_tile_alloc(thread const uint& el_ix, thread const bool& mem_ok, device Memory& v_308, constant uint& v_308BufferSize) +Alloc read_tile_alloc(thread const uint& el_ix, thread const bool& mem_ok, device Memory& v_242, constant uint& v_242BufferSize) { uint param = 0u; - uint param_1 = uint(int((v_308BufferSize - 8) / 4) * 4); + uint param_1 = uint(int((v_242BufferSize - 8) / 4) * 4); bool param_2 = mem_ok; return new_alloc(param, param_1, param_2); } static inline __attribute__((always_inline)) -Tile Tile_read(thread const Alloc& a, thread const TileRef& ref, device Memory& v_308, constant uint& v_308BufferSize) +Tile Tile_read(thread const Alloc& a, thread const TileRef& ref, device Memory& v_242, constant uint& v_242BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_308, v_308BufferSize); + uint raw0 = read_mem(param, param_1, v_242, v_242BufferSize); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_308, v_308BufferSize); + uint raw1 = read_mem(param_2, param_3, v_242, v_242BufferSize); Tile s; s.tile = TileSegRef{ raw0 }; s.backdrop = int(raw1); @@ -361,63 +299,26 @@ Tile Tile_read(thread const Alloc& a, thread const TileRef& ref, device Memory& } static inline __attribute__((always_inline)) -AnnoColor AnnoColor_read(thread const Alloc& a, thread const AnnoColorRef& ref, device Memory& v_308, constant uint& v_308BufferSize) +MallocResult malloc(thread const uint& size, device Memory& v_242, constant uint& v_242BufferSize) { - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_308, v_308BufferSize); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_308, v_308BufferSize); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_308, v_308BufferSize); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_308, v_308BufferSize); - Alloc param_8 = a; - uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9, v_308, v_308BufferSize); - Alloc param_10 = a; - uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11, v_308, v_308BufferSize); - AnnoColor s; - s.bbox = float4(as_type(raw0), as_type(raw1), as_type(raw2), as_type(raw3)); - s.linewidth = as_type(raw4); - s.rgba_color = raw5; - return s; -} - -static inline __attribute__((always_inline)) -AnnoColor Annotated_Color_read(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_308, constant uint& v_308BufferSize) -{ - Alloc param = a; - AnnoColorRef param_1 = AnnoColorRef{ ref.offset + 4u }; - return AnnoColor_read(param, param_1, v_308, v_308BufferSize); -} - -static inline __attribute__((always_inline)) -MallocResult malloc(thread const uint& size, device Memory& v_308, constant uint& v_308BufferSize) -{ - uint _314 = atomic_fetch_add_explicit((device atomic_uint*)&v_308.mem_offset, size, memory_order_relaxed); - uint offset = _314; + uint _248 = atomic_fetch_add_explicit((device atomic_uint*)&v_242.mem_offset, size, memory_order_relaxed); + uint offset = _248; MallocResult r; - r.failed = (offset + size) > uint(int((v_308BufferSize - 8) / 4) * 4); + r.failed = (offset + size) > uint(int((v_242BufferSize - 8) / 4) * 4); uint param = offset; uint param_1 = size; bool param_2 = !r.failed; r.alloc = new_alloc(param, param_1, param_2); if (r.failed) { - uint _343 = atomic_fetch_max_explicit((device atomic_uint*)&v_308.mem_error, 1u, memory_order_relaxed); + uint _277 = atomic_fetch_max_explicit((device atomic_uint*)&v_242.mem_error, 1u, memory_order_relaxed); return r; } return r; } static inline __attribute__((always_inline)) -void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_308, constant uint& v_308BufferSize) +void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_242, constant uint& v_242BufferSize) { Alloc param = alloc; uint param_1 = offset; @@ -425,42 +326,42 @@ void write_mem(thread const Alloc& alloc, thread const uint& offset, thread cons { return; } - v_308.memory[offset] = val; + v_242.memory[offset] = val; } static inline __attribute__((always_inline)) -void CmdJump_write(thread const Alloc& a, thread const CmdJumpRef& ref, thread const CmdJump& s, device Memory& v_308, constant uint& v_308BufferSize) +void CmdJump_write(thread const Alloc& a, thread const CmdJumpRef& ref, thread const CmdJump& s, device Memory& v_242, constant uint& v_242BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = s.new_ref; - write_mem(param, param_1, param_2, v_308, v_308BufferSize); + write_mem(param, param_1, param_2, v_242, v_242BufferSize); } static inline __attribute__((always_inline)) -void Cmd_Jump_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdJump& s, device Memory& v_308, constant uint& v_308BufferSize) +void Cmd_Jump_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdJump& s, device Memory& v_242, constant uint& v_242BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 10u; - write_mem(param, param_1, param_2, v_308, v_308BufferSize); + write_mem(param, param_1, param_2, v_242, v_242BufferSize); Alloc param_3 = a; CmdJumpRef param_4 = CmdJumpRef{ ref.offset + 4u }; CmdJump param_5 = s; - CmdJump_write(param_3, param_4, param_5, v_308, v_308BufferSize); + CmdJump_write(param_3, param_4, param_5, v_242, v_242BufferSize); } static inline __attribute__((always_inline)) -bool alloc_cmd(thread Alloc& cmd_alloc, thread CmdRef& cmd_ref, thread uint& cmd_limit, device Memory& v_308, constant uint& v_308BufferSize) +bool alloc_cmd(thread Alloc& cmd_alloc, thread CmdRef& cmd_ref, thread uint& cmd_limit, device Memory& v_242, constant uint& v_242BufferSize) { if (cmd_ref.offset < cmd_limit) { return true; } uint param = 1024u; - MallocResult _1190 = malloc(param, v_308, v_308BufferSize); - MallocResult new_cmd = _1190; + MallocResult _762 = malloc(param, v_242, v_242BufferSize); + MallocResult new_cmd = _762; if (new_cmd.failed) { return false; @@ -469,7 +370,7 @@ bool alloc_cmd(thread Alloc& cmd_alloc, thread CmdRef& cmd_ref, thread uint& cmd Alloc param_1 = cmd_alloc; CmdRef param_2 = cmd_ref; CmdJump param_3 = jump; - Cmd_Jump_write(param_1, param_2, param_3, v_308, v_308BufferSize); + Cmd_Jump_write(param_1, param_2, param_3, v_242, v_242BufferSize); cmd_alloc = new_cmd.alloc; cmd_ref = CmdRef{ cmd_alloc.offset }; cmd_limit = (cmd_alloc.offset + 1024u) - 60u; @@ -477,358 +378,228 @@ bool alloc_cmd(thread Alloc& cmd_alloc, thread CmdRef& cmd_ref, thread uint& cmd } static inline __attribute__((always_inline)) -uint fill_mode_from_flags(thread const uint& flags) -{ - return flags & 1u; -} - -static inline __attribute__((always_inline)) -void CmdFill_write(thread const Alloc& a, thread const CmdFillRef& ref, thread const CmdFill& s, device Memory& v_308, constant uint& v_308BufferSize) +void CmdFill_write(thread const Alloc& a, thread const CmdFillRef& ref, thread const CmdFill& s, device Memory& v_242, constant uint& v_242BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = s.tile_ref; - write_mem(param, param_1, param_2, v_308, v_308BufferSize); + write_mem(param, param_1, param_2, v_242, v_242BufferSize); Alloc param_3 = a; uint param_4 = ix + 1u; uint param_5 = uint(s.backdrop); - write_mem(param_3, param_4, param_5, v_308, v_308BufferSize); + write_mem(param_3, param_4, param_5, v_242, v_242BufferSize); } static inline __attribute__((always_inline)) -void Cmd_Fill_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdFill& s, device Memory& v_308, constant uint& v_308BufferSize) +void Cmd_Fill_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdFill& s, device Memory& v_242, constant uint& v_242BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 1u; - write_mem(param, param_1, param_2, v_308, v_308BufferSize); + write_mem(param, param_1, param_2, v_242, v_242BufferSize); Alloc param_3 = a; CmdFillRef param_4 = CmdFillRef{ ref.offset + 4u }; CmdFill param_5 = s; - CmdFill_write(param_3, param_4, param_5, v_308, v_308BufferSize); + CmdFill_write(param_3, param_4, param_5, v_242, v_242BufferSize); } static inline __attribute__((always_inline)) -void Cmd_Solid_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_308, constant uint& v_308BufferSize) +void Cmd_Solid_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_242, constant uint& v_242BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 3u; - write_mem(param, param_1, param_2, v_308, v_308BufferSize); + write_mem(param, param_1, param_2, v_242, v_242BufferSize); } static inline __attribute__((always_inline)) -void CmdStroke_write(thread const Alloc& a, thread const CmdStrokeRef& ref, thread const CmdStroke& s, device Memory& v_308, constant uint& v_308BufferSize) +void CmdStroke_write(thread const Alloc& a, thread const CmdStrokeRef& ref, thread const CmdStroke& s, device Memory& v_242, constant uint& v_242BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = s.tile_ref; - write_mem(param, param_1, param_2, v_308, v_308BufferSize); + write_mem(param, param_1, param_2, v_242, v_242BufferSize); Alloc param_3 = a; uint param_4 = ix + 1u; uint param_5 = as_type(s.half_width); - write_mem(param_3, param_4, param_5, v_308, v_308BufferSize); + write_mem(param_3, param_4, param_5, v_242, v_242BufferSize); } static inline __attribute__((always_inline)) -void Cmd_Stroke_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdStroke& s, device Memory& v_308, constant uint& v_308BufferSize) +void Cmd_Stroke_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdStroke& s, device Memory& v_242, constant uint& v_242BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 2u; - write_mem(param, param_1, param_2, v_308, v_308BufferSize); + write_mem(param, param_1, param_2, v_242, v_242BufferSize); Alloc param_3 = a; CmdStrokeRef param_4 = CmdStrokeRef{ ref.offset + 4u }; CmdStroke param_5 = s; - CmdStroke_write(param_3, param_4, param_5, v_308, v_308BufferSize); + CmdStroke_write(param_3, param_4, param_5, v_242, v_242BufferSize); } static inline __attribute__((always_inline)) -void write_fill(thread const Alloc& alloc, thread CmdRef& cmd_ref, thread const uint& flags, thread const Tile& tile, thread const float& linewidth, device Memory& v_308, constant uint& v_308BufferSize) +void write_fill(thread const Alloc& alloc, thread CmdRef& cmd_ref, thread const Tile& tile, thread const float& linewidth, device Memory& v_242, constant uint& v_242BufferSize) { - uint param = flags; - if (fill_mode_from_flags(param) == 0u) + if (linewidth < 0.0) { if (tile.tile.offset != 0u) { CmdFill cmd_fill = CmdFill{ tile.tile.offset, tile.backdrop }; - Alloc param_1 = alloc; - CmdRef param_2 = cmd_ref; - CmdFill param_3 = cmd_fill; - Cmd_Fill_write(param_1, param_2, param_3, v_308, v_308BufferSize); + Alloc param = alloc; + CmdRef param_1 = cmd_ref; + CmdFill param_2 = cmd_fill; + Cmd_Fill_write(param, param_1, param_2, v_242, v_242BufferSize); cmd_ref.offset += 12u; } else { - Alloc param_4 = alloc; - CmdRef param_5 = cmd_ref; - Cmd_Solid_write(param_4, param_5, v_308, v_308BufferSize); + Alloc param_3 = alloc; + CmdRef param_4 = cmd_ref; + Cmd_Solid_write(param_3, param_4, v_242, v_242BufferSize); cmd_ref.offset += 4u; } } else { CmdStroke cmd_stroke = CmdStroke{ tile.tile.offset, 0.5 * linewidth }; - Alloc param_6 = alloc; - CmdRef param_7 = cmd_ref; - CmdStroke param_8 = cmd_stroke; - Cmd_Stroke_write(param_6, param_7, param_8, v_308, v_308BufferSize); + Alloc param_5 = alloc; + CmdRef param_6 = cmd_ref; + CmdStroke param_7 = cmd_stroke; + Cmd_Stroke_write(param_5, param_6, param_7, v_242, v_242BufferSize); cmd_ref.offset += 12u; } } static inline __attribute__((always_inline)) -void CmdColor_write(thread const Alloc& a, thread const CmdColorRef& ref, thread const CmdColor& s, device Memory& v_308, constant uint& v_308BufferSize) +void CmdColor_write(thread const Alloc& a, thread const CmdColorRef& ref, thread const CmdColor& s, device Memory& v_242, constant uint& v_242BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = s.rgba_color; - write_mem(param, param_1, param_2, v_308, v_308BufferSize); + write_mem(param, param_1, param_2, v_242, v_242BufferSize); } static inline __attribute__((always_inline)) -void Cmd_Color_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdColor& s, device Memory& v_308, constant uint& v_308BufferSize) +void Cmd_Color_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdColor& s, device Memory& v_242, constant uint& v_242BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 5u; - write_mem(param, param_1, param_2, v_308, v_308BufferSize); + write_mem(param, param_1, param_2, v_242, v_242BufferSize); Alloc param_3 = a; CmdColorRef param_4 = CmdColorRef{ ref.offset + 4u }; CmdColor param_5 = s; - CmdColor_write(param_3, param_4, param_5, v_308, v_308BufferSize); + CmdColor_write(param_3, param_4, param_5, v_242, v_242BufferSize); } static inline __attribute__((always_inline)) -AnnoLinGradient AnnoLinGradient_read(thread const Alloc& a, thread const AnnoLinGradientRef& ref, device Memory& v_308, constant uint& v_308BufferSize) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_308, v_308BufferSize); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_308, v_308BufferSize); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_308, v_308BufferSize); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_308, v_308BufferSize); - Alloc param_8 = a; - uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9, v_308, v_308BufferSize); - Alloc param_10 = a; - uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11, v_308, v_308BufferSize); - Alloc param_12 = a; - uint param_13 = ix + 6u; - uint raw6 = read_mem(param_12, param_13, v_308, v_308BufferSize); - Alloc param_14 = a; - uint param_15 = ix + 7u; - uint raw7 = read_mem(param_14, param_15, v_308, v_308BufferSize); - Alloc param_16 = a; - uint param_17 = ix + 8u; - uint raw8 = read_mem(param_16, param_17, v_308, v_308BufferSize); - AnnoLinGradient s; - s.bbox = float4(as_type(raw0), as_type(raw1), as_type(raw2), as_type(raw3)); - s.linewidth = as_type(raw4); - s.index = raw5; - s.line_x = as_type(raw6); - s.line_y = as_type(raw7); - s.line_c = as_type(raw8); - return s; -} - -static inline __attribute__((always_inline)) -AnnoLinGradient Annotated_LinGradient_read(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_308, constant uint& v_308BufferSize) -{ - Alloc param = a; - AnnoLinGradientRef param_1 = AnnoLinGradientRef{ ref.offset + 4u }; - return AnnoLinGradient_read(param, param_1, v_308, v_308BufferSize); -} - -static inline __attribute__((always_inline)) -void CmdLinGrad_write(thread const Alloc& a, thread const CmdLinGradRef& ref, thread const CmdLinGrad& s, device Memory& v_308, constant uint& v_308BufferSize) +void CmdLinGrad_write(thread const Alloc& a, thread const CmdLinGradRef& ref, thread const CmdLinGrad& s, device Memory& v_242, constant uint& v_242BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = s.index; - write_mem(param, param_1, param_2, v_308, v_308BufferSize); + write_mem(param, param_1, param_2, v_242, v_242BufferSize); Alloc param_3 = a; uint param_4 = ix + 1u; uint param_5 = as_type(s.line_x); - write_mem(param_3, param_4, param_5, v_308, v_308BufferSize); + write_mem(param_3, param_4, param_5, v_242, v_242BufferSize); Alloc param_6 = a; uint param_7 = ix + 2u; uint param_8 = as_type(s.line_y); - write_mem(param_6, param_7, param_8, v_308, v_308BufferSize); + write_mem(param_6, param_7, param_8, v_242, v_242BufferSize); Alloc param_9 = a; uint param_10 = ix + 3u; uint param_11 = as_type(s.line_c); - write_mem(param_9, param_10, param_11, v_308, v_308BufferSize); + write_mem(param_9, param_10, param_11, v_242, v_242BufferSize); } static inline __attribute__((always_inline)) -void Cmd_LinGrad_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdLinGrad& s, device Memory& v_308, constant uint& v_308BufferSize) +void Cmd_LinGrad_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdLinGrad& s, device Memory& v_242, constant uint& v_242BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 6u; - write_mem(param, param_1, param_2, v_308, v_308BufferSize); + write_mem(param, param_1, param_2, v_242, v_242BufferSize); Alloc param_3 = a; CmdLinGradRef param_4 = CmdLinGradRef{ ref.offset + 4u }; CmdLinGrad param_5 = s; - CmdLinGrad_write(param_3, param_4, param_5, v_308, v_308BufferSize); + CmdLinGrad_write(param_3, param_4, param_5, v_242, v_242BufferSize); } static inline __attribute__((always_inline)) -AnnoImage AnnoImage_read(thread const Alloc& a, thread const AnnoImageRef& ref, device Memory& v_308, constant uint& v_308BufferSize) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_308, v_308BufferSize); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_308, v_308BufferSize); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_308, v_308BufferSize); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_308, v_308BufferSize); - Alloc param_8 = a; - uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9, v_308, v_308BufferSize); - Alloc param_10 = a; - uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11, v_308, v_308BufferSize); - Alloc param_12 = a; - uint param_13 = ix + 6u; - uint raw6 = read_mem(param_12, param_13, v_308, v_308BufferSize); - AnnoImage s; - s.bbox = float4(as_type(raw0), as_type(raw1), as_type(raw2), as_type(raw3)); - s.linewidth = as_type(raw4); - s.index = raw5; - s.offset = int2(int(raw6 << uint(16)) >> 16, int(raw6) >> 16); - return s; -} - -static inline __attribute__((always_inline)) -AnnoImage Annotated_Image_read(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_308, constant uint& v_308BufferSize) -{ - Alloc param = a; - AnnoImageRef param_1 = AnnoImageRef{ ref.offset + 4u }; - return AnnoImage_read(param, param_1, v_308, v_308BufferSize); -} - -static inline __attribute__((always_inline)) -void CmdImage_write(thread const Alloc& a, thread const CmdImageRef& ref, thread const CmdImage& s, device Memory& v_308, constant uint& v_308BufferSize) +void CmdImage_write(thread const Alloc& a, thread const CmdImageRef& ref, thread const CmdImage& s, device Memory& v_242, constant uint& v_242BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = s.index; - write_mem(param, param_1, param_2, v_308, v_308BufferSize); + write_mem(param, param_1, param_2, v_242, v_242BufferSize); Alloc param_3 = a; uint param_4 = ix + 1u; uint param_5 = (uint(s.offset.x) & 65535u) | (uint(s.offset.y) << uint(16)); - write_mem(param_3, param_4, param_5, v_308, v_308BufferSize); + write_mem(param_3, param_4, param_5, v_242, v_242BufferSize); } static inline __attribute__((always_inline)) -void Cmd_Image_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdImage& s, device Memory& v_308, constant uint& v_308BufferSize) +void Cmd_Image_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdImage& s, device Memory& v_242, constant uint& v_242BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 7u; - write_mem(param, param_1, param_2, v_308, v_308BufferSize); + write_mem(param, param_1, param_2, v_242, v_242BufferSize); Alloc param_3 = a; CmdImageRef param_4 = CmdImageRef{ ref.offset + 4u }; CmdImage param_5 = s; - CmdImage_write(param_3, param_4, param_5, v_308, v_308BufferSize); + CmdImage_write(param_3, param_4, param_5, v_242, v_242BufferSize); } static inline __attribute__((always_inline)) -void Cmd_BeginClip_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_308, constant uint& v_308BufferSize) +void Cmd_BeginClip_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_242, constant uint& v_242BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 8u; - write_mem(param, param_1, param_2, v_308, v_308BufferSize); + write_mem(param, param_1, param_2, v_242, v_242BufferSize); } static inline __attribute__((always_inline)) -AnnoEndClip AnnoEndClip_read(thread const Alloc& a, thread const AnnoEndClipRef& ref, device Memory& v_308, constant uint& v_308BufferSize) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_308, v_308BufferSize); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_308, v_308BufferSize); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_308, v_308BufferSize); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_308, v_308BufferSize); - Alloc param_8 = a; - uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9, v_308, v_308BufferSize); - AnnoEndClip s; - s.bbox = float4(as_type(raw0), as_type(raw1), as_type(raw2), as_type(raw3)); - s.blend = raw4; - return s; -} - -static inline __attribute__((always_inline)) -AnnoEndClip Annotated_EndClip_read(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_308, constant uint& v_308BufferSize) -{ - Alloc param = a; - AnnoEndClipRef param_1 = AnnoEndClipRef{ ref.offset + 4u }; - return AnnoEndClip_read(param, param_1, v_308, v_308BufferSize); -} - -static inline __attribute__((always_inline)) -void CmdEndClip_write(thread const Alloc& a, thread const CmdEndClipRef& ref, thread const CmdEndClip& s, device Memory& v_308, constant uint& v_308BufferSize) +void CmdEndClip_write(thread const Alloc& a, thread const CmdEndClipRef& ref, thread const CmdEndClip& s, device Memory& v_242, constant uint& v_242BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = s.blend; - write_mem(param, param_1, param_2, v_308, v_308BufferSize); + write_mem(param, param_1, param_2, v_242, v_242BufferSize); } static inline __attribute__((always_inline)) -void Cmd_EndClip_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdEndClip& s, device Memory& v_308, constant uint& v_308BufferSize) +void Cmd_EndClip_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdEndClip& s, device Memory& v_242, constant uint& v_242BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 9u; - write_mem(param, param_1, param_2, v_308, v_308BufferSize); + write_mem(param, param_1, param_2, v_242, v_242BufferSize); Alloc param_3 = a; CmdEndClipRef param_4 = CmdEndClipRef{ ref.offset + 4u }; CmdEndClip param_5 = s; - CmdEndClip_write(param_3, param_4, param_5, v_308, v_308BufferSize); + CmdEndClip_write(param_3, param_4, param_5, v_242, v_242BufferSize); } static inline __attribute__((always_inline)) -void Cmd_End_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_308, constant uint& v_308BufferSize) +void Cmd_End_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_242, constant uint& v_242BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 0u; - write_mem(param, param_1, param_2, v_308, v_308BufferSize); + write_mem(param, param_1, param_2, v_242, v_242BufferSize); } -kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_308 [[buffer(0)]], const device ConfigBuf& _1283 [[buffer(1)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) +kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_242 [[buffer(0)]], const device ConfigBuf& _854 [[buffer(1)]], const device SceneBuf& _1222 [[buffer(2)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) { threadgroup uint sh_bitmaps[8][256]; threadgroup Alloc sh_part_elements[256]; @@ -840,19 +611,19 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M threadgroup uint sh_tile_y0[256]; threadgroup uint sh_tile_base[256]; threadgroup uint sh_tile_count[256]; - constant uint& v_308BufferSize = spvBufferSizeConstants[0]; - uint width_in_bins = ((_1283.conf.width_in_tiles + 16u) - 1u) / 16u; + constant uint& v_242BufferSize = spvBufferSizeConstants[0]; + uint width_in_bins = ((_854.conf.width_in_tiles + 16u) - 1u) / 16u; uint bin_ix = (width_in_bins * gl_WorkGroupID.y) + gl_WorkGroupID.x; uint partition_ix = 0u; - uint n_partitions = ((_1283.conf.n_elements + 256u) - 1u) / 256u; + uint n_partitions = ((_854.conf.n_elements + 256u) - 1u) / 256u; uint th_ix = gl_LocalInvocationID.x; uint bin_tile_x = 16u * gl_WorkGroupID.x; uint bin_tile_y = 16u * gl_WorkGroupID.y; uint tile_x = gl_LocalInvocationID.x % 16u; uint tile_y = gl_LocalInvocationID.x / 16u; - uint this_tile_ix = (((bin_tile_y + tile_y) * _1283.conf.width_in_tiles) + bin_tile_x) + tile_x; + uint this_tile_ix = (((bin_tile_y + tile_y) * _854.conf.width_in_tiles) + bin_tile_x) + tile_x; Alloc param; - param.offset = _1283.conf.ptcl_alloc.offset; + param.offset = _854.conf.ptcl_alloc.offset; uint param_1 = this_tile_ix * 1024u; uint param_2 = 1024u; Alloc cmd_alloc = slice_mem(param, param_1, param_2); @@ -864,57 +635,52 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M uint wr_ix = 0u; uint part_start_ix = 0u; uint ready_ix = 0u; - bool mem_ok = v_308.mem_error == 0u; + uint drawmonoid_start = _854.conf.drawmonoid_alloc.offset >> uint(2); + uint drawtag_start = _854.conf.drawtag_offset >> uint(2); + uint drawdata_start = _854.conf.drawdata_offset >> uint(2); + uint drawinfo_start = _854.conf.drawinfo_alloc.offset >> uint(2); + bool mem_ok = v_242.mem_error == 0u; Alloc param_3; Alloc param_5; - uint _1562; + uint _1154; uint element_ix; - AnnotatedRef ref; Alloc param_14; - Alloc param_16; uint tile_count; - Alloc param_23; - uint _1887; - Alloc param_29; - Tile tile_1; - AnnoColor fill; - Alloc param_35; - Alloc param_52; + uint _1453; + float linewidth; CmdLinGrad cmd_lin; - Alloc param_69; - Alloc param_95; while (true) { for (uint i = 0u; i < 8u; i++) { sh_bitmaps[i][th_ix] = 0u; } - bool _1614; + bool _1206; for (;;) { if ((ready_ix == wr_ix) && (partition_ix < n_partitions)) { part_start_ix = ready_ix; uint count = 0u; - bool _1412 = th_ix < 256u; - bool _1420; - if (_1412) + bool _1003 = th_ix < 256u; + bool _1011; + if (_1003) { - _1420 = (partition_ix + th_ix) < n_partitions; + _1011 = (partition_ix + th_ix) < n_partitions; } else { - _1420 = _1412; + _1011 = _1003; } - if (_1420) + if (_1011) { - uint in_ix = (_1283.conf.bin_alloc.offset >> uint(2)) + ((((partition_ix + th_ix) * 256u) + bin_ix) * 2u); - param_3.offset = _1283.conf.bin_alloc.offset; + uint in_ix = (_854.conf.bin_alloc.offset >> uint(2)) + ((((partition_ix + th_ix) * 256u) + bin_ix) * 2u); + param_3.offset = _854.conf.bin_alloc.offset; uint param_4 = in_ix; - count = read_mem(param_3, param_4, v_308, v_308BufferSize); - param_5.offset = _1283.conf.bin_alloc.offset; + count = read_mem(param_3, param_4, v_242, v_242BufferSize); + param_5.offset = _854.conf.bin_alloc.offset; uint param_6 = in_ix + 1u; - uint offset = read_mem(param_5, param_6, v_308, v_308BufferSize); + uint offset = read_mem(param_5, param_6, v_242, v_242BufferSize); uint param_7 = offset; uint param_8 = count * 4u; bool param_9 = mem_ok; @@ -958,34 +724,34 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M } if (part_ix > 0u) { - _1562 = sh_part_count[part_ix - 1u]; + _1154 = sh_part_count[part_ix - 1u]; } else { - _1562 = part_start_ix; + _1154 = part_start_ix; } - ix -= _1562; + ix -= _1154; Alloc bin_alloc = sh_part_elements[part_ix]; BinInstanceRef inst_ref = BinInstanceRef{ bin_alloc.offset }; BinInstanceRef param_10 = inst_ref; uint param_11 = ix; Alloc param_12 = bin_alloc; BinInstanceRef param_13 = BinInstance_index(param_10, param_11); - BinInstance inst = BinInstance_read(param_12, param_13, v_308, v_308BufferSize); + BinInstance inst = BinInstance_read(param_12, param_13, v_242, v_242BufferSize); sh_elements[th_ix] = inst.element_ix; } threadgroup_barrier(mem_flags::mem_threadgroup); wr_ix = min((rd_ix + 256u), ready_ix); - bool _1604 = (wr_ix - rd_ix) < 256u; - if (_1604) + bool _1196 = (wr_ix - rd_ix) < 256u; + if (_1196) { - _1614 = (wr_ix < ready_ix) || (partition_ix < n_partitions); + _1206 = (wr_ix < ready_ix) || (partition_ix < n_partitions); } else { - _1614 = _1604; + _1206 = _1196; } - if (_1614) + if (_1206) { continue; } @@ -998,24 +764,21 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M if ((th_ix + rd_ix) < wr_ix) { element_ix = sh_elements[th_ix]; - ref = AnnotatedRef{ _1283.conf.anno_alloc.offset + (element_ix * 40u) }; - param_14.offset = _1283.conf.anno_alloc.offset; - AnnotatedRef param_15 = ref; - tag = Annotated_tag(param_14, param_15, v_308, v_308BufferSize).tag; + tag = _1222.scene[drawtag_start + element_ix]; } switch (tag) { - case 1u: - case 3u: - case 2u: - case 4u: + case 68u: + case 72u: + case 276u: case 5u: + case 37u: { - uint drawmonoid_base = (_1283.conf.drawmonoid_alloc.offset >> uint(2)) + (2u * element_ix); - uint path_ix = v_308.memory[drawmonoid_base]; - param_16.offset = _1283.conf.tile_alloc.offset; - PathRef param_17 = PathRef{ _1283.conf.tile_alloc.offset + (path_ix * 12u) }; - Path path = Path_read(param_16, param_17, v_308, v_308BufferSize); + uint drawmonoid_base = drawmonoid_start + (4u * element_ix); + uint path_ix = v_242.memory[drawmonoid_base]; + param_14.offset = _854.conf.tile_alloc.offset; + PathRef param_15 = PathRef{ _854.conf.tile_alloc.offset + (path_ix * 12u) }; + Path path = Path_read(param_14, param_15, v_242, v_242BufferSize); uint stride = path.bbox.z - path.bbox.x; sh_tile_stride[th_ix] = stride; int dx = int(path.bbox.x) - int(bin_tile_x); @@ -1030,13 +793,13 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M tile_count = uint(x1 - x0) * uint(y1 - y0); uint base = path.tiles.offset - (((uint(dy) * stride) + uint(dx)) * 8u); sh_tile_base[th_ix] = base; - uint param_18 = path.tiles.offset; - uint param_19 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u; - bool param_20 = mem_ok; - Alloc path_alloc = new_alloc(param_18, param_19, param_20); - uint param_21 = th_ix; - Alloc param_22 = path_alloc; - write_tile_alloc(param_21, param_22); + uint param_16 = path.tiles.offset; + uint param_17 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u; + bool param_18 = mem_ok; + Alloc path_alloc = new_alloc(param_16, param_17, param_18); + uint param_19 = th_ix; + Alloc param_20 = path_alloc; + write_tile_alloc(param_19, param_20); break; } default: @@ -1069,67 +832,55 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M el_ix = probe_1; } } - AnnotatedRef ref_1 = AnnotatedRef{ _1283.conf.anno_alloc.offset + (sh_elements[el_ix] * 40u) }; - param_23.offset = _1283.conf.anno_alloc.offset; - AnnotatedRef param_24 = ref_1; - AnnotatedTag anno_tag = Annotated_tag(param_23, param_24, v_308, v_308BufferSize); - uint tag_1 = anno_tag.tag; + uint tag_1 = _1222.scene[drawtag_start + sh_elements[el_ix]]; if (el_ix > 0u) { - _1887 = sh_tile_count[el_ix - 1u]; + _1453 = sh_tile_count[el_ix - 1u]; } else { - _1887 = 0u; + _1453 = 0u; } - uint seq_ix = ix_1 - _1887; + uint seq_ix = ix_1 - _1453; uint width = sh_tile_width[el_ix]; uint x = sh_tile_x0[el_ix] + (seq_ix % width); uint y = sh_tile_y0[el_ix] + (seq_ix / width); bool include_tile = false; if (mem_ok) { - uint param_25 = el_ix; - bool param_26 = mem_ok; - Alloc param_27 = read_tile_alloc(param_25, param_26, v_308, v_308BufferSize); - TileRef param_28 = TileRef{ sh_tile_base[el_ix] + (((sh_tile_stride[el_ix] * y) + x) * 8u) }; - Tile tile = Tile_read(param_27, param_28, v_308, v_308BufferSize); - bool is_clip = (tag_1 == 4u) || (tag_1 == 5u); - bool _1951 = tile.tile.offset != 0u; - bool _1960; - if (!_1951) + uint param_21 = el_ix; + bool param_22 = mem_ok; + Alloc param_23 = read_tile_alloc(param_21, param_22, v_242, v_242BufferSize); + TileRef param_24 = TileRef{ sh_tile_base[el_ix] + (((sh_tile_stride[el_ix] * y) + x) * 8u) }; + Tile tile = Tile_read(param_23, param_24, v_242, v_242BufferSize); + bool is_clip = (tag_1 & 1u) != 0u; + bool is_blend = false; + bool _1516 = tile.tile.offset != 0u; + bool _1525; + if (!_1516) { - _1960 = (tile.backdrop == 0) == is_clip; + _1525 = (tile.backdrop == 0) == is_clip; } else { - _1960 = _1951; + _1525 = _1516; } - bool _1972; - if (!_1960) + bool _1532; + if (!_1525) { - bool _1971; - if (is_clip) - { - _1971 = (anno_tag.flags & 2u) != 0u; - } - else - { - _1971 = is_clip; - } - _1972 = _1971; + _1532 = is_clip && is_blend; } else { - _1972 = _1960; + _1532 = _1525; } - include_tile = _1972; + include_tile = _1532; } if (include_tile) { uint el_slice = el_ix / 32u; uint el_mask = 1u << (el_ix & 31u); - uint _1992 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&sh_bitmaps[el_slice][(y * 16u) + x], el_mask, memory_order_relaxed); + uint _1552 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&sh_bitmaps[el_slice][(y * 16u) + x], el_mask, memory_order_relaxed); } } threadgroup_barrier(mem_flags::mem_threadgroup); @@ -1153,198 +904,172 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M uint element_ref_ix = (slice_ix * 32u) + uint(int(spvFindLSB(bitmap))); uint element_ix_1 = sh_elements[element_ref_ix]; bitmap &= (bitmap - 1u); - ref = AnnotatedRef{ _1283.conf.anno_alloc.offset + (element_ix_1 * 40u) }; - param_29.offset = _1283.conf.anno_alloc.offset; - AnnotatedRef param_30 = ref; - AnnotatedTag tag_2 = Annotated_tag(param_29, param_30, v_308, v_308BufferSize); + uint drawtag = _1222.scene[drawtag_start + element_ix_1]; if (clip_zero_depth == 0u) { - switch (tag_2.tag) + uint param_25 = element_ref_ix; + bool param_26 = mem_ok; + Alloc param_27 = read_tile_alloc(param_25, param_26, v_242, v_242BufferSize); + TileRef param_28 = TileRef{ sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; + Tile tile_1 = Tile_read(param_27, param_28, v_242, v_242BufferSize); + uint drawmonoid_base_1 = drawmonoid_start + (4u * element_ix_1); + uint scene_offset = v_242.memory[drawmonoid_base_1 + 2u]; + uint info_offset = v_242.memory[drawmonoid_base_1 + 3u]; + uint dd = drawdata_start + (scene_offset >> uint(2)); + uint di = drawinfo_start + (info_offset >> uint(2)); + switch (drawtag) { - case 1u: + case 68u: { - uint param_31 = element_ref_ix; - bool param_32 = mem_ok; - Alloc param_33 = read_tile_alloc(param_31, param_32, v_308, v_308BufferSize); - TileRef param_34 = TileRef{ sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; - tile_1 = Tile_read(param_33, param_34, v_308, v_308BufferSize); - param_35.offset = _1283.conf.anno_alloc.offset; - AnnotatedRef param_36 = ref; - fill = Annotated_Color_read(param_35, param_36, v_308, v_308BufferSize); - Alloc param_37 = cmd_alloc; - CmdRef param_38 = cmd_ref; - uint param_39 = cmd_limit; - bool _2105 = alloc_cmd(param_37, param_38, param_39, v_308, v_308BufferSize); - cmd_alloc = param_37; - cmd_ref = param_38; - cmd_limit = param_39; - if (!_2105) + linewidth = as_type(v_242.memory[di]); + Alloc param_29 = cmd_alloc; + CmdRef param_30 = cmd_ref; + uint param_31 = cmd_limit; + bool _1676 = alloc_cmd(param_29, param_30, param_31, v_242, v_242BufferSize); + cmd_alloc = param_29; + cmd_ref = param_30; + cmd_limit = param_31; + if (!_1676) { break; } - Alloc param_40 = cmd_alloc; - CmdRef param_41 = cmd_ref; - uint param_42 = tag_2.flags; - Tile param_43 = tile_1; - float param_44 = fill.linewidth; - write_fill(param_40, param_41, param_42, param_43, param_44, v_308, v_308BufferSize); - cmd_ref = param_41; - Alloc param_45 = cmd_alloc; - CmdRef param_46 = cmd_ref; - CmdColor param_47 = CmdColor{ fill.rgba_color }; - Cmd_Color_write(param_45, param_46, param_47, v_308, v_308BufferSize); + Alloc param_32 = cmd_alloc; + CmdRef param_33 = cmd_ref; + Tile param_34 = tile_1; + float param_35 = linewidth; + write_fill(param_32, param_33, param_34, param_35, v_242, v_242BufferSize); + cmd_ref = param_33; + uint rgba = _1222.scene[dd]; + Alloc param_36 = cmd_alloc; + CmdRef param_37 = cmd_ref; + CmdColor param_38 = CmdColor{ rgba }; + Cmd_Color_write(param_36, param_37, param_38, v_242, v_242BufferSize); cmd_ref.offset += 8u; break; } - case 2u: + case 276u: { - uint param_48 = element_ref_ix; - bool param_49 = mem_ok; - Alloc param_50 = read_tile_alloc(param_48, param_49, v_308, v_308BufferSize); - TileRef param_51 = TileRef{ sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; - tile_1 = Tile_read(param_50, param_51, v_308, v_308BufferSize); - param_52.offset = _1283.conf.anno_alloc.offset; - AnnotatedRef param_53 = ref; - AnnoLinGradient lin = Annotated_LinGradient_read(param_52, param_53, v_308, v_308BufferSize); - Alloc param_54 = cmd_alloc; - CmdRef param_55 = cmd_ref; - uint param_56 = cmd_limit; - bool _2177 = alloc_cmd(param_54, param_55, param_56, v_308, v_308BufferSize); - cmd_alloc = param_54; - cmd_ref = param_55; - cmd_limit = param_56; - if (!_2177) + Alloc param_39 = cmd_alloc; + CmdRef param_40 = cmd_ref; + uint param_41 = cmd_limit; + bool _1717 = alloc_cmd(param_39, param_40, param_41, v_242, v_242BufferSize); + cmd_alloc = param_39; + cmd_ref = param_40; + cmd_limit = param_41; + if (!_1717) { break; } - Alloc param_57 = cmd_alloc; - CmdRef param_58 = cmd_ref; - uint param_59 = tag_2.flags; - Tile param_60 = tile_1; - float param_61 = fill.linewidth; - write_fill(param_57, param_58, param_59, param_60, param_61, v_308, v_308BufferSize); - cmd_ref = param_58; - cmd_lin.index = lin.index; - cmd_lin.line_x = lin.line_x; - cmd_lin.line_y = lin.line_y; - cmd_lin.line_c = lin.line_c; - Alloc param_62 = cmd_alloc; - CmdRef param_63 = cmd_ref; - CmdLinGrad param_64 = cmd_lin; - Cmd_LinGrad_write(param_62, param_63, param_64, v_308, v_308BufferSize); + linewidth = as_type(v_242.memory[di]); + Alloc param_42 = cmd_alloc; + CmdRef param_43 = cmd_ref; + Tile param_44 = tile_1; + float param_45 = linewidth; + write_fill(param_42, param_43, param_44, param_45, v_242, v_242BufferSize); + cmd_ref = param_43; + cmd_lin.index = _1222.scene[dd]; + cmd_lin.line_x = as_type(v_242.memory[di + 1u]); + cmd_lin.line_y = as_type(v_242.memory[di + 2u]); + cmd_lin.line_c = as_type(v_242.memory[di + 3u]); + Alloc param_46 = cmd_alloc; + CmdRef param_47 = cmd_ref; + CmdLinGrad param_48 = cmd_lin; + Cmd_LinGrad_write(param_46, param_47, param_48, v_242, v_242BufferSize); cmd_ref.offset += 20u; break; } - case 3u: + case 72u: { - uint param_65 = element_ref_ix; - bool param_66 = mem_ok; - Alloc param_67 = read_tile_alloc(param_65, param_66, v_308, v_308BufferSize); - TileRef param_68 = TileRef{ sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; - tile_1 = Tile_read(param_67, param_68, v_308, v_308BufferSize); - param_69.offset = _1283.conf.anno_alloc.offset; - AnnotatedRef param_70 = ref; - AnnoImage fill_img = Annotated_Image_read(param_69, param_70, v_308, v_308BufferSize); - Alloc param_71 = cmd_alloc; - CmdRef param_72 = cmd_ref; - uint param_73 = cmd_limit; - bool _2261 = alloc_cmd(param_71, param_72, param_73, v_308, v_308BufferSize); - cmd_alloc = param_71; - cmd_ref = param_72; - cmd_limit = param_73; - if (!_2261) + linewidth = as_type(v_242.memory[di]); + Alloc param_49 = cmd_alloc; + CmdRef param_50 = cmd_ref; + uint param_51 = cmd_limit; + bool _1785 = alloc_cmd(param_49, param_50, param_51, v_242, v_242BufferSize); + cmd_alloc = param_49; + cmd_ref = param_50; + cmd_limit = param_51; + if (!_1785) { break; } - Alloc param_74 = cmd_alloc; - CmdRef param_75 = cmd_ref; - uint param_76 = tag_2.flags; - Tile param_77 = tile_1; - float param_78 = fill_img.linewidth; - write_fill(param_74, param_75, param_76, param_77, param_78, v_308, v_308BufferSize); - cmd_ref = param_75; - Alloc param_79 = cmd_alloc; - CmdRef param_80 = cmd_ref; - CmdImage param_81 = CmdImage{ fill_img.index, fill_img.offset }; - Cmd_Image_write(param_79, param_80, param_81, v_308, v_308BufferSize); + Alloc param_52 = cmd_alloc; + CmdRef param_53 = cmd_ref; + Tile param_54 = tile_1; + float param_55 = linewidth; + write_fill(param_52, param_53, param_54, param_55, v_242, v_242BufferSize); + cmd_ref = param_53; + uint index = _1222.scene[dd]; + uint raw1 = _1222.scene[dd + 1u]; + int2 offset_1 = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); + Alloc param_56 = cmd_alloc; + CmdRef param_57 = cmd_ref; + CmdImage param_58 = CmdImage{ index, offset_1 }; + Cmd_Image_write(param_56, param_57, param_58, v_242, v_242BufferSize); cmd_ref.offset += 12u; break; } - case 4u: + case 5u: { - uint param_82 = element_ref_ix; - bool param_83 = mem_ok; - Alloc param_84 = read_tile_alloc(param_82, param_83, v_308, v_308BufferSize); - TileRef param_85 = TileRef{ sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; - tile_1 = Tile_read(param_84, param_85, v_308, v_308BufferSize); - bool _2322 = tile_1.tile.offset == 0u; - bool _2328; - if (_2322) + bool _1838 = tile_1.tile.offset == 0u; + bool _1844; + if (_1838) { - _2328 = tile_1.backdrop == 0; + _1844 = tile_1.backdrop == 0; } else { - _2328 = _2322; + _1844 = _1838; } - if (_2328) + if (_1844) { clip_zero_depth = clip_depth + 1u; } else { - Alloc param_86 = cmd_alloc; - CmdRef param_87 = cmd_ref; - uint param_88 = cmd_limit; - bool _2340 = alloc_cmd(param_86, param_87, param_88, v_308, v_308BufferSize); - cmd_alloc = param_86; - cmd_ref = param_87; - cmd_limit = param_88; - if (!_2340) + Alloc param_59 = cmd_alloc; + CmdRef param_60 = cmd_ref; + uint param_61 = cmd_limit; + bool _1856 = alloc_cmd(param_59, param_60, param_61, v_242, v_242BufferSize); + cmd_alloc = param_59; + cmd_ref = param_60; + cmd_limit = param_61; + if (!_1856) { break; } - Alloc param_89 = cmd_alloc; - CmdRef param_90 = cmd_ref; - Cmd_BeginClip_write(param_89, param_90, v_308, v_308BufferSize); + Alloc param_62 = cmd_alloc; + CmdRef param_63 = cmd_ref; + Cmd_BeginClip_write(param_62, param_63, v_242, v_242BufferSize); cmd_ref.offset += 4u; } clip_depth++; break; } - case 5u: + case 37u: { - uint param_91 = element_ref_ix; - bool param_92 = mem_ok; - Alloc param_93 = read_tile_alloc(param_91, param_92, v_308, v_308BufferSize); - TileRef param_94 = TileRef{ sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; - tile_1 = Tile_read(param_93, param_94, v_308, v_308BufferSize); - param_95.offset = _1283.conf.anno_alloc.offset; - AnnotatedRef param_96 = ref; - AnnoEndClip end_clip = Annotated_EndClip_read(param_95, param_96, v_308, v_308BufferSize); clip_depth--; - Alloc param_97 = cmd_alloc; - CmdRef param_98 = cmd_ref; - uint param_99 = cmd_limit; - bool _2398 = alloc_cmd(param_97, param_98, param_99, v_308, v_308BufferSize); - cmd_alloc = param_97; - cmd_ref = param_98; - cmd_limit = param_99; - if (!_2398) + Alloc param_64 = cmd_alloc; + CmdRef param_65 = cmd_ref; + uint param_66 = cmd_limit; + bool _1884 = alloc_cmd(param_64, param_65, param_66, v_242, v_242BufferSize); + cmd_alloc = param_64; + cmd_ref = param_65; + cmd_limit = param_66; + if (!_1884) { break; } - Alloc param_100 = cmd_alloc; - CmdRef param_101 = cmd_ref; - uint param_102 = 0u; - Tile param_103 = tile_1; - float param_104 = 0.0; - write_fill(param_100, param_101, param_102, param_103, param_104, v_308, v_308BufferSize); - cmd_ref = param_101; - Alloc param_105 = cmd_alloc; - CmdRef param_106 = cmd_ref; - CmdEndClip param_107 = CmdEndClip{ end_clip.blend }; - Cmd_EndClip_write(param_105, param_106, param_107, v_308, v_308BufferSize); + Alloc param_67 = cmd_alloc; + CmdRef param_68 = cmd_ref; + Tile param_69 = tile_1; + float param_70 = -1.0; + write_fill(param_67, param_68, param_69, param_70, v_242, v_242BufferSize); + cmd_ref = param_68; + uint blend = _1222.scene[dd]; + Alloc param_71 = cmd_alloc; + CmdRef param_72 = cmd_ref; + CmdEndClip param_73 = CmdEndClip{ blend }; + Cmd_EndClip_write(param_71, param_72, param_73, v_242, v_242BufferSize); cmd_ref.offset += 8u; break; } @@ -1352,14 +1077,14 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M } else { - switch (tag_2.tag) + switch (drawtag) { - case 4u: + case 5u: { clip_depth++; break; } - case 5u: + case 37u: { if (clip_depth == clip_zero_depth) { @@ -1378,21 +1103,21 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M break; } } - bool _2467 = (bin_tile_x + tile_x) < _1283.conf.width_in_tiles; - bool _2476; - if (_2467) + bool _1954 = (bin_tile_x + tile_x) < _854.conf.width_in_tiles; + bool _1963; + if (_1954) { - _2476 = (bin_tile_y + tile_y) < _1283.conf.height_in_tiles; + _1963 = (bin_tile_y + tile_y) < _854.conf.height_in_tiles; } else { - _2476 = _2467; + _1963 = _1954; } - if (_2476) + if (_1963) { - Alloc param_108 = cmd_alloc; - CmdRef param_109 = cmd_ref; - Cmd_End_write(param_108, param_109, v_308, v_308BufferSize); + Alloc param_74 = cmd_alloc; + CmdRef param_75 = cmd_ref; + Cmd_End_write(param_74, param_75, v_242, v_242BufferSize); } } diff --git a/piet-gpu/shader/gen/coarse.spv b/piet-gpu/shader/gen/coarse.spv index 1fef2d7..bc27230 100644 Binary files a/piet-gpu/shader/gen/coarse.spv and b/piet-gpu/shader/gen/coarse.spv differ diff --git a/piet-gpu/shader/gen/draw_leaf.dxil b/piet-gpu/shader/gen/draw_leaf.dxil index d1567c9..6ce3b8c 100644 Binary files a/piet-gpu/shader/gen/draw_leaf.dxil and b/piet-gpu/shader/gen/draw_leaf.dxil differ diff --git a/piet-gpu/shader/gen/draw_leaf.hlsl b/piet-gpu/shader/gen/draw_leaf.hlsl index 1f2f78b..f812f52 100644 --- a/piet-gpu/shader/gen/draw_leaf.hlsl +++ b/piet-gpu/shader/gen/draw_leaf.hlsl @@ -1,133 +1,12 @@ -struct Alloc -{ - uint offset; -}; - -struct ElementRef -{ - uint offset; -}; - -struct FillColorRef -{ - uint offset; -}; - -struct FillColor -{ - uint rgba_color; -}; - -struct FillLinGradientRef -{ - uint offset; -}; - -struct FillLinGradient -{ - uint index; - float2 p0; - float2 p1; -}; - -struct FillImageRef -{ - uint offset; -}; - -struct FillImage -{ - uint index; - int2 offset; -}; - -struct ClipRef -{ - uint offset; -}; - -struct Clip -{ - float4 bbox; - uint blend; -}; - -struct ElementTag -{ - uint tag; - uint flags; -}; - struct DrawMonoid { uint path_ix; uint clip_ix; + uint scene_offset; + uint info_offset; }; -struct AnnoImageRef -{ - uint offset; -}; - -struct AnnoImage -{ - float4 bbox; - float linewidth; - uint index; - int2 offset; -}; - -struct AnnoColorRef -{ - uint offset; -}; - -struct AnnoColor -{ - float4 bbox; - float linewidth; - uint rgba_color; -}; - -struct AnnoLinGradientRef -{ - uint offset; -}; - -struct AnnoLinGradient -{ - float4 bbox; - float linewidth; - uint index; - float line_x; - float line_y; - float line_c; -}; - -struct AnnoBeginClipRef -{ - uint offset; -}; - -struct AnnoBeginClip -{ - float4 bbox; - float linewidth; - uint blend; -}; - -struct AnnoEndClipRef -{ - uint offset; -}; - -struct AnnoEndClip -{ - float4 bbox; - uint blend; -}; - -struct AnnotatedRef +struct Alloc { uint offset; }; @@ -144,12 +23,14 @@ struct Config Alloc pathseg_alloc; Alloc anno_alloc; Alloc trans_alloc; - Alloc bbox_alloc; + Alloc path_bbox_alloc; Alloc drawmonoid_alloc; Alloc clip_alloc; Alloc clip_bic_alloc; Alloc clip_stack_alloc; Alloc clip_bbox_alloc; + Alloc draw_bbox_alloc; + Alloc drawinfo_alloc; uint n_trans; uint n_path; uint n_clip; @@ -157,18 +38,18 @@ struct Config uint linewidth_offset; uint pathtag_offset; uint pathseg_offset; + uint drawtag_offset; + uint drawdata_offset; }; static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); -static const DrawMonoid _413 = { 0u, 0u }; -static const DrawMonoid _437 = { 1u, 0u }; -static const DrawMonoid _439 = { 1u, 1u }; +static const DrawMonoid _23 = { 0u, 0u, 0u, 0u }; -RWByteAddressBuffer _199 : register(u0, space0); -ByteAddressBuffer _223 : register(t2, space0); -ByteAddressBuffer _1020 : register(t3, space0); -ByteAddressBuffer _1054 : register(t1, space0); +ByteAddressBuffer _92 : register(t1, space0); +ByteAddressBuffer _102 : register(t2, space0); +ByteAddressBuffer _202 : register(t3, space0); +RWByteAddressBuffer _284 : register(u0, space0); static uint3 gl_WorkGroupID; static uint3 gl_LocalInvocationID; @@ -182,389 +63,44 @@ struct SPIRV_Cross_Input groupshared DrawMonoid sh_scratch[256]; -ElementTag Element_tag(ElementRef ref) -{ - uint tag_and_flags = _223.Load((ref.offset >> uint(2)) * 4 + 0); - ElementTag _378 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; - return _378; -} - DrawMonoid map_tag(uint tag_word) { - switch (tag_word) - { - case 4u: - case 5u: - case 6u: - { - return _437; - } - case 9u: - case 10u: - { - return _439; - } - default: - { - return _413; - } - } + uint has_path = uint(tag_word != 0u); + DrawMonoid _75 = { has_path, tag_word & 1u, tag_word & 28u, (tag_word >> uint(4)) & 28u }; + return _75; } -ElementRef Element_index(ElementRef ref, uint index) -{ - ElementRef _212 = { ref.offset + (index * 36u) }; - return _212; -} - -DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b) +DrawMonoid combine_draw_monoid(DrawMonoid a, DrawMonoid b) { DrawMonoid c; c.path_ix = a.path_ix + b.path_ix; c.clip_ix = a.clip_ix + b.clip_ix; + c.scene_offset = a.scene_offset + b.scene_offset; + c.info_offset = a.info_offset + b.info_offset; return c; } -DrawMonoid tag_monoid_identity() +DrawMonoid draw_monoid_identity() { - return _413; -} - -FillColor FillColor_read(FillColorRef ref) -{ - uint ix = ref.offset >> uint(2); - uint raw0 = _223.Load((ix + 0u) * 4 + 0); - FillColor s; - s.rgba_color = raw0; - return s; -} - -FillColor Element_FillColor_read(ElementRef ref) -{ - FillColorRef _384 = { ref.offset + 4u }; - FillColorRef param = _384; - return FillColor_read(param); -} - -bool touch_mem(Alloc alloc, uint offset) -{ - return true; -} - -void write_mem(Alloc alloc, uint offset, uint val) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return; - } - _199.Store(offset * 4 + 8, val); -} - -void AnnoColor_write(Alloc a, AnnoColorRef ref, AnnoColor s) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = asuint(s.bbox.x); - write_mem(param, param_1, param_2); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = asuint(s.bbox.y); - write_mem(param_3, param_4, param_5); - Alloc param_6 = a; - uint param_7 = ix + 2u; - uint param_8 = asuint(s.bbox.z); - write_mem(param_6, param_7, param_8); - Alloc param_9 = a; - uint param_10 = ix + 3u; - uint param_11 = asuint(s.bbox.w); - write_mem(param_9, param_10, param_11); - Alloc param_12 = a; - uint param_13 = ix + 4u; - uint param_14 = asuint(s.linewidth); - write_mem(param_12, param_13, param_14); - Alloc param_15 = a; - uint param_16 = ix + 5u; - uint param_17 = s.rgba_color; - write_mem(param_15, param_16, param_17); -} - -void Annotated_Color_write(Alloc a, AnnotatedRef ref, uint flags, AnnoColor s) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = (flags << uint(16)) | 1u; - write_mem(param, param_1, param_2); - AnnoColorRef _818 = { ref.offset + 4u }; - Alloc param_3 = a; - AnnoColorRef param_4 = _818; - AnnoColor param_5 = s; - AnnoColor_write(param_3, param_4, param_5); -} - -FillLinGradient FillLinGradient_read(FillLinGradientRef ref) -{ - uint ix = ref.offset >> uint(2); - uint raw0 = _223.Load((ix + 0u) * 4 + 0); - uint raw1 = _223.Load((ix + 1u) * 4 + 0); - uint raw2 = _223.Load((ix + 2u) * 4 + 0); - uint raw3 = _223.Load((ix + 3u) * 4 + 0); - uint raw4 = _223.Load((ix + 4u) * 4 + 0); - FillLinGradient s; - s.index = raw0; - s.p0 = float2(asfloat(raw1), asfloat(raw2)); - s.p1 = float2(asfloat(raw3), asfloat(raw4)); - return s; -} - -FillLinGradient Element_FillLinGradient_read(ElementRef ref) -{ - FillLinGradientRef _392 = { ref.offset + 4u }; - FillLinGradientRef param = _392; - return FillLinGradient_read(param); -} - -void AnnoLinGradient_write(Alloc a, AnnoLinGradientRef ref, AnnoLinGradient s) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = asuint(s.bbox.x); - write_mem(param, param_1, param_2); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = asuint(s.bbox.y); - write_mem(param_3, param_4, param_5); - Alloc param_6 = a; - uint param_7 = ix + 2u; - uint param_8 = asuint(s.bbox.z); - write_mem(param_6, param_7, param_8); - Alloc param_9 = a; - uint param_10 = ix + 3u; - uint param_11 = asuint(s.bbox.w); - write_mem(param_9, param_10, param_11); - Alloc param_12 = a; - uint param_13 = ix + 4u; - uint param_14 = asuint(s.linewidth); - write_mem(param_12, param_13, param_14); - Alloc param_15 = a; - uint param_16 = ix + 5u; - uint param_17 = s.index; - write_mem(param_15, param_16, param_17); - Alloc param_18 = a; - uint param_19 = ix + 6u; - uint param_20 = asuint(s.line_x); - write_mem(param_18, param_19, param_20); - Alloc param_21 = a; - uint param_22 = ix + 7u; - uint param_23 = asuint(s.line_y); - write_mem(param_21, param_22, param_23); - Alloc param_24 = a; - uint param_25 = ix + 8u; - uint param_26 = asuint(s.line_c); - write_mem(param_24, param_25, param_26); -} - -void Annotated_LinGradient_write(Alloc a, AnnotatedRef ref, uint flags, AnnoLinGradient s) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = (flags << uint(16)) | 2u; - write_mem(param, param_1, param_2); - AnnoLinGradientRef _839 = { ref.offset + 4u }; - Alloc param_3 = a; - AnnoLinGradientRef param_4 = _839; - AnnoLinGradient param_5 = s; - AnnoLinGradient_write(param_3, param_4, param_5); -} - -FillImage FillImage_read(FillImageRef ref) -{ - uint ix = ref.offset >> uint(2); - uint raw0 = _223.Load((ix + 0u) * 4 + 0); - uint raw1 = _223.Load((ix + 1u) * 4 + 0); - FillImage s; - s.index = raw0; - s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); - return s; -} - -FillImage Element_FillImage_read(ElementRef ref) -{ - FillImageRef _400 = { ref.offset + 4u }; - FillImageRef param = _400; - return FillImage_read(param); -} - -void AnnoImage_write(Alloc a, AnnoImageRef ref, AnnoImage s) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = asuint(s.bbox.x); - write_mem(param, param_1, param_2); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = asuint(s.bbox.y); - write_mem(param_3, param_4, param_5); - Alloc param_6 = a; - uint param_7 = ix + 2u; - uint param_8 = asuint(s.bbox.z); - write_mem(param_6, param_7, param_8); - Alloc param_9 = a; - uint param_10 = ix + 3u; - uint param_11 = asuint(s.bbox.w); - write_mem(param_9, param_10, param_11); - Alloc param_12 = a; - uint param_13 = ix + 4u; - uint param_14 = asuint(s.linewidth); - write_mem(param_12, param_13, param_14); - Alloc param_15 = a; - uint param_16 = ix + 5u; - uint param_17 = s.index; - write_mem(param_15, param_16, param_17); - Alloc param_18 = a; - uint param_19 = ix + 6u; - uint param_20 = (uint(s.offset.x) & 65535u) | (uint(s.offset.y) << uint(16)); - write_mem(param_18, param_19, param_20); -} - -void Annotated_Image_write(Alloc a, AnnotatedRef ref, uint flags, AnnoImage s) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = (flags << uint(16)) | 3u; - write_mem(param, param_1, param_2); - AnnoImageRef _860 = { ref.offset + 4u }; - Alloc param_3 = a; - AnnoImageRef param_4 = _860; - AnnoImage param_5 = s; - AnnoImage_write(param_3, param_4, param_5); -} - -Clip Clip_read(ClipRef ref) -{ - uint ix = ref.offset >> uint(2); - uint raw0 = _223.Load((ix + 0u) * 4 + 0); - uint raw1 = _223.Load((ix + 1u) * 4 + 0); - uint raw2 = _223.Load((ix + 2u) * 4 + 0); - uint raw3 = _223.Load((ix + 3u) * 4 + 0); - Clip s; - s.bbox = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3)); - s.blend = _223.Load((ix + 4u) * 4 + 0); - return s; -} - -Clip Element_BeginClip_read(ElementRef ref) -{ - ClipRef _408 = { ref.offset + 4u }; - ClipRef param = _408; - return Clip_read(param); -} - -void AnnoBeginClip_write(Alloc a, AnnoBeginClipRef ref, AnnoBeginClip s) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = asuint(s.bbox.x); - write_mem(param, param_1, param_2); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = asuint(s.bbox.y); - write_mem(param_3, param_4, param_5); - Alloc param_6 = a; - uint param_7 = ix + 2u; - uint param_8 = asuint(s.bbox.z); - write_mem(param_6, param_7, param_8); - Alloc param_9 = a; - uint param_10 = ix + 3u; - uint param_11 = asuint(s.bbox.w); - write_mem(param_9, param_10, param_11); - Alloc param_12 = a; - uint param_13 = ix + 4u; - uint param_14 = asuint(s.linewidth); - write_mem(param_12, param_13, param_14); - Alloc param_15 = a; - uint param_16 = ix + 5u; - uint param_17 = s.blend; - write_mem(param_15, param_16, param_17); -} - -void Annotated_BeginClip_write(Alloc a, AnnotatedRef ref, uint flags, AnnoBeginClip s) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = (flags << uint(16)) | 4u; - write_mem(param, param_1, param_2); - AnnoBeginClipRef _881 = { ref.offset + 4u }; - Alloc param_3 = a; - AnnoBeginClipRef param_4 = _881; - AnnoBeginClip param_5 = s; - AnnoBeginClip_write(param_3, param_4, param_5); -} - -void AnnoEndClip_write(Alloc a, AnnoEndClipRef ref, AnnoEndClip s) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = asuint(s.bbox.x); - write_mem(param, param_1, param_2); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = asuint(s.bbox.y); - write_mem(param_3, param_4, param_5); - Alloc param_6 = a; - uint param_7 = ix + 2u; - uint param_8 = asuint(s.bbox.z); - write_mem(param_6, param_7, param_8); - Alloc param_9 = a; - uint param_10 = ix + 3u; - uint param_11 = asuint(s.bbox.w); - write_mem(param_9, param_10, param_11); - Alloc param_12 = a; - uint param_13 = ix + 4u; - uint param_14 = s.blend; - write_mem(param_12, param_13, param_14); -} - -void Annotated_EndClip_write(Alloc a, AnnotatedRef ref, uint flags, AnnoEndClip s) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = (flags << uint(16)) | 5u; - write_mem(param, param_1, param_2); - AnnoEndClipRef _902 = { ref.offset + 4u }; - Alloc param_3 = a; - AnnoEndClipRef param_4 = _902; - AnnoEndClip param_5 = s; - AnnoEndClip_write(param_3, param_4, param_5); + return _23; } void comp_main() { uint ix = gl_GlobalInvocationID.x * 8u; - ElementRef _920 = { ix * 36u }; - ElementRef ref = _920; - ElementRef param = ref; - uint tag_word = Element_tag(param).tag; - uint param_1 = tag_word; - DrawMonoid agg = map_tag(param_1); + uint drawtag_base = _92.Load(100) >> uint(2); + uint tag_word = _102.Load((drawtag_base + ix) * 4 + 0); + uint param = tag_word; + DrawMonoid agg = map_tag(param); DrawMonoid local[8]; local[0] = agg; for (uint i = 1u; i < 8u; i++) { - ElementRef param_2 = ref; - uint param_3 = i; - ElementRef param_4 = Element_index(param_2, param_3); - tag_word = Element_tag(param_4).tag; - uint param_5 = tag_word; - DrawMonoid param_6 = agg; - DrawMonoid param_7 = map_tag(param_5); - agg = combine_tag_monoid(param_6, param_7); + tag_word = _102.Load(((drawtag_base + ix) + i) * 4 + 0); + uint param_1 = tag_word; + DrawMonoid param_2 = agg; + DrawMonoid param_3 = map_tag(param_1); + agg = combine_draw_monoid(param_2, param_3); local[i] = agg; } sh_scratch[gl_LocalInvocationID.x] = agg; @@ -574,194 +110,121 @@ void comp_main() if (gl_LocalInvocationID.x >= (1u << i_1)) { DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; - DrawMonoid param_8 = other; - DrawMonoid param_9 = agg; - agg = combine_tag_monoid(param_8, param_9); + DrawMonoid param_4 = other; + DrawMonoid param_5 = agg; + agg = combine_draw_monoid(param_4, param_5); } GroupMemoryBarrierWithGroupSync(); sh_scratch[gl_LocalInvocationID.x] = agg; } GroupMemoryBarrierWithGroupSync(); - DrawMonoid row = tag_monoid_identity(); + DrawMonoid row = draw_monoid_identity(); if (gl_WorkGroupID.x > 0u) { - DrawMonoid _1026; - _1026.path_ix = _1020.Load((gl_WorkGroupID.x - 1u) * 8 + 0); - _1026.clip_ix = _1020.Load((gl_WorkGroupID.x - 1u) * 8 + 4); - row.path_ix = _1026.path_ix; - row.clip_ix = _1026.clip_ix; + DrawMonoid _208; + _208.path_ix = _202.Load((gl_WorkGroupID.x - 1u) * 16 + 0); + _208.clip_ix = _202.Load((gl_WorkGroupID.x - 1u) * 16 + 4); + _208.scene_offset = _202.Load((gl_WorkGroupID.x - 1u) * 16 + 8); + _208.info_offset = _202.Load((gl_WorkGroupID.x - 1u) * 16 + 12); + row.path_ix = _208.path_ix; + row.clip_ix = _208.clip_ix; + row.scene_offset = _208.scene_offset; + row.info_offset = _208.info_offset; } if (gl_LocalInvocationID.x > 0u) { - DrawMonoid param_10 = row; - DrawMonoid param_11 = sh_scratch[gl_LocalInvocationID.x - 1u]; - row = combine_tag_monoid(param_10, param_11); + DrawMonoid param_6 = row; + DrawMonoid param_7 = sh_scratch[gl_LocalInvocationID.x - 1u]; + row = combine_draw_monoid(param_6, param_7); } + uint drawdata_base = _92.Load(104) >> uint(2); + uint drawinfo_base = _92.Load(68) >> uint(2); uint out_ix = gl_GlobalInvocationID.x * 8u; - uint out_base = (_1054.Load(44) >> uint(2)) + (out_ix * 2u); - uint clip_out_base = _1054.Load(48) >> uint(2); - AnnotatedRef _1075 = { _1054.Load(32) + (out_ix * 40u) }; - AnnotatedRef out_ref = _1075; + uint out_base = (_92.Load(44) >> uint(2)) + (out_ix * 4u); + uint clip_out_base = _92.Load(48) >> uint(2); float4 mat; float2 translate; - AnnoColor anno_fill; - Alloc param_18; - AnnoLinGradient anno_lin; - Alloc param_23; - AnnoImage anno_img; - Alloc param_28; - AnnoBeginClip anno_begin_clip; - Alloc param_33; - AnnoEndClip anno_end_clip; - Alloc param_38; for (uint i_2 = 0u; i_2 < 8u; i_2++) { DrawMonoid m = row; if (i_2 > 0u) { - DrawMonoid param_12 = m; - DrawMonoid param_13 = local[i_2 - 1u]; - m = combine_tag_monoid(param_12, param_13); + DrawMonoid param_8 = m; + DrawMonoid param_9 = local[i_2 - 1u]; + m = combine_draw_monoid(param_8, param_9); } - _199.Store((out_base + (i_2 * 2u)) * 4 + 8, m.path_ix); - _199.Store(((out_base + (i_2 * 2u)) + 1u) * 4 + 8, m.clip_ix); - ElementRef param_14 = ref; - uint param_15 = i_2; - ElementRef this_ref = Element_index(param_14, param_15); - ElementRef param_16 = this_ref; - tag_word = Element_tag(param_16).tag; - if ((((tag_word == 4u) || (tag_word == 5u)) || (tag_word == 6u)) || (tag_word == 9u)) + _284.Store((out_base + (i_2 * 4u)) * 4 + 8, m.path_ix); + _284.Store(((out_base + (i_2 * 4u)) + 1u) * 4 + 8, m.clip_ix); + _284.Store(((out_base + (i_2 * 4u)) + 2u) * 4 + 8, m.scene_offset); + _284.Store(((out_base + (i_2 * 4u)) + 3u) * 4 + 8, m.info_offset); + uint dd = drawdata_base + (m.scene_offset >> uint(2)); + uint di = drawinfo_base + (m.info_offset >> uint(2)); + tag_word = _102.Load(((drawtag_base + ix) + i_2) * 4 + 0); + if ((((tag_word == 68u) || (tag_word == 276u)) || (tag_word == 72u)) || (tag_word == 5u)) { - uint bbox_offset = (_1054.Load(40) >> uint(2)) + (6u * m.path_ix); - float bbox_l = float(_199.Load(bbox_offset * 4 + 8)) - 32768.0f; - float bbox_t = float(_199.Load((bbox_offset + 1u) * 4 + 8)) - 32768.0f; - float bbox_r = float(_199.Load((bbox_offset + 2u) * 4 + 8)) - 32768.0f; - float bbox_b = float(_199.Load((bbox_offset + 3u) * 4 + 8)) - 32768.0f; + uint bbox_offset = (_92.Load(40) >> uint(2)) + (6u * m.path_ix); + float bbox_l = float(_284.Load(bbox_offset * 4 + 8)) - 32768.0f; + float bbox_t = float(_284.Load((bbox_offset + 1u) * 4 + 8)) - 32768.0f; + float bbox_r = float(_284.Load((bbox_offset + 2u) * 4 + 8)) - 32768.0f; + float bbox_b = float(_284.Load((bbox_offset + 3u) * 4 + 8)) - 32768.0f; float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); - float linewidth = asfloat(_199.Load((bbox_offset + 4u) * 4 + 8)); + float linewidth = asfloat(_284.Load((bbox_offset + 4u) * 4 + 8)); uint fill_mode = uint(linewidth >= 0.0f); - if ((linewidth >= 0.0f) || (tag_word == 5u)) + if ((linewidth >= 0.0f) || (tag_word == 276u)) { - uint trans_ix = _199.Load((bbox_offset + 5u) * 4 + 8); - uint t = (_1054.Load(36) >> uint(2)) + (6u * trans_ix); - mat = asfloat(uint4(_199.Load(t * 4 + 8), _199.Load((t + 1u) * 4 + 8), _199.Load((t + 2u) * 4 + 8), _199.Load((t + 3u) * 4 + 8))); - if (tag_word == 5u) + uint trans_ix = _284.Load((bbox_offset + 5u) * 4 + 8); + uint t = (_92.Load(36) >> uint(2)) + (6u * trans_ix); + mat = asfloat(uint4(_284.Load(t * 4 + 8), _284.Load((t + 1u) * 4 + 8), _284.Load((t + 2u) * 4 + 8), _284.Load((t + 3u) * 4 + 8))); + if (tag_word == 276u) { - translate = asfloat(uint2(_199.Load((t + 4u) * 4 + 8), _199.Load((t + 5u) * 4 + 8))); + translate = asfloat(uint2(_284.Load((t + 4u) * 4 + 8), _284.Load((t + 5u) * 4 + 8))); } } if (linewidth >= 0.0f) { linewidth *= sqrt(abs((mat.x * mat.w) - (mat.y * mat.z))); } - linewidth = max(linewidth, 0.0f); switch (tag_word) { - case 4u: + case 68u: + case 72u: { - ElementRef param_17 = this_ref; - FillColor fill = Element_FillColor_read(param_17); - anno_fill.bbox = bbox; - anno_fill.linewidth = linewidth; - anno_fill.rgba_color = fill.rgba_color; - Alloc _1288; - _1288.offset = _1054.Load(32); - param_18.offset = _1288.offset; - AnnotatedRef param_19 = out_ref; - uint param_20 = fill_mode; - AnnoColor param_21 = anno_fill; - Annotated_Color_write(param_18, param_19, param_20, param_21); + _284.Store(di * 4 + 8, asuint(linewidth)); break; } - case 5u: + case 276u: { - ElementRef param_22 = this_ref; - FillLinGradient lin = Element_FillLinGradient_read(param_22); - anno_lin.bbox = bbox; - anno_lin.linewidth = linewidth; - anno_lin.index = lin.index; - float2 p0 = ((mat.xy * lin.p0.x) + (mat.zw * lin.p0.y)) + translate; - float2 p1 = ((mat.xy * lin.p1.x) + (mat.zw * lin.p1.y)) + translate; + _284.Store(di * 4 + 8, asuint(linewidth)); + uint index = _102.Load(dd * 4 + 0); + float2 p0 = asfloat(uint2(_102.Load((dd + 1u) * 4 + 0), _102.Load((dd + 2u) * 4 + 0))); + float2 p1 = asfloat(uint2(_102.Load((dd + 3u) * 4 + 0), _102.Load((dd + 4u) * 4 + 0))); + p0 = ((mat.xy * p0.x) + (mat.zw * p0.y)) + translate; + p1 = ((mat.xy * p1.x) + (mat.zw * p1.y)) + translate; float2 dxy = p1 - p0; float scale = 1.0f / ((dxy.x * dxy.x) + (dxy.y * dxy.y)); float line_x = dxy.x * scale; float line_y = dxy.y * scale; - anno_lin.line_x = line_x; - anno_lin.line_y = line_y; - anno_lin.line_c = -((p0.x * line_x) + (p0.y * line_y)); - Alloc _1384; - _1384.offset = _1054.Load(32); - param_23.offset = _1384.offset; - AnnotatedRef param_24 = out_ref; - uint param_25 = fill_mode; - AnnoLinGradient param_26 = anno_lin; - Annotated_LinGradient_write(param_23, param_24, param_25, param_26); + float line_c = -((p0.x * line_x) + (p0.y * line_y)); + _284.Store((di + 1u) * 4 + 8, asuint(line_x)); + _284.Store((di + 2u) * 4 + 8, asuint(line_y)); + _284.Store((di + 3u) * 4 + 8, asuint(line_c)); break; } - case 6u: + case 5u: { - ElementRef param_27 = this_ref; - FillImage fill_img = Element_FillImage_read(param_27); - anno_img.bbox = bbox; - anno_img.linewidth = linewidth; - anno_img.index = fill_img.index; - anno_img.offset = fill_img.offset; - Alloc _1412; - _1412.offset = _1054.Load(32); - param_28.offset = _1412.offset; - AnnotatedRef param_29 = out_ref; - uint param_30 = fill_mode; - AnnoImage param_31 = anno_img; - Annotated_Image_write(param_28, param_29, param_30, param_31); - break; - } - case 9u: - { - ElementRef param_32 = this_ref; - Clip begin_clip = Element_BeginClip_read(param_32); - anno_begin_clip.bbox = bbox; - anno_begin_clip.linewidth = 0.0f; - anno_begin_clip.blend = begin_clip.blend; - uint flags = uint(begin_clip.blend != 3u) << uint(1); - Alloc _1442; - _1442.offset = _1054.Load(32); - param_33.offset = _1442.offset; - AnnotatedRef param_34 = out_ref; - uint param_35 = flags; - AnnoBeginClip param_36 = anno_begin_clip; - Annotated_BeginClip_write(param_33, param_34, param_35, param_36); break; } } } - else - { - if (tag_word == 10u) - { - ElementRef param_37 = this_ref; - Clip end_clip = Element_BeginClip_read(param_37); - anno_end_clip.bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f); - anno_end_clip.blend = end_clip.blend; - uint flags_1 = uint(end_clip.blend != 3u) << uint(1); - Alloc _1480; - _1480.offset = _1054.Load(32); - param_38.offset = _1480.offset; - AnnotatedRef param_39 = out_ref; - uint param_40 = flags_1; - AnnoEndClip param_41 = anno_end_clip; - Annotated_EndClip_write(param_38, param_39, param_40, param_41); - } - } - if ((tag_word == 9u) || (tag_word == 10u)) + if ((tag_word == 5u) || (tag_word == 37u)) { uint path_ix = ~(out_ix + i_2); - if (tag_word == 9u) + if (tag_word == 5u) { path_ix = m.path_ix; } - _199.Store((clip_out_base + m.clip_ix) * 4 + 8, path_ix); + _284.Store((clip_out_base + m.clip_ix) * 4 + 8, path_ix); } - out_ref.offset += 40u; } } diff --git a/piet-gpu/shader/gen/draw_leaf.msl b/piet-gpu/shader/gen/draw_leaf.msl index 5b9ecc6..a8516ae 100644 --- a/piet-gpu/shader/gen/draw_leaf.msl +++ b/piet-gpu/shader/gen/draw_leaf.msl @@ -44,145 +44,53 @@ struct spvUnsafeArray } }; +struct DrawMonoid +{ + uint path_ix; + uint clip_ix; + uint scene_offset; + uint info_offset; +}; + struct Alloc { uint offset; }; -struct ElementRef +struct Config { - uint offset; + uint n_elements; + uint n_pathseg; + uint width_in_tiles; + uint height_in_tiles; + Alloc tile_alloc; + Alloc bin_alloc; + Alloc ptcl_alloc; + Alloc pathseg_alloc; + Alloc anno_alloc; + Alloc trans_alloc; + Alloc path_bbox_alloc; + Alloc drawmonoid_alloc; + Alloc clip_alloc; + Alloc clip_bic_alloc; + Alloc clip_stack_alloc; + Alloc clip_bbox_alloc; + Alloc draw_bbox_alloc; + Alloc drawinfo_alloc; + uint n_trans; + uint n_path; + uint n_clip; + uint trans_offset; + uint linewidth_offset; + uint pathtag_offset; + uint pathseg_offset; + uint drawtag_offset; + uint drawdata_offset; }; -struct FillColorRef +struct ConfigBuf { - uint offset; -}; - -struct FillColor -{ - uint rgba_color; -}; - -struct FillLinGradientRef -{ - uint offset; -}; - -struct FillLinGradient -{ - uint index; - float2 p0; - float2 p1; -}; - -struct FillImageRef -{ - uint offset; -}; - -struct FillImage -{ - uint index; - int2 offset; -}; - -struct ClipRef -{ - uint offset; -}; - -struct Clip -{ - float4 bbox; - uint blend; -}; - -struct ElementTag -{ - uint tag; - uint flags; -}; - -struct DrawMonoid -{ - uint path_ix; - uint clip_ix; -}; - -struct AnnoImageRef -{ - uint offset; -}; - -struct AnnoImage -{ - float4 bbox; - float linewidth; - uint index; - int2 offset; -}; - -struct AnnoColorRef -{ - uint offset; -}; - -struct AnnoColor -{ - float4 bbox; - float linewidth; - uint rgba_color; -}; - -struct AnnoLinGradientRef -{ - uint offset; -}; - -struct AnnoLinGradient -{ - float4 bbox; - float linewidth; - uint index; - float line_x; - float line_y; - float line_c; -}; - -struct AnnoBeginClipRef -{ - uint offset; -}; - -struct AnnoBeginClip -{ - float4 bbox; - float linewidth; - uint blend; -}; - -struct AnnoEndClipRef -{ - uint offset; -}; - -struct AnnoEndClip -{ - float4 bbox; - uint blend; -}; - -struct AnnotatedRef -{ - uint offset; -}; - -struct Memory -{ - uint mem_offset; - uint mem_error; - uint memory[1]; + Config conf; }; struct SceneBuf @@ -194,6 +102,8 @@ struct DrawMonoid_1 { uint path_ix; uint clip_ix; + uint scene_offset; + uint info_offset; }; struct ParentBuf @@ -201,442 +111,56 @@ struct ParentBuf DrawMonoid_1 parent[1]; }; -struct Alloc_1 +struct Memory { - uint offset; -}; - -struct Config -{ - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc_1 tile_alloc; - Alloc_1 bin_alloc; - Alloc_1 ptcl_alloc; - Alloc_1 pathseg_alloc; - Alloc_1 anno_alloc; - Alloc_1 trans_alloc; - Alloc_1 bbox_alloc; - Alloc_1 drawmonoid_alloc; - Alloc_1 clip_alloc; - Alloc_1 clip_bic_alloc; - Alloc_1 clip_stack_alloc; - Alloc_1 clip_bbox_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; -}; - -struct ConfigBuf -{ - Config conf; + uint mem_offset; + uint mem_error; + uint memory[1]; }; constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); -static inline __attribute__((always_inline)) -ElementTag Element_tag(thread const ElementRef& ref, const device SceneBuf& v_223) -{ - uint tag_and_flags = v_223.scene[ref.offset >> uint(2)]; - return ElementTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) }; -} - static inline __attribute__((always_inline)) DrawMonoid map_tag(thread const uint& tag_word) { - switch (tag_word) - { - case 4u: - case 5u: - case 6u: - { - return DrawMonoid{ 1u, 0u }; - } - case 9u: - case 10u: - { - return DrawMonoid{ 1u, 1u }; - } - default: - { - return DrawMonoid{ 0u, 0u }; - } - } + uint has_path = uint(tag_word != 0u); + return DrawMonoid{ has_path, tag_word & 1u, tag_word & 28u, (tag_word >> uint(4)) & 28u }; } static inline __attribute__((always_inline)) -ElementRef Element_index(thread const ElementRef& ref, thread const uint& index) -{ - return ElementRef{ ref.offset + (index * 36u) }; -} - -static inline __attribute__((always_inline)) -DrawMonoid combine_tag_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b) +DrawMonoid combine_draw_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b) { DrawMonoid c; c.path_ix = a.path_ix + b.path_ix; c.clip_ix = a.clip_ix + b.clip_ix; + c.scene_offset = a.scene_offset + b.scene_offset; + c.info_offset = a.info_offset + b.info_offset; return c; } static inline __attribute__((always_inline)) -DrawMonoid tag_monoid_identity() +DrawMonoid draw_monoid_identity() { - return DrawMonoid{ 0u, 0u }; + return DrawMonoid{ 0u, 0u, 0u, 0u }; } -static inline __attribute__((always_inline)) -FillColor FillColor_read(thread const FillColorRef& ref, const device SceneBuf& v_223) -{ - uint ix = ref.offset >> uint(2); - uint raw0 = v_223.scene[ix + 0u]; - FillColor s; - s.rgba_color = raw0; - return s; -} - -static inline __attribute__((always_inline)) -FillColor Element_FillColor_read(thread const ElementRef& ref, const device SceneBuf& v_223) -{ - FillColorRef param = FillColorRef{ ref.offset + 4u }; - return FillColor_read(param, v_223); -} - -static inline __attribute__((always_inline)) -bool touch_mem(thread const Alloc& alloc, thread const uint& offset) -{ - return true; -} - -static inline __attribute__((always_inline)) -void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_199) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return; - } - v_199.memory[offset] = val; -} - -static inline __attribute__((always_inline)) -void AnnoColor_write(thread const Alloc& a, thread const AnnoColorRef& ref, thread const AnnoColor& s, device Memory& v_199) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = as_type(s.bbox.x); - write_mem(param, param_1, param_2, v_199); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = as_type(s.bbox.y); - write_mem(param_3, param_4, param_5, v_199); - Alloc param_6 = a; - uint param_7 = ix + 2u; - uint param_8 = as_type(s.bbox.z); - write_mem(param_6, param_7, param_8, v_199); - Alloc param_9 = a; - uint param_10 = ix + 3u; - uint param_11 = as_type(s.bbox.w); - write_mem(param_9, param_10, param_11, v_199); - Alloc param_12 = a; - uint param_13 = ix + 4u; - uint param_14 = as_type(s.linewidth); - write_mem(param_12, param_13, param_14, v_199); - Alloc param_15 = a; - uint param_16 = ix + 5u; - uint param_17 = s.rgba_color; - write_mem(param_15, param_16, param_17, v_199); -} - -static inline __attribute__((always_inline)) -void Annotated_Color_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoColor& s, device Memory& v_199) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = (flags << uint(16)) | 1u; - write_mem(param, param_1, param_2, v_199); - Alloc param_3 = a; - AnnoColorRef param_4 = AnnoColorRef{ ref.offset + 4u }; - AnnoColor param_5 = s; - AnnoColor_write(param_3, param_4, param_5, v_199); -} - -static inline __attribute__((always_inline)) -FillLinGradient FillLinGradient_read(thread const FillLinGradientRef& ref, const device SceneBuf& v_223) -{ - uint ix = ref.offset >> uint(2); - uint raw0 = v_223.scene[ix + 0u]; - uint raw1 = v_223.scene[ix + 1u]; - uint raw2 = v_223.scene[ix + 2u]; - uint raw3 = v_223.scene[ix + 3u]; - uint raw4 = v_223.scene[ix + 4u]; - FillLinGradient s; - s.index = raw0; - s.p0 = float2(as_type(raw1), as_type(raw2)); - s.p1 = float2(as_type(raw3), as_type(raw4)); - return s; -} - -static inline __attribute__((always_inline)) -FillLinGradient Element_FillLinGradient_read(thread const ElementRef& ref, const device SceneBuf& v_223) -{ - FillLinGradientRef param = FillLinGradientRef{ ref.offset + 4u }; - return FillLinGradient_read(param, v_223); -} - -static inline __attribute__((always_inline)) -void AnnoLinGradient_write(thread const Alloc& a, thread const AnnoLinGradientRef& ref, thread const AnnoLinGradient& s, device Memory& v_199) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = as_type(s.bbox.x); - write_mem(param, param_1, param_2, v_199); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = as_type(s.bbox.y); - write_mem(param_3, param_4, param_5, v_199); - Alloc param_6 = a; - uint param_7 = ix + 2u; - uint param_8 = as_type(s.bbox.z); - write_mem(param_6, param_7, param_8, v_199); - Alloc param_9 = a; - uint param_10 = ix + 3u; - uint param_11 = as_type(s.bbox.w); - write_mem(param_9, param_10, param_11, v_199); - Alloc param_12 = a; - uint param_13 = ix + 4u; - uint param_14 = as_type(s.linewidth); - write_mem(param_12, param_13, param_14, v_199); - Alloc param_15 = a; - uint param_16 = ix + 5u; - uint param_17 = s.index; - write_mem(param_15, param_16, param_17, v_199); - Alloc param_18 = a; - uint param_19 = ix + 6u; - uint param_20 = as_type(s.line_x); - write_mem(param_18, param_19, param_20, v_199); - Alloc param_21 = a; - uint param_22 = ix + 7u; - uint param_23 = as_type(s.line_y); - write_mem(param_21, param_22, param_23, v_199); - Alloc param_24 = a; - uint param_25 = ix + 8u; - uint param_26 = as_type(s.line_c); - write_mem(param_24, param_25, param_26, v_199); -} - -static inline __attribute__((always_inline)) -void Annotated_LinGradient_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoLinGradient& s, device Memory& v_199) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = (flags << uint(16)) | 2u; - write_mem(param, param_1, param_2, v_199); - Alloc param_3 = a; - AnnoLinGradientRef param_4 = AnnoLinGradientRef{ ref.offset + 4u }; - AnnoLinGradient param_5 = s; - AnnoLinGradient_write(param_3, param_4, param_5, v_199); -} - -static inline __attribute__((always_inline)) -FillImage FillImage_read(thread const FillImageRef& ref, const device SceneBuf& v_223) -{ - uint ix = ref.offset >> uint(2); - uint raw0 = v_223.scene[ix + 0u]; - uint raw1 = v_223.scene[ix + 1u]; - FillImage s; - s.index = raw0; - s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); - return s; -} - -static inline __attribute__((always_inline)) -FillImage Element_FillImage_read(thread const ElementRef& ref, const device SceneBuf& v_223) -{ - FillImageRef param = FillImageRef{ ref.offset + 4u }; - return FillImage_read(param, v_223); -} - -static inline __attribute__((always_inline)) -void AnnoImage_write(thread const Alloc& a, thread const AnnoImageRef& ref, thread const AnnoImage& s, device Memory& v_199) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = as_type(s.bbox.x); - write_mem(param, param_1, param_2, v_199); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = as_type(s.bbox.y); - write_mem(param_3, param_4, param_5, v_199); - Alloc param_6 = a; - uint param_7 = ix + 2u; - uint param_8 = as_type(s.bbox.z); - write_mem(param_6, param_7, param_8, v_199); - Alloc param_9 = a; - uint param_10 = ix + 3u; - uint param_11 = as_type(s.bbox.w); - write_mem(param_9, param_10, param_11, v_199); - Alloc param_12 = a; - uint param_13 = ix + 4u; - uint param_14 = as_type(s.linewidth); - write_mem(param_12, param_13, param_14, v_199); - Alloc param_15 = a; - uint param_16 = ix + 5u; - uint param_17 = s.index; - write_mem(param_15, param_16, param_17, v_199); - Alloc param_18 = a; - uint param_19 = ix + 6u; - uint param_20 = (uint(s.offset.x) & 65535u) | (uint(s.offset.y) << uint(16)); - write_mem(param_18, param_19, param_20, v_199); -} - -static inline __attribute__((always_inline)) -void Annotated_Image_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoImage& s, device Memory& v_199) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = (flags << uint(16)) | 3u; - write_mem(param, param_1, param_2, v_199); - Alloc param_3 = a; - AnnoImageRef param_4 = AnnoImageRef{ ref.offset + 4u }; - AnnoImage param_5 = s; - AnnoImage_write(param_3, param_4, param_5, v_199); -} - -static inline __attribute__((always_inline)) -Clip Clip_read(thread const ClipRef& ref, const device SceneBuf& v_223) -{ - uint ix = ref.offset >> uint(2); - uint raw0 = v_223.scene[ix + 0u]; - uint raw1 = v_223.scene[ix + 1u]; - uint raw2 = v_223.scene[ix + 2u]; - uint raw3 = v_223.scene[ix + 3u]; - Clip s; - s.bbox = float4(as_type(raw0), as_type(raw1), as_type(raw2), as_type(raw3)); - s.blend = v_223.scene[ix + 4u]; - return s; -} - -static inline __attribute__((always_inline)) -Clip Element_BeginClip_read(thread const ElementRef& ref, const device SceneBuf& v_223) -{ - ClipRef param = ClipRef{ ref.offset + 4u }; - return Clip_read(param, v_223); -} - -static inline __attribute__((always_inline)) -void AnnoBeginClip_write(thread const Alloc& a, thread const AnnoBeginClipRef& ref, thread const AnnoBeginClip& s, device Memory& v_199) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = as_type(s.bbox.x); - write_mem(param, param_1, param_2, v_199); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = as_type(s.bbox.y); - write_mem(param_3, param_4, param_5, v_199); - Alloc param_6 = a; - uint param_7 = ix + 2u; - uint param_8 = as_type(s.bbox.z); - write_mem(param_6, param_7, param_8, v_199); - Alloc param_9 = a; - uint param_10 = ix + 3u; - uint param_11 = as_type(s.bbox.w); - write_mem(param_9, param_10, param_11, v_199); - Alloc param_12 = a; - uint param_13 = ix + 4u; - uint param_14 = as_type(s.linewidth); - write_mem(param_12, param_13, param_14, v_199); - Alloc param_15 = a; - uint param_16 = ix + 5u; - uint param_17 = s.blend; - write_mem(param_15, param_16, param_17, v_199); -} - -static inline __attribute__((always_inline)) -void Annotated_BeginClip_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoBeginClip& s, device Memory& v_199) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = (flags << uint(16)) | 4u; - write_mem(param, param_1, param_2, v_199); - Alloc param_3 = a; - AnnoBeginClipRef param_4 = AnnoBeginClipRef{ ref.offset + 4u }; - AnnoBeginClip param_5 = s; - AnnoBeginClip_write(param_3, param_4, param_5, v_199); -} - -static inline __attribute__((always_inline)) -void AnnoEndClip_write(thread const Alloc& a, thread const AnnoEndClipRef& ref, thread const AnnoEndClip& s, device Memory& v_199) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = as_type(s.bbox.x); - write_mem(param, param_1, param_2, v_199); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = as_type(s.bbox.y); - write_mem(param_3, param_4, param_5, v_199); - Alloc param_6 = a; - uint param_7 = ix + 2u; - uint param_8 = as_type(s.bbox.z); - write_mem(param_6, param_7, param_8, v_199); - Alloc param_9 = a; - uint param_10 = ix + 3u; - uint param_11 = as_type(s.bbox.w); - write_mem(param_9, param_10, param_11, v_199); - Alloc param_12 = a; - uint param_13 = ix + 4u; - uint param_14 = s.blend; - write_mem(param_12, param_13, param_14, v_199); -} - -static inline __attribute__((always_inline)) -void Annotated_EndClip_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoEndClip& s, device Memory& v_199) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = (flags << uint(16)) | 5u; - write_mem(param, param_1, param_2, v_199); - Alloc param_3 = a; - AnnoEndClipRef param_4 = AnnoEndClipRef{ ref.offset + 4u }; - AnnoEndClip param_5 = s; - AnnoEndClip_write(param_3, param_4, param_5, v_199); -} - -kernel void main0(device Memory& v_199 [[buffer(0)]], const device ConfigBuf& _1054 [[buffer(1)]], const device SceneBuf& v_223 [[buffer(2)]], const device ParentBuf& _1020 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) +kernel void main0(device Memory& _284 [[buffer(0)]], const device ConfigBuf& _92 [[buffer(1)]], const device SceneBuf& _102 [[buffer(2)]], const device ParentBuf& _202 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) { threadgroup DrawMonoid sh_scratch[256]; uint ix = gl_GlobalInvocationID.x * 8u; - ElementRef ref = ElementRef{ ix * 36u }; - ElementRef param = ref; - uint tag_word = Element_tag(param, v_223).tag; - uint param_1 = tag_word; - DrawMonoid agg = map_tag(param_1); + uint drawtag_base = _92.conf.drawtag_offset >> uint(2); + uint tag_word = _102.scene[drawtag_base + ix]; + uint param = tag_word; + DrawMonoid agg = map_tag(param); spvUnsafeArray local; local[0] = agg; for (uint i = 1u; i < 8u; i++) { - ElementRef param_2 = ref; - uint param_3 = i; - ElementRef param_4 = Element_index(param_2, param_3); - tag_word = Element_tag(param_4, v_223).tag; - uint param_5 = tag_word; - DrawMonoid param_6 = agg; - DrawMonoid param_7 = map_tag(param_5); - agg = combine_tag_monoid(param_6, param_7); + tag_word = _102.scene[(drawtag_base + ix) + i]; + uint param_1 = tag_word; + DrawMonoid param_2 = agg; + DrawMonoid param_3 = map_tag(param_1); + agg = combine_draw_monoid(param_2, param_3); local[i] = agg; } sh_scratch[gl_LocalInvocationID.x] = agg; @@ -646,181 +170,117 @@ kernel void main0(device Memory& v_199 [[buffer(0)]], const device ConfigBuf& _1 if (gl_LocalInvocationID.x >= (1u << i_1)) { DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; - DrawMonoid param_8 = other; - DrawMonoid param_9 = agg; - agg = combine_tag_monoid(param_8, param_9); + DrawMonoid param_4 = other; + DrawMonoid param_5 = agg; + agg = combine_draw_monoid(param_4, param_5); } threadgroup_barrier(mem_flags::mem_threadgroup); sh_scratch[gl_LocalInvocationID.x] = agg; } threadgroup_barrier(mem_flags::mem_threadgroup); - DrawMonoid row = tag_monoid_identity(); + DrawMonoid row = draw_monoid_identity(); if (gl_WorkGroupID.x > 0u) { - uint _1023 = gl_WorkGroupID.x - 1u; - row.path_ix = _1020.parent[_1023].path_ix; - row.clip_ix = _1020.parent[_1023].clip_ix; + uint _205 = gl_WorkGroupID.x - 1u; + row.path_ix = _202.parent[_205].path_ix; + row.clip_ix = _202.parent[_205].clip_ix; + row.scene_offset = _202.parent[_205].scene_offset; + row.info_offset = _202.parent[_205].info_offset; } if (gl_LocalInvocationID.x > 0u) { - DrawMonoid param_10 = row; - DrawMonoid param_11 = sh_scratch[gl_LocalInvocationID.x - 1u]; - row = combine_tag_monoid(param_10, param_11); + DrawMonoid param_6 = row; + DrawMonoid param_7 = sh_scratch[gl_LocalInvocationID.x - 1u]; + row = combine_draw_monoid(param_6, param_7); } + uint drawdata_base = _92.conf.drawdata_offset >> uint(2); + uint drawinfo_base = _92.conf.drawinfo_alloc.offset >> uint(2); uint out_ix = gl_GlobalInvocationID.x * 8u; - uint out_base = (_1054.conf.drawmonoid_alloc.offset >> uint(2)) + (out_ix * 2u); - uint clip_out_base = _1054.conf.clip_alloc.offset >> uint(2); - AnnotatedRef out_ref = AnnotatedRef{ _1054.conf.anno_alloc.offset + (out_ix * 40u) }; + uint out_base = (_92.conf.drawmonoid_alloc.offset >> uint(2)) + (out_ix * 4u); + uint clip_out_base = _92.conf.clip_alloc.offset >> uint(2); float4 mat; float2 translate; - AnnoColor anno_fill; - Alloc param_18; - AnnoLinGradient anno_lin; - Alloc param_23; - AnnoImage anno_img; - Alloc param_28; - AnnoBeginClip anno_begin_clip; - Alloc param_33; - AnnoEndClip anno_end_clip; - Alloc param_38; for (uint i_2 = 0u; i_2 < 8u; i_2++) { DrawMonoid m = row; if (i_2 > 0u) { - DrawMonoid param_12 = m; - DrawMonoid param_13 = local[i_2 - 1u]; - m = combine_tag_monoid(param_12, param_13); + DrawMonoid param_8 = m; + DrawMonoid param_9 = local[i_2 - 1u]; + m = combine_draw_monoid(param_8, param_9); } - v_199.memory[out_base + (i_2 * 2u)] = m.path_ix; - v_199.memory[(out_base + (i_2 * 2u)) + 1u] = m.clip_ix; - ElementRef param_14 = ref; - uint param_15 = i_2; - ElementRef this_ref = Element_index(param_14, param_15); - ElementRef param_16 = this_ref; - tag_word = Element_tag(param_16, v_223).tag; - if ((((tag_word == 4u) || (tag_word == 5u)) || (tag_word == 6u)) || (tag_word == 9u)) + _284.memory[out_base + (i_2 * 4u)] = m.path_ix; + _284.memory[(out_base + (i_2 * 4u)) + 1u] = m.clip_ix; + _284.memory[(out_base + (i_2 * 4u)) + 2u] = m.scene_offset; + _284.memory[(out_base + (i_2 * 4u)) + 3u] = m.info_offset; + uint dd = drawdata_base + (m.scene_offset >> uint(2)); + uint di = drawinfo_base + (m.info_offset >> uint(2)); + tag_word = _102.scene[(drawtag_base + ix) + i_2]; + if ((((tag_word == 68u) || (tag_word == 276u)) || (tag_word == 72u)) || (tag_word == 5u)) { - uint bbox_offset = (_1054.conf.bbox_alloc.offset >> uint(2)) + (6u * m.path_ix); - float bbox_l = float(v_199.memory[bbox_offset]) - 32768.0; - float bbox_t = float(v_199.memory[bbox_offset + 1u]) - 32768.0; - float bbox_r = float(v_199.memory[bbox_offset + 2u]) - 32768.0; - float bbox_b = float(v_199.memory[bbox_offset + 3u]) - 32768.0; + uint bbox_offset = (_92.conf.path_bbox_alloc.offset >> uint(2)) + (6u * m.path_ix); + float bbox_l = float(_284.memory[bbox_offset]) - 32768.0; + float bbox_t = float(_284.memory[bbox_offset + 1u]) - 32768.0; + float bbox_r = float(_284.memory[bbox_offset + 2u]) - 32768.0; + float bbox_b = float(_284.memory[bbox_offset + 3u]) - 32768.0; float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); - float linewidth = as_type(v_199.memory[bbox_offset + 4u]); + float linewidth = as_type(_284.memory[bbox_offset + 4u]); uint fill_mode = uint(linewidth >= 0.0); - if ((linewidth >= 0.0) || (tag_word == 5u)) + if ((linewidth >= 0.0) || (tag_word == 276u)) { - uint trans_ix = v_199.memory[bbox_offset + 5u]; - uint t = (_1054.conf.trans_alloc.offset >> uint(2)) + (6u * trans_ix); - mat = as_type(uint4(v_199.memory[t], v_199.memory[t + 1u], v_199.memory[t + 2u], v_199.memory[t + 3u])); - if (tag_word == 5u) + uint trans_ix = _284.memory[bbox_offset + 5u]; + uint t = (_92.conf.trans_alloc.offset >> uint(2)) + (6u * trans_ix); + mat = as_type(uint4(_284.memory[t], _284.memory[t + 1u], _284.memory[t + 2u], _284.memory[t + 3u])); + if (tag_word == 276u) { - translate = as_type(uint2(v_199.memory[t + 4u], v_199.memory[t + 5u])); + translate = as_type(uint2(_284.memory[t + 4u], _284.memory[t + 5u])); } } if (linewidth >= 0.0) { linewidth *= sqrt(abs((mat.x * mat.w) - (mat.y * mat.z))); } - linewidth = fast::max(linewidth, 0.0); switch (tag_word) { - case 4u: + case 68u: + case 72u: { - ElementRef param_17 = this_ref; - FillColor fill = Element_FillColor_read(param_17, v_223); - anno_fill.bbox = bbox; - anno_fill.linewidth = linewidth; - anno_fill.rgba_color = fill.rgba_color; - param_18.offset = _1054.conf.anno_alloc.offset; - AnnotatedRef param_19 = out_ref; - uint param_20 = fill_mode; - AnnoColor param_21 = anno_fill; - Annotated_Color_write(param_18, param_19, param_20, param_21, v_199); + _284.memory[di] = as_type(linewidth); break; } - case 5u: + case 276u: { - ElementRef param_22 = this_ref; - FillLinGradient lin = Element_FillLinGradient_read(param_22, v_223); - anno_lin.bbox = bbox; - anno_lin.linewidth = linewidth; - anno_lin.index = lin.index; - float2 p0 = ((mat.xy * lin.p0.x) + (mat.zw * lin.p0.y)) + translate; - float2 p1 = ((mat.xy * lin.p1.x) + (mat.zw * lin.p1.y)) + translate; + _284.memory[di] = as_type(linewidth); + uint index = _102.scene[dd]; + float2 p0 = as_type(uint2(_102.scene[dd + 1u], _102.scene[dd + 2u])); + float2 p1 = as_type(uint2(_102.scene[dd + 3u], _102.scene[dd + 4u])); + p0 = ((mat.xy * p0.x) + (mat.zw * p0.y)) + translate; + p1 = ((mat.xy * p1.x) + (mat.zw * p1.y)) + translate; float2 dxy = p1 - p0; float scale = 1.0 / ((dxy.x * dxy.x) + (dxy.y * dxy.y)); float line_x = dxy.x * scale; float line_y = dxy.y * scale; - anno_lin.line_x = line_x; - anno_lin.line_y = line_y; - anno_lin.line_c = -((p0.x * line_x) + (p0.y * line_y)); - param_23.offset = _1054.conf.anno_alloc.offset; - AnnotatedRef param_24 = out_ref; - uint param_25 = fill_mode; - AnnoLinGradient param_26 = anno_lin; - Annotated_LinGradient_write(param_23, param_24, param_25, param_26, v_199); + float line_c = -((p0.x * line_x) + (p0.y * line_y)); + _284.memory[di + 1u] = as_type(line_x); + _284.memory[di + 2u] = as_type(line_y); + _284.memory[di + 3u] = as_type(line_c); break; } - case 6u: + case 5u: { - ElementRef param_27 = this_ref; - FillImage fill_img = Element_FillImage_read(param_27, v_223); - anno_img.bbox = bbox; - anno_img.linewidth = linewidth; - anno_img.index = fill_img.index; - anno_img.offset = fill_img.offset; - param_28.offset = _1054.conf.anno_alloc.offset; - AnnotatedRef param_29 = out_ref; - uint param_30 = fill_mode; - AnnoImage param_31 = anno_img; - Annotated_Image_write(param_28, param_29, param_30, param_31, v_199); - break; - } - case 9u: - { - ElementRef param_32 = this_ref; - Clip begin_clip = Element_BeginClip_read(param_32, v_223); - anno_begin_clip.bbox = bbox; - anno_begin_clip.linewidth = 0.0; - anno_begin_clip.blend = begin_clip.blend; - uint flags = uint(begin_clip.blend != 3u) << uint(1); - param_33.offset = _1054.conf.anno_alloc.offset; - AnnotatedRef param_34 = out_ref; - uint param_35 = flags; - AnnoBeginClip param_36 = anno_begin_clip; - Annotated_BeginClip_write(param_33, param_34, param_35, param_36, v_199); break; } } } - else - { - if (tag_word == 10u) - { - ElementRef param_37 = this_ref; - Clip end_clip = Element_BeginClip_read(param_37, v_223); - anno_end_clip.bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0); - anno_end_clip.blend = end_clip.blend; - uint flags_1 = uint(end_clip.blend != 3u) << uint(1); - param_38.offset = _1054.conf.anno_alloc.offset; - AnnotatedRef param_39 = out_ref; - uint param_40 = flags_1; - AnnoEndClip param_41 = anno_end_clip; - Annotated_EndClip_write(param_38, param_39, param_40, param_41, v_199); - } - } - if ((tag_word == 9u) || (tag_word == 10u)) + if ((tag_word == 5u) || (tag_word == 37u)) { uint path_ix = ~(out_ix + i_2); - if (tag_word == 9u) + if (tag_word == 5u) { path_ix = m.path_ix; } - v_199.memory[clip_out_base + m.clip_ix] = path_ix; + _284.memory[clip_out_base + m.clip_ix] = path_ix; } - out_ref.offset += 40u; } } diff --git a/piet-gpu/shader/gen/draw_leaf.spv b/piet-gpu/shader/gen/draw_leaf.spv index bdbdb0c..d18b287 100644 Binary files a/piet-gpu/shader/gen/draw_leaf.spv and b/piet-gpu/shader/gen/draw_leaf.spv differ diff --git a/piet-gpu/shader/gen/draw_reduce.dxil b/piet-gpu/shader/gen/draw_reduce.dxil index 9b1b0fd..bfafc5f 100644 Binary files a/piet-gpu/shader/gen/draw_reduce.dxil and b/piet-gpu/shader/gen/draw_reduce.dxil differ diff --git a/piet-gpu/shader/gen/draw_reduce.hlsl b/piet-gpu/shader/gen/draw_reduce.hlsl index 0553c9b..7220b7e 100644 --- a/piet-gpu/shader/gen/draw_reduce.hlsl +++ b/piet-gpu/shader/gen/draw_reduce.hlsl @@ -1,22 +1,11 @@ -struct ElementRef -{ - uint offset; -}; - -struct ElementTag -{ - uint tag; - uint flags; -}; - struct DrawMonoid { uint path_ix; uint clip_ix; + uint scene_offset; + uint info_offset; }; -static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); - struct Alloc { uint offset; @@ -34,12 +23,14 @@ struct Config Alloc pathseg_alloc; Alloc anno_alloc; Alloc trans_alloc; - Alloc bbox_alloc; + Alloc path_bbox_alloc; Alloc drawmonoid_alloc; Alloc clip_alloc; Alloc clip_bic_alloc; Alloc clip_stack_alloc; Alloc clip_bbox_alloc; + Alloc draw_bbox_alloc; + Alloc drawinfo_alloc; uint n_trans; uint n_path; uint n_clip; @@ -47,16 +38,16 @@ struct Config uint linewidth_offset; uint pathtag_offset; uint pathseg_offset; + uint drawtag_offset; + uint drawdata_offset; }; -static const DrawMonoid _87 = { 1u, 0u }; -static const DrawMonoid _89 = { 1u, 1u }; -static const DrawMonoid _91 = { 0u, 0u }; +static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); -ByteAddressBuffer _46 : register(t2, space0); -RWByteAddressBuffer _199 : register(u3, space0); -RWByteAddressBuffer _213 : register(u0, space0); -ByteAddressBuffer _219 : register(t1, space0); +ByteAddressBuffer _86 : register(t1, space0); +ByteAddressBuffer _96 : register(t2, space0); +RWByteAddressBuffer _187 : register(u3, space0); +RWByteAddressBuffer _205 : register(u0, space0); static uint3 gl_WorkGroupID; static uint3 gl_LocalInvocationID; @@ -70,68 +61,37 @@ struct SPIRV_Cross_Input groupshared DrawMonoid sh_scratch[256]; -ElementTag Element_tag(ElementRef ref) -{ - uint tag_and_flags = _46.Load((ref.offset >> uint(2)) * 4 + 0); - ElementTag _60 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; - return _60; -} - DrawMonoid map_tag(uint tag_word) { - switch (tag_word) - { - case 4u: - case 5u: - case 6u: - { - return _87; - } - case 9u: - case 10u: - { - return _89; - } - default: - { - return _91; - } - } + uint has_path = uint(tag_word != 0u); + DrawMonoid _69 = { has_path, tag_word & 1u, tag_word & 28u, (tag_word >> uint(4)) & 28u }; + return _69; } -ElementRef Element_index(ElementRef ref, uint index) -{ - ElementRef _39 = { ref.offset + (index * 36u) }; - return _39; -} - -DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b) +DrawMonoid combine_draw_monoid(DrawMonoid a, DrawMonoid b) { DrawMonoid c; c.path_ix = a.path_ix + b.path_ix; c.clip_ix = a.clip_ix + b.clip_ix; + c.scene_offset = a.scene_offset + b.scene_offset; + c.info_offset = a.info_offset + b.info_offset; return c; } void comp_main() { uint ix = gl_GlobalInvocationID.x * 8u; - ElementRef _107 = { ix * 36u }; - ElementRef ref = _107; - ElementRef param = ref; - uint tag_word = Element_tag(param).tag; - uint param_1 = tag_word; - DrawMonoid agg = map_tag(param_1); + uint drawtag_base = _86.Load(100) >> uint(2); + uint tag_word = _96.Load((drawtag_base + ix) * 4 + 0); + uint param = tag_word; + DrawMonoid agg = map_tag(param); for (uint i = 1u; i < 8u; i++) { - ElementRef param_2 = ref; - uint param_3 = i; - ElementRef param_4 = Element_index(param_2, param_3); - tag_word = Element_tag(param_4).tag; - uint param_5 = tag_word; - DrawMonoid param_6 = agg; - DrawMonoid param_7 = map_tag(param_5); - agg = combine_tag_monoid(param_6, param_7); + uint tag_word_1 = _96.Load(((drawtag_base + ix) + i) * 4 + 0); + uint param_1 = tag_word_1; + DrawMonoid param_2 = agg; + DrawMonoid param_3 = map_tag(param_1); + agg = combine_draw_monoid(param_2, param_3); } sh_scratch[gl_LocalInvocationID.x] = agg; for (uint i_1 = 0u; i_1 < 8u; i_1++) @@ -140,17 +100,19 @@ void comp_main() if ((gl_LocalInvocationID.x + (1u << i_1)) < 256u) { DrawMonoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)]; - DrawMonoid param_8 = agg; - DrawMonoid param_9 = other; - agg = combine_tag_monoid(param_8, param_9); + DrawMonoid param_4 = agg; + DrawMonoid param_5 = other; + agg = combine_draw_monoid(param_4, param_5); } GroupMemoryBarrierWithGroupSync(); sh_scratch[gl_LocalInvocationID.x] = agg; } if (gl_LocalInvocationID.x == 0u) { - _199.Store(gl_WorkGroupID.x * 8 + 0, agg.path_ix); - _199.Store(gl_WorkGroupID.x * 8 + 4, agg.clip_ix); + _187.Store(gl_WorkGroupID.x * 16 + 0, agg.path_ix); + _187.Store(gl_WorkGroupID.x * 16 + 4, agg.clip_ix); + _187.Store(gl_WorkGroupID.x * 16 + 8, agg.scene_offset); + _187.Store(gl_WorkGroupID.x * 16 + 12, agg.info_offset); } } diff --git a/piet-gpu/shader/gen/draw_reduce.msl b/piet-gpu/shader/gen/draw_reduce.msl index 064c515..8e409a8 100644 --- a/piet-gpu/shader/gen/draw_reduce.msl +++ b/piet-gpu/shader/gen/draw_reduce.msl @@ -5,48 +5,14 @@ using namespace metal; -struct ElementRef -{ - uint offset; -}; - -struct ElementTag -{ - uint tag; - uint flags; -}; - struct DrawMonoid { uint path_ix; uint clip_ix; + uint scene_offset; + uint info_offset; }; -struct SceneBuf -{ - uint scene[1]; -}; - -struct DrawMonoid_1 -{ - uint path_ix; - uint clip_ix; -}; - -struct OutBuf -{ - DrawMonoid_1 outbuf[1]; -}; - -struct Memory -{ - uint mem_offset; - uint mem_error; - uint memory[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); - struct Alloc { uint offset; @@ -64,12 +30,14 @@ struct Config Alloc pathseg_alloc; Alloc anno_alloc; Alloc trans_alloc; - Alloc bbox_alloc; + Alloc path_bbox_alloc; Alloc drawmonoid_alloc; Alloc clip_alloc; Alloc clip_bic_alloc; Alloc clip_stack_alloc; Alloc clip_bbox_alloc; + Alloc draw_bbox_alloc; + Alloc drawinfo_alloc; uint n_trans; uint n_path; uint n_clip; @@ -77,6 +45,8 @@ struct Config uint linewidth_offset; uint pathtag_offset; uint pathseg_offset; + uint drawtag_offset; + uint drawdata_offset; }; struct ConfigBuf @@ -84,70 +54,66 @@ struct ConfigBuf Config conf; }; -static inline __attribute__((always_inline)) -ElementTag Element_tag(thread const ElementRef& ref, const device SceneBuf& v_46) +struct SceneBuf { - uint tag_and_flags = v_46.scene[ref.offset >> uint(2)]; - return ElementTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) }; -} + uint scene[1]; +}; + +struct DrawMonoid_1 +{ + uint path_ix; + uint clip_ix; + uint scene_offset; + uint info_offset; +}; + +struct OutBuf +{ + DrawMonoid_1 outbuf[1]; +}; + +struct Memory +{ + uint mem_offset; + uint mem_error; + uint memory[1]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); static inline __attribute__((always_inline)) DrawMonoid map_tag(thread const uint& tag_word) { - switch (tag_word) - { - case 4u: - case 5u: - case 6u: - { - return DrawMonoid{ 1u, 0u }; - } - case 9u: - case 10u: - { - return DrawMonoid{ 1u, 1u }; - } - default: - { - return DrawMonoid{ 0u, 0u }; - } - } + uint has_path = uint(tag_word != 0u); + return DrawMonoid{ has_path, tag_word & 1u, tag_word & 28u, (tag_word >> uint(4)) & 28u }; } static inline __attribute__((always_inline)) -ElementRef Element_index(thread const ElementRef& ref, thread const uint& index) -{ - return ElementRef{ ref.offset + (index * 36u) }; -} - -static inline __attribute__((always_inline)) -DrawMonoid combine_tag_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b) +DrawMonoid combine_draw_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b) { DrawMonoid c; c.path_ix = a.path_ix + b.path_ix; c.clip_ix = a.clip_ix + b.clip_ix; + c.scene_offset = a.scene_offset + b.scene_offset; + c.info_offset = a.info_offset + b.info_offset; return c; } -kernel void main0(const device SceneBuf& v_46 [[buffer(2)]], device OutBuf& _199 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) +kernel void main0(const device ConfigBuf& _86 [[buffer(1)]], const device SceneBuf& _96 [[buffer(2)]], device OutBuf& _187 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) { threadgroup DrawMonoid sh_scratch[256]; uint ix = gl_GlobalInvocationID.x * 8u; - ElementRef ref = ElementRef{ ix * 36u }; - ElementRef param = ref; - uint tag_word = Element_tag(param, v_46).tag; - uint param_1 = tag_word; - DrawMonoid agg = map_tag(param_1); + uint drawtag_base = _86.conf.drawtag_offset >> uint(2); + uint tag_word = _96.scene[drawtag_base + ix]; + uint param = tag_word; + DrawMonoid agg = map_tag(param); for (uint i = 1u; i < 8u; i++) { - ElementRef param_2 = ref; - uint param_3 = i; - ElementRef param_4 = Element_index(param_2, param_3); - tag_word = Element_tag(param_4, v_46).tag; - uint param_5 = tag_word; - DrawMonoid param_6 = agg; - DrawMonoid param_7 = map_tag(param_5); - agg = combine_tag_monoid(param_6, param_7); + uint tag_word_1 = _96.scene[(drawtag_base + ix) + i]; + uint param_1 = tag_word_1; + DrawMonoid param_2 = agg; + DrawMonoid param_3 = map_tag(param_1); + agg = combine_draw_monoid(param_2, param_3); } sh_scratch[gl_LocalInvocationID.x] = agg; for (uint i_1 = 0u; i_1 < 8u; i_1++) @@ -156,17 +122,19 @@ kernel void main0(const device SceneBuf& v_46 [[buffer(2)]], device OutBuf& _199 if ((gl_LocalInvocationID.x + (1u << i_1)) < 256u) { DrawMonoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)]; - DrawMonoid param_8 = agg; - DrawMonoid param_9 = other; - agg = combine_tag_monoid(param_8, param_9); + DrawMonoid param_4 = agg; + DrawMonoid param_5 = other; + agg = combine_draw_monoid(param_4, param_5); } threadgroup_barrier(mem_flags::mem_threadgroup); sh_scratch[gl_LocalInvocationID.x] = agg; } if (gl_LocalInvocationID.x == 0u) { - _199.outbuf[gl_WorkGroupID.x].path_ix = agg.path_ix; - _199.outbuf[gl_WorkGroupID.x].clip_ix = agg.clip_ix; + _187.outbuf[gl_WorkGroupID.x].path_ix = agg.path_ix; + _187.outbuf[gl_WorkGroupID.x].clip_ix = agg.clip_ix; + _187.outbuf[gl_WorkGroupID.x].scene_offset = agg.scene_offset; + _187.outbuf[gl_WorkGroupID.x].info_offset = agg.info_offset; } } diff --git a/piet-gpu/shader/gen/draw_reduce.spv b/piet-gpu/shader/gen/draw_reduce.spv index a45627d..4daf43a 100644 Binary files a/piet-gpu/shader/gen/draw_reduce.spv and b/piet-gpu/shader/gen/draw_reduce.spv differ diff --git a/piet-gpu/shader/gen/draw_root.dxil b/piet-gpu/shader/gen/draw_root.dxil index a84fd4a..873fa29 100644 Binary files a/piet-gpu/shader/gen/draw_root.dxil and b/piet-gpu/shader/gen/draw_root.dxil differ diff --git a/piet-gpu/shader/gen/draw_root.hlsl b/piet-gpu/shader/gen/draw_root.hlsl index 56b513f..b4cb7e4 100644 --- a/piet-gpu/shader/gen/draw_root.hlsl +++ b/piet-gpu/shader/gen/draw_root.hlsl @@ -2,13 +2,15 @@ struct DrawMonoid { uint path_ix; uint clip_ix; + uint scene_offset; + uint info_offset; }; static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); -static const DrawMonoid _18 = { 0u, 0u }; +static const DrawMonoid _18 = { 0u, 0u, 0u, 0u }; -RWByteAddressBuffer _57 : register(u0, space0); +RWByteAddressBuffer _71 : register(u0, space0); static uint3 gl_LocalInvocationID; static uint3 gl_GlobalInvocationID; @@ -20,15 +22,17 @@ struct SPIRV_Cross_Input groupshared DrawMonoid sh_scratch[256]; -DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b) +DrawMonoid combine_draw_monoid(DrawMonoid a, DrawMonoid b) { DrawMonoid c; c.path_ix = a.path_ix + b.path_ix; c.clip_ix = a.clip_ix + b.clip_ix; + c.scene_offset = a.scene_offset + b.scene_offset; + c.info_offset = a.info_offset + b.info_offset; return c; } -DrawMonoid tag_monoid_identity() +DrawMonoid draw_monoid_identity() { return _18; } @@ -36,22 +40,30 @@ DrawMonoid tag_monoid_identity() void comp_main() { uint ix = gl_GlobalInvocationID.x * 8u; - DrawMonoid _61; - _61.path_ix = _57.Load(ix * 8 + 0); - _61.clip_ix = _57.Load(ix * 8 + 4); + DrawMonoid _75; + _75.path_ix = _71.Load(ix * 16 + 0); + _75.clip_ix = _71.Load(ix * 16 + 4); + _75.scene_offset = _71.Load(ix * 16 + 8); + _75.info_offset = _71.Load(ix * 16 + 12); DrawMonoid local[8]; - local[0].path_ix = _61.path_ix; - local[0].clip_ix = _61.clip_ix; + local[0].path_ix = _75.path_ix; + local[0].clip_ix = _75.clip_ix; + local[0].scene_offset = _75.scene_offset; + local[0].info_offset = _75.info_offset; DrawMonoid param_1; for (uint i = 1u; i < 8u; i++) { DrawMonoid param = local[i - 1u]; - DrawMonoid _88; - _88.path_ix = _57.Load((ix + i) * 8 + 0); - _88.clip_ix = _57.Load((ix + i) * 8 + 4); - param_1.path_ix = _88.path_ix; - param_1.clip_ix = _88.clip_ix; - local[i] = combine_tag_monoid(param, param_1); + DrawMonoid _106; + _106.path_ix = _71.Load((ix + i) * 16 + 0); + _106.clip_ix = _71.Load((ix + i) * 16 + 4); + _106.scene_offset = _71.Load((ix + i) * 16 + 8); + _106.info_offset = _71.Load((ix + i) * 16 + 12); + param_1.path_ix = _106.path_ix; + param_1.clip_ix = _106.clip_ix; + param_1.scene_offset = _106.scene_offset; + param_1.info_offset = _106.info_offset; + local[i] = combine_draw_monoid(param, param_1); } DrawMonoid agg = local[7]; sh_scratch[gl_LocalInvocationID.x] = agg; @@ -63,13 +75,13 @@ void comp_main() DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; DrawMonoid param_2 = other; DrawMonoid param_3 = agg; - agg = combine_tag_monoid(param_2, param_3); + agg = combine_draw_monoid(param_2, param_3); } GroupMemoryBarrierWithGroupSync(); sh_scratch[gl_LocalInvocationID.x] = agg; } GroupMemoryBarrierWithGroupSync(); - DrawMonoid row = tag_monoid_identity(); + DrawMonoid row = draw_monoid_identity(); if (gl_LocalInvocationID.x > 0u) { row = sh_scratch[gl_LocalInvocationID.x - 1u]; @@ -78,10 +90,12 @@ void comp_main() { DrawMonoid param_4 = row; DrawMonoid param_5 = local[i_2]; - DrawMonoid m = combine_tag_monoid(param_4, param_5); - uint _177 = ix + i_2; - _57.Store(_177 * 8 + 0, m.path_ix); - _57.Store(_177 * 8 + 4, m.clip_ix); + DrawMonoid m = combine_draw_monoid(param_4, param_5); + uint _199 = ix + i_2; + _71.Store(_199 * 16 + 0, m.path_ix); + _71.Store(_199 * 16 + 4, m.clip_ix); + _71.Store(_199 * 16 + 8, m.scene_offset); + _71.Store(_199 * 16 + 12, m.info_offset); } } diff --git a/piet-gpu/shader/gen/draw_root.msl b/piet-gpu/shader/gen/draw_root.msl index 0d22e4b..9ee8cfe 100644 --- a/piet-gpu/shader/gen/draw_root.msl +++ b/piet-gpu/shader/gen/draw_root.msl @@ -48,12 +48,16 @@ struct DrawMonoid { uint path_ix; uint clip_ix; + uint scene_offset; + uint info_offset; }; struct DrawMonoid_1 { uint path_ix; uint clip_ix; + uint scene_offset; + uint info_offset; }; struct DataBuf @@ -64,35 +68,41 @@ struct DataBuf constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); static inline __attribute__((always_inline)) -DrawMonoid combine_tag_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b) +DrawMonoid combine_draw_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b) { DrawMonoid c; c.path_ix = a.path_ix + b.path_ix; c.clip_ix = a.clip_ix + b.clip_ix; + c.scene_offset = a.scene_offset + b.scene_offset; + c.info_offset = a.info_offset + b.info_offset; return c; } static inline __attribute__((always_inline)) -DrawMonoid tag_monoid_identity() +DrawMonoid draw_monoid_identity() { - return DrawMonoid{ 0u, 0u }; + return DrawMonoid{ 0u, 0u, 0u, 0u }; } -kernel void main0(device DataBuf& _57 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) +kernel void main0(device DataBuf& _71 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) { threadgroup DrawMonoid sh_scratch[256]; uint ix = gl_GlobalInvocationID.x * 8u; spvUnsafeArray local; - local[0].path_ix = _57.data[ix].path_ix; - local[0].clip_ix = _57.data[ix].clip_ix; + local[0].path_ix = _71.data[ix].path_ix; + local[0].clip_ix = _71.data[ix].clip_ix; + local[0].scene_offset = _71.data[ix].scene_offset; + local[0].info_offset = _71.data[ix].info_offset; DrawMonoid param_1; for (uint i = 1u; i < 8u; i++) { - uint _82 = ix + i; + uint _100 = ix + i; DrawMonoid param = local[i - 1u]; - param_1.path_ix = _57.data[_82].path_ix; - param_1.clip_ix = _57.data[_82].clip_ix; - local[i] = combine_tag_monoid(param, param_1); + param_1.path_ix = _71.data[_100].path_ix; + param_1.clip_ix = _71.data[_100].clip_ix; + param_1.scene_offset = _71.data[_100].scene_offset; + param_1.info_offset = _71.data[_100].info_offset; + local[i] = combine_draw_monoid(param, param_1); } DrawMonoid agg = local[7]; sh_scratch[gl_LocalInvocationID.x] = agg; @@ -104,13 +114,13 @@ kernel void main0(device DataBuf& _57 [[buffer(0)]], uint3 gl_GlobalInvocationID DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; DrawMonoid param_2 = other; DrawMonoid param_3 = agg; - agg = combine_tag_monoid(param_2, param_3); + agg = combine_draw_monoid(param_2, param_3); } threadgroup_barrier(mem_flags::mem_threadgroup); sh_scratch[gl_LocalInvocationID.x] = agg; } threadgroup_barrier(mem_flags::mem_threadgroup); - DrawMonoid row = tag_monoid_identity(); + DrawMonoid row = draw_monoid_identity(); if (gl_LocalInvocationID.x > 0u) { row = sh_scratch[gl_LocalInvocationID.x - 1u]; @@ -119,10 +129,12 @@ kernel void main0(device DataBuf& _57 [[buffer(0)]], uint3 gl_GlobalInvocationID { DrawMonoid param_4 = row; DrawMonoid param_5 = local[i_2]; - DrawMonoid m = combine_tag_monoid(param_4, param_5); - uint _177 = ix + i_2; - _57.data[_177].path_ix = m.path_ix; - _57.data[_177].clip_ix = m.clip_ix; + DrawMonoid m = combine_draw_monoid(param_4, param_5); + uint _199 = ix + i_2; + _71.data[_199].path_ix = m.path_ix; + _71.data[_199].clip_ix = m.clip_ix; + _71.data[_199].scene_offset = m.scene_offset; + _71.data[_199].info_offset = m.info_offset; } } diff --git a/piet-gpu/shader/gen/draw_root.spv b/piet-gpu/shader/gen/draw_root.spv index 1c11414..e6a53e5 100644 Binary files a/piet-gpu/shader/gen/draw_root.spv and b/piet-gpu/shader/gen/draw_root.spv differ diff --git a/piet-gpu/shader/gen/kernel4.dxil b/piet-gpu/shader/gen/kernel4.dxil index c0c27c9..bc0b8e8 100644 Binary files a/piet-gpu/shader/gen/kernel4.dxil and b/piet-gpu/shader/gen/kernel4.dxil differ diff --git a/piet-gpu/shader/gen/kernel4.hlsl b/piet-gpu/shader/gen/kernel4.hlsl index 21bd083..f17b240 100644 --- a/piet-gpu/shader/gen/kernel4.hlsl +++ b/piet-gpu/shader/gen/kernel4.hlsl @@ -125,12 +125,14 @@ struct Config Alloc pathseg_alloc; Alloc anno_alloc; Alloc trans_alloc; - Alloc bbox_alloc; + Alloc path_bbox_alloc; Alloc drawmonoid_alloc; Alloc clip_alloc; Alloc clip_bic_alloc; Alloc clip_stack_alloc; Alloc clip_bbox_alloc; + Alloc draw_bbox_alloc; + Alloc drawinfo_alloc; uint n_trans; uint n_path; uint n_clip; @@ -138,6 +140,8 @@ struct Config uint linewidth_offset; uint pathtag_offset; uint pathseg_offset; + uint drawtag_offset; + uint drawdata_offset; }; static const uint3 gl_WorkGroupSize = uint3(8u, 4u, 1u); diff --git a/piet-gpu/shader/gen/kernel4.msl b/piet-gpu/shader/gen/kernel4.msl index 9a8fbd0..c1f41af 100644 --- a/piet-gpu/shader/gen/kernel4.msl +++ b/piet-gpu/shader/gen/kernel4.msl @@ -183,12 +183,14 @@ struct Config Alloc_1 pathseg_alloc; Alloc_1 anno_alloc; Alloc_1 trans_alloc; - Alloc_1 bbox_alloc; + Alloc_1 path_bbox_alloc; Alloc_1 drawmonoid_alloc; Alloc_1 clip_alloc; Alloc_1 clip_bic_alloc; Alloc_1 clip_stack_alloc; Alloc_1 clip_bbox_alloc; + Alloc_1 draw_bbox_alloc; + Alloc_1 drawinfo_alloc; uint n_trans; uint n_path; uint n_clip; @@ -196,6 +198,8 @@ struct Config uint linewidth_offset; uint pathtag_offset; uint pathseg_offset; + uint drawtag_offset; + uint drawdata_offset; }; struct ConfigBuf diff --git a/piet-gpu/shader/gen/kernel4.spv b/piet-gpu/shader/gen/kernel4.spv index 4d205ce..91272da 100644 Binary files a/piet-gpu/shader/gen/kernel4.spv and b/piet-gpu/shader/gen/kernel4.spv differ diff --git a/piet-gpu/shader/gen/kernel4_gray.dxil b/piet-gpu/shader/gen/kernel4_gray.dxil index 18c4b7e..7e39c73 100644 Binary files a/piet-gpu/shader/gen/kernel4_gray.dxil and b/piet-gpu/shader/gen/kernel4_gray.dxil differ diff --git a/piet-gpu/shader/gen/kernel4_gray.hlsl b/piet-gpu/shader/gen/kernel4_gray.hlsl index 7dc2e01..de95771 100644 --- a/piet-gpu/shader/gen/kernel4_gray.hlsl +++ b/piet-gpu/shader/gen/kernel4_gray.hlsl @@ -125,12 +125,14 @@ struct Config Alloc pathseg_alloc; Alloc anno_alloc; Alloc trans_alloc; - Alloc bbox_alloc; + Alloc path_bbox_alloc; Alloc drawmonoid_alloc; Alloc clip_alloc; Alloc clip_bic_alloc; Alloc clip_stack_alloc; Alloc clip_bbox_alloc; + Alloc draw_bbox_alloc; + Alloc drawinfo_alloc; uint n_trans; uint n_path; uint n_clip; @@ -138,6 +140,8 @@ struct Config uint linewidth_offset; uint pathtag_offset; uint pathseg_offset; + uint drawtag_offset; + uint drawdata_offset; }; static const uint3 gl_WorkGroupSize = uint3(8u, 4u, 1u); diff --git a/piet-gpu/shader/gen/kernel4_gray.msl b/piet-gpu/shader/gen/kernel4_gray.msl index 38506dd..5128e99 100644 --- a/piet-gpu/shader/gen/kernel4_gray.msl +++ b/piet-gpu/shader/gen/kernel4_gray.msl @@ -183,12 +183,14 @@ struct Config Alloc_1 pathseg_alloc; Alloc_1 anno_alloc; Alloc_1 trans_alloc; - Alloc_1 bbox_alloc; + Alloc_1 path_bbox_alloc; Alloc_1 drawmonoid_alloc; Alloc_1 clip_alloc; Alloc_1 clip_bic_alloc; Alloc_1 clip_stack_alloc; Alloc_1 clip_bbox_alloc; + Alloc_1 draw_bbox_alloc; + Alloc_1 drawinfo_alloc; uint n_trans; uint n_path; uint n_clip; @@ -196,6 +198,8 @@ struct Config uint linewidth_offset; uint pathtag_offset; uint pathseg_offset; + uint drawtag_offset; + uint drawdata_offset; }; struct ConfigBuf diff --git a/piet-gpu/shader/gen/kernel4_gray.spv b/piet-gpu/shader/gen/kernel4_gray.spv index 305facd..791b76c 100644 Binary files a/piet-gpu/shader/gen/kernel4_gray.spv and b/piet-gpu/shader/gen/kernel4_gray.spv differ diff --git a/piet-gpu/shader/gen/path_coarse.dxil b/piet-gpu/shader/gen/path_coarse.dxil index 9fd593c..b6c9398 100644 Binary files a/piet-gpu/shader/gen/path_coarse.dxil and b/piet-gpu/shader/gen/path_coarse.dxil differ diff --git a/piet-gpu/shader/gen/path_coarse.hlsl b/piet-gpu/shader/gen/path_coarse.hlsl index 59cd7a6..93ee8f0 100644 --- a/piet-gpu/shader/gen/path_coarse.hlsl +++ b/piet-gpu/shader/gen/path_coarse.hlsl @@ -84,12 +84,14 @@ struct Config Alloc pathseg_alloc; Alloc anno_alloc; Alloc trans_alloc; - Alloc bbox_alloc; + Alloc path_bbox_alloc; Alloc drawmonoid_alloc; Alloc clip_alloc; Alloc clip_bic_alloc; Alloc clip_stack_alloc; Alloc clip_bbox_alloc; + Alloc draw_bbox_alloc; + Alloc drawinfo_alloc; uint n_trans; uint n_path; uint n_clip; @@ -97,6 +99,8 @@ struct Config uint linewidth_offset; uint pathtag_offset; uint pathseg_offset; + uint drawtag_offset; + uint drawdata_offset; }; static const uint3 gl_WorkGroupSize = uint3(32u, 1u, 1u); diff --git a/piet-gpu/shader/gen/path_coarse.msl b/piet-gpu/shader/gen/path_coarse.msl index f3cead8..26aa33a 100644 --- a/piet-gpu/shader/gen/path_coarse.msl +++ b/piet-gpu/shader/gen/path_coarse.msl @@ -144,12 +144,14 @@ struct Config Alloc_1 pathseg_alloc; Alloc_1 anno_alloc; Alloc_1 trans_alloc; - Alloc_1 bbox_alloc; + Alloc_1 path_bbox_alloc; Alloc_1 drawmonoid_alloc; Alloc_1 clip_alloc; Alloc_1 clip_bic_alloc; Alloc_1 clip_stack_alloc; Alloc_1 clip_bbox_alloc; + Alloc_1 draw_bbox_alloc; + Alloc_1 drawinfo_alloc; uint n_trans; uint n_path; uint n_clip; @@ -157,6 +159,8 @@ struct Config uint linewidth_offset; uint pathtag_offset; uint pathseg_offset; + uint drawtag_offset; + uint drawdata_offset; }; struct ConfigBuf diff --git a/piet-gpu/shader/gen/path_coarse.spv b/piet-gpu/shader/gen/path_coarse.spv index 7c452bf..5e6beda 100644 Binary files a/piet-gpu/shader/gen/path_coarse.spv and b/piet-gpu/shader/gen/path_coarse.spv differ diff --git a/piet-gpu/shader/gen/pathseg.dxil b/piet-gpu/shader/gen/pathseg.dxil index 3895131..7ce4684 100644 Binary files a/piet-gpu/shader/gen/pathseg.dxil and b/piet-gpu/shader/gen/pathseg.dxil differ diff --git a/piet-gpu/shader/gen/pathseg.hlsl b/piet-gpu/shader/gen/pathseg.hlsl index 0501f6f..578417f 100644 --- a/piet-gpu/shader/gen/pathseg.hlsl +++ b/piet-gpu/shader/gen/pathseg.hlsl @@ -62,12 +62,14 @@ struct Config Alloc pathseg_alloc; Alloc anno_alloc; Alloc trans_alloc; - Alloc bbox_alloc; + Alloc path_bbox_alloc; Alloc drawmonoid_alloc; Alloc clip_alloc; Alloc clip_bic_alloc; Alloc clip_stack_alloc; Alloc clip_bbox_alloc; + Alloc draw_bbox_alloc; + Alloc drawinfo_alloc; uint n_trans; uint n_path; uint n_clip; @@ -75,6 +77,8 @@ struct Config uint linewidth_offset; uint pathtag_offset; uint pathseg_offset; + uint drawtag_offset; + uint drawdata_offset; }; static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); @@ -361,7 +365,7 @@ uint round_up(float x) void comp_main() { uint ix = gl_GlobalInvocationID.x * 4u; - uint tag_word = _574.Load(((_639.Load(84) >> uint(2)) + (ix >> uint(2))) * 4 + 0); + uint tag_word = _574.Load(((_639.Load(92) >> uint(2)) + (ix >> uint(2))) * 4 + 0); uint param = tag_word; TagMonoid local_tm = reduce_tag(param); sh_tag[gl_LocalInvocationID.x] = local_tm; @@ -400,8 +404,8 @@ void comp_main() TagMonoid param_4 = sh_tag[gl_LocalInvocationID.x - 1u]; tm = combine_tag_monoid(param_3, param_4); } - uint ps_ix = (_639.Load(88) >> uint(2)) + tm.pathseg_offset; - uint lw_ix = (_639.Load(80) >> uint(2)) + tm.linewidth_ix; + uint ps_ix = (_639.Load(96) >> uint(2)) + tm.pathseg_offset; + uint lw_ix = (_639.Load(88) >> uint(2)) + tm.linewidth_ix; uint save_path_ix = tm.path_ix; uint trans_ix = tm.trans_ix; TransformSegRef _771 = { _639.Load(36) + (trans_ix * 24u) }; diff --git a/piet-gpu/shader/gen/pathseg.msl b/piet-gpu/shader/gen/pathseg.msl index 0e97d68..9f6328e 100644 --- a/piet-gpu/shader/gen/pathseg.msl +++ b/piet-gpu/shader/gen/pathseg.msl @@ -127,12 +127,14 @@ struct Config Alloc_1 pathseg_alloc; Alloc_1 anno_alloc; Alloc_1 trans_alloc; - Alloc_1 bbox_alloc; + Alloc_1 path_bbox_alloc; Alloc_1 drawmonoid_alloc; Alloc_1 clip_alloc; Alloc_1 clip_bic_alloc; Alloc_1 clip_stack_alloc; Alloc_1 clip_bbox_alloc; + Alloc_1 draw_bbox_alloc; + Alloc_1 drawinfo_alloc; uint n_trans; uint n_path; uint n_clip; @@ -140,6 +142,8 @@ struct Config uint linewidth_offset; uint pathtag_offset; uint pathseg_offset; + uint drawtag_offset; + uint drawdata_offset; }; struct ConfigBuf @@ -635,7 +639,7 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6 } threadgroup_barrier(mem_flags::mem_threadgroup); uint path_ix = save_path_ix; - uint bbox_out_ix = (_639.conf.bbox_alloc.offset >> uint(2)) + (path_ix * 6u); + uint bbox_out_ix = (_639.conf.path_bbox_alloc.offset >> uint(2)) + (path_ix * 6u); Monoid row = monoid_identity(); if (gl_LocalInvocationID.x > 0u) { diff --git a/piet-gpu/shader/gen/pathseg.spv b/piet-gpu/shader/gen/pathseg.spv index a1f223c..4e2e9d5 100644 Binary files a/piet-gpu/shader/gen/pathseg.spv and b/piet-gpu/shader/gen/pathseg.spv differ diff --git a/piet-gpu/shader/gen/pathtag_reduce.dxil b/piet-gpu/shader/gen/pathtag_reduce.dxil index 89fb562..ff544b8 100644 Binary files a/piet-gpu/shader/gen/pathtag_reduce.dxil and b/piet-gpu/shader/gen/pathtag_reduce.dxil differ diff --git a/piet-gpu/shader/gen/pathtag_reduce.hlsl b/piet-gpu/shader/gen/pathtag_reduce.hlsl index 754e6e9..5f7d125 100644 --- a/piet-gpu/shader/gen/pathtag_reduce.hlsl +++ b/piet-gpu/shader/gen/pathtag_reduce.hlsl @@ -24,12 +24,14 @@ struct Config Alloc pathseg_alloc; Alloc anno_alloc; Alloc trans_alloc; - Alloc bbox_alloc; + Alloc path_bbox_alloc; Alloc drawmonoid_alloc; Alloc clip_alloc; Alloc clip_bic_alloc; Alloc clip_stack_alloc; Alloc clip_bbox_alloc; + Alloc draw_bbox_alloc; + Alloc drawinfo_alloc; uint n_trans; uint n_path; uint n_clip; @@ -37,6 +39,8 @@ struct Config uint linewidth_offset; uint pathtag_offset; uint pathseg_offset; + uint drawtag_offset; + uint drawdata_offset; }; static const uint3 gl_WorkGroupSize = uint3(128u, 1u, 1u); @@ -88,7 +92,7 @@ TagMonoid combine_tag_monoid(TagMonoid a, TagMonoid b) void comp_main() { uint ix = gl_GlobalInvocationID.x * 2u; - uint scene_ix = (_139.Load(84) >> uint(2)) + ix; + uint scene_ix = (_139.Load(92) >> uint(2)) + ix; uint tag_word = _151.Load(scene_ix * 4 + 0); uint param = tag_word; TagMonoid agg = reduce_tag(param); diff --git a/piet-gpu/shader/gen/pathtag_reduce.msl b/piet-gpu/shader/gen/pathtag_reduce.msl index 83a8208..91e0cca 100644 --- a/piet-gpu/shader/gen/pathtag_reduce.msl +++ b/piet-gpu/shader/gen/pathtag_reduce.msl @@ -31,12 +31,14 @@ struct Config Alloc pathseg_alloc; Alloc anno_alloc; Alloc trans_alloc; - Alloc bbox_alloc; + Alloc path_bbox_alloc; Alloc drawmonoid_alloc; Alloc clip_alloc; Alloc clip_bic_alloc; Alloc clip_stack_alloc; Alloc clip_bbox_alloc; + Alloc draw_bbox_alloc; + Alloc drawinfo_alloc; uint n_trans; uint n_path; uint n_clip; @@ -44,6 +46,8 @@ struct Config uint linewidth_offset; uint pathtag_offset; uint pathseg_offset; + uint drawtag_offset; + uint drawdata_offset; }; struct ConfigBuf diff --git a/piet-gpu/shader/gen/pathtag_reduce.spv b/piet-gpu/shader/gen/pathtag_reduce.spv index feaad0a..f1d8679 100644 Binary files a/piet-gpu/shader/gen/pathtag_reduce.spv and b/piet-gpu/shader/gen/pathtag_reduce.spv differ diff --git a/piet-gpu/shader/gen/pathtag_root.dxil b/piet-gpu/shader/gen/pathtag_root.dxil index 77f12e6..48584bd 100644 Binary files a/piet-gpu/shader/gen/pathtag_root.dxil and b/piet-gpu/shader/gen/pathtag_root.dxil differ diff --git a/piet-gpu/shader/gen/tile_alloc.dxil b/piet-gpu/shader/gen/tile_alloc.dxil index fdc60a1..d4d0b49 100644 Binary files a/piet-gpu/shader/gen/tile_alloc.dxil and b/piet-gpu/shader/gen/tile_alloc.dxil differ diff --git a/piet-gpu/shader/gen/tile_alloc.hlsl b/piet-gpu/shader/gen/tile_alloc.hlsl index 1c9d04b..9e370ed 100644 --- a/piet-gpu/shader/gen/tile_alloc.hlsl +++ b/piet-gpu/shader/gen/tile_alloc.hlsl @@ -9,28 +9,6 @@ struct MallocResult bool failed; }; -struct AnnoEndClipRef -{ - uint offset; -}; - -struct AnnoEndClip -{ - float4 bbox; - uint blend; -}; - -struct AnnotatedRef -{ - uint offset; -}; - -struct AnnotatedTag -{ - uint tag; - uint flags; -}; - struct PathRef { uint offset; @@ -59,12 +37,14 @@ struct Config Alloc pathseg_alloc; Alloc anno_alloc; Alloc trans_alloc; - Alloc bbox_alloc; + Alloc path_bbox_alloc; Alloc drawmonoid_alloc; Alloc clip_alloc; Alloc clip_bic_alloc; Alloc clip_stack_alloc; Alloc clip_bbox_alloc; + Alloc draw_bbox_alloc; + Alloc drawinfo_alloc; uint n_trans; uint n_path; uint n_clip; @@ -72,12 +52,15 @@ struct Config uint linewidth_offset; uint pathtag_offset; uint pathseg_offset; + uint drawtag_offset; + uint drawdata_offset; }; static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); -RWByteAddressBuffer _92 : register(u0, space0); -ByteAddressBuffer _314 : register(t1, space0); +RWByteAddressBuffer _70 : register(u0, space0); +ByteAddressBuffer _181 : register(t1, space0); +ByteAddressBuffer _257 : register(t2, space0); static uint3 gl_LocalInvocationID; static uint3 gl_GlobalInvocationID; @@ -90,62 +73,15 @@ struct SPIRV_Cross_Input groupshared uint sh_tile_count[256]; groupshared MallocResult sh_tile_alloc; -bool touch_mem(Alloc alloc, uint offset) +float4 load_draw_bbox(uint draw_ix) { - return true; -} - -uint read_mem(Alloc alloc, uint offset) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return 0u; - } - uint v = _92.Load(offset * 4 + 8); - return v; -} - -AnnotatedTag Annotated_tag(Alloc a, AnnotatedRef ref) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint tag_and_flags = read_mem(param, param_1); - AnnotatedTag _246 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; - return _246; -} - -AnnoEndClip AnnoEndClip_read(Alloc a, AnnoEndClipRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7); - Alloc param_8 = a; - uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9); - AnnoEndClip s; - s.bbox = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3)); - s.blend = raw4; - return s; -} - -AnnoEndClip Annotated_EndClip_read(Alloc a, AnnotatedRef ref) -{ - AnnoEndClipRef _252 = { ref.offset + 4u }; - Alloc param = a; - AnnoEndClipRef param_1 = _252; - return AnnoEndClip_read(param, param_1); + uint base = (_181.Load(64) >> uint(2)) + (4u * draw_ix); + float x0 = asfloat(_70.Load(base * 4 + 8)); + float y0 = asfloat(_70.Load((base + 1u) * 4 + 8)); + float x1 = asfloat(_70.Load((base + 2u) * 4 + 8)); + float y1 = asfloat(_70.Load((base + 3u) * 4 + 8)); + float4 bbox = float4(x0, y0, x1, y1); + return bbox; } Alloc new_alloc(uint offset, uint size, bool mem_ok) @@ -157,22 +93,22 @@ Alloc new_alloc(uint offset, uint size, bool mem_ok) MallocResult malloc(uint size) { - uint _98; - _92.InterlockedAdd(0, size, _98); - uint offset = _98; - uint _105; - _92.GetDimensions(_105); - _105 = (_105 - 8) / 4; + uint _76; + _70.InterlockedAdd(0, size, _76); + uint offset = _76; + uint _83; + _70.GetDimensions(_83); + _83 = (_83 - 8) / 4; MallocResult r; - r.failed = (offset + size) > uint(int(_105) * 4); + r.failed = (offset + size) > uint(int(_83) * 4); uint param = offset; uint param_1 = size; bool param_2 = !r.failed; r.alloc = new_alloc(param, param_1, param_2); if (r.failed) { - uint _127; - _92.InterlockedMax(4, 1u, _127); + uint _105; + _70.InterlockedMax(4, 1u, _105); return r; } return r; @@ -180,8 +116,13 @@ MallocResult malloc(uint size) Alloc slice_mem(Alloc a, uint offset, uint size) { - Alloc _169 = { a.offset + offset }; - return _169; + Alloc _131 = { a.offset + offset }; + return _131; +} + +bool touch_mem(Alloc alloc, uint offset) +{ + return true; } void write_mem(Alloc alloc, uint offset, uint val) @@ -192,7 +133,7 @@ void write_mem(Alloc alloc, uint offset, uint val) { return; } - _92.Store(offset * 4 + 8, val); + _70.Store(offset * 4 + 8, val); } void Path_write(Alloc a, PathRef ref, Path s) @@ -216,53 +157,35 @@ void comp_main() { uint th_ix = gl_LocalInvocationID.x; uint element_ix = gl_GlobalInvocationID.x; - PathRef _321 = { _314.Load(16) + (element_ix * 12u) }; - PathRef path_ref = _321; - AnnotatedRef _330 = { _314.Load(32) + (element_ix * 40u) }; - AnnotatedRef ref = _330; - uint tag = 0u; - if (element_ix < _314.Load(0)) + PathRef _241 = { _181.Load(16) + (element_ix * 12u) }; + PathRef path_ref = _241; + uint drawtag_base = _181.Load(100) >> uint(2); + uint drawtag = 0u; + if (element_ix < _181.Load(0)) { - Alloc _341; - _341.offset = _314.Load(32); - Alloc param; - param.offset = _341.offset; - AnnotatedRef param_1 = ref; - tag = Annotated_tag(param, param_1).tag; + drawtag = _257.Load((drawtag_base + element_ix) * 4 + 0); } int x0 = 0; int y0 = 0; int x1 = 0; int y1 = 0; - switch (tag) + if ((drawtag != 0u) && (drawtag != 37u)) { - case 1u: - case 2u: - case 3u: - case 4u: - case 5u: - { - Alloc _359; - _359.offset = _314.Load(32); - Alloc param_2; - param_2.offset = _359.offset; - AnnotatedRef param_3 = ref; - AnnoEndClip clip = Annotated_EndClip_read(param_2, param_3); - x0 = int(floor(clip.bbox.x * 0.0625f)); - y0 = int(floor(clip.bbox.y * 0.0625f)); - x1 = int(ceil(clip.bbox.z * 0.0625f)); - y1 = int(ceil(clip.bbox.w * 0.0625f)); - break; - } + uint param = element_ix; + float4 bbox = load_draw_bbox(param); + x0 = int(floor(bbox.x * 0.0625f)); + y0 = int(floor(bbox.y * 0.0625f)); + x1 = int(ceil(bbox.z * 0.0625f)); + y1 = int(ceil(bbox.w * 0.0625f)); } - x0 = clamp(x0, 0, int(_314.Load(8))); - y0 = clamp(y0, 0, int(_314.Load(12))); - x1 = clamp(x1, 0, int(_314.Load(8))); - y1 = clamp(y1, 0, int(_314.Load(12))); + x0 = clamp(x0, 0, int(_181.Load(8))); + y0 = clamp(y0, 0, int(_181.Load(12))); + x1 = clamp(x1, 0, int(_181.Load(8))); + y1 = clamp(y1, 0, int(_181.Load(12))); Path path; path.bbox = uint4(uint(x0), uint(y0), uint(x1), uint(y1)); uint tile_count = uint((x1 - x0) * (y1 - y0)); - if (tag == 5u) + if (drawtag == 37u) { tile_count = 0u; } @@ -280,59 +203,59 @@ void comp_main() } if (th_ix == 255u) { - uint param_4 = total_tile_count * 8u; - MallocResult _485 = malloc(param_4); - sh_tile_alloc = _485; + uint param_1 = total_tile_count * 8u; + MallocResult _396 = malloc(param_1); + sh_tile_alloc = _396; } GroupMemoryBarrierWithGroupSync(); MallocResult alloc_start = sh_tile_alloc; - bool _496; + bool _407; if (!alloc_start.failed) { - _496 = _92.Load(4) != 0u; + _407 = _70.Load(4) != 0u; } else { - _496 = alloc_start.failed; + _407 = alloc_start.failed; } - if (_496) + if (_407) { return; } - if (element_ix < _314.Load(0)) + if (element_ix < _181.Load(0)) { - uint _509; + uint _420; if (th_ix > 0u) { - _509 = sh_tile_count[th_ix - 1u]; + _420 = sh_tile_count[th_ix - 1u]; } else { - _509 = 0u; + _420 = 0u; } - uint tile_subix = _509; - Alloc param_5 = alloc_start.alloc; - uint param_6 = 8u * tile_subix; - uint param_7 = 8u * tile_count; - Alloc tiles_alloc = slice_mem(param_5, param_6, param_7); - TileRef _531 = { tiles_alloc.offset }; - path.tiles = _531; - Alloc _536; - _536.offset = _314.Load(16); - Alloc param_8; - param_8.offset = _536.offset; - PathRef param_9 = path_ref; - Path param_10 = path; - Path_write(param_8, param_9, param_10); + uint tile_subix = _420; + Alloc param_2 = alloc_start.alloc; + uint param_3 = 8u * tile_subix; + uint param_4 = 8u * tile_count; + Alloc tiles_alloc = slice_mem(param_2, param_3, param_4); + TileRef _442 = { tiles_alloc.offset }; + path.tiles = _442; + Alloc _448; + _448.offset = _181.Load(16); + Alloc param_5; + param_5.offset = _448.offset; + PathRef param_6 = path_ref; + Path param_7 = path; + Path_write(param_5, param_6, param_7); } uint total_count = sh_tile_count[255] * 2u; uint start_ix = alloc_start.alloc.offset >> uint(2); for (uint i_1 = th_ix; i_1 < total_count; i_1 += 256u) { - Alloc param_11 = alloc_start.alloc; - uint param_12 = start_ix + i_1; - uint param_13 = 0u; - write_mem(param_11, param_12, param_13); + Alloc param_8 = alloc_start.alloc; + uint param_9 = start_ix + i_1; + uint param_10 = 0u; + write_mem(param_8, param_9, param_10); } } diff --git a/piet-gpu/shader/gen/tile_alloc.msl b/piet-gpu/shader/gen/tile_alloc.msl index c03e830..e6f486b 100644 --- a/piet-gpu/shader/gen/tile_alloc.msl +++ b/piet-gpu/shader/gen/tile_alloc.msl @@ -18,28 +18,6 @@ struct MallocResult bool failed; }; -struct AnnoEndClipRef -{ - uint offset; -}; - -struct AnnoEndClip -{ - float4 bbox; - uint blend; -}; - -struct AnnotatedRef -{ - uint offset; -}; - -struct AnnotatedTag -{ - uint tag; - uint flags; -}; - struct PathRef { uint offset; @@ -80,12 +58,14 @@ struct Config Alloc_1 pathseg_alloc; Alloc_1 anno_alloc; Alloc_1 trans_alloc; - Alloc_1 bbox_alloc; + Alloc_1 path_bbox_alloc; Alloc_1 drawmonoid_alloc; Alloc_1 clip_alloc; Alloc_1 clip_bic_alloc; Alloc_1 clip_stack_alloc; Alloc_1 clip_bbox_alloc; + Alloc_1 draw_bbox_alloc; + Alloc_1 drawinfo_alloc; uint n_trans; uint n_path; uint n_clip; @@ -93,6 +73,8 @@ struct Config uint linewidth_offset; uint pathtag_offset; uint pathseg_offset; + uint drawtag_offset; + uint drawdata_offset; }; struct ConfigBuf @@ -100,67 +82,23 @@ struct ConfigBuf Config conf; }; +struct SceneBuf +{ + uint scene[1]; +}; + constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); static inline __attribute__((always_inline)) -bool touch_mem(thread const Alloc& alloc, thread const uint& offset) +float4 load_draw_bbox(thread const uint& draw_ix, device Memory& v_70, constant uint& v_70BufferSize, const device ConfigBuf& v_181) { - return true; -} - -static inline __attribute__((always_inline)) -uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_92, constant uint& v_92BufferSize) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return 0u; - } - uint v = v_92.memory[offset]; - return v; -} - -static inline __attribute__((always_inline)) -AnnotatedTag Annotated_tag(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_92, constant uint& v_92BufferSize) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint tag_and_flags = read_mem(param, param_1, v_92, v_92BufferSize); - return AnnotatedTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) }; -} - -static inline __attribute__((always_inline)) -AnnoEndClip AnnoEndClip_read(thread const Alloc& a, thread const AnnoEndClipRef& ref, device Memory& v_92, constant uint& v_92BufferSize) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_92, v_92BufferSize); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_92, v_92BufferSize); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_92, v_92BufferSize); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_92, v_92BufferSize); - Alloc param_8 = a; - uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9, v_92, v_92BufferSize); - AnnoEndClip s; - s.bbox = float4(as_type(raw0), as_type(raw1), as_type(raw2), as_type(raw3)); - s.blend = raw4; - return s; -} - -static inline __attribute__((always_inline)) -AnnoEndClip Annotated_EndClip_read(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_92, constant uint& v_92BufferSize) -{ - Alloc param = a; - AnnoEndClipRef param_1 = AnnoEndClipRef{ ref.offset + 4u }; - return AnnoEndClip_read(param, param_1, v_92, v_92BufferSize); + uint base = (v_181.conf.draw_bbox_alloc.offset >> uint(2)) + (4u * draw_ix); + float x0 = as_type(v_70.memory[base]); + float y0 = as_type(v_70.memory[base + 1u]); + float x1 = as_type(v_70.memory[base + 2u]); + float y1 = as_type(v_70.memory[base + 3u]); + float4 bbox = float4(x0, y0, x1, y1); + return bbox; } static inline __attribute__((always_inline)) @@ -172,19 +110,19 @@ Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const } static inline __attribute__((always_inline)) -MallocResult malloc(thread const uint& size, device Memory& v_92, constant uint& v_92BufferSize) +MallocResult malloc(thread const uint& size, device Memory& v_70, constant uint& v_70BufferSize) { - uint _98 = atomic_fetch_add_explicit((device atomic_uint*)&v_92.mem_offset, size, memory_order_relaxed); - uint offset = _98; + uint _76 = atomic_fetch_add_explicit((device atomic_uint*)&v_70.mem_offset, size, memory_order_relaxed); + uint offset = _76; MallocResult r; - r.failed = (offset + size) > uint(int((v_92BufferSize - 8) / 4) * 4); + r.failed = (offset + size) > uint(int((v_70BufferSize - 8) / 4) * 4); uint param = offset; uint param_1 = size; bool param_2 = !r.failed; r.alloc = new_alloc(param, param_1, param_2); if (r.failed) { - uint _127 = atomic_fetch_max_explicit((device atomic_uint*)&v_92.mem_error, 1u, memory_order_relaxed); + uint _105 = atomic_fetch_max_explicit((device atomic_uint*)&v_70.mem_error, 1u, memory_order_relaxed); return r; } return r; @@ -197,7 +135,13 @@ Alloc slice_mem(thread const Alloc& a, thread const uint& offset, thread const u } static inline __attribute__((always_inline)) -void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_92, constant uint& v_92BufferSize) +bool touch_mem(thread const Alloc& alloc, thread const uint& offset) +{ + return true; +} + +static inline __attribute__((always_inline)) +void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_70, constant uint& v_70BufferSize) { Alloc param = alloc; uint param_1 = offset; @@ -205,75 +149,62 @@ void write_mem(thread const Alloc& alloc, thread const uint& offset, thread cons { return; } - v_92.memory[offset] = val; + v_70.memory[offset] = val; } static inline __attribute__((always_inline)) -void Path_write(thread const Alloc& a, thread const PathRef& ref, thread const Path& s, device Memory& v_92, constant uint& v_92BufferSize) +void Path_write(thread const Alloc& a, thread const PathRef& ref, thread const Path& s, device Memory& v_70, constant uint& v_70BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = s.bbox.x | (s.bbox.y << uint(16)); - write_mem(param, param_1, param_2, v_92, v_92BufferSize); + write_mem(param, param_1, param_2, v_70, v_70BufferSize); Alloc param_3 = a; uint param_4 = ix + 1u; uint param_5 = s.bbox.z | (s.bbox.w << uint(16)); - write_mem(param_3, param_4, param_5, v_92, v_92BufferSize); + write_mem(param_3, param_4, param_5, v_70, v_70BufferSize); Alloc param_6 = a; uint param_7 = ix + 2u; uint param_8 = s.tiles.offset; - write_mem(param_6, param_7, param_8, v_92, v_92BufferSize); + write_mem(param_6, param_7, param_8, v_70, v_70BufferSize); } -kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_92 [[buffer(0)]], const device ConfigBuf& _314 [[buffer(1)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_70 [[buffer(0)]], const device ConfigBuf& v_181 [[buffer(1)]], const device SceneBuf& _257 [[buffer(2)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { threadgroup uint sh_tile_count[256]; threadgroup MallocResult sh_tile_alloc; - constant uint& v_92BufferSize = spvBufferSizeConstants[0]; + constant uint& v_70BufferSize = spvBufferSizeConstants[0]; uint th_ix = gl_LocalInvocationID.x; uint element_ix = gl_GlobalInvocationID.x; - PathRef path_ref = PathRef{ _314.conf.tile_alloc.offset + (element_ix * 12u) }; - AnnotatedRef ref = AnnotatedRef{ _314.conf.anno_alloc.offset + (element_ix * 40u) }; - uint tag = 0u; - if (element_ix < _314.conf.n_elements) + PathRef path_ref = PathRef{ v_181.conf.tile_alloc.offset + (element_ix * 12u) }; + uint drawtag_base = v_181.conf.drawtag_offset >> uint(2); + uint drawtag = 0u; + if (element_ix < v_181.conf.n_elements) { - Alloc param; - param.offset = _314.conf.anno_alloc.offset; - AnnotatedRef param_1 = ref; - tag = Annotated_tag(param, param_1, v_92, v_92BufferSize).tag; + drawtag = _257.scene[drawtag_base + element_ix]; } int x0 = 0; int y0 = 0; int x1 = 0; int y1 = 0; - switch (tag) + if ((drawtag != 0u) && (drawtag != 37u)) { - case 1u: - case 2u: - case 3u: - case 4u: - case 5u: - { - Alloc param_2; - param_2.offset = _314.conf.anno_alloc.offset; - AnnotatedRef param_3 = ref; - AnnoEndClip clip = Annotated_EndClip_read(param_2, param_3, v_92, v_92BufferSize); - x0 = int(floor(clip.bbox.x * 0.0625)); - y0 = int(floor(clip.bbox.y * 0.0625)); - x1 = int(ceil(clip.bbox.z * 0.0625)); - y1 = int(ceil(clip.bbox.w * 0.0625)); - break; - } + uint param = element_ix; + float4 bbox = load_draw_bbox(param, v_70, v_70BufferSize, v_181); + x0 = int(floor(bbox.x * 0.0625)); + y0 = int(floor(bbox.y * 0.0625)); + x1 = int(ceil(bbox.z * 0.0625)); + y1 = int(ceil(bbox.w * 0.0625)); } - x0 = clamp(x0, 0, int(_314.conf.width_in_tiles)); - y0 = clamp(y0, 0, int(_314.conf.height_in_tiles)); - x1 = clamp(x1, 0, int(_314.conf.width_in_tiles)); - y1 = clamp(y1, 0, int(_314.conf.height_in_tiles)); + x0 = clamp(x0, 0, int(v_181.conf.width_in_tiles)); + y0 = clamp(y0, 0, int(v_181.conf.height_in_tiles)); + x1 = clamp(x1, 0, int(v_181.conf.width_in_tiles)); + y1 = clamp(y1, 0, int(v_181.conf.height_in_tiles)); Path path; path.bbox = uint4(uint(x0), uint(y0), uint(x1), uint(y1)); uint tile_count = uint((x1 - x0) * (y1 - y0)); - if (tag == 5u) + if (drawtag == 37u) { tile_count = 0u; } @@ -291,56 +222,56 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M } if (th_ix == 255u) { - uint param_4 = total_tile_count * 8u; - MallocResult _485 = malloc(param_4, v_92, v_92BufferSize); - sh_tile_alloc = _485; + uint param_1 = total_tile_count * 8u; + MallocResult _396 = malloc(param_1, v_70, v_70BufferSize); + sh_tile_alloc = _396; } threadgroup_barrier(mem_flags::mem_threadgroup); MallocResult alloc_start = sh_tile_alloc; - bool _496; + bool _407; if (!alloc_start.failed) { - _496 = v_92.mem_error != 0u; + _407 = v_70.mem_error != 0u; } else { - _496 = alloc_start.failed; + _407 = alloc_start.failed; } - if (_496) + if (_407) { return; } - if (element_ix < _314.conf.n_elements) + if (element_ix < v_181.conf.n_elements) { - uint _509; + uint _420; if (th_ix > 0u) { - _509 = sh_tile_count[th_ix - 1u]; + _420 = sh_tile_count[th_ix - 1u]; } else { - _509 = 0u; + _420 = 0u; } - uint tile_subix = _509; - Alloc param_5 = alloc_start.alloc; - uint param_6 = 8u * tile_subix; - uint param_7 = 8u * tile_count; - Alloc tiles_alloc = slice_mem(param_5, param_6, param_7); + uint tile_subix = _420; + Alloc param_2 = alloc_start.alloc; + uint param_3 = 8u * tile_subix; + uint param_4 = 8u * tile_count; + Alloc tiles_alloc = slice_mem(param_2, param_3, param_4); path.tiles = TileRef{ tiles_alloc.offset }; - Alloc param_8; - param_8.offset = _314.conf.tile_alloc.offset; - PathRef param_9 = path_ref; - Path param_10 = path; - Path_write(param_8, param_9, param_10, v_92, v_92BufferSize); + Alloc param_5; + param_5.offset = v_181.conf.tile_alloc.offset; + PathRef param_6 = path_ref; + Path param_7 = path; + Path_write(param_5, param_6, param_7, v_70, v_70BufferSize); } uint total_count = sh_tile_count[255] * 2u; uint start_ix = alloc_start.alloc.offset >> uint(2); for (uint i_1 = th_ix; i_1 < total_count; i_1 += 256u) { - Alloc param_11 = alloc_start.alloc; - uint param_12 = start_ix + i_1; - uint param_13 = 0u; - write_mem(param_11, param_12, param_13, v_92, v_92BufferSize); + Alloc param_8 = alloc_start.alloc; + uint param_9 = start_ix + i_1; + uint param_10 = 0u; + write_mem(param_8, param_9, param_10, v_70, v_70BufferSize); } } diff --git a/piet-gpu/shader/gen/tile_alloc.spv b/piet-gpu/shader/gen/tile_alloc.spv index cf2f01c..3ae1073 100644 Binary files a/piet-gpu/shader/gen/tile_alloc.spv and b/piet-gpu/shader/gen/tile_alloc.spv differ diff --git a/piet-gpu/shader/gen/transform_leaf.dxil b/piet-gpu/shader/gen/transform_leaf.dxil index 8a3200b..0c1e376 100644 Binary files a/piet-gpu/shader/gen/transform_leaf.dxil and b/piet-gpu/shader/gen/transform_leaf.dxil differ diff --git a/piet-gpu/shader/gen/transform_leaf.hlsl b/piet-gpu/shader/gen/transform_leaf.hlsl index 3528d6c..8a3b3d5 100644 --- a/piet-gpu/shader/gen/transform_leaf.hlsl +++ b/piet-gpu/shader/gen/transform_leaf.hlsl @@ -37,12 +37,14 @@ struct Config Alloc pathseg_alloc; Alloc anno_alloc; Alloc trans_alloc; - Alloc bbox_alloc; + Alloc path_bbox_alloc; Alloc drawmonoid_alloc; Alloc clip_alloc; Alloc clip_bic_alloc; Alloc clip_stack_alloc; Alloc clip_bbox_alloc; + Alloc draw_bbox_alloc; + Alloc drawinfo_alloc; uint n_trans; uint n_path; uint n_clip; @@ -50,6 +52,8 @@ struct Config uint linewidth_offset; uint pathtag_offset; uint pathseg_offset; + uint drawtag_offset; + uint drawdata_offset; }; static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); @@ -155,7 +159,7 @@ void TransformSeg_write(Alloc a, TransformSegRef ref, TransformSeg s) void comp_main() { uint ix = gl_GlobalInvocationID.x * 8u; - TransformRef _285 = { _278.Load(76) + (ix * 24u) }; + TransformRef _285 = { _278.Load(84) + (ix * 24u) }; TransformRef ref = _285; TransformRef param = ref; Transform agg = Transform_read(param); diff --git a/piet-gpu/shader/gen/transform_leaf.msl b/piet-gpu/shader/gen/transform_leaf.msl index 6a99fae..fe45438 100644 --- a/piet-gpu/shader/gen/transform_leaf.msl +++ b/piet-gpu/shader/gen/transform_leaf.msl @@ -100,12 +100,14 @@ struct Config Alloc_1 pathseg_alloc; Alloc_1 anno_alloc; Alloc_1 trans_alloc; - Alloc_1 bbox_alloc; + Alloc_1 path_bbox_alloc; Alloc_1 drawmonoid_alloc; Alloc_1 clip_alloc; Alloc_1 clip_bic_alloc; Alloc_1 clip_stack_alloc; Alloc_1 clip_bbox_alloc; + Alloc_1 draw_bbox_alloc; + Alloc_1 drawinfo_alloc; uint n_trans; uint n_path; uint n_clip; @@ -113,6 +115,8 @@ struct Config uint linewidth_offset; uint pathtag_offset; uint pathseg_offset; + uint drawtag_offset; + uint drawdata_offset; }; struct ConfigBuf diff --git a/piet-gpu/shader/gen/transform_leaf.spv b/piet-gpu/shader/gen/transform_leaf.spv index b9a0a83..b739099 100644 Binary files a/piet-gpu/shader/gen/transform_leaf.spv and b/piet-gpu/shader/gen/transform_leaf.spv differ diff --git a/piet-gpu/shader/gen/transform_reduce.dxil b/piet-gpu/shader/gen/transform_reduce.dxil index 6c94a3a..fc3a311 100644 Binary files a/piet-gpu/shader/gen/transform_reduce.dxil and b/piet-gpu/shader/gen/transform_reduce.dxil differ diff --git a/piet-gpu/shader/gen/transform_reduce.hlsl b/piet-gpu/shader/gen/transform_reduce.hlsl index cce1a22..bd14f79 100644 --- a/piet-gpu/shader/gen/transform_reduce.hlsl +++ b/piet-gpu/shader/gen/transform_reduce.hlsl @@ -26,12 +26,14 @@ struct Config Alloc pathseg_alloc; Alloc anno_alloc; Alloc trans_alloc; - Alloc bbox_alloc; + Alloc path_bbox_alloc; Alloc drawmonoid_alloc; Alloc clip_alloc; Alloc clip_bic_alloc; Alloc clip_stack_alloc; Alloc clip_bbox_alloc; + Alloc draw_bbox_alloc; + Alloc drawinfo_alloc; uint n_trans; uint n_path; uint n_clip; @@ -39,6 +41,8 @@ struct Config uint linewidth_offset; uint pathtag_offset; uint pathseg_offset; + uint drawtag_offset; + uint drawdata_offset; }; static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); @@ -92,7 +96,7 @@ Transform combine_monoid(Transform a, Transform b) void comp_main() { uint ix = gl_GlobalInvocationID.x * 8u; - TransformRef _168 = { _161.Load(76) + (ix * 24u) }; + TransformRef _168 = { _161.Load(84) + (ix * 24u) }; TransformRef ref = _168; TransformRef param = ref; Transform agg = Transform_read(param); diff --git a/piet-gpu/shader/gen/transform_reduce.msl b/piet-gpu/shader/gen/transform_reduce.msl index 3695563..62da531 100644 --- a/piet-gpu/shader/gen/transform_reduce.msl +++ b/piet-gpu/shader/gen/transform_reduce.msl @@ -38,12 +38,14 @@ struct Config Alloc pathseg_alloc; Alloc anno_alloc; Alloc trans_alloc; - Alloc bbox_alloc; + Alloc path_bbox_alloc; Alloc drawmonoid_alloc; Alloc clip_alloc; Alloc clip_bic_alloc; Alloc clip_stack_alloc; Alloc clip_bbox_alloc; + Alloc draw_bbox_alloc; + Alloc drawinfo_alloc; uint n_trans; uint n_path; uint n_clip; @@ -51,6 +53,8 @@ struct Config uint linewidth_offset; uint pathtag_offset; uint pathseg_offset; + uint drawtag_offset; + uint drawdata_offset; }; struct ConfigBuf diff --git a/piet-gpu/shader/gen/transform_reduce.spv b/piet-gpu/shader/gen/transform_reduce.spv index e74cb8d..6aa6b94 100644 Binary files a/piet-gpu/shader/gen/transform_reduce.spv and b/piet-gpu/shader/gen/transform_reduce.spv differ diff --git a/piet-gpu/shader/gen/transform_root.dxil b/piet-gpu/shader/gen/transform_root.dxil index 5b4f059..a33ff7f 100644 Binary files a/piet-gpu/shader/gen/transform_root.dxil and b/piet-gpu/shader/gen/transform_root.dxil differ diff --git a/piet-gpu/shader/pathseg.comp b/piet-gpu/shader/pathseg.comp index a2ea86e..ce4ab84 100644 --- a/piet-gpu/shader/pathseg.comp +++ b/piet-gpu/shader/pathseg.comp @@ -248,7 +248,7 @@ void main() { barrier(); uint path_ix = save_path_ix; - uint bbox_out_ix = (conf.bbox_alloc.offset >> 2) + path_ix * 6; + uint bbox_out_ix = (conf.path_bbox_alloc.offset >> 2) + path_ix * 6; // Write bboxes to paths; do atomic min/max if partial Monoid row = monoid_identity(); if (gl_LocalInvocationID.x > 0) { diff --git a/piet-gpu/shader/setup.h b/piet-gpu/shader/setup.h index 0dccecb..ec17188 100644 --- a/piet-gpu/shader/setup.h +++ b/piet-gpu/shader/setup.h @@ -42,7 +42,7 @@ struct Config { // new element pipeline stuff follows // Bounding boxes of paths, stored as int (so atomics work) - Alloc bbox_alloc; + Alloc path_bbox_alloc; // Monoid for draw objects Alloc drawmonoid_alloc; @@ -54,6 +54,10 @@ struct Config { Alloc clip_stack_alloc; // Clip processing results (path_ix + bbox) Alloc clip_bbox_alloc; + // Bounding box per draw object + Alloc draw_bbox_alloc; + // Info computed in draw stage, per draw object + Alloc drawinfo_alloc; // Number of transforms in scene // This is probably not needed. @@ -63,6 +67,10 @@ struct Config { uint n_path; // Total number of BeginClip and EndClip draw objects. uint n_clip; + + // Note: one of these offsets *could* be hardcoded to zero (as was the + // original element stream), but for now retain flexibility. + // Offset (in bytes) of transform stream in scene buffer uint trans_offset; // Offset (in bytes) of linewidth stream in scene @@ -71,6 +79,10 @@ struct Config { uint pathtag_offset; // Offset (in bytes) of path segment stream in scene uint pathseg_offset; + // Offset (in bytes) of draw object tag stream in scene; see drawtag.h + uint drawtag_offset; + // Offset (in bytes) of draw payload stream in scene + uint drawdata_offset; }; #endif diff --git a/piet-gpu/shader/tile_alloc.comp b/piet-gpu/shader/tile_alloc.comp index 024f499..0fec2ce 100644 --- a/piet-gpu/shader/tile_alloc.comp +++ b/piet-gpu/shader/tile_alloc.comp @@ -17,7 +17,11 @@ layout(set = 0, binding = 1) readonly buffer ConfigBuf { Config conf; }; -#include "annotated.h" +layout(binding = 2) readonly buffer SceneBuf { + uint[] scene; +}; + +#include "drawtag.h" #include "tile.h" // scale factors useful for converting coordinates to tiles @@ -27,31 +31,39 @@ layout(set = 0, binding = 1) readonly buffer ConfigBuf { shared uint sh_tile_count[TILE_ALLOC_WG]; shared MallocResult sh_tile_alloc; +vec4 load_draw_bbox(uint draw_ix) { + uint base = (conf.draw_bbox_alloc.offset >> 2) + 4 * draw_ix; + float x0 = uintBitsToFloat(memory[base]); + float y0 = uintBitsToFloat(memory[base + 1]); + float x1 = uintBitsToFloat(memory[base + 2]); + float y1 = uintBitsToFloat(memory[base + 3]); + vec4 bbox = vec4(x0, y0, x1, y1); + return bbox; +} + void main() { uint th_ix = gl_LocalInvocationID.x; uint element_ix = gl_GlobalInvocationID.x; + // At the moment, element_ix == path_ix. The clip-intersected bounding boxes + // for elements (draw objects) are computed in the binning stage, but at some + // point we'll probably want to break that correspondence. Tiles should be + // allocated for paths, not draw objs. EndClip doesn't need an allocation. PathRef path_ref = PathRef(conf.tile_alloc.offset + element_ix * Path_size); - AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size); + uint drawtag_base = conf.drawtag_offset >> 2; - uint tag = Annotated_Nop; + uint drawtag = Drawtag_Nop; if (element_ix < conf.n_elements) { - tag = Annotated_tag(conf.anno_alloc, ref).tag; + drawtag = scene[drawtag_base + element_ix]; } int x0 = 0, y0 = 0, x1 = 0, y1 = 0; - switch (tag) { - case Annotated_Color: - case Annotated_LinGradient: - case Annotated_Image: - case Annotated_BeginClip: - case Annotated_EndClip: - // Note: we take advantage of the fact that fills, strokes, and - // clips have compatible layout. - AnnoEndClip clip = Annotated_EndClip_read(conf.anno_alloc, ref); - x0 = int(floor(clip.bbox.x * SX)); - y0 = int(floor(clip.bbox.y * SY)); - x1 = int(ceil(clip.bbox.z * SX)); - y1 = int(ceil(clip.bbox.w * SY)); - break; + // Allocate an empty path for EndClip; at some point we'll change + // this to be per path rather than per draw object. + if (drawtag != Drawtag_Nop && drawtag != Drawtag_EndClip) { + vec4 bbox = load_draw_bbox(element_ix); + x0 = int(floor(bbox.x * SX)); + y0 = int(floor(bbox.y * SY)); + x1 = int(ceil(bbox.z * SX)); + y1 = int(ceil(bbox.w * SY)); } x0 = clamp(x0, 0, int(conf.width_in_tiles)); y0 = clamp(y0, 0, int(conf.height_in_tiles)); @@ -61,11 +73,6 @@ void main() { Path path; path.bbox = uvec4(x0, y0, x1, y1); uint tile_count = (x1 - x0) * (y1 - y0); - if (tag == Annotated_EndClip) { - // Don't actually allocate tiles for an end clip, but we do want - // the path structure (especially bbox) allocated for it. - tile_count = 0; - } sh_tile_count[th_ix] = tile_count; uint total_tile_count = tile_count; diff --git a/piet-gpu/src/encoder.rs b/piet-gpu/src/encoder.rs index c24615e..c25c260 100644 --- a/piet-gpu/src/encoder.rs +++ b/piet-gpu/src/encoder.rs @@ -30,7 +30,8 @@ pub struct Encoder { tag_stream: Vec, pathseg_stream: Vec, linewidth_stream: Vec, - drawobj_stream: Vec, + drawtag_stream: Vec, + drawdata_stream: Vec, n_path: u32, n_pathseg: u32, n_clip: u32, @@ -43,53 +44,54 @@ pub struct Encoder { pub struct GlyphEncoder { tag_stream: Vec, pathseg_stream: Vec, - drawobj_stream: Vec, + drawtag_stream: Vec, + drawdata_stream: Vec, n_path: u32, n_pathseg: u32, } -// Currently same as Element, but may change - should become packed. -const DRAWOBJ_SIZE: usize = 36; const TRANSFORM_SIZE: usize = 24; const LINEWIDTH_SIZE: usize = 4; const PATHSEG_SIZE: usize = 52; -const BBOX_SIZE: usize = 24; -const DRAWMONOID_SIZE: usize = 8; +const PATH_BBOX_SIZE: usize = 24; +const DRAWMONOID_SIZE: usize = 16; +const DRAW_BBOX_SIZE: usize = 16; +const DRAWTAG_SIZE: usize = 4; const ANNOTATED_SIZE: usize = 40; -// These are bytemuck versions of elements currently defined in the -// Element struct in piet-gpu-types; that's pretty much going away. - -const ELEMENT_FILLCOLOR: u32 = 4; -const ELEMENT_FILLLINGRADIENT: u32 = 5; -const ELEMENT_BEGINCLIP: u32 = 9; -const ELEMENT_ENDCLIP: u32 = 10; +// Tags for draw objects. See shader/drawtag.h for the authoritative source. +const DRAWTAG_FILLCOLOR: u32 = 0x44; +const DRAWTAG_FILLLINGRADIENT: u32 = 0x114; +const DRAWTAG_BEGINCLIP: u32 = 0x05; +const DRAWTAG_ENDCLIP: u32 = 0x25; #[repr(C)] #[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] pub struct FillColor { - tag: u32, rgba_color: u32, - padding: [u32; 7], } #[repr(C)] #[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] pub struct FillLinGradient { - tag: u32, index: u32, p0: [f32; 2], p1: [f32; 2], - padding: [u32; 3], +} + +#[allow(unused)] +#[repr(C)] +#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] +pub struct FillImage { + index: u32, + // [i16; 2] + offset: u32, } #[repr(C)] #[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] pub struct Clip { - tag: u32, - bbox: [f32; 4], blend: u32, - padding: [u32; 3], } impl Encoder { @@ -99,7 +101,8 @@ impl Encoder { tag_stream: Vec::new(), pathseg_stream: Vec::new(), linewidth_stream: vec![-1.0], - drawobj_stream: Vec::new(), + drawtag_stream: Vec::new(), + drawdata_stream: Vec::new(), n_path: 0, n_pathseg: 0, n_clip: 0, @@ -130,51 +133,42 @@ impl Encoder { /// /// This should be encoded after a path. pub fn fill_color(&mut self, rgba_color: u32) { + self.drawtag_stream.push(DRAWTAG_FILLCOLOR); let element = FillColor { - tag: ELEMENT_FILLCOLOR, rgba_color, - ..Default::default() }; - self.drawobj_stream.extend(bytemuck::bytes_of(&element)); + self.drawdata_stream.extend(bytemuck::bytes_of(&element)); } /// Encode a fill linear gradient draw object. /// /// This should be encoded after a path. pub fn fill_lin_gradient(&mut self, index: u32, p0: [f32; 2], p1: [f32; 2]) { + self.drawtag_stream.push(DRAWTAG_FILLLINGRADIENT); let element = FillLinGradient { - tag: ELEMENT_FILLLINGRADIENT, index, p0, p1, - ..Default::default() }; - self.drawobj_stream.extend(bytemuck::bytes_of(&element)); + self.drawdata_stream.extend(bytemuck::bytes_of(&element)); } - /// Start a clip and return a save point to be filled in later. - pub fn begin_clip(&mut self, blend: Option) -> usize { - let saved = self.drawobj_stream.len(); + /// Start a clip. + pub fn begin_clip(&mut self, blend: Option) { + self.drawtag_stream.push(DRAWTAG_BEGINCLIP); let element = Clip { - tag: ELEMENT_BEGINCLIP, blend: blend.unwrap_or(Blend::default()).pack(), - ..Default::default() }; - self.drawobj_stream.extend(bytemuck::bytes_of(&element)); + self.drawdata_stream.extend(bytemuck::bytes_of(&element)); self.n_clip += 1; - saved } - pub fn end_clip(&mut self, bbox: [f32; 4], blend: Option, save_point: usize) { + pub fn end_clip(&mut self, blend: Option) { + self.drawtag_stream.push(DRAWTAG_ENDCLIP); let element = Clip { - tag: ELEMENT_ENDCLIP, - bbox, blend: blend.unwrap_or(Blend::default()).pack(), - ..Default::default() }; - self.drawobj_stream[save_point + 4..save_point + 20] - .clone_from_slice(bytemuck::bytes_of(&bbox)); - self.drawobj_stream.extend(bytemuck::bytes_of(&element)); + self.drawdata_stream.extend(bytemuck::bytes_of(&element)); // This is a dummy path, and will go away with the new clip impl. self.tag_stream.push(0x10); self.n_path += 1; @@ -187,9 +181,11 @@ impl Encoder { /// beginning of free memory. pub fn stage_config(&self) -> (Config, usize) { // Layout of scene buffer + let drawtag_offset = 0; let n_drawobj = self.n_drawobj(); let n_drawobj_padded = align_up(n_drawobj, DRAW_PART_SIZE as usize); - let trans_offset = n_drawobj_padded * DRAWOBJ_SIZE; + let drawdata_offset = drawtag_offset + n_drawobj_padded * DRAWTAG_SIZE; + let trans_offset = drawdata_offset + self.drawdata_stream.len(); let n_trans = self.transform_stream.len(); let n_trans_padded = align_up(n_trans, TRANSFORM_PART_SIZE as usize); let linewidth_offset = trans_offset + n_trans_padded * TRANSFORM_SIZE; @@ -205,9 +201,9 @@ impl Encoder { alloc += trans_alloc + n_trans_padded * TRANSFORM_SIZE; let pathseg_alloc = alloc; alloc += pathseg_alloc + self.n_pathseg as usize * PATHSEG_SIZE; - let bbox_alloc = alloc; + let path_bbox_alloc = alloc; let n_path = self.n_path as usize; - alloc += bbox_alloc + n_path * BBOX_SIZE; + alloc += path_bbox_alloc + n_path * PATH_BBOX_SIZE; let drawmonoid_alloc = alloc; alloc += n_drawobj_padded * DRAWMONOID_SIZE; let anno_alloc = alloc; @@ -226,6 +222,12 @@ impl Encoder { let clip_bbox_alloc = alloc; const CLIP_BBOX_SIZE: usize = 16; alloc += align_up(n_clip as usize, CLIP_PART_SIZE as usize) * CLIP_BBOX_SIZE; + let draw_bbox_alloc = alloc; + alloc += n_drawobj * DRAW_BBOX_SIZE; + let drawinfo_alloc = alloc; + // TODO: not optimized; it can be accumulated during encoding or summed from drawtags + const MAX_DRAWINFO_SIZE: usize = 16; + alloc += n_drawobj * MAX_DRAWINFO_SIZE; let config = Config { n_elements: n_drawobj as u32, @@ -233,12 +235,14 @@ impl Encoder { pathseg_alloc: pathseg_alloc as u32, anno_alloc: anno_alloc as u32, trans_alloc: trans_alloc as u32, - bbox_alloc: bbox_alloc as u32, + path_bbox_alloc: path_bbox_alloc as u32, drawmonoid_alloc: drawmonoid_alloc as u32, clip_alloc: clip_alloc as u32, clip_bic_alloc: clip_bic_alloc as u32, clip_stack_alloc: clip_stack_alloc as u32, clip_bbox_alloc: clip_bbox_alloc as u32, + draw_bbox_alloc: draw_bbox_alloc as u32, + drawinfo_alloc: drawinfo_alloc as u32, n_trans: n_trans as u32, n_path: self.n_path, n_clip: self.n_clip, @@ -246,15 +250,18 @@ impl Encoder { linewidth_offset: linewidth_offset as u32, pathtag_offset: pathtag_offset as u32, pathseg_offset: pathseg_offset as u32, + drawtag_offset: drawtag_offset as u32, + drawdata_offset: drawdata_offset as u32, ..Default::default() }; (config, alloc) } pub fn write_scene(&self, buf: &mut BufWrite) { - buf.extend_slice(&self.drawobj_stream); - let n_drawobj = self.drawobj_stream.len() / DRAWOBJ_SIZE; - buf.fill_zero(padding(n_drawobj, DRAW_PART_SIZE as usize) * DRAWOBJ_SIZE); + buf.extend_slice(&self.drawtag_stream); + let n_drawobj = self.drawtag_stream.len(); + buf.fill_zero(padding(n_drawobj, DRAW_PART_SIZE as usize) * DRAWTAG_SIZE); + buf.extend_slice(&self.drawdata_stream); buf.extend_slice(&self.transform_stream); let n_trans = self.transform_stream.len(); buf.fill_zero(padding(n_trans, TRANSFORM_PART_SIZE as usize) * TRANSFORM_SIZE); @@ -265,9 +272,9 @@ impl Encoder { buf.extend_slice(&self.pathseg_stream); } - /// The number of elements in the draw object stream. + /// The number of draw objects in the draw object stream. pub(crate) fn n_drawobj(&self) -> usize { - self.drawobj_stream.len() / DRAWOBJ_SIZE + self.drawtag_stream.len() } /// The number of paths. @@ -296,7 +303,8 @@ impl Encoder { pub(crate) fn encode_glyph(&mut self, glyph: &GlyphEncoder) { self.tag_stream.extend(&glyph.tag_stream); self.pathseg_stream.extend(&glyph.pathseg_stream); - self.drawobj_stream.extend(&glyph.drawobj_stream); + self.drawtag_stream.extend(&glyph.drawtag_stream); + self.drawdata_stream.extend(&glyph.drawdata_stream); self.n_path += glyph.n_path; self.n_pathseg += glyph.n_pathseg; } @@ -325,15 +333,14 @@ impl GlyphEncoder { /// /// This should be encoded after a path. pub(crate) fn fill_color(&mut self, rgba_color: u32) { + self.drawtag_stream.push(DRAWTAG_FILLCOLOR); let element = FillColor { - tag: ELEMENT_FILLCOLOR, rgba_color, - ..Default::default() }; - self.drawobj_stream.extend(bytemuck::bytes_of(&element)); + self.drawdata_stream.extend(bytemuck::bytes_of(&element)); } pub(crate) fn is_color(&self) -> bool { - !self.drawobj_stream.is_empty() + !self.drawtag_stream.is_empty() } } diff --git a/piet-gpu/src/lib.rs b/piet-gpu/src/lib.rs index bd26d45..5c5c78a 100644 --- a/piet-gpu/src/lib.rs +++ b/piet-gpu/src/lib.rs @@ -92,7 +92,7 @@ pub struct Renderer { clip_binding: ClipBinding, tile_pipeline: Pipeline, - tile_ds: DescriptorSet, + tile_ds: Vec, path_pipeline: Pipeline, path_ds: DescriptorSet, @@ -105,7 +105,7 @@ pub struct Renderer { bin_ds: DescriptorSet, coarse_pipeline: Pipeline, - coarse_ds: DescriptorSet, + coarse_ds: Vec, k4_pipeline: Pipeline, k4_ds: DescriptorSet, @@ -176,10 +176,8 @@ impl Renderer { }; let image_dev = session.create_image2d(width as u32, height as u32, image_format)?; - // Note: this must be updated when the config struct size changes. const CONFIG_BUFFER_SIZE: u64 = std::mem::size_of::() as u64; let config_buf = session.create_buffer(CONFIG_BUFFER_SIZE, dev).unwrap(); - // TODO: separate staging buffer (if needed) let config_bufs = (0..n_bufs) .map(|_| { session @@ -197,7 +195,7 @@ impl Renderer { let element_stage = ElementStage::new(session, &element_code); let element_bindings = scene_bufs .iter() - .map(|scene_buf| { + .map(|scene_buf| element_stage.bind( session, &element_code, @@ -205,7 +203,7 @@ impl Renderer { scene_buf, &memory_buf_dev, ) - }) + ) .collect(); let clip_code = ClipCode::new(session); @@ -214,8 +212,15 @@ impl Renderer { let tile_alloc_code = include_shader!(session, "../shader/gen/tile_alloc"); let tile_pipeline = session .create_compute_pipeline(tile_alloc_code, &[BindType::Buffer, BindType::BufReadOnly])?; - let tile_ds = session - .create_simple_descriptor_set(&tile_pipeline, &[&memory_buf_dev, &config_buf])?; + let tile_ds = scene_bufs + .iter() + .map(|scene_buf| { + session.create_simple_descriptor_set( + &tile_pipeline, + &[&memory_buf_dev, &config_buf, scene_buf], + ) + }) + .collect::, _>>()?; let path_alloc_code = include_shader!(session, "../shader/gen/path_coarse"); let path_pipeline = session @@ -243,10 +248,23 @@ impl Renderer { session.create_simple_descriptor_set(&bin_pipeline, &[&memory_buf_dev, &config_buf])?; let coarse_code = include_shader!(session, "../shader/gen/coarse"); - let coarse_pipeline = session - .create_compute_pipeline(coarse_code, &[BindType::Buffer, BindType::BufReadOnly])?; - let coarse_ds = session - .create_simple_descriptor_set(&coarse_pipeline, &[&memory_buf_dev, &config_buf])?; + let coarse_pipeline = session.create_compute_pipeline( + coarse_code, + &[ + BindType::Buffer, + BindType::BufReadOnly, + BindType::BufReadOnly, + ], + )?; + let coarse_ds = scene_bufs + .iter() + .map(|scene_buf| { + session.create_simple_descriptor_set( + &coarse_pipeline, + &[&memory_buf_dev, &config_buf, scene_buf], + ) + }) + .collect::, _>>()?; let bg_image = Self::make_test_bg_image(&session); @@ -430,7 +448,7 @@ impl Renderer { cmd_buf.begin_debug_label("Tile allocation"); cmd_buf.dispatch( &self.tile_pipeline, - &self.tile_ds, + &self.tile_ds[buf_ix], (((self.n_paths + 255) / 256) as u32, 1, 1), (256, 1, 1), ); @@ -462,7 +480,7 @@ impl Renderer { cmd_buf.begin_debug_label("Coarse raster"); cmd_buf.dispatch( &self.coarse_pipeline, - &self.coarse_ds, + &self.coarse_ds[buf_ix], ( (self.width as u32 + 255) / 256, (self.height as u32 + 255) / 256, diff --git a/piet-gpu/src/render_ctx.rs b/piet-gpu/src/render_ctx.rs index 1fe1ce9..e9a24fa 100644 --- a/piet-gpu/src/render_ctx.rs +++ b/piet-gpu/src/render_ctx.rs @@ -64,9 +64,6 @@ struct State { } struct ClipElement { - /// Byte offset of BeginClip element in element vec, for bbox fixup. - save_point: usize, - bbox: Option, blend: Option, } @@ -199,8 +196,6 @@ impl RenderContext for PietGpuRenderContext { fn stroke(&mut self, shape: impl Shape, brush: &impl IntoBrush, width: f64) { self.encode_linewidth(width.abs() as f32); let brush = brush.make_brush(self, || shape.bounding_box()).into_owned(); - // Note: the bbox contribution of stroke becomes more complicated with miter joins. - self.accumulate_bbox(|| shape.bounding_box() + Insets::uniform(width * 0.5)); let path = shape.path_elements(TOLERANCE); self.encode_path(path, false); self.encode_brush(&brush); @@ -217,9 +212,6 @@ impl RenderContext for PietGpuRenderContext { fn fill(&mut self, shape: impl Shape, brush: &impl IntoBrush) { let brush = brush.make_brush(self, || shape.bounding_box()).into_owned(); - // Note: we might get a good speedup from using an approximate bounding box. - // Perhaps that should be added to kurbo. - self.accumulate_bbox(|| shape.bounding_box()); let path = shape.path_elements(TOLERANCE); self.encode_linewidth(-1.0); self.encode_path(path, true); @@ -232,13 +224,11 @@ impl RenderContext for PietGpuRenderContext { self.encode_linewidth(-1.0); let path = shape.path_elements(TOLERANCE); self.encode_path(path, true); - let save_point = self.new_encoder.begin_clip(None); + self.new_encoder.begin_clip(None); if self.clip_stack.len() >= MAX_BLEND_STACK { panic!("Maximum clip/blend stack size {} exceeded", MAX_BLEND_STACK); } self.clip_stack.push(ClipElement { - bbox: None, - save_point, blend: None, }); if let Some(tos) = self.state_stack.last_mut() { @@ -340,16 +330,13 @@ impl PietGpuRenderContext { self.encode_linewidth(-1.0); let path = shape.path_elements(TOLERANCE); self.encode_path(path, true); - let save_point = self.new_encoder.begin_clip(Some(blend)); + self.new_encoder.begin_clip(Some(blend)); if self.clip_stack.len() >= MAX_BLEND_STACK { panic!("Maximum clip/blend stack size {} exceeded", MAX_BLEND_STACK); } self.clip_stack.push(ClipElement { - bbox: None, - save_point, blend: Some(blend), }); - self.accumulate_bbox(|| shape.bounding_box()); if let Some(tos) = self.state_stack.last_mut() { tos.n_clip += 1; } @@ -406,37 +393,7 @@ impl PietGpuRenderContext { fn pop_clip(&mut self) { let tos = self.clip_stack.pop().unwrap(); - let bbox = tos.bbox.unwrap_or_default(); - let bbox_f32_4 = rect_to_f32_4(bbox); - self.new_encoder.end_clip(bbox_f32_4, tos.blend, tos.save_point); - if let Some(bbox) = tos.bbox { - self.union_bbox(bbox); - } - } - - /// Accumulate a bbox. - /// - /// The bbox is given lazily as a closure, relative to the current transform. - /// It's lazy because we don't need to compute it unless we're inside a clip. - fn accumulate_bbox(&mut self, f: impl FnOnce() -> Rect) { - if !self.clip_stack.is_empty() { - let bbox = f(); - let bbox = self.cur_transform.transform_rect_bbox(bbox); - self.union_bbox(bbox); - } - } - - /// Accumulate an absolute bbox. - /// - /// The bbox is given already transformed into surface coordinates. - fn union_bbox(&mut self, bbox: Rect) { - if let Some(tos) = self.clip_stack.last_mut() { - tos.bbox = if let Some(old_bbox) = tos.bbox { - Some(old_bbox.union(bbox)) - } else { - Some(bbox) - }; - } + self.new_encoder.end_clip(tos.blend); } pub(crate) fn encode_glyph(&mut self, glyph: &GlyphEncoder) { diff --git a/piet-gpu/src/stages.rs b/piet-gpu/src/stages.rs index e155c50..52b8bf1 100644 --- a/piet-gpu/src/stages.rs +++ b/piet-gpu/src/stages.rs @@ -47,12 +47,14 @@ pub struct Config { pub pathseg_alloc: u32, pub anno_alloc: u32, pub trans_alloc: u32, - pub bbox_alloc: u32, + pub path_bbox_alloc: u32, pub drawmonoid_alloc: u32, pub clip_alloc: u32, pub clip_bic_alloc: u32, pub clip_stack_alloc: u32, pub clip_bbox_alloc: u32, + pub draw_bbox_alloc: u32, + pub drawinfo_alloc: u32, pub n_trans: u32, pub n_path: u32, pub n_clip: u32, @@ -60,6 +62,8 @@ pub struct Config { pub linewidth_offset: u32, pub pathtag_offset: u32, pub pathseg_offset: u32, + pub drawtag_offset: u32, + pub drawdata_offset: u32, } // The "element" stage combines a number of stages for parts of the pipeline. diff --git a/piet-gpu/src/stages/draw.rs b/piet-gpu/src/stages/draw.rs index 5328a84..21312a4 100644 --- a/piet-gpu/src/stages/draw.rs +++ b/piet-gpu/src/stages/draw.rs @@ -28,6 +28,8 @@ use piet_gpu_hal::{ pub struct DrawMonoid { pub path_ix: u32, pub clip_ix: u32, + pub scene_offset: u32, + pub info_offset: u32, } const DRAW_WG: u64 = 256; @@ -93,7 +95,7 @@ impl DrawStage { pub unsafe fn new(session: &Session, code: &DrawCode) -> DrawStage { // We're limited to DRAW_PART_SIZE^2 // Also note: size here allows padding - let root_buf_size = DRAW_PART_SIZE * 8; + let root_buf_size = DRAW_PART_SIZE * 16; let root_buf = session .create_buffer(root_buf_size, BufferUsage::STORAGE) .unwrap(); diff --git a/tests/src/clip.rs b/tests/src/clip.rs index cfd8a35..4a38949 100644 --- a/tests/src/clip.rs +++ b/tests/src/clip.rs @@ -163,8 +163,7 @@ impl ClipData { let clip_bbox_alloc = clip_stack_alloc + 20 * n_clip; stages::Config { clip_alloc: clip_alloc as u32, - // TODO: this wants to be renamed to path_bbox_alloc - bbox_alloc: path_bbox_alloc as u32, + path_bbox_alloc: path_bbox_alloc as u32, drawmonoid_alloc: drawmonoid_alloc as u32, clip_bic_alloc: clip_bic_alloc as u32, clip_stack_alloc: clip_stack_alloc as u32, @@ -194,7 +193,7 @@ impl ClipData { let clip_range = clip_bbox_start..(clip_bbox_start + n_clip * 16); let clip_result = bytemuck::cast_slice::(&buf[clip_range]); let draw_start = 8 + n_clip * 4 + n_path * 24; - let draw_range = draw_start..(draw_start + n_clip * 8); + let draw_range = draw_start..(draw_start + n_clip * 16); let draw_result = bytemuck::cast_slice::(&buf[draw_range]); let mut bbox_stack = Vec::new(); let mut parent_stack = Vec::new(); diff --git a/tests/src/draw.rs b/tests/src/draw.rs index 7b264d4..692f943 100644 --- a/tests/src/draw.rs +++ b/tests/src/draw.rs @@ -17,20 +17,23 @@ //! Tests for the piet-gpu draw object stage. use piet_gpu_hal::{BufWrite, BufferUsage}; -use rand::Rng; +use rand::{Rng, seq::SliceRandom}; use crate::{Config, Runner, TestResult}; use piet_gpu::stages::{self, DrawCode, DrawMonoid, DrawStage}; -const ELEMENT_SIZE: usize = 36; +const DRAWTAG_SIZE: usize = 4; const ANNOTATED_SIZE: usize = 40; -const ELEMENT_FILLCOLOR: u32 = 4; -const ELEMENT_FILLLINGRADIENT: u32 = 5; -const ELEMENT_FILLIMAGE: u32 = 6; -const ELEMENT_BEGINCLIP: u32 = 9; -const ELEMENT_ENDCLIP: u32 = 10; +// Tags for draw objects. See shader/drawtag.h for the authoritative source. +const DRAWTAG_FILLCOLOR: u32 = 4; +const DRAWTAG_FILLLINGRADIENT: u32 = 20; +const DRAWTAG_FILLIMAGE: u32 = 8; +const DRAWTAG_BEGINCLIP: u32 = 5; +const DRAWTAG_ENDCLIP: u32 = 37; + +const TAGS: &[u32] = &[DRAWTAG_FILLCOLOR, DRAWTAG_FILLLINGRADIENT, DRAWTAG_FILLIMAGE, DRAWTAG_BEGINCLIP, DRAWTAG_ENDCLIP]; struct DrawTestData { tags: Vec, @@ -47,7 +50,7 @@ pub unsafe fn draw_test(runner: &mut Runner, config: &Config) -> TestResult { .session .create_buffer_init(std::slice::from_ref(&stage_config), BufferUsage::STORAGE) .unwrap(); - let scene_size = n_tag * ELEMENT_SIZE as u64; + let scene_size = n_tag * DRAWTAG_SIZE as u64; let scene_buf = runner .session .create_buffer_with(scene_size, |b| data.fill_scene(b), BufferUsage::STORAGE) @@ -92,7 +95,7 @@ pub unsafe fn draw_test(runner: &mut Runner, config: &Config) -> TestResult { impl DrawTestData { fn new(n: u64) -> DrawTestData { let mut rng = rand::thread_rng(); - let tags = (0..n).map(|_| rng.gen_range(0, 12)).collect(); + let tags = (0..n).map(|_| *TAGS.choose(&mut rng).unwrap()).collect(); DrawTestData { tags } } @@ -101,13 +104,14 @@ impl DrawTestData { // Layout of memory let drawmonoid_alloc = 0; - let anno_alloc = drawmonoid_alloc + 8 * n_tags; + let anno_alloc = drawmonoid_alloc + 16 * n_tags; let clip_alloc = anno_alloc + ANNOTATED_SIZE * n_tags; let stage_config = stages::Config { n_elements: n_tags as u32, anno_alloc: anno_alloc as u32, drawmonoid_alloc: drawmonoid_alloc as u32, clip_alloc: clip_alloc as u32, + drawtag_offset: 0, ..Default::default() }; stage_config @@ -116,37 +120,35 @@ impl DrawTestData { fn memory_size(&self) -> u64 { // Note: this overallocates the clip buf a bit - only needed for the // total number of begin_clip and end_clip tags. - (8 + self.tags.len() * (8 + 4 + ANNOTATED_SIZE)) as u64 + (8 + self.tags.len() * (16 + 4 + ANNOTATED_SIZE)) as u64 } fn fill_scene(&self, buf: &mut BufWrite) { - let mut element = [0u32; ELEMENT_SIZE / 4]; - for tag in &self.tags { - element[0] = *tag; - buf.push(element); - } + buf.extend_slice(&self.tags); } fn verify(&self, buf: &[u8]) -> Option { - let size = self.tags.len() * 8; + let size = self.tags.len() * 16; let actual = bytemuck::cast_slice::(&buf[8..8 + size]); let mut expected = DrawMonoid::default(); for (i, (tag, actual)) in self.tags.iter().zip(actual).enumerate() { // Verify exclusive prefix sum. let (path_ix, clip_ix) = Self::reduce_tag(*tag); if *actual != expected { + println!("{:?} {:?}", actual, expected); return Some(format!("draw mismatch at {}", i)); } expected.path_ix += path_ix; expected.clip_ix += clip_ix; + expected.scene_offset += tag & 28; } None } fn reduce_tag(tag: u32) -> (u32, u32) { match tag { - ELEMENT_FILLCOLOR | ELEMENT_FILLLINGRADIENT | ELEMENT_FILLIMAGE => (1, 0), - ELEMENT_BEGINCLIP | ELEMENT_ENDCLIP => (1, 1), + DRAWTAG_FILLCOLOR | DRAWTAG_FILLLINGRADIENT | DRAWTAG_FILLIMAGE => (1, 0), + DRAWTAG_BEGINCLIP | DRAWTAG_ENDCLIP => (1, 1), // TODO: ENDCLIP will become (0, 1) _ => (0, 0), } diff --git a/tests/src/path.rs b/tests/src/path.rs index 6f1f61a..bf72c68 100644 --- a/tests/src/path.rs +++ b/tests/src/path.rs @@ -207,11 +207,11 @@ impl PathData { // Layout of memory let trans_alloc = 0; let pathseg_alloc = trans_alloc + n_trans * 24; - let bbox_alloc = pathseg_alloc + self.n_pathseg * PATHSEG_SIZE; + let path_bbox_alloc = pathseg_alloc + self.n_pathseg * PATHSEG_SIZE; let stage_config = stages::Config { pathseg_alloc, trans_alloc, - bbox_alloc, + path_bbox_alloc, n_trans, n_path: self.n_path, pathtag_offset,