From 3b67a4e7c16cde6650f72c4662aa55deef907951 Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Thu, 17 Feb 2022 16:25:41 -0800 Subject: [PATCH] New clip implementation This PR reworks the clip implementation. The highlight is that clip bounding box accounting is now done on GPU rather than CPU. The clip mask is also rasterized on EndClip rather than BeginClip, which decreases memory traffic needed for the clip stack. This is a pretty good working state, but not all cleanup has been applied. An important next step is to remove the CPU clip accounting (it is computed and encoded, but that result is not used). Another step is to remove the Annotated structure entirely. Fixes #88. Also relevant to #119 --- piet-gpu/shader/binning.comp | 70 ++- piet-gpu/shader/build.ninja | 18 +- piet-gpu/shader/clip_leaf.comp | 287 +++++++++++ piet-gpu/shader/clip_reduce.comp | 148 ++++++ piet-gpu/shader/coarse.comp | 45 +- piet-gpu/shader/draw_leaf.comp | 38 +- piet-gpu/shader/gen/backdrop.hlsl | 5 + piet-gpu/shader/gen/backdrop.msl | 5 + piet-gpu/shader/gen/backdrop.spv | Bin 12588 -> 12840 bytes piet-gpu/shader/gen/backdrop_lg.hlsl | 5 + piet-gpu/shader/gen/backdrop_lg.msl | 5 + piet-gpu/shader/gen/backdrop_lg.spv | Bin 12620 -> 12872 bytes piet-gpu/shader/gen/bbox_clear.dxil | Bin 3160 -> 3156 bytes piet-gpu/shader/gen/bbox_clear.hlsl | 7 +- piet-gpu/shader/gen/bbox_clear.msl | 5 + piet-gpu/shader/gen/bbox_clear.spv | Bin 2748 -> 3000 bytes piet-gpu/shader/gen/binning.dxil | Bin 5800 -> 6388 bytes piet-gpu/shader/gen/binning.hlsl | 237 +++++---- piet-gpu/shader/gen/binning.msl | 214 ++++---- piet-gpu/shader/gen/binning.spv | Bin 16400 -> 19728 bytes piet-gpu/shader/gen/clip_leaf.dxil | Bin 0 -> 7232 bytes piet-gpu/shader/gen/clip_leaf.hlsl | 367 ++++++++++++++ piet-gpu/shader/gen/clip_leaf.msl | 366 ++++++++++++++ piet-gpu/shader/gen/clip_leaf.spv | Bin 0 -> 19028 bytes piet-gpu/shader/gen/clip_reduce.dxil | Bin 0 -> 4624 bytes piet-gpu/shader/gen/clip_reduce.hlsl | 177 +++++++ piet-gpu/shader/gen/clip_reduce.msl | 173 +++++++ piet-gpu/shader/gen/clip_reduce.spv | Bin 0 -> 9484 bytes piet-gpu/shader/gen/coarse.dxil | Bin 10984 -> 10696 bytes piet-gpu/shader/gen/coarse.hlsl | 506 ++++++++----------- piet-gpu/shader/gen/coarse.msl | 574 ++++++++++------------ piet-gpu/shader/gen/coarse.spv | Bin 64040 -> 61516 bytes piet-gpu/shader/gen/draw_leaf.dxil | Bin 6880 -> 6860 bytes piet-gpu/shader/gen/draw_leaf.hlsl | 257 +++++----- piet-gpu/shader/gen/draw_leaf.msl | 310 ++++++------ piet-gpu/shader/gen/draw_leaf.spv | Bin 38560 -> 37720 bytes piet-gpu/shader/gen/draw_reduce.hlsl | 5 + piet-gpu/shader/gen/draw_reduce.msl | 5 + piet-gpu/shader/gen/draw_reduce.spv | Bin 6856 -> 7108 bytes piet-gpu/shader/gen/kernel4.dxil | Bin 10004 -> 9872 bytes piet-gpu/shader/gen/kernel4.hlsl | 13 +- piet-gpu/shader/gen/kernel4.msl | 9 +- piet-gpu/shader/gen/kernel4.spv | Bin 39124 -> 38960 bytes piet-gpu/shader/gen/kernel4_gray.dxil | Bin 9932 -> 9808 bytes piet-gpu/shader/gen/kernel4_gray.hlsl | 13 +- piet-gpu/shader/gen/kernel4_gray.msl | 9 +- piet-gpu/shader/gen/kernel4_gray.spv | Bin 38880 -> 38716 bytes piet-gpu/shader/gen/path_coarse.hlsl | 5 + piet-gpu/shader/gen/path_coarse.msl | 5 + piet-gpu/shader/gen/path_coarse.spv | Bin 39324 -> 39576 bytes piet-gpu/shader/gen/pathseg.dxil | Bin 9592 -> 9592 bytes piet-gpu/shader/gen/pathseg.hlsl | 127 ++--- piet-gpu/shader/gen/pathseg.msl | 83 ++-- piet-gpu/shader/gen/pathseg.spv | Bin 34732 -> 35000 bytes piet-gpu/shader/gen/pathtag_reduce.dxil | Bin 4644 -> 4644 bytes piet-gpu/shader/gen/pathtag_reduce.hlsl | 27 +- piet-gpu/shader/gen/pathtag_reduce.msl | 21 +- piet-gpu/shader/gen/pathtag_reduce.spv | Bin 7820 -> 8088 bytes piet-gpu/shader/gen/tile_alloc.hlsl | 5 + piet-gpu/shader/gen/tile_alloc.msl | 5 + piet-gpu/shader/gen/tile_alloc.spv | Bin 15160 -> 15412 bytes piet-gpu/shader/gen/transform_leaf.dxil | Bin 5664 -> 5664 bytes piet-gpu/shader/gen/transform_leaf.hlsl | 7 +- piet-gpu/shader/gen/transform_leaf.msl | 5 + piet-gpu/shader/gen/transform_leaf.spv | Bin 12508 -> 12760 bytes piet-gpu/shader/gen/transform_reduce.dxil | Bin 4696 -> 4700 bytes piet-gpu/shader/gen/transform_reduce.hlsl | 7 +- piet-gpu/shader/gen/transform_reduce.msl | 5 + piet-gpu/shader/gen/transform_reduce.spv | Bin 7860 -> 8112 bytes piet-gpu/shader/kernel4.comp | 4 +- piet-gpu/shader/setup.h | 14 +- piet-gpu/src/encoder.rs | 30 +- piet-gpu/src/lib.rs | 47 +- piet-gpu/src/render_ctx.rs | 4 + piet-gpu/src/stages.rs | 7 + piet-gpu/src/stages/clip.rs | 94 ++++ tests/src/clip.rs | 237 +++++++++ tests/src/draw.rs | 13 +- tests/src/main.rs | 2 + tests/src/runner.rs | 34 +- 80 files changed, 3318 insertions(+), 1343 deletions(-) create mode 100644 piet-gpu/shader/clip_leaf.comp create mode 100644 piet-gpu/shader/clip_reduce.comp create mode 100644 piet-gpu/shader/gen/clip_leaf.dxil create mode 100644 piet-gpu/shader/gen/clip_leaf.hlsl create mode 100644 piet-gpu/shader/gen/clip_leaf.msl create mode 100644 piet-gpu/shader/gen/clip_leaf.spv create mode 100644 piet-gpu/shader/gen/clip_reduce.dxil create mode 100644 piet-gpu/shader/gen/clip_reduce.hlsl create mode 100644 piet-gpu/shader/gen/clip_reduce.msl create mode 100644 piet-gpu/shader/gen/clip_reduce.spv create mode 100644 piet-gpu/src/stages/clip.rs create mode 100644 tests/src/clip.rs diff --git a/piet-gpu/shader/binning.comp b/piet-gpu/shader/binning.comp index a3a8ffd..2304ea2 100644 --- a/piet-gpu/shader/binning.comp +++ b/piet-gpu/shader/binning.comp @@ -20,6 +20,7 @@ layout(set = 0, binding = 1) readonly buffer ConfigBuf { #include "annotated.h" #include "bins.h" +#include "drawtag.h" // scale factors useful for converting coordinates to bins #define SX (1.0 / float(N_TILE_X * TILE_WIDTH_PX)) @@ -35,6 +36,47 @@ shared uint count[N_SLICE][N_TILE]; shared Alloc sh_chunk_alloc[N_TILE]; shared bool sh_alloc_failed; +DrawMonoid load_draw_monoid(uint element_ix) { + uint base = (conf.drawmonoid_alloc.offset >> 2) + 2 * element_ix; + uint path_ix = memory[base]; + uint clip_ix = memory[base + 1]; + return DrawMonoid(path_ix, clip_ix); +} + +// Load bounding box computed by clip processing +vec4 load_clip_bbox(uint clip_ix) { + uint base = (conf.clip_bbox_alloc.offset >> 2) + 4 * clip_ix; + float x0 = uintBitsToFloat(memory[base]); + float y0 = uintBitsToFloat(memory[base + 1]); + float x1 = uintBitsToFloat(memory[base + 2]); + float y1 = uintBitsToFloat(memory[base + 3]); + vec4 bbox = vec4(x0, y0, x1, y1); + return bbox; +} + +vec4 bbox_intersect(vec4 a, vec4 b) { + return vec4(max(a.xy, b.xy), min(a.zw, b.zw)); +} + +// Load path's bbox from bbox (as written by pathseg). +vec4 load_path_bbox(uint path_ix) { + uint base = (conf.bbox_alloc.offset >> 2) + 6 * path_ix; + float bbox_l = float(memory[base]) - 32768.0; + float bbox_t = float(memory[base + 1]) - 32768.0; + float bbox_r = float(memory[base + 2]) - 32768.0; + float bbox_b = float(memory[base + 3]) - 32768.0; + vec4 bbox = vec4(bbox_l, bbox_t, bbox_r, bbox_b); + return bbox; +} + +void store_path_bbox(AnnotatedRef ref, vec4 bbox) { + uint ix = ref.offset >> 2; + memory[ix + 1] = floatBitsToUint(bbox.x); + memory[ix + 2] = floatBitsToUint(bbox.y); + memory[ix + 3] = floatBitsToUint(bbox.z); + memory[ix + 4] = floatBitsToUint(bbox.w); +} + void main() { uint my_n_elements = conf.n_elements; uint my_partition = gl_WorkGroupID.x; @@ -61,13 +103,27 @@ void main() { case Annotated_Image: case Annotated_BeginClip: case Annotated_EndClip: - // Note: we take advantage of the fact that these drawing elements - // have the bbox at the same place in their layout. - AnnoEndClip clip = Annotated_EndClip_read(conf.anno_alloc, ref); - x0 = int(floor(clip.bbox.x * SX)); - y0 = int(floor(clip.bbox.y * SY)); - x1 = int(ceil(clip.bbox.z * SX)); - y1 = int(ceil(clip.bbox.w * SY)); + DrawMonoid draw_monoid = load_draw_monoid(element_ix); + uint path_ix = draw_monoid.path_ix; + vec4 clip_bbox = vec4(-1e9, -1e9, 1e9, 1e9); + uint clip_ix = draw_monoid.clip_ix; + if (clip_ix > 0) { + clip_bbox = load_clip_bbox(clip_ix - 1); + } + // For clip elements, clip_bbox is the bbox of the clip path, intersected + // with enclosing clips. + // For other elements, it is the bbox of the enclosing clips. + + vec4 path_bbox = load_path_bbox(path_ix); + vec4 bbox = bbox_intersect(path_bbox, clip_bbox); + // Avoid negative-size bbox (is this necessary)? + bbox.zw = max(bbox.xy, bbox.zw); + // Store clip-intersected bbox for tile_alloc. + store_path_bbox(ref, bbox); + x0 = int(floor(bbox.x * SX)); + y0 = int(floor(bbox.y * SY)); + x1 = int(ceil(bbox.z * SX)); + y1 = int(ceil(bbox.w * SY)); break; } diff --git a/piet-gpu/shader/build.ninja b/piet-gpu/shader/build.ninja index 7b80f6f..8b9998f 100644 --- a/piet-gpu/shader/build.ninja +++ b/piet-gpu/shader/build.ninja @@ -22,7 +22,7 @@ rule dxil rule msl command = $spirv_cross --msl $in --output $out $msl_flags -build gen/binning.spv: glsl binning.comp | annotated.h state.h bins.h setup.h mem.h +build gen/binning.spv: glsl binning.comp | annotated.h bins.h drawtag.h setup.h mem.h build gen/binning.hlsl: hlsl gen/binning.spv build gen/binning.dxil: dxil gen/binning.hlsl build gen/binning.msl: msl gen/binning.spv @@ -119,6 +119,16 @@ build gen/draw_leaf.hlsl: hlsl gen/draw_leaf.spv build gen/draw_leaf.dxil: dxil gen/draw_leaf.hlsl build gen/draw_leaf.msl: msl gen/draw_leaf.spv -build spv: phony gen/backdrop_lg.spv gen/backdrop.spv gen/bbox_clear.spv gen/binning.spv gen/coarse.spv gen/draw_leaf.spv gen/draw_reduce.spv gen/draw_root.spv gen/kernel4.spv gen/kernel4_gray.spv gen/path_coarse.spv gen/pathseg.spv gen/pathtag_reduce.spv gen/pathtag_root.spv gen/tile_alloc.spv gen/transform_leaf.spv gen/transform_reduce.spv gen/transform_root.spv -build dxil: phony gen/backdrop.hlsl gen/backdrop_lg.hlsl gen/bbox_clear.hlsl gen/binning.hlsl gen/coarse.hlsl gen/draw_leaf.hlsl gen/draw_reduce.hlsl gen/draw_root.hlsl gen/kernel4.hlsl gen/kernel4_gray.hlsl gen/path_coarse.hlsl gen/pathseg.hlsl gen/pathtag_reduce.hlsl gen/pathtag_root.hlsl gen/tile_alloc.hlsl gen/transform_leaf.hlsl gen/transform_reduce.hlsl gen/transform_root.hlsl -build msl: phony gen/backdrop_lg.msl gen/backdrop.msl gen/bbox_clear.msl gen/binning.msl gen/coarse.msl gen/draw_leaf.msl gen/draw_reduce.msl gen/draw_root.msl gen/kernel4.msl gen/kernel4_gray.msl gen/path_coarse.msl gen/pathseg.msl gen/pathtag_reduce.msl gen/pathtag_root.msl gen/tile_alloc.msl gen/transform_leaf.msl gen/transform_reduce.msl gen/transform_root.msl +build gen/clip_reduce.spv: glsl clip_reduce.comp | mem.h setup.h annotated.h +build gen/clip_reduce.hlsl: hlsl gen/clip_reduce.spv +build gen/clip_reduce.dxil: dxil gen/clip_reduce.hlsl +build gen/clip_reduce.msl: msl gen/clip_reduce.spv + +build gen/clip_leaf.spv: glsl clip_leaf.comp | mem.h setup.h annotated.h +build gen/clip_leaf.hlsl: hlsl gen/clip_leaf.spv +build gen/clip_leaf.dxil: dxil gen/clip_leaf.hlsl +build gen/clip_leaf.msl: msl gen/clip_leaf.spv + +build spv: phony gen/backdrop_lg.spv gen/backdrop.spv gen/bbox_clear.spv gen/binning.spv gen/clip_leaf.spv gen/clip_reduce.spv gen/coarse.spv gen/draw_leaf.spv gen/draw_reduce.spv gen/draw_root.spv gen/kernel4.spv gen/kernel4_gray.spv gen/path_coarse.spv gen/pathseg.spv gen/pathtag_reduce.spv gen/pathtag_root.spv gen/tile_alloc.spv gen/transform_leaf.spv gen/transform_reduce.spv gen/transform_root.spv +build dxil: phony gen/backdrop.hlsl gen/backdrop_lg.hlsl gen/bbox_clear.hlsl gen/binning.hlsl gen/clip_leaf.hlsl gen/clip_reduce.hlsl gen/coarse.hlsl gen/draw_leaf.hlsl gen/draw_reduce.hlsl gen/draw_root.hlsl gen/kernel4.hlsl gen/kernel4_gray.hlsl gen/path_coarse.hlsl gen/pathseg.hlsl gen/pathtag_reduce.hlsl gen/pathtag_root.hlsl gen/tile_alloc.hlsl gen/transform_leaf.hlsl gen/transform_reduce.hlsl gen/transform_root.hlsl +build msl: phony gen/backdrop_lg.msl gen/backdrop.msl gen/bbox_clear.msl gen/binning.msl gen/clip_leaf.msl gen/clip_reduce.msl gen/coarse.msl gen/draw_leaf.msl gen/draw_reduce.msl gen/draw_root.msl gen/kernel4.msl gen/kernel4_gray.msl gen/path_coarse.msl gen/pathseg.msl gen/pathtag_reduce.msl gen/pathtag_root.msl gen/tile_alloc.msl gen/transform_leaf.msl gen/transform_reduce.msl gen/transform_root.msl diff --git a/piet-gpu/shader/clip_leaf.comp b/piet-gpu/shader/clip_leaf.comp new file mode 100644 index 0000000..5f7e79b --- /dev/null +++ b/piet-gpu/shader/clip_leaf.comp @@ -0,0 +1,287 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense + +// The second dispatch of clip stack processing. + +#version 450 +#extension GL_GOOGLE_include_directive : enable + +#include "mem.h" +#include "setup.h" + +#define LG_WG_SIZE (7 + LG_WG_FACTOR) +#define WG_SIZE (1 << LG_WG_SIZE) +#define PARTITION_SIZE WG_SIZE + +layout(local_size_x = WG_SIZE) in; + +layout(binding = 1) readonly buffer ConfigBuf { + Config conf; +}; + +#include "annotated.h" + +// Some of this is cut'n'paste duplication with the reduce pass, and +// arguably should be moved to a common .h file. +// The bicyclic monoid + +struct ClipEl { + // index of parent node + uint parent_ix; + // bounding box + vec4 bbox; +}; + +struct Bic { + uint a; + uint b; +}; + +Bic bic_combine(Bic x, Bic y) { + uint m = min(x.b, y.a); + return Bic(x.a + y.a - m, x.b + y.b - m); +} + +// Load path's bbox from bbox (as written by pathseg). +vec4 load_path_bbox(uint path_ix) { + uint base = (conf.bbox_alloc.offset >> 2) + 6 * path_ix; + float bbox_l = float(memory[base]) - 32768.0; + float bbox_t = float(memory[base + 1]) - 32768.0; + float bbox_r = float(memory[base + 2]) - 32768.0; + float bbox_b = float(memory[base + 3]) - 32768.0; + vec4 bbox = vec4(bbox_l, bbox_t, bbox_r, bbox_b); + return bbox; +} + +vec4 bbox_intersect(vec4 a, vec4 b) { + return vec4(max(a.xy, b.xy), min(a.zw, b.zw)); +} + +shared Bic sh_bic[WG_SIZE * 2 - 2]; +shared uint sh_stack[PARTITION_SIZE]; +shared vec4 sh_stack_bbox[PARTITION_SIZE]; +shared uint sh_link[PARTITION_SIZE]; +shared vec4 sh_bbox[PARTITION_SIZE]; + +// This is adapted directly from the stack monoid impl. +// Return value is reference within partition if >= 0, +// otherwise reference to stack. +uint search_link(inout Bic bic) { + uint ix = gl_LocalInvocationID.x; + uint j = 0; + while (j < LG_WG_SIZE) { + uint base = 2 * WG_SIZE - (2u << (LG_WG_SIZE - j)); + if (((ix >> j) & 1) != 0) { + Bic test = bic_combine(sh_bic[base + (ix >> j) - 1], bic); + if (test.b > 0) { + break; + } + bic = test; + ix -= 1u << j; + } + j++; + } + if (ix > 0) { + while (j > 0) { + j--; + uint base = 2 * WG_SIZE - (2u << (LG_WG_SIZE - j)); + Bic test = bic_combine(sh_bic[base + (ix >> j) - 1], bic); + if (test.b == 0) { + bic = test; + ix -= 1u << j; + } + } + } + // ix is the smallest value such that reduce(ix..th).b == 0 + if (ix > 0) { + return ix - 1; + } else { + return ~0u - bic.a; + } +} + +Bic load_bic(uint ix) { + uint base = (conf.clip_bic_alloc.offset >> 2) + 2 * ix; + return Bic(memory[base], memory[base + 1]); +} + +ClipEl load_clip_el(uint ix) { + uint base = (conf.clip_stack_alloc.offset >> 2) + 5 * ix; + uint parent_ix = memory[base]; + float x0 = uintBitsToFloat(memory[base + 1]); + float y0 = uintBitsToFloat(memory[base + 2]); + float x1 = uintBitsToFloat(memory[base + 3]); + float y1 = uintBitsToFloat(memory[base + 4]); + vec4 bbox = vec4(x0, y0, x1, y1); + return ClipEl(parent_ix, bbox); +} + +uint load_path_ix(uint ix) { + // This is one approach to a partial final block. Another would be + // to do a memset to the padding in the command queue. + if (ix < conf.n_clip) { + return memory[(conf.clip_alloc.offset >> 2) + ix]; + } else { + // EndClip tags don't implicate further loads. + return 0x80000000; + } +} + +void store_clip_bbox(uint ix, vec4 bbox) { + uint base = (conf.clip_bbox_alloc.offset >> 2) + 4 * ix; + memory[base] = floatBitsToUint(bbox.x); + memory[base + 1] = floatBitsToUint(bbox.y); + memory[base + 2] = floatBitsToUint(bbox.z); + memory[base + 3] = floatBitsToUint(bbox.w); +} + +void main() { + // materialize stack up to the start of this partition. This + // is based on the pure stack monoid, but with two additions. + + // First, (this only matters if the stack goes deeper than the + // partition size, which might be unlikely in practice), the + // topmost stack element from each partition is picked, then an + // exclusive scan of those. Also note that if this is skipped, + // a scan is not needed in the reduce stage. + + // Second, after the stream compaction, do a scan of the retrieved + // bbox values. + uint th = gl_LocalInvocationID.x; + Bic bic = Bic(0, 0); + if (th < gl_WorkGroupID.x) { + bic = load_bic(th); + } + sh_bic[th] = bic; + for (uint i = 0; i < LG_WG_SIZE; i++) { + barrier(); + if (th + (1u << i) < WG_SIZE) { + Bic other = sh_bic[th + (1u << i)]; + bic = bic_combine(bic, other); + } + barrier(); + sh_bic[th] = bic; + } + barrier(); + uint stack_size = sh_bic[0].b; + + // TODO: do bbox scan here (to unlock greater stack depth) + + // binary search in stack + uint sp = PARTITION_SIZE - 1 - th; + uint ix = 0; + for (uint i = 0; i < LG_WG_SIZE; i++) { + uint probe = ix + (uint(PARTITION_SIZE / 2) >> i); + if (sp < sh_bic[probe].b) { + ix = probe; + } + } + // ix is largest value such that sp < sh_bic[ix].b (if any) + uint b = sh_bic[ix].b; + vec4 bbox = vec4(-1e9, -1e9, 1e9, 1e9); + if (sp < b) { + // maybe store the index here for future use? + ClipEl el = load_clip_el(ix * PARTITION_SIZE + b - sp - 1); + sh_stack[th] = el.parent_ix; + bbox = el.bbox; + // other element values here? + } + + // forward scan of bbox values of prefix stack + for (uint i = 0; i < LG_WG_SIZE; i++) { + sh_stack_bbox[th] = bbox; + barrier(); + if (th >= (1u << i)) { + bbox = bbox_intersect(sh_stack_bbox[th - (1u << i)], bbox); + } + barrier(); + } + sh_stack_bbox[th] = bbox; + + // Read input and compute bicyclic semigroup binary tree + uint inp = load_path_ix(gl_GlobalInvocationID.x); + bool is_push = int(inp) >= 0; + bic = Bic(1 - uint(is_push), uint(is_push)); + sh_bic[th] = bic; + if (is_push) { + bbox = load_path_bbox(inp); + } else { + bbox = vec4(-1e9, -1e9, 1e9, 1e9); + } + uint inbase = 0; + for (uint i = 0; i < LG_WG_SIZE - 1; i++) { + uint outbase = 2 * WG_SIZE - (1u << (LG_WG_SIZE - i)); + barrier(); + if (th < (1u << (LG_WG_SIZE - 1 - i))) { + sh_bic[outbase + th] = bic_combine(sh_bic[inbase + th * 2], sh_bic[inbase + th * 2 + 1]); + } + inbase = outbase; + } + barrier(); + // Search for predecessor node + bic = Bic(0, 0); + uint link = search_link(bic); + // we use N_SEQ > 1 convention here: + // link >= 0 is index within partition + // link < 0 is reference to stack + + // We want grandparent bbox for pop nodes, so follow those links. + sh_link[th] = link; + barrier(); + uint grandparent; + if (int(link) >= 0) { + grandparent = sh_link[link]; + } else { + grandparent = link - 1; + } + + // Resolve parent + uint parent; + if (int(link) >= 0) { + parent = gl_WorkGroupID.x * PARTITION_SIZE + link; + } else if (int(link + stack_size) >= 0) { + parent = sh_stack[PARTITION_SIZE + link]; + } else { + parent = ~0u; + } + + // bbox scan along parent links + for (uint i = 0; i < LG_WG_SIZE; i++) { + // sh_link was already stored for first iteration + if (i != 0) { + sh_link[th] = link; + } + sh_bbox[th] = bbox; + barrier(); + if (int(link) >= 0) { + bbox = bbox_intersect(sh_bbox[link], bbox); + link = sh_link[link]; + } + barrier(); + } + if (int(link + stack_size) >= 0) { + bbox = bbox_intersect(sh_stack_bbox[PARTITION_SIZE + link], bbox); + } + // At this point, bbox is the reduction of bounding boxes along the tree. + sh_bbox[th] = bbox; + barrier(); + + uint path_ix = inp; + if (!is_push && gl_GlobalInvocationID.x < conf.n_clip) { + // Is this load expensive? If so, it's loaded earlier for in-partition + // and is in the ClipEl for cross-partition. + // If not, can probably get rid of it in the stack intermediate buf. + path_ix = load_path_ix(parent); + uint drawmonoid_out_base = (conf.drawmonoid_alloc.offset >> 2) + 2 * ~inp; + // Fix up drawmonoid so path_ix at EndClip matches BeginClip + memory[drawmonoid_out_base] = path_ix; + + if (int(grandparent) >= 0) { + bbox = sh_bbox[grandparent]; + } else if (int(grandparent + stack_size) >= 0) { + bbox = sh_stack_bbox[PARTITION_SIZE + grandparent]; + } else { + bbox = vec4(-1e9, -1e9, 1e9, 1e9); + } + } + store_clip_bbox(gl_GlobalInvocationID.x, bbox); +} diff --git a/piet-gpu/shader/clip_reduce.comp b/piet-gpu/shader/clip_reduce.comp new file mode 100644 index 0000000..c62b239 --- /dev/null +++ b/piet-gpu/shader/clip_reduce.comp @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense + +// The reduce pass for clip stack processing. + +// The primary input is a sequence of path ids representing paths to +// push, with a special value of ~0 to represent pop. + +// For each path, the bounding box is found in the anno stream +// (anno_alloc), though this may change. + +// Output is a stack monoid reduction for the partition. The Bic +// is stored in the BicBuf, and the stack slice in StackBuf. + +// Note: for this shader, only pushes are represented in the stack +// monoid reduction output, so we don't have to worry about the +// interpretation of pops. + +#version 450 +#extension GL_GOOGLE_include_directive : enable + +#include "mem.h" +#include "setup.h" + +#define LG_WG_SIZE (7 + LG_WG_FACTOR) +#define WG_SIZE (1 << LG_WG_SIZE) +#define PARTITION_SIZE WG_SIZE + +layout(local_size_x = WG_SIZE) in; + +layout(binding = 1) readonly buffer ConfigBuf { + Config conf; +}; + +#include "annotated.h" + +// The intermediate state for clip processing. +struct ClipEl { + // index of parent node + uint parent_ix; + // bounding box + vec4 bbox; +}; + +// The bicyclic monoid +struct Bic { + uint a; + uint b; +}; + +Bic bic_combine(Bic x, Bic y) { + uint m = min(x.b, y.a); + return Bic(x.a + y.a - m, x.b + y.b - m); +} + +shared Bic sh_bic[WG_SIZE]; +shared uint sh_parent[WG_SIZE]; +shared uint sh_path_ix[WG_SIZE]; +shared vec4 sh_bbox[WG_SIZE]; + +// Load path's bbox from bbox (as written by pathseg). +vec4 load_path_bbox(uint path_ix) { + uint base = (conf.bbox_alloc.offset >> 2) + 6 * path_ix; + float bbox_l = float(memory[base]) - 32768.0; + float bbox_t = float(memory[base + 1]) - 32768.0; + float bbox_r = float(memory[base + 2]) - 32768.0; + float bbox_b = float(memory[base + 3]) - 32768.0; + vec4 bbox = vec4(bbox_l, bbox_t, bbox_r, bbox_b); + return bbox; +} + +vec4 bbox_intersect(vec4 a, vec4 b) { + return vec4(max(a.xy, b.xy), min(a.zw, b.zw)); +} + +void store_bic(uint ix, Bic bic) { + uint base = (conf.clip_bic_alloc.offset >> 2) + 2 * ix; + memory[base] = bic.a; + memory[base + 1] = bic.b; +} + +void store_clip_el(uint ix, ClipEl el) { + uint base = (conf.clip_stack_alloc.offset >> 2) + 5 * ix; + memory[base] = el.parent_ix; + memory[base + 1] = floatBitsToUint(el.bbox.x); + memory[base + 2] = floatBitsToUint(el.bbox.y); + memory[base + 3] = floatBitsToUint(el.bbox.z); + memory[base + 4] = floatBitsToUint(el.bbox.w); +} + +void main() { + uint th = gl_LocalInvocationID.x; + uint inp = memory[(conf.clip_alloc.offset >> 2) + gl_GlobalInvocationID.x]; + bool is_push = int(inp) >= 0; + // reverse scan of bicyclic semigroup + Bic bic = Bic(1 - uint(is_push), uint(is_push)); + sh_bic[gl_LocalInvocationID.x] = bic; + for (uint i = 0; i < LG_WG_SIZE; i++) { + barrier(); + if (th + (1u << i) < WG_SIZE) { + Bic other = sh_bic[gl_LocalInvocationID.x + (1u << i)]; + bic = bic_combine(bic, other); + } + barrier(); + sh_bic[th] = bic; + } + if (th == 0) { + store_bic(gl_WorkGroupID.x, bic); + } + barrier(); + uint size = sh_bic[0].b; + bic = Bic(0, 0); + if (th + 1 < WG_SIZE) { + bic = sh_bic[th + 1]; + } + if (is_push && bic.a == 0) { + uint local_ix = size - bic.b - 1; + sh_parent[local_ix] = th; + sh_path_ix[local_ix] = inp; + } + barrier(); + // Do forward scan of bounding box intersection + vec4 bbox; + uint path_ix; + if (th < size) { + path_ix = sh_path_ix[th]; + bbox = load_path_bbox(path_ix); + } + // Not necessary if depth is bounded by wg size +#if 0 + for (uint i = 0; i < LG_WG_SIZE; i++) { + // We gate so we never access uninit data, but it might + // be more efficient to avoid the conditionals. + if (th < size) { + sh_bbox[th] = bbox; + } + barrier(); + if (th < size && th >= (1u << i)) { + bbox = bbox_intersect(sh_bbox[th - (1u << i)], bbox); + } + barrier(); + } +#endif + if (th < size) { + uint parent_ix = sh_parent[th] + gl_WorkGroupID.x * PARTITION_SIZE; + ClipEl el = ClipEl(parent_ix, bbox); + store_clip_el(gl_GlobalInvocationID.x, el); + } +} diff --git a/piet-gpu/shader/coarse.comp b/piet-gpu/shader/coarse.comp index bf5f949..98ab270 100644 --- a/piet-gpu/shader/coarse.comp +++ b/piet-gpu/shader/coarse.comp @@ -136,9 +136,6 @@ void main() { // currently in a clip for which the entire tile has an alpha of zero, and // the value is the depth after the "begin clip" of that element. uint clip_zero_depth = 0; - // State for the "clip one" optimization. If bit `i` is set, then that means - // that the clip pushed at depth `i` has an alpha of all one. - uint clip_one_mask = 0; // I'm sure we can figure out how to do this with at least one fewer register... // Items up to rd_ix have been read from sh_elements @@ -227,9 +224,8 @@ void main() { case Annotated_LinGradient: case Annotated_BeginClip: case Annotated_EndClip: - // We have one "path" for each element, even if the element isn't - // actually a path (currently EndClip, but images etc in the future). - uint path_ix = element_ix; + uint drawmonoid_base = (conf.drawmonoid_alloc.offset >> 2) + 2 * element_ix; + uint path_ix = memory[drawmonoid_base]; Path path = Path_read(conf.tile_alloc, PathRef(conf.tile_alloc.offset + path_ix * Path_size)); uint stride = path.bbox.z - path.bbox.x; sh_tile_stride[th_ix] = stride; @@ -283,15 +279,15 @@ void main() { uint x = sh_tile_x0[el_ix] + seq_ix % width; uint y = sh_tile_y0[el_ix] + seq_ix / width; bool include_tile = false; - if (tag == Annotated_BeginClip || tag == Annotated_EndClip) { - include_tile = true; - } else if (mem_ok) { + if (mem_ok) { Tile tile = Tile_read(read_tile_alloc(el_ix, mem_ok), TileRef(sh_tile_base[el_ix] + (sh_tile_stride[el_ix] * y + x) * Tile_size)); - // Include the path in the tile if - // - the tile contains at least a segment (tile offset non-zero) - // - the tile is completely covered (backdrop non-zero) - include_tile = tile.tile.offset != 0 || tile.backdrop != 0; + bool is_clip = tag == Annotated_BeginClip || tag == Annotated_EndClip; + // Always include the tile if it contains a path segment. + // For draws, include the tile if it is solid. + // For clips, include the tile if it is empty - this way, logic + // below will suppress the drawing of inner elements. + include_tile = tile.tile.offset != 0 || (tile.backdrop == 0) == is_clip; } if (include_tile) { uint el_slice = el_ix / 32; @@ -378,33 +374,26 @@ void main() { (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size)); if (tile.tile.offset == 0 && tile.backdrop == 0) { clip_zero_depth = clip_depth + 1; - } else if (tile.tile.offset == 0 && clip_depth < 32) { - clip_one_mask |= (1u << clip_depth); } else { - AnnoBeginClip begin_clip = Annotated_BeginClip_read(conf.anno_alloc, ref); if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) { break; } - write_fill(cmd_alloc, cmd_ref, tag.flags, tile, begin_clip.linewidth); Cmd_BeginClip_write(cmd_alloc, cmd_ref); cmd_ref.offset += 4; - if (clip_depth < 32) { - clip_one_mask &= ~(1u << clip_depth); - } } clip_depth++; break; case Annotated_EndClip: + tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok), + TileRef(sh_tile_base[element_ref_ix] + + (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size)); clip_depth--; - if (clip_depth >= 32 || (clip_one_mask & (1u << clip_depth)) == 0) { - if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) { - break; - } - Cmd_Solid_write(cmd_alloc, cmd_ref); - cmd_ref.offset += 4; - Cmd_EndClip_write(cmd_alloc, cmd_ref); - cmd_ref.offset += 4; + if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) { + break; } + write_fill(cmd_alloc, cmd_ref, MODE_NONZERO, tile, 0.0); + Cmd_EndClip_write(cmd_alloc, cmd_ref); + cmd_ref.offset += 4; break; } } else { diff --git a/piet-gpu/shader/draw_leaf.comp b/piet-gpu/shader/draw_leaf.comp index c020847..f236b7f 100644 --- a/piet-gpu/shader/draw_leaf.comp +++ b/piet-gpu/shader/draw_leaf.comp @@ -72,9 +72,14 @@ void main() { } uint out_ix = gl_GlobalInvocationID.x * N_ROWS; uint out_base = (conf.drawmonoid_alloc.offset >> 2) + out_ix * 2; + uint clip_out_base = conf.clip_alloc.offset >> 2; AnnotatedRef out_ref = AnnotatedRef(conf.anno_alloc.offset + out_ix * Annotated_size); for (uint i = 0; i < N_ROWS; i++) { - Monoid m = combine_tag_monoid(row, local[i]); + Monoid m = row; + if (i > 0) { + m = combine_tag_monoid(m, local[i - 1]); + } + // m now holds exclusive scan of draw monoid memory[out_base + i * 2] = m.path_ix; memory[out_base + i * 2 + 1] = m.clip_ix; @@ -83,8 +88,9 @@ void main() { // later stages read scene + bbox etc. ElementRef this_ref = Element_index(ref, i); tag_word = Element_tag(this_ref).tag; - if (tag_word == Element_FillColor || tag_word == Element_FillLinGradient || tag_word == Element_FillImage) { - uint bbox_offset = (conf.bbox_alloc.offset >> 2) + 6 * (m.path_ix - 1); + if (tag_word == Element_FillColor || tag_word == Element_FillLinGradient || tag_word == Element_FillImage || + tag_word == Element_BeginClip) { + uint bbox_offset = (conf.bbox_alloc.offset >> 2) + 6 * m.path_ix; float bbox_l = float(memory[bbox_offset]) - 32768.0; float bbox_t = float(memory[bbox_offset + 1]) - 32768.0; float bbox_r = float(memory[bbox_offset + 2]) - 32768.0; @@ -142,21 +148,27 @@ void main() { anno_img.offset = fill_img.offset; Annotated_Image_write(conf.anno_alloc, out_ref, fill_mode, anno_img); break; + case Element_BeginClip: + AnnoBeginClip anno_begin_clip; + anno_begin_clip.bbox = bbox; + anno_begin_clip.linewidth = 0.0; // don't support clip-with-stroke + Annotated_BeginClip_write(conf.anno_alloc, out_ref, 0, anno_begin_clip); + break; } - } else if (tag_word == Element_BeginClip) { - Clip begin_clip = Element_BeginClip_read(this_ref); - AnnoBeginClip anno_begin_clip; - // This is the absolute bbox, it's been transformed during encoding. - anno_begin_clip.bbox = begin_clip.bbox; - anno_begin_clip.linewidth = 0.0; // don't support clip-with-stroke - Annotated_BeginClip_write(conf.anno_alloc, out_ref, 0, anno_begin_clip); } else if (tag_word == Element_EndClip) { - Clip end_clip = Element_EndClip_read(this_ref); AnnoEndClip anno_end_clip; - // This bbox is expected to be the same as the begin one. - anno_end_clip.bbox = end_clip.bbox; + // The actual bbox will be reconstructed from clip stream output. + anno_end_clip.bbox = vec4(-1e9, -1e9, 1e9, 1e9); Annotated_EndClip_write(conf.anno_alloc, out_ref, anno_end_clip); } + // Generate clip stream. + if (tag_word == Element_BeginClip || tag_word == Element_EndClip) { + uint path_ix = ~(out_ix + i); + if (tag_word == Element_BeginClip) { + path_ix = m.path_ix; + } + memory[clip_out_base + m.clip_ix] = path_ix; + } out_ref.offset += Annotated_size; } } diff --git a/piet-gpu/shader/gen/backdrop.hlsl b/piet-gpu/shader/gen/backdrop.hlsl index 65b969d..a2e71a8 100644 --- a/piet-gpu/shader/gen/backdrop.hlsl +++ b/piet-gpu/shader/gen/backdrop.hlsl @@ -44,8 +44,13 @@ struct Config Alloc trans_alloc; Alloc bbox_alloc; Alloc drawmonoid_alloc; + Alloc clip_alloc; + Alloc clip_bic_alloc; + Alloc clip_stack_alloc; + Alloc clip_bbox_alloc; uint n_trans; uint n_path; + uint n_clip; uint trans_offset; uint linewidth_offset; uint pathtag_offset; diff --git a/piet-gpu/shader/gen/backdrop.msl b/piet-gpu/shader/gen/backdrop.msl index 7640ed0..be670fc 100644 --- a/piet-gpu/shader/gen/backdrop.msl +++ b/piet-gpu/shader/gen/backdrop.msl @@ -63,8 +63,13 @@ struct Config Alloc_1 trans_alloc; Alloc_1 bbox_alloc; Alloc_1 drawmonoid_alloc; + Alloc_1 clip_alloc; + Alloc_1 clip_bic_alloc; + Alloc_1 clip_stack_alloc; + Alloc_1 clip_bbox_alloc; uint n_trans; uint n_path; + uint n_clip; uint trans_offset; uint linewidth_offset; uint pathtag_offset; diff --git a/piet-gpu/shader/gen/backdrop.spv b/piet-gpu/shader/gen/backdrop.spv index f3a782470d434b7dc5130e9c37d12365f09e8d2d..f90bf6e577b8d16768e7caa5e093ab6b80ff3021 100644 GIT binary patch delta 352 zcmZ3Jv?66g7JEG#!*&J+1|A?z&dDr@Pt3{5PiA0XV_*l1^TNcFGLsQ<9AG&{nUh5e6v(6?=d&L`)dS28sC~ ziHShP0+7T&mVnfSAc=`V#UeHz{nUh5e6v(6?=d&L`)dS28sC~ ziHShP0+7T&mVnfSAc=`V#UeHz_h6@Ckz1IEt&+lcvU;)=MA+ALRTuTB@OgR8lpv=a}(3G&Cpr;}6a)%>R zgMx%cta6H!X;K1%0tefX;~C0q96$vZDkd;^F6Lx3xMRSQVsKi3<6s9PNU=$xfvCUf zECGg8hae{gQJ2stM;2u^UZ9Q-Ga8Sv+}@fyJJ<;(GUwnm7MWv(cY>W^A`2Lwv&@w& hiDefRJlNsL3^dRt^MY+-IorJFuA5nyn1CJt0RRoaUCICe delta 315 zcmca2aYI7VCBn)1oyHQwLu*{OJbYdyDR|#on2~{jA!4IsJe%d$vJVezG}NbL?O)_h6|o>87}zqpWn-T!3C~W30eykxE4Ebwfb=O097zBD6?@gG$kx3>6?&x zrNfb_K|w;}o^nRCX;K1%0tXw5gvsp=Mv$n9yo$-?3?~L2a~?*?Ge#^<21yMd6}=KZ zN0WQiSR5oLL{1TKV4WByaInLX8K}c1eS;xmd!fwpDR7aD1E!ACOJl!Zfs15buxu=6 hoA=z&)x{v`0?5FAsS5uO3lDy;y{OaJ0Cpk+0|4m%V0i!l diff --git a/piet-gpu/shader/gen/bbox_clear.hlsl b/piet-gpu/shader/gen/bbox_clear.hlsl index 64b109f..903d84c 100644 --- a/piet-gpu/shader/gen/bbox_clear.hlsl +++ b/piet-gpu/shader/gen/bbox_clear.hlsl @@ -17,8 +17,13 @@ struct Config Alloc trans_alloc; Alloc bbox_alloc; Alloc drawmonoid_alloc; + Alloc clip_alloc; + Alloc clip_bic_alloc; + Alloc clip_stack_alloc; + Alloc clip_bbox_alloc; uint n_trans; uint n_path; + uint n_clip; uint trans_offset; uint linewidth_offset; uint pathtag_offset; @@ -39,7 +44,7 @@ struct SPIRV_Cross_Input void comp_main() { uint ix = gl_GlobalInvocationID.x; - if (ix < _21.Load(52)) + if (ix < _21.Load(68)) { uint out_ix = (_21.Load(40) >> uint(2)) + (6u * ix); _45.Store(out_ix * 4 + 8, 65535u); diff --git a/piet-gpu/shader/gen/bbox_clear.msl b/piet-gpu/shader/gen/bbox_clear.msl index 9af5b11..e80f15e 100644 --- a/piet-gpu/shader/gen/bbox_clear.msl +++ b/piet-gpu/shader/gen/bbox_clear.msl @@ -22,8 +22,13 @@ struct Config Alloc trans_alloc; Alloc bbox_alloc; Alloc drawmonoid_alloc; + Alloc clip_alloc; + Alloc clip_bic_alloc; + Alloc clip_stack_alloc; + Alloc clip_bbox_alloc; uint n_trans; uint n_path; + uint n_clip; uint trans_offset; uint linewidth_offset; uint pathtag_offset; diff --git a/piet-gpu/shader/gen/bbox_clear.spv b/piet-gpu/shader/gen/bbox_clear.spv index c4595020a0486470934ae2a053384d4b4702108c..e3e88d76ca71de0a37372ee9df7f1c6c39a54ecc 100644 GIT binary patch delta 356 zcmdlZxj zF*zG1i_pstlTS*@uYgLkF|dO53IOHu;!BDWCs#6YYYKvS1&JjYAl(oZAXnwZgWQBL zQF!u9CTT{I$y=G^WkrGVARQ%%>GAn#X~n4}2z6qcKQg&8#Ul(-1}gRdV~ChAkPQ;^ zK@t;ziUlBvfh+;33qcYSgNj9LHe}6ZWR#pdpUrgg0XBijSJ)&bZ(w7Y{Dv)pQE;;> HdpR=zSaCfd delta 96 zcmdlXzDIOJ785%wgAfA)1JC4#Ov0kPKvrISL1IY;0|OfaJ6MKqvMjSSBmZPeW_d<| v$&o;|;O0_hSEkK!Y#EHqA`C*4FR}|yp2My%`3$?nT0W%?oN=>RFTD{AL>#w znS>AoBp7rl+BQ{dY1g&!qbzi5Hv!sGl@?w4TouZK*L9IyZz=WO)!pvBdjR^PV&B`TgFLt`&DP<8wEe%=|}e=*>0k3%Ri<$Xw-Wccub}E`oOj28tmgpv8p2ozk!LpzAMb7Xf7qB# z23#R5jDf_Bma1w26eYMYbqZO@63HJ`a1|*ecwenPl7K#sO%?&Yk#&a$Tz3D-@ZkFvI=wk}%cjmzTpixW31CAM9=NW5q2F(3 z44;pdZir`JarW76-t3$GXzKG@*FPGVow{%H!ZlKk@})O#6(wH}=mg3X3`Z?l*0t z-Hu8zf-(CPnk?Kp_kJAFTzas2ROOqTeeKf+JcL`ntaryxeXffyFa5r6pWt3ArR#2X zhaZj4`0eiR_;i`3JT+AVBSS_pQm5ANjikMD6S=)Zobp@dM1;kiZ#t5H#wN@y&@%Yu z(w#GHqXtKnxZc}I8s3;VKr|;#iyLx2CkEevtNyxY$Cf#YH@5NQsAtVAoPr97XD;!? zTrXt^1>Xoz=+FP8dFZw=v75_6rMeiLLTY1s9&#V2tr^uvtkF67zR>E z(s*=*ZfJ+#Az+1drI5UL5A_K8Jn!wMmLn5q$Eb7vhIwpZ0C*S*8-4AH1;j2SVhm^`Fh#S|D1n&MPN>Vw6T?FBmt z)sNZxgsSxZ|1VQLIk~aWNNmVmrFaUO6B3hoRY=48iQF0)R@x9B&=6&8J$IES*IPjP zW^B`dPyVigN>BvTDRxB_Xrg!7qXi45vDPD9C&D?U^;X9hc=TSEpdC-q2FZBT`r|2vo&Qf-f?-nb^^y`>rng*m0oR!!4r5oG&HkJn6 z2+A`wZ_MzaF5(@1rvf}YVsBRjYRvYwpst_Am!Cv8_+^ij!&)E-4n({M2t?PMg0qqu zUrdJ$C0z-^V~k@nTJ95~hz}H%ak6AOlLZPK)9V|a);d=C>n}TjmQ0*-C)FAoLqwpZ0#)Glr%sp`qT}_T5RApR5(gLBUp~j7UU(p~E z_ANPumfCn3(Zwa66Mn)gp|=_Jz){CGtR}Bmveh8N+AXVHjPKqw%Cw@@6HZXs1Hr}`?kh>Dk%!DTwoBgbDRp7aS2>{hi z?6!t=C1kD?58~|=Wy0-zDgx>o*x9s~{Xpr!pjI{Geaqa{A$X5F%tX(_Q!^?A#dLJF z>1)vKJj^Q{1OzJ_-GgB-lXa$pS!%pIbwUl~h%SKYrY9q~ zi^E}brd3=!)7|vCr-9Ysrxo(kJq>1lMecHLz$Yfa0GF{>Sc;xe3<|nI&9Xn9mfoLF zP~==}y0Q#-^6oQlC^2!X{C7u}3vzJAz3k>g<(!y`rPdE~YY-c2_^YEXpQ6On(S*NB zW&rCU5Z48|%s+D5W#Ex}^>i2L=%4GXVqFVI!L; z2U1YUqdiT55azaF_C{C;)7SDL>6vsL1CHB-iQJYljy9#NHD5Fc3Dqo_-SKQ;2X}~t zUz47E{tm~Rs?BRjPHFz>-F%zAlEc>^v|T-c2$L=zb=-EXBTGP^Ai?{? z?M_(#7a}i3>gPSnA1K)FC+a2odBK?HTSv)h-RD?L=9qMk=t_DDv@DJ`wthvXYkgPI zM>!>CJ69n3PCxK$23rRLVunTj1+?0H0-By-g$z!2U5MeWMTTpwvm(PE(H!VpjbkAO z#g;_|#S^WNVB3ScAi+t}79u#mNRZa{Cy{`~R>LhcI}1ZwC|e}3MZYOByqi4;8AjZV zA%-s(8FsgQCo*in;72(2pA0cjIg}2C+B^}ZvTJh6{bEAJwlo)1^|+gyg>F2FkU}r_ zT_%4#=yK1Q-FYP1>3-4zeUWhob&hW#L=GXgnmg`26U44Ta-YxLaw&2(7M(b?a`lML z!g>>WTcC5765JtVpVNYwihoi8D#MQ2BY2@py1!!~;g)W0`%0^fiGvB375uC47`tvzp{c-W${iq$qD&RAKIUx`0Df27>zDAg+Efhdm3 z!7bt!UQspE4D@`2ZVzi||Fm^v5ME4|B8#wSo>5<}F_)%rrGCYf4trm67XOl!_@%G@ zuoR1CmsPxA+*lg1J_zdN&uNz9e)pS?Ur+V{I(kZo$iZtw?pLE#wg<;Zm2%2$N8l}z zyvLLhtTC^IwSOs#dy2c^Phz^*lH;8qCi95wbzB9G30P&A-!5Z&nC)TapU*VrBKmEE z-k(nt`vfGNk6IG)3?_PITAQ!r`mvJf<0Ts^4l!qUY+&_ciZP5X%9(N~jsT}*sd)2v zzmAs3*UBry43ryz{};U9MQ82N$J^XgzQJY+GTnOC!h z^BH1`Ym6M=F8UHg@ZLG_E)lG46~VXWz>kVxe}M?TCxZ1FV_a1lm#oEZKumcgB%CZp JGvUJX?Qa3dG2{RM delta 3370 zcmd5-eN+=y7JrkO43jXC2?U&g;)DQ=8f-uXtsl)7h!kzK>Y}nbfFdf=_))tmJsCm* zL7<{ROBLHusixMtsd6gXk4*@*6rrMZ+gMo_l(p3Dw$ycd^t8wB8%XW$KfC|-os*My z@B6*qyZ62OyK_I)lM-vrGn(2yHzc1v-&K2bOYh3y&X3k1002+S-q=`i0|CIPy)(}x zH-`bh0x|dsu9@@Z=K}aoL6`&jjV9ks6$!vi#4VK^N1k%OBViEUeG_@YWAvqeSeC)nv*+s%MFjPG3N@R01dxWpuAw|6Rb?XZvAde!M%*qgw_HV zG5<@o=O~k)0MlMeULa4(eI$K8Xf6p*@_ns~G7V-8@HDG|*!@RQlf;CxS0$+KYlqu{ zE`Bai4={le4&C6J9H2+%fbFbA5Pit-HLOELT*%;5d%`f`PDX|)UI#>M$l$c_o+vkh zB_`$^)9r5Hv(cRDPerOt80S)ggms{^8DR==h~5Fu+3rpyXW76(23)}lm~;X&6qJ_P zL3Ugfs!Ar-P=o6TViJs?kZ~RUjSf!Te+g7Z^LH2Q&PbuI)hO9X|6StdwYx zIi?~$3{j4!C@^ zYtZHT#Cg8{$o`%M6jxWJys?u0OCN^x=Cz#fxgz-L%?INIPrs{o|G-P{eBj)hdSr6o z$AiaqxhEGbdgV@5Muq`gkS%lX5i4!F%`Cn}7q&DQ{N~I2tDRkaL&q^gblU8(@1>nT z-qV>XMIF@l@D5h`M?wbjk*l0_G%AoY4Jb*$u@A#fey^J$=nSxLnV% zpe0m-*@ow`6`=yWQPj9rzJ0*?&quCFf9rd>S&;92*{GafLjPONebE1loVU3pk`u_R z5jD5`7Sx>h|JCfA=sqPn6n48}{t48*&}|G~Y>`lpbxH1+!~j8Q!l3W5`jHkU7`nmh z9AJbQ9Bk+9|CD(c`D+JY!ZG*#a7H#X4$)9ovw#a~cdJ`PK9l;3WRnT5)j*TdqOEaw z2LmO`%bOVRS(U>QQ%KXIAT=!tn;J|eR=5w7<0Mu*h#`D3p?9~FPl?u9-H*s5#CJAP z*81D3INSfMxB>=%HduL4*b1T9ff)G1x`Col|4?C0YFwNeU80|VO<&MhsQ)CIF(PRS z#*8dhFj>f!M&psj1p|%qHsgxt^$&4{#jyY9pA8j?HTuj(ld@qMnz-Kw+mx*cV+i{n z;Kv52aS5=q-T{AlFEZZicNs(+LznLG$zEW?Ht7mkzz}$iM!)^n>5IBT__zdqGv4Fp z!Zzv-QegTJqAj#I=MYsg7Z-u^(%D=ld8#+e??vPf{NS1a_qzPij~F_g5m{(1$JPa6 zQhi1Z7QoGn6t&bUeF^w-+Q|kvBD(;boQdXj=)>Qj$T+HEBB`HK0&3~f0foH)P0%|L zk4DDI(J3uPU8x>BsRIF-dlKi@nixojJ$b4fbVm|u92e5M);>OHSh~v^y(AQLy!Iz# z8E)f8qHd~aqV44anT>nEu@#T^qT*dXB=)*di z286w$DB>+DA2cY-nb+kg3Jf~yJ%6-49Hb3K3{D+=sxeRdj8;T{++{D%B1z%+htVom&}G8F!I6Z!@e!GZBYsG{F<(g(=APzsVAdCqN3 z)i7F*Z!9lr@X0HM^SL}@JiU)jogpOeNg7n?vdA&;P_RX^leLF1tL*WjH68r`Cdf2- ztHYAANZSZ#w`3Pl7XqyjO?ZtR1@W)?s)4d>hE&&7#yI#g|9GzzNo*}lXdA#C`pV^1 zCP}fRp+d?VTZ?#68+F&i?~~#hb8?FVR#mfjAy$NEH7GbaeI`nHH4WrxbM#}3HczkX zqq0kMmuy_3Ut5tjdS$gMyd61~nV_h{C&U=p+EBxPZqTxFQ+{2hP+|el`|Ikv-WuL6 zlEz;Y#iRzsD})Nxk>E`fW-eSeQ&W4Gn55zF8j@LgU{z~~z%uJG{d=I6x7kHG6G|Zi zSCt=P@IPj_UVBDhcw~42hOoZxmk?$@X6Uul!Vq>RmO}y$zB)v3{V~DB`f~!oVk68U zu+fnH?-iZ zhck7BRzdc`jSn0P?tfHb1WSgaEXeATXq?Qz>~wdUZh@}QD?s*dIwgMAmASF*VoAqE ztzoL=NR3oI^-?7MeM9|Wp$FL3f?{R!JUH#FOvl*+{mLPrsov0?r5x4Kcj>vepSW{t znox*KZfQ?Dz@!1Oi~YbEeoK{vd0hi@d(}LkqlfHcGO)iR z_*rsnwBpGiu8> uint(2); uint tag_and_flags = read_mem(param, param_1); - AnnotatedTag _221 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; - return _221; + AnnotatedTag _181 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; + return _181; } -AnnoEndClip AnnoEndClip_read(Alloc a, AnnoEndClipRef ref) +DrawMonoid load_draw_monoid(uint element_ix) +{ + uint base = (_202.Load(44) >> uint(2)) + (2u * element_ix); + uint path_ix = _94.Load(base * 4 + 8); + uint clip_ix = _94.Load((base + 1u) * 4 + 8); + DrawMonoid _222 = { path_ix, clip_ix }; + return _222; +} + +float4 load_clip_bbox(uint clip_ix) +{ + uint base = (_202.Load(60) >> uint(2)) + (4u * clip_ix); + float x0 = asfloat(_94.Load(base * 4 + 8)); + float y0 = asfloat(_94.Load((base + 1u) * 4 + 8)); + float x1 = asfloat(_94.Load((base + 2u) * 4 + 8)); + float y1 = asfloat(_94.Load((base + 3u) * 4 + 8)); + float4 bbox = float4(x0, y0, x1, y1); + return bbox; +} + +float4 load_path_bbox(uint path_ix) +{ + uint base = (_202.Load(40) >> uint(2)) + (6u * path_ix); + float bbox_l = float(_94.Load(base * 4 + 8)) - 32768.0f; + float bbox_t = float(_94.Load((base + 1u) * 4 + 8)) - 32768.0f; + float bbox_r = float(_94.Load((base + 2u) * 4 + 8)) - 32768.0f; + float bbox_b = float(_94.Load((base + 3u) * 4 + 8)) - 32768.0f; + float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); + return bbox; +} + +float4 bbox_intersect(float4 a, float4 b) +{ + return float4(max(a.xy, b.xy), min(a.zw, b.zw)); +} + +void store_path_bbox(AnnotatedRef ref, float4 bbox) { uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7); - AnnoEndClip s; - s.bbox = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3)); - return s; -} - -AnnoEndClip Annotated_EndClip_read(Alloc a, AnnotatedRef ref) -{ - AnnoEndClipRef _228 = { ref.offset + 4u }; - Alloc param = a; - AnnoEndClipRef param_1 = _228; - return AnnoEndClip_read(param, param_1); + _94.Store((ix + 1u) * 4 + 8, asuint(bbox.x)); + _94.Store((ix + 2u) * 4 + 8, asuint(bbox.y)); + _94.Store((ix + 3u) * 4 + 8, asuint(bbox.z)); + _94.Store((ix + 4u) * 4 + 8, asuint(bbox.w)); } Alloc new_alloc(uint offset, uint size, bool mem_ok) @@ -143,22 +161,22 @@ Alloc new_alloc(uint offset, uint size, bool mem_ok) MallocResult malloc(uint size) { - uint _90; - _84.InterlockedAdd(0, size, _90); - uint offset = _90; - uint _97; - _84.GetDimensions(_97); - _97 = (_97 - 8) / 4; + uint _100; + _94.InterlockedAdd(0, size, _100); + uint offset = _100; + uint _107; + _94.GetDimensions(_107); + _107 = (_107 - 8) / 4; MallocResult r; - r.failed = (offset + size) > uint(int(_97) * 4); + r.failed = (offset + size) > uint(int(_107) * 4); uint param = offset; uint param_1 = size; bool param_2 = !r.failed; r.alloc = new_alloc(param, param_1, param_2); if (r.failed) { - uint _119; - _84.InterlockedMax(4, 1u, _119); + uint _129; + _94.InterlockedMax(4, 1u, _129); return r; } return r; @@ -172,7 +190,7 @@ void write_mem(Alloc alloc, uint offset, uint val) { return; } - _84.Store(offset * 4 + 8, val); + _94.Store(offset * 4 + 8, val); } void BinInstance_write(Alloc a, BinInstanceRef ref, BinInstance s) @@ -186,7 +204,7 @@ void BinInstance_write(Alloc a, BinInstanceRef ref, BinInstance s) void comp_main() { - uint my_n_elements = _253.Load(0); + uint my_n_elements = _202.Load(0); uint my_partition = gl_WorkGroupID.x; for (uint i = 0u; i < 8u; i++) { @@ -198,15 +216,15 @@ void comp_main() } GroupMemoryBarrierWithGroupSync(); uint element_ix = (my_partition * 256u) + gl_LocalInvocationID.x; - AnnotatedRef _308 = { _253.Load(32) + (element_ix * 40u) }; - AnnotatedRef ref = _308; + AnnotatedRef _415 = { _202.Load(32) + (element_ix * 40u) }; + AnnotatedRef ref = _415; uint tag = 0u; if (element_ix < my_n_elements) { - Alloc _318; - _318.offset = _253.Load(32); + Alloc _425; + _425.offset = _202.Load(32); Alloc param; - param.offset = _318.offset; + param.offset = _425.offset; AnnotatedRef param_1 = ref; tag = Annotated_tag(param, param_1).tag; } @@ -222,21 +240,38 @@ void comp_main() case 4u: case 5u: { - Alloc _336; - _336.offset = _253.Load(32); - Alloc param_2; - param_2.offset = _336.offset; - AnnotatedRef param_3 = ref; - AnnoEndClip clip = Annotated_EndClip_read(param_2, param_3); - x0 = int(floor(clip.bbox.x * 0.00390625f)); - y0 = int(floor(clip.bbox.y * 0.00390625f)); - x1 = int(ceil(clip.bbox.z * 0.00390625f)); - y1 = int(ceil(clip.bbox.w * 0.00390625f)); + uint param_2 = element_ix; + DrawMonoid draw_monoid = load_draw_monoid(param_2); + uint path_ix = draw_monoid.path_ix; + float4 clip_bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f); + uint clip_ix = draw_monoid.clip_ix; + if (clip_ix > 0u) + { + uint param_3 = clip_ix - 1u; + clip_bbox = load_clip_bbox(param_3); + } + uint param_4 = path_ix; + float4 path_bbox = load_path_bbox(param_4); + float4 param_5 = path_bbox; + float4 param_6 = clip_bbox; + float4 bbox = bbox_intersect(param_5, param_6); + float4 _473 = bbox; + float4 _475 = bbox; + float2 _477 = max(_473.xy, _475.zw); + bbox.z = _477.x; + bbox.w = _477.y; + AnnotatedRef param_7 = ref; + float4 param_8 = bbox; + store_path_bbox(param_7, param_8); + x0 = int(floor(bbox.x * 0.00390625f)); + y0 = int(floor(bbox.y * 0.00390625f)); + x1 = int(ceil(bbox.z * 0.00390625f)); + y1 = int(ceil(bbox.w * 0.00390625f)); break; } } - uint width_in_bins = ((_253.Load(8) + 16u) - 1u) / 16u; - uint height_in_bins = ((_253.Load(12) + 16u) - 1u) / 16u; + uint width_in_bins = ((_202.Load(8) + 16u) - 1u) / 16u; + uint height_in_bins = ((_202.Load(12) + 16u) - 1u) / 16u; x0 = clamp(x0, 0, int(width_in_bins)); x1 = clamp(x1, x0, int(width_in_bins)); y0 = clamp(y0, 0, int(height_in_bins)); @@ -251,8 +286,8 @@ void comp_main() uint my_mask = 1u << (gl_LocalInvocationID.x & 31u); while (y < y1) { - uint _437; - InterlockedOr(bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, _437); + uint _581; + InterlockedOr(bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, _581); x++; if (x == x1) { @@ -267,15 +302,15 @@ void comp_main() element_count += uint(int(countbits(bitmaps[i_1][gl_LocalInvocationID.x]))); count[i_1][gl_LocalInvocationID.x] = element_count; } - uint param_4 = 0u; - uint param_5 = 0u; - bool param_6 = true; - Alloc chunk_alloc = new_alloc(param_4, param_5, param_6); + uint param_9 = 0u; + uint param_10 = 0u; + bool param_11 = true; + Alloc chunk_alloc = new_alloc(param_9, param_10, param_11); if (element_count != 0u) { - uint param_7 = element_count * 4u; - MallocResult _487 = malloc(param_7); - MallocResult chunk = _487; + uint param_12 = element_count * 4u; + MallocResult _631 = malloc(param_12); + MallocResult chunk = _631; chunk_alloc = chunk.alloc; sh_chunk_alloc[gl_LocalInvocationID.x] = chunk_alloc; if (chunk.failed) @@ -283,32 +318,32 @@ void comp_main() sh_alloc_failed = true; } } - uint out_ix = (_253.Load(20) >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u); - Alloc _516; - _516.offset = _253.Load(20); - Alloc param_8; - param_8.offset = _516.offset; - uint param_9 = out_ix; - uint param_10 = element_count; - write_mem(param_8, param_9, param_10); - Alloc _528; - _528.offset = _253.Load(20); - Alloc param_11; - param_11.offset = _528.offset; - uint param_12 = out_ix + 1u; - uint param_13 = chunk_alloc.offset; - write_mem(param_11, param_12, param_13); + uint out_ix = (_202.Load(20) >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u); + Alloc _660; + _660.offset = _202.Load(20); + Alloc param_13; + param_13.offset = _660.offset; + uint param_14 = out_ix; + uint param_15 = element_count; + write_mem(param_13, param_14, param_15); + Alloc _672; + _672.offset = _202.Load(20); + Alloc param_16; + param_16.offset = _672.offset; + uint param_17 = out_ix + 1u; + uint param_18 = chunk_alloc.offset; + write_mem(param_16, param_17, param_18); GroupMemoryBarrierWithGroupSync(); - bool _543; + bool _687; if (!sh_alloc_failed) { - _543 = _84.Load(4) != 0u; + _687 = _94.Load(4) != 0u; } else { - _543 = sh_alloc_failed; + _687 = sh_alloc_failed; } - if (_543) + if (_687) { return; } @@ -327,12 +362,12 @@ void comp_main() } Alloc out_alloc = sh_chunk_alloc[bin_ix]; uint out_offset = out_alloc.offset + (idx * 4u); - BinInstanceRef _605 = { out_offset }; - BinInstance _607 = { element_ix }; - Alloc param_14 = out_alloc; - BinInstanceRef param_15 = _605; - BinInstance param_16 = _607; - BinInstance_write(param_14, param_15, param_16); + BinInstanceRef _749 = { out_offset }; + BinInstance _751 = { element_ix }; + Alloc param_19 = out_alloc; + BinInstanceRef param_20 = _749; + BinInstance param_21 = _751; + BinInstance_write(param_19, param_20, param_21); } x++; if (x == x1) diff --git a/piet-gpu/shader/gen/binning.msl b/piet-gpu/shader/gen/binning.msl index 42a11ee..0e3b6c8 100644 --- a/piet-gpu/shader/gen/binning.msl +++ b/piet-gpu/shader/gen/binning.msl @@ -18,16 +18,6 @@ struct MallocResult bool failed; }; -struct AnnoEndClipRef -{ - uint offset; -}; - -struct AnnoEndClip -{ - float4 bbox; -}; - struct AnnotatedRef { uint offset; @@ -49,6 +39,12 @@ struct BinInstance uint element_ix; }; +struct DrawMonoid +{ + uint path_ix; + uint clip_ix; +}; + struct Memory { uint mem_offset; @@ -75,8 +71,13 @@ struct Config Alloc_1 trans_alloc; Alloc_1 bbox_alloc; Alloc_1 drawmonoid_alloc; + Alloc_1 clip_alloc; + Alloc_1 clip_bic_alloc; + Alloc_1 clip_stack_alloc; + Alloc_1 clip_bbox_alloc; uint n_trans; uint n_path; + uint n_clip; uint trans_offset; uint linewidth_offset; uint pathtag_offset; @@ -97,7 +98,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset) } static inline __attribute__((always_inline)) -uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_84, constant uint& v_84BufferSize) +uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_94, constant uint& v_94BufferSize) { Alloc param = alloc; uint param_1 = offset; @@ -105,46 +106,66 @@ uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memor { return 0u; } - uint v = v_84.memory[offset]; + uint v = v_94.memory[offset]; return v; } static inline __attribute__((always_inline)) -AnnotatedTag Annotated_tag(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_84, constant uint& v_84BufferSize) +AnnotatedTag Annotated_tag(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_94, constant uint& v_94BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); - uint tag_and_flags = read_mem(param, param_1, v_84, v_84BufferSize); + uint tag_and_flags = read_mem(param, param_1, v_94, v_94BufferSize); return AnnotatedTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) }; } static inline __attribute__((always_inline)) -AnnoEndClip AnnoEndClip_read(thread const Alloc& a, thread const AnnoEndClipRef& ref, device Memory& v_84, constant uint& v_84BufferSize) +DrawMonoid load_draw_monoid(thread const uint& element_ix, device Memory& v_94, constant uint& v_94BufferSize, const device ConfigBuf& v_202) { - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_84, v_84BufferSize); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_84, v_84BufferSize); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_84, v_84BufferSize); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_84, v_84BufferSize); - AnnoEndClip s; - s.bbox = float4(as_type(raw0), as_type(raw1), as_type(raw2), as_type(raw3)); - return s; + uint base = (v_202.conf.drawmonoid_alloc.offset >> uint(2)) + (2u * element_ix); + uint path_ix = v_94.memory[base]; + uint clip_ix = v_94.memory[base + 1u]; + return DrawMonoid{ path_ix, clip_ix }; } static inline __attribute__((always_inline)) -AnnoEndClip Annotated_EndClip_read(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_84, constant uint& v_84BufferSize) +float4 load_clip_bbox(thread const uint& clip_ix, device Memory& v_94, constant uint& v_94BufferSize, const device ConfigBuf& v_202) { - Alloc param = a; - AnnoEndClipRef param_1 = AnnoEndClipRef{ ref.offset + 4u }; - return AnnoEndClip_read(param, param_1, v_84, v_84BufferSize); + uint base = (v_202.conf.clip_bbox_alloc.offset >> uint(2)) + (4u * clip_ix); + float x0 = as_type(v_94.memory[base]); + float y0 = as_type(v_94.memory[base + 1u]); + float x1 = as_type(v_94.memory[base + 2u]); + float y1 = as_type(v_94.memory[base + 3u]); + float4 bbox = float4(x0, y0, x1, y1); + return bbox; +} + +static inline __attribute__((always_inline)) +float4 load_path_bbox(thread const uint& path_ix, device Memory& v_94, constant uint& v_94BufferSize, const device ConfigBuf& v_202) +{ + uint base = (v_202.conf.bbox_alloc.offset >> uint(2)) + (6u * path_ix); + float bbox_l = float(v_94.memory[base]) - 32768.0; + float bbox_t = float(v_94.memory[base + 1u]) - 32768.0; + float bbox_r = float(v_94.memory[base + 2u]) - 32768.0; + float bbox_b = float(v_94.memory[base + 3u]) - 32768.0; + float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); + return bbox; +} + +static inline __attribute__((always_inline)) +float4 bbox_intersect(thread const float4& a, thread const float4& b) +{ + return float4(fast::max(a.xy, b.xy), fast::min(a.zw, b.zw)); +} + +static inline __attribute__((always_inline)) +void store_path_bbox(thread const AnnotatedRef& ref, thread const float4& bbox, device Memory& v_94, constant uint& v_94BufferSize) +{ + uint ix = ref.offset >> uint(2); + v_94.memory[ix + 1u] = as_type(bbox.x); + v_94.memory[ix + 2u] = as_type(bbox.y); + v_94.memory[ix + 3u] = as_type(bbox.z); + v_94.memory[ix + 4u] = as_type(bbox.w); } static inline __attribute__((always_inline)) @@ -156,26 +177,26 @@ Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const } static inline __attribute__((always_inline)) -MallocResult malloc(thread const uint& size, device Memory& v_84, constant uint& v_84BufferSize) +MallocResult malloc(thread const uint& size, device Memory& v_94, constant uint& v_94BufferSize) { - uint _90 = atomic_fetch_add_explicit((device atomic_uint*)&v_84.mem_offset, size, memory_order_relaxed); - uint offset = _90; + uint _100 = atomic_fetch_add_explicit((device atomic_uint*)&v_94.mem_offset, size, memory_order_relaxed); + uint offset = _100; MallocResult r; - r.failed = (offset + size) > uint(int((v_84BufferSize - 8) / 4) * 4); + r.failed = (offset + size) > uint(int((v_94BufferSize - 8) / 4) * 4); uint param = offset; uint param_1 = size; bool param_2 = !r.failed; r.alloc = new_alloc(param, param_1, param_2); if (r.failed) { - uint _119 = atomic_fetch_max_explicit((device atomic_uint*)&v_84.mem_error, 1u, memory_order_relaxed); + uint _129 = atomic_fetch_max_explicit((device atomic_uint*)&v_94.mem_error, 1u, memory_order_relaxed); return r; } return r; } static inline __attribute__((always_inline)) -void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_84, constant uint& v_84BufferSize) +void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_94, constant uint& v_94BufferSize) { Alloc param = alloc; uint param_1 = offset; @@ -183,27 +204,27 @@ void write_mem(thread const Alloc& alloc, thread const uint& offset, thread cons { return; } - v_84.memory[offset] = val; + v_94.memory[offset] = val; } static inline __attribute__((always_inline)) -void BinInstance_write(thread const Alloc& a, thread const BinInstanceRef& ref, thread const BinInstance& s, device Memory& v_84, constant uint& v_84BufferSize) +void BinInstance_write(thread const Alloc& a, thread const BinInstanceRef& ref, thread const BinInstance& s, device Memory& v_94, constant uint& v_94BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = s.element_ix; - write_mem(param, param_1, param_2, v_84, v_84BufferSize); + write_mem(param, param_1, param_2, v_94, v_94BufferSize); } -kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_84 [[buffer(0)]], const device ConfigBuf& _253 [[buffer(1)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) +kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_94 [[buffer(0)]], const device ConfigBuf& v_202 [[buffer(1)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) { threadgroup uint bitmaps[8][256]; threadgroup short sh_alloc_failed; threadgroup uint count[8][256]; threadgroup Alloc sh_chunk_alloc[256]; - constant uint& v_84BufferSize = spvBufferSizeConstants[0]; - uint my_n_elements = _253.conf.n_elements; + constant uint& v_94BufferSize = spvBufferSizeConstants[0]; + uint my_n_elements = v_202.conf.n_elements; uint my_partition = gl_WorkGroupID.x; for (uint i = 0u; i < 8u; i++) { @@ -215,14 +236,14 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M } threadgroup_barrier(mem_flags::mem_threadgroup); uint element_ix = (my_partition * 256u) + gl_LocalInvocationID.x; - AnnotatedRef ref = AnnotatedRef{ _253.conf.anno_alloc.offset + (element_ix * 40u) }; + AnnotatedRef ref = AnnotatedRef{ v_202.conf.anno_alloc.offset + (element_ix * 40u) }; uint tag = 0u; if (element_ix < my_n_elements) { Alloc param; - param.offset = _253.conf.anno_alloc.offset; + param.offset = v_202.conf.anno_alloc.offset; AnnotatedRef param_1 = ref; - tag = Annotated_tag(param, param_1, v_84, v_84BufferSize).tag; + tag = Annotated_tag(param, param_1, v_94, v_94BufferSize).tag; } int x0 = 0; int y0 = 0; @@ -236,19 +257,38 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M case 4u: case 5u: { - Alloc param_2; - param_2.offset = _253.conf.anno_alloc.offset; - AnnotatedRef param_3 = ref; - AnnoEndClip clip = Annotated_EndClip_read(param_2, param_3, v_84, v_84BufferSize); - x0 = int(floor(clip.bbox.x * 0.00390625)); - y0 = int(floor(clip.bbox.y * 0.00390625)); - x1 = int(ceil(clip.bbox.z * 0.00390625)); - y1 = int(ceil(clip.bbox.w * 0.00390625)); + uint param_2 = element_ix; + DrawMonoid draw_monoid = load_draw_monoid(param_2, v_94, v_94BufferSize, v_202); + uint path_ix = draw_monoid.path_ix; + float4 clip_bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0); + uint clip_ix = draw_monoid.clip_ix; + if (clip_ix > 0u) + { + uint param_3 = clip_ix - 1u; + clip_bbox = load_clip_bbox(param_3, v_94, v_94BufferSize, v_202); + } + uint param_4 = path_ix; + float4 path_bbox = load_path_bbox(param_4, v_94, v_94BufferSize, v_202); + float4 param_5 = path_bbox; + float4 param_6 = clip_bbox; + float4 bbox = bbox_intersect(param_5, param_6); + float4 _473 = bbox; + float4 _475 = bbox; + float2 _477 = fast::max(_473.xy, _475.zw); + bbox.z = _477.x; + bbox.w = _477.y; + AnnotatedRef param_7 = ref; + float4 param_8 = bbox; + store_path_bbox(param_7, param_8, v_94, v_94BufferSize); + x0 = int(floor(bbox.x * 0.00390625)); + y0 = int(floor(bbox.y * 0.00390625)); + x1 = int(ceil(bbox.z * 0.00390625)); + y1 = int(ceil(bbox.w * 0.00390625)); break; } } - uint width_in_bins = ((_253.conf.width_in_tiles + 16u) - 1u) / 16u; - uint height_in_bins = ((_253.conf.height_in_tiles + 16u) - 1u) / 16u; + uint width_in_bins = ((v_202.conf.width_in_tiles + 16u) - 1u) / 16u; + uint height_in_bins = ((v_202.conf.height_in_tiles + 16u) - 1u) / 16u; x0 = clamp(x0, 0, int(width_in_bins)); x1 = clamp(x1, x0, int(width_in_bins)); y0 = clamp(y0, 0, int(height_in_bins)); @@ -263,7 +303,7 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M uint my_mask = 1u << (gl_LocalInvocationID.x & 31u); while (y < y1) { - uint _437 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, memory_order_relaxed); + uint _581 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, memory_order_relaxed); x++; if (x == x1) { @@ -278,15 +318,15 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M element_count += uint(int(popcount(bitmaps[i_1][gl_LocalInvocationID.x]))); count[i_1][gl_LocalInvocationID.x] = element_count; } - uint param_4 = 0u; - uint param_5 = 0u; - bool param_6 = true; - Alloc chunk_alloc = new_alloc(param_4, param_5, param_6); + uint param_9 = 0u; + uint param_10 = 0u; + bool param_11 = true; + Alloc chunk_alloc = new_alloc(param_9, param_10, param_11); if (element_count != 0u) { - uint param_7 = element_count * 4u; - MallocResult _487 = malloc(param_7, v_84, v_84BufferSize); - MallocResult chunk = _487; + uint param_12 = element_count * 4u; + MallocResult _631 = malloc(param_12, v_94, v_94BufferSize); + MallocResult chunk = _631; chunk_alloc = chunk.alloc; sh_chunk_alloc[gl_LocalInvocationID.x] = chunk_alloc; if (chunk.failed) @@ -294,28 +334,28 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M sh_alloc_failed = short(true); } } - uint out_ix = (_253.conf.bin_alloc.offset >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u); - Alloc param_8; - param_8.offset = _253.conf.bin_alloc.offset; - uint param_9 = out_ix; - uint param_10 = element_count; - write_mem(param_8, param_9, param_10, v_84, v_84BufferSize); - Alloc param_11; - param_11.offset = _253.conf.bin_alloc.offset; - uint param_12 = out_ix + 1u; - uint param_13 = chunk_alloc.offset; - write_mem(param_11, param_12, param_13, v_84, v_84BufferSize); + uint out_ix = (v_202.conf.bin_alloc.offset >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u); + Alloc param_13; + param_13.offset = v_202.conf.bin_alloc.offset; + uint param_14 = out_ix; + uint param_15 = element_count; + write_mem(param_13, param_14, param_15, v_94, v_94BufferSize); + Alloc param_16; + param_16.offset = v_202.conf.bin_alloc.offset; + uint param_17 = out_ix + 1u; + uint param_18 = chunk_alloc.offset; + write_mem(param_16, param_17, param_18, v_94, v_94BufferSize); threadgroup_barrier(mem_flags::mem_threadgroup); - bool _543; + bool _687; if (!bool(sh_alloc_failed)) { - _543 = v_84.mem_error != 0u; + _687 = v_94.mem_error != 0u; } else { - _543 = bool(sh_alloc_failed); + _687 = bool(sh_alloc_failed); } - if (_543) + if (_687) { return; } @@ -334,10 +374,10 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M } Alloc out_alloc = sh_chunk_alloc[bin_ix]; uint out_offset = out_alloc.offset + (idx * 4u); - Alloc param_14 = out_alloc; - BinInstanceRef param_15 = BinInstanceRef{ out_offset }; - BinInstance param_16 = BinInstance{ element_ix }; - BinInstance_write(param_14, param_15, param_16, v_84, v_84BufferSize); + Alloc param_19 = out_alloc; + BinInstanceRef param_20 = BinInstanceRef{ out_offset }; + BinInstance param_21 = BinInstance{ element_ix }; + BinInstance_write(param_19, param_20, param_21, v_94, v_94BufferSize); } x++; if (x == x1) diff --git a/piet-gpu/shader/gen/binning.spv b/piet-gpu/shader/gen/binning.spv index 17043bc0bc5b911798d9a4c360ac6de0b53c913c..eca06929486b44ac39aafe2c917f9fabf656b062 100644 GIT binary patch literal 19728 zcmb80cVM1X*~TA8nzEF=v9!#x_nw79K?;|7O@~LOZ@Tq1o9poI+EJgStFhG>>UPSUuD$`PY5>DY zlx1kIzN{i#e@h$xTa@}XNIRLQOzoU<;DJ-7?$y~dcW&qK$T9uBo&9|Sy`9thhI)HO z`sVlQw^-rVH_+2RZ+hu429L52@+HVIslR`)hq{Aa%Do0>%oy$+sZhq!fBM^dRA-ld z6X$KQ6F=Qs?1ZhoT52KR_Uc{m;l5+(jcwbR2gckxr*m+&yrVJB{TqP}?j4@jPljgR z#`wVvQ=vH*GrIcvd#BreOtk`h&Vr6Kb1aWGnsbS2HTcNjyq;N|WS%%YGBmGeWRppD zev^4yGsJCnSJBGz>%(bb@nCrktV|G;Q!zT?4432b-^iDsxcLubvhU#9W?*{M%zK3?r z)b7qx_ZmTR{pF20@9&y93?9P{-Gq8U+fF?H@0@0mi{3Qmn)YqgMzrs0#A>THrXHfJ z?uE_3d-M(LJ1{)bHPF-B7=e9I_Zsd0m}(1n;Y;g|hNfR{fA5^$fsxL>qwVi1)piT} zb~e`V9s68Fr3Z~(*P|&c&A|}Y@oY_<@oWQbj;F2K7Tlb(G1U&>$wOU7?LRm$*vH;= zTxwo(yGCZ&*=WvpPk-NBwK1IGo#Fk1?2qXL=$zAt;IXr>C*A9UA8LF>(Hkt#`5zy{+0Cd&W0KUyApE{iqxLY7AiE9Ph=J zagE{BPlFGS3=Z{njyk{7CTS#Y+UsEV}dUJ*jtSXmAMZI@sR(EqWt{^KdxaOYGs&*ZG`BJ-2J9 zYfjCNYTLM92%k^!%v?gv3GD0|nBF;K!5M9@E=Mo>bmc<4 zt-1l;JRj}V%?sJut6N(5tu6dccz4(Ef_wU1@W$4)nPrJ^e8x|5$a-D!qznL7#aOsJgh@#gJHwqmK-A4Gh;!Giar)f$eWkgcrM)?x z6|p0bskFU|5k6{_NtA|JbKS+ug1xXv$K9}@9b~( zXVY-rgzVoo;oNNGJBM=PvP=a|m&h~d*w&2H-49!B~``UV^C zpX$DY&g}0zY;b7yl%c_SbN8Lxj5%b?M*Q~b1aNoX$egaZ!^B#w@y<${se?UT{re8g zr!t}~ZQD_uiU07d=BDbbU))@)v(XoRX0=u4qUG6nA=q~i$M_v^c`v(SA>LM94bPhV z0Gu_sM!jB>YtPzFJV)=3O@n)N9dsY4 z;`P8js~qn{FnedXzprP(eSVXsHm7TNHXa?-GyCTNb^oItV@cr5ikI8&e%A zpEYkFw;|_bHd;CN{VjY>3m;C5M!xoH{zCSS>UcPNu1rI}+N)F12j}Izu)X@iLhahC zb6fcNE&PHOeo+g*xP{*Y_xaL1k7KGk;pT5lU_Dn4q4iDoJ#S3)L~YOWNbRXw8@q7H6t+suk`D|9(xzHS^ z+U|w6Y^_Z$G@r@(?NewzlhvjcnscN!t>}4#&fsaFFuFeFRs1&$LEjp=l8nY--pOould_-2>Gg%<=GwXj|oIKzIa92xW?lt zUgP1euSIJ&P$LhA30C#@X^fQ<9Bi3gE`*me% zeWM%OeP&;y>96lv)aH+FpVqH6V^0L@t7cy|sBO(YkEu3b&z7PbAU?T6!0bA1n?9#2_>vQf?5qh9Z!54qQkqrLa4^jF=yAEh>?ww-G3 zeN0VzXN_%VeCuvX^2vKBUi$lRbR6!3^k*hG{`PkcrJb??{c}H@Mv3n^U~?zeRp7VZ z9<^TsUxwnE-9>Hwah!m=Yb;OE{{Jay_up#s-9tR{IL@C@+fOy;O@42!Wi0o>Jv$l8 zgDv(a;NH(-e+ulp82=gSu@v`A>~DcR+wP~Om{4t*E9Z4hG|yW6*M>V5xxO32xu)2@ zzqT3Ad~@KM)g6!A`;VIT!5Sx?^)YC!U-BKNGi43@kEhm0P5yCeHQN~P6V#4X-F$N2 zrPY%EBsf2n{-@M7W6D2QbMxyj_ubm|8`k^k^YHZTEbwAAcdpKpQ^)=__(Zs|F9AER zch&qVxNBiM?Kjliv!1!V6>Z||fd|6Ws5Z=rDO3k&W& z-fyDV^}nLvuAkpTvCI7~D!JcACHMO%-1YVwDcsucqmui56h0aIEd^Ki+bDMH`wOnW z-$teVp@M7oyC`p_npIUnsDDg{HBpl;!OIC z?qkjFrS{ox*@biGz4AzOZM$+RJ%jQ|oJ{ZMb1B~AeSXLPW3{foXH+iseCqc)=Xx0I z`L=$AJoBjMQ@pevRokr-_h_&&t|Iqh_#F#Y^Lg+FG2|l@V;NhVwkLvZn|_}R_SsPO z`;%~W=fh{krzk#8Y^%*NsAUZH*F4tAb1K+8C(=jf?liF4OmOBvt(*h&VpwgUjVyyr_|#( z2dw5jJAM5kxV5ieLQ~K9zYMmmy8ZB;tTu_A=bk$soIQ6QxZHPNfoo5nz6!Q|`t)_M zn$O+jy8vuEZPOUr`sDZqSl#?S1LR`w|K<9B6K?$E`xe;!p8a+aSRZxg{oB-D&bzjY zDQeEU*nQ-Cz@?OOjK=r9;JdZ$pn8rkgR6Nt$DYm0;Xap~7yB&N&lryFO0Z+nb_KP3 z2lD#e@;&e-)V9}_Jl_Xv^U6G^xi`{(Yi(CkC-)Bud`*F`t#RhWC^(Oyn ztu9ykA^+j_)2Q3(y1lM_=4*EztrPdhVAscf-AFCpfqC*B>L*~o=he0QtX0cA-V9bN z_t#J1>iYZam5XnozMYb`cYK?FK&g;EkFZYeM zpHb8tuQ+k;0~_aL;&?ClIasY5Y+j%LYR-Z0hYx`FpjOW~)LhrZw@&zCmDh^4OmL25PIn(J3!HEoYj%VT>C>>O%)lv?hdwB4`4`%v3Pn?4UytEc^Mz-k9_ z0+yowEk(`ZWfkAwK}z4>!__SAFZb3`b(}w-@zZ!mH>SR>@iS;-{qn0**0^7!KsOW_$L#mt;Od{M$MFnU?PdEyluv(exWjH^T z?|a&l>lLszb6rC%Pp((N=5igA>ovG~a=i{#D|5X8=ckcNdvd)A)@H72spX!*>#05C z)_Lc88=PmuIdy)|x{IzY&$UI}1QfsPazAJTt9N5Mm9@5AJ6v5q^S?z5wd5NMPCma0 z9k1;c18YmZ#ldQ2z9r!5-9^47;p)lfH-MUP-A_w_-ABpuF1URbV6+j_s&$FUekdtxsGb}hq~g_pUOgO|CMhpQ#m3gF~2mhJV)9?_oID}vpl8OKU+ z$C`1h3|33*RltdDTgRb~v5liWu~!AVPr_G&m$_Dlm$}w}t0mWaz{zDS+v{U4<7!Xr zap266^SvhAIn2A|c(^|5KEKzZ_VQV!Z30EjeJoDgwZZl|Ipyy8_*%zzU9f%7cO9@i zw)Me@-?baFR_F%^sz602`x|)2xGwcZ0 zN8P!XZ%r|_xx}`04t4@N&cxaoY;5N@vf=?i>{n4I$lfll{3p|JI&t71CyTPud_T=3gY+m!YKc|4zeD}(JQH%dR zVEx1Q1(*Bzy>NZh-Op30y_{2R`%%=KQ*q+#4=(qpJhlVC&Tsm7AXqJZ)TZXzC$_cR zePJ%^a__jVt>0pfcN)5LBtEFdwmTSX9BcR7q15lAcxiusZMU}H&ii3tW8{1017Njd zSTpBCeh9@_#ulgT2f?<@Irj!(02>i>977Gr;CZ-)DlmDaO**JZkzmzs|LDWStnZ zz~)Uq`@m}17uwXy^>eO`J)1h~++W~1;M16+9XLBnP!GVDrq*9S?Y2LXT3h-u2zES| z*Y}gTU^R1i4u-&9uDiC6QPdohIB`b6#>u^69$4)u+M0JhSk3PS|GnlY@CwxG+J~vt z+&BJC>S(Z9zQY{TF>v*aL!N#1ad7L}eF9D0KHB~Su;aJQiPR@ij;H8nOu0VVeJ?y2 zY#&?~V|c&)BwXFO`4qL6bEAF=Ma{Vp8}m%|wd>+q#%3+gJX+`X9G?c~z3Vh^#_zfO z3|#wd^iO^|SnV8e*5R}8+2qVRoB>xm4lQ%_Ie6<_or$KNJ^6XCZPgR&EU>!o+VMLZ zZa=g+-cza7JRg^k^BizF9{YJ0?Z1ewEx&8{5?Ia4evYO7GW-h^=fK!<{W8|`z~<9- zF10+iuYlc;+Rmqzdp4-5ukxSq)W)`*_O$yN*q9mL*THJt_-9Ptfd98KU5KtNWBMjo z%`0R27W@KA#w6FzeqKavzW9C{?0zlxz<1#4`d>^f&$uoDk0+P$jitTyS>ZU{$CqMr zZ(dsS@50sG*Oya!xv$kPqo}#B#rDtN$z1_HoI2;jJ$WUXdhRFR1FLyIanC1?^~ zefd7v+@1sd&0#;TruMQQ`dmd(vmfIB@HQ(}b6h?nuF;2aIX~-w<*{80F5emD_USrm z<5>HQyq@|7ikJ2u)pqN|x)Hoy!G8=c-^+di*GJv{%YR5Qwz+iv8-L#F*AK>MB{82kaUv2UGAF#IYKY{Ic_QB&|HT^OsHTQY?W}Wtb z1~>CIpP5g>kEf{X<8$>dV0Gg?K`qZc?5|+8@*eg#xO#Ft4YrSY&O8HFdy3fU_p@-@ zX*17L)M||gEvt<7_AwT$5fu>Cal^VD+ji`2#vzeH`j z+;9H{-i}!M>!;m$%>J}CzW1epcb)zU|8YK}3nt>rgRo6A~$UCpnj_CCEm z#echZzxZD2<=?ApN8iU%`*(W5{(YXN3=uExF3J=#xE_gu>TV|6wErYiTx!CKEfa4y{V>W~xqh(^ zfn&GNz7q^n)boBaQfnF0e7IvYo^vga?P##$)#kj*#m>2L@{M#X*u7|b{j}Ssm8g%S z*eBP+wsQT`=i|Yxef~I_`m6Y#K<(vuRR07;&3z;`j_*(>Qk-|^=DB*EPXeDv$vRs* zHuvYr6!W@w#l|yk?57lVarU-zx*El6Wr};-ar*b8+2^Z*jkh|r@lT^p`$fUyDPC(( z(%%2CW!jG`@S0#_O`y)XJss@X$Qe4L=Jwrl`B{qboNs;QS<}yfjjzpia{tZ4ah*wV zY_`#sJ@a|6wpVF$Hno@Qp?(%c%|43rE~%EBUj&;o{7YcR8X9 z`THu^`Lm6-%-`3*+MK@&sJ)y&^{-RZ?6cVY;yp>Op1j`#n>TazEpX;cTl_8pYqNhB zQp;nz7_4RvarWc};7urA8&Wc_8`XFs^~MzYv?+D=-6i16t7qWyf~#LybI+*!Qi^fB z7o<<$t@V71Uj|omFX*!b^%WHR;+V`K7aL!H=ij~YJ&OB7A8q>BNA=|UK3Fa9u2+NA zuA;=}2Vi~Dr)$A#*HET07U%0aify$ypYqJ7XM8(~*H)Cw=hijeoO+uYZ%Mr^#kt*{ zI&*si*g4A{zPaG)x3utEYo59O5!@W*T;B*+&$xaJR&%bMD|4!49)1E=%k%Cgu$ucb zYx7gExWzTV>Qq3j(!fmmttS!w0}>&S88W0`El0d-yNF z4^q@Km%jwtR-5Cuk7|kW2v{xW{a4_ecWv=|6s*k{j!PcfuffJh&fkF5x+#hKTX5oP zPut&twHep>k;nFXu(`tj0M^H|;kx{hqK~>~L!Ps-8+Z?j*Y1?8+a$1OU{C6-+vDJ@ z+cV&2Tln)W{FN5|?}ASTw=L3)mv#O#TGsgqaJe_1glo?_{{^g8uJd2v_B-qRH}F#w z_00LxVB2bQE*+Cv=I-xcwXDZKz*!G%@%txOo9~OeQa?vgGp6$*kM9d$W2O%;g4N1( ze+iyE+SB%5U~S3sGFZ(#&bK_iuY$`wuff&a>lx4M;N;Pswr_y7CC{5+HS@TR^7#G- zT;_QjuJ#rsdH5_^kVkvkE`qKtc@_n$naBMh*Vj312RoPHW8jWCyaR6E!pFjmF^yg7 zzFrJ&4BP3e-8}9)ZMM^Iaj=?aLtLJXeXx7&Me%G*rrx{2Q^1~+eW?@gUBu8Qf4}o? zxSGYwD!xlYj1k{u;A$2xtNi`WvJm-1iZS&~oaMk}&gJ21mPPSzqr`Uwv@++4a5YPO zlXE3Bej3klW9pl~^;sEgpS+^2Qftc5R)rYHE81$crkwR!9b!yl*hU}MM7>;-{Th*~ zYIo|~uiguG9i~#}el?DEV{7jHYW)_zX$#+`h3{N&*L}~L=YBOFp8M6B;PQSo0j@pw ztF^#t<^5_Mxc$!kYF+r+6!qLI)&tvCn|s$WskxRNw4Df6%emVCob#46fGPf6d$K+BlKUgz zGWSQ}YHyLp`SrebINaR!C%HS()RX%Nuv&6=X~*CBHm14N^h@q;aGARYuJ#mdl6yMb d-1aBAd(qUBdj?o7xo3it+nDB7(=Xo?{|jIQ)bjuU literal 16400 zcmai)34mQim4+|r*PV?m5JK2OhY*qw5(s+;OPVDM1PEbYJDu*Azv6rxn%tQnn zRK$gesDpwcB8z~Ch=>G4QPe>}6j^342snZQvV(}NnzCQ`Z0g=iGCu&aGRw zx|@a}6NYBl2HA*gShjLVmS3Z?At+fkGHc5FxeJb6Fuk|0WBQ(Z?WV)VSwrrp&nDT> ztdYJ%>uRf1H8fxtL)(=5)ki79^|v+fKU3WcDUo^Zg4Vf5A3b-$VXf^eSGM-{ozq!v z?W|PGtsRw~a(iE8Raw7biC?AK-r3)gIyB;ujzKw}Bxi3Q* z%J{RaT0X0_O~1+gGiLE`$&6XpO4){kd`sCz@ZQR~jK+IQ)PXsdyIQ+f$eZ$c9+?MP zRPOEXBttE4K7Vj-N;GS+w5`%v?(qJ`Y&5)U{Xl9tHb)!CIU?H{-q+pVzPy#plY9Gm z`rG@awK(``{WJ2>*X|pOU*vPFTfqnBJu+*CpVd?8E5C0X3W1OJZbzTS8V4VU)sSru zU)9!$?}%(7xToCK@xEcP8*>7E8gobZK+L9W61=5a?LMs9aY$!n<)ZS^S}oPR27DW{ zsqn-%f7WVVOO|w>ZQqz}mhDQP__aFD|69VS>7e9!acglJvT1;}JZ3{SoxX>``TS>q zjo;VSSMJD1TpP2x*T9&YvOVDIeUEEfrrozi-K!7DH7n28NoU)#UT`CC)jss=?`_5N zpH3o1Q=38~@7`YAALHK}TpR!9+5Us#7M1`1t28(PG0d@FlEZZrH98P{aHTrG+S}Jw zZ7=6la-Qm512t;QX2TO->d};I`jtD&UFB+DYlWxVn2pWm4EAl!*Y^9?C5g%i^2!vC zJ+TaW=7$hJYIZ2NHlK#Ux(ly}$h)ph**I(|tV zzjP39$kxDD(HzGW^tESF%B}+Uw4IfIGaT2|@N^v4)bVR;{`u>o|MhkD8|(N@HUHh| z@_FA{XTPnE-wtOyzEZjWoptuR2Jwb$Exh(@o3aPNylJg%)sEJs>)*vv_VA#4O4%cI z{Fyrb+#ueNJr9q1y*$WP%3i7CuMXl(*_-gLb6Tsd#mi6|YbkpRJ^v)oyLRMW>3Vz{ zen@w9X@#A~btm^qgN5#TthTOf>s#JicB5+IyQ#nH@vKTm-}2T8_}2G9p8oUZ$e+$*WMY1$T3vCHmWeKpUW_8ZS2Kih*-nX z`+C}{y@erk&jx-g2KC!s?hCbGv5*=4Zt9+D5tN$Oh`+S)rCU zo`SYoj!m$6-l^3xwb$yrDR!f|{=ynH7**JKGV5)gCEh`=J>P@-mx3LynpZo|-QM^0 zT>+QhFF!P_CtlIMN?)ZrKg+3mo?F)0dSZ9ain%@A{VV6Q0ebIH^h$njDH{!5Qt9hz zTiJ_kSbqNFo(0|QZJqP0tLTjAowZyO@b6t-t7mI*2y)yzp%32M8?s%{V!z!B?6=YV zVHP;uPg(}?hHMV}Y<3^VIv0FSf#<2S%c##+&v7X`65P&Vqc+A;b`(5~f9xRMlvUyD z*FOul_Wm_x{pbU0zh*GkW#}G8#B0EO3c!uo@LA&knLTWiP;a zj%gakRmxsR@9vN9b156bW|8I{TE~ai@r~>F=5>5@9iIkw@2KsijoDss^XCODYBd|J z(s4H9<9k}zW3N#=q0r(Ta~_M)(s3`T$Upav6t&-ZpWx*ZZ-F~&>XLtds1lb z+iIf|%^IkUO*HpwwQUp4d}_^!<{quKW1_h~t4&Tc_hz+S63wxy@v5&MpZl@eo{8pu zthR5Wxeu#(4vx7wPqo>J=ANr|Xs(sk@2fG_*k|?e@>!}iD)Q!>zqdeKdDicXey6E+0n( z>#Was{@)z(xL4nC+~fS!y=OdpGkP`sj5(&j`iy5SZAq_h=*D&pIPTEAS6}O5{?HxM zq(U?HWU#(!W3Wx3Z>DXDZD)G9Yt;Lu(yJNQH6k~zF|^O1uhprPd5AQwnseNvu+?hO zn6Di*@7LFuGwCCynlb%3CSo20Hl~`k+4Ny+0c%sU9tYD8p&8%%wEGK#LpiM3K z;b5=rh2UMFp0tTd!m3jjL@|!QD&Lv>#t!?=impNwml(KbhvG|EctjdlW6k zb2>Qu9dA3$XE25F`P|Q=h3~~+b4RXQ>i8XS*Ri?orZ<;0mOoH%pK09xDEPhi2I4;s z_ni&cOS|5;0YQ|z<#{WhlK{42e4RI}!C-*Gk9=>`3m<;G619+ZO*(^!lpFH=|ec9^-9JZ%x$AC-?lO7Wv(C z_?PLwRbexxe20RYUw^shIq%=Oc;-98V{8+_!wT;Dm?ozW`waMGxUu&KTPL4`{4ls{ zp!Ob=vJ(nE1d#(=dd+gad zxcNL=2REN*>)`gDtyAuqI=J~gOQ(EI!qq)fhuz-ubjsJ(@p}^P_&htO{+^pt?zuVm zaQr#%4a9s z_@0kbyJzFz+7~DMAh_q^u(!ZH7t32%^L}IgMsp85yl4louH8@Ax^vrsta{hDyoHtT zp4UioA3PZ?{F@40f8PPQ*!8d9Tf8^!HNHRguajp4{YaXZ_6-ZWeZ<`eY>b=9?cTHr zSj|22ZDPoW(~Ml!)^1y{n&023fm&Jv#~b6?4qQL3acJt-74zO6tmZi0U`*q|Ue-WcGfmC0iOuWY z?|yIIu-QMwJ$~P96X7wBNnoGd1;sojgVo#F?)|}rQ*ItZaSGe}zyMetw#dm8N}NYtZshySh?8!HGTej z!;K&L_5r)zW4-MQ*GJvD??>-t-L=i6sabci>&S2A0km|E#(#_Z4lHcP6g55wuI6Qp zeK%*r-S@4FW0vb@4Ci(**g0uyp_k7jujk-H!2TV=`?W=$L&4g-q7G`VjTpbZwmI~X zdv1c~C3t>;qi%=8t*td$K=1Vqf0fAvHj$|-%`2XR(rpx_SVrp>UcExAJuUo zy0)0#F<>>XsN*8|QM8zgT)(K}ap0(<-#NK;^jv#9*q=AOUz>IDTdD5YPM|-Lre^$D zQ=WTHDr^@rEBPsKHP1sEaq#E@mVqPR9ZA0B=-MJ*1+13lI}^^oJfHT+w*suqe6Cr!Yu+{KTC+F5`%*VJ*4o8I ze)sd0=-Ohx=>e;Go;|;Kk9)!DK9`FMtq-oQpZTlwYLV|OaO7JBj(6#7u(rr|4p=SC zcP?B#&G#>G^~l!`Rx_?^>w{q5?Z|T;*fD*uu>UJqZ9`Ux{{IH%U;cMh?{%H)<6MlR zJz}p0`#ghx2%P3RAD-s=FkCHiT>$1^p37L?uTQKK?GgJzuxmBuaS_*Ck;7<++UI{rZ^8xY{H3C%{o7 z>wPKQ8pb#OlVE++-B&+N@8!2g+ox!1u48e;{S4SKM^3qGK78%N_dmdnLEp>hh~A#&{bfyD{1!? z+Lytudu_(Of?mxzeudt8*n7tOD*ZJyFYR9|?DjFwYr)Ph_}9Tct6RwDne#fZKI+z9 zel^Y5<`R3aHMkz^JR{Z(U}L*BwciL<3;%C`eJ|pCa}!t}^@wpZ*k=CkeAl8|Bk?^2_P%?;#<6$J-ADgJnwRz;6?S{aZQbt&8zat{KL)F9 zOtT*HAJB|tY;oNC6R`Kjd-MQ!3@v>Ze+pNR_3$(BXqxwGi&#GgYx_Alea8PKro~!& z5UdvdzW`f@SbM(&tLYatQVaiw!Rc6k1+O3LBWUVz&#%Geh_U|${1DAp`kF^gKkI9) zt&x4icob~j80T-nYOyY~sin`)S{wUu`gqPyB=|}2dDLho@6HJN|Azaw3H|lc?){I^ zYm2e`57_x!U+gDOfz{06d$11d<#X5eJDQqv5=WfhgN+k=#WP^Fb=+&-XTfSS$ukP= zIk12GQP=)7y_)OB-xd4;tmgO79M0)~;p#CDd91TPf$N{$pV8Fg{ujZ{-+Nx7f0_0I zO+RDG_0c|@e18Eu2A_*D+;3ljt6Q7b=)J6s`l~cGYa=%11bUx~&oXTG@^yu;ef<3S zCiq?ao%b8yn7{AkTl&-PWqk6t!D<(S<2k$o&uH-+{t8yxloqvm7hGSfzoDtentTuJ zz3LH*A!q8Ix5IA(G{>RM`MwTT^ZnQ#+yGAJ<2c{Ky@akUenx5pt9dz2f7Wb*`#Uac zU~IX5G1p;W^J(*UU-Gbx0J|Qw`Fk+A?*?5qlK*R_&wqag)*kn52sUQSZzH&x-=LV& zCh&hWr%lnd#hgZg)x2U(o5B4(T+B(XpX2oRapnu(Ex@kVbPbF_Q`g_$*U4jETY{U( zWqf04um4s!PuKBSY_82~3%(Ux&2_yEy_f4+eQTPU>ssvi{F!81uzwdiofh@m4oyAw zlW|}*_Y>EA#Ib*k=iykk2b+5x4PJ+%Zk0H>SPc$Vv2*X-*RroSJShiyNwTJTxmD~J*2=l$XOsKQTozU^VAAn_eC{ z=Yp--4ZIJo<9Tqk82jO1FUPKJK26QBizChvVB^GC7r@mVYxo=qPoKw8aL*n3Y74)k z!P_o6y zx`&+vSC1SggB@e6?^D2P>xdoWJ{9hL+RSqTy;{UL4XozxSpD1i>0q_6p8?*9aciHA zy%nw&b7%uQPGc{omy4Is8%x|y?|re~c7W#)OMm^eTgOe_7!ld^vm`KJoK+1+I^}d*upxFV~v3GihqBHF3o01gHC>JZxQH_n{bL6|NRz z)TZVfBeuPKGQGL%{Tb{jug2fc0_BSzEb2`kVhqn)bgDR~%#WT3eb_+}wX8lk+@*tO{W`e}DeuCWi$9J9~Ed*%Aan0vr6rx-pmw>fJy)LceYYHCq z`vlyWj>CK9G4@Y_&7sXW@~GdZz}C-uv_<_s4c2D;E~EFde(Im0sX1n`>&0`ATs`uB z7Hrso-5{Uc1nuuG0!UnSMIWG3`blb-V(c z*7d4{`%YY)aQ&{SmN=Y`ye% zeKpa4jpq8-N1Hy@Ts`uA9h~O74z6}m;bXoVXpzsF%Z+8d^^bht0Gm%AZTk4k)Fa=` z;56Si;cB-OKIZ!tE%N!yAtz_*wP0V6{hS;qw?+pBU2S?X_r+Lk!MLqW`u=Sipvz`a!xOV=21|0RA41TWQQNO3)-j~+z zcX0LCPo4&=t)uxf*(Cbk)6}D$&w?F`^)aVhY(9Ohr*rrNZC9E;+MK6a%yS5M4$bRe zTFmp10v|+wXn|Yk52HEfx%9Dyo(B&txNGR;gsZ<&$6qaY)Z_)YIntW^39cT`@6TX0 z*Oc=#r&>Jk7r|;#!mwJ*1|Ou zHaToKY}_+N_nLs?Yn|+lf2QbD@*FWEQ6U0r$_>M*^r)~cYmUy z0Js8aUKV%{H>>gRWFj4If?HdFV~E=>UyHVgY@=KkbA!u7 z-UF_SIN1%D?h)4oDF7^iV=y?z*J4bd(@2{DLBJjwfCxy_1la|2{%+h?-CPM@pI0_G zWj<*RhgW4+n#Ac*BJ!Coz}O|!mx}-uM0HYHG)QJ8yBXAgR=%i#j!sobfa@q{gE3hn zsJ1lDM=Ou+u$@2LTIGBsDYWg@B2kMavL-Ybw2qu!-}XqbgFO4ZVEwG%$J#e#!eQV@ z4=V&Ip+a;VG13@`sV*|9mU$QuExpbpR?gg%nJ$qE!MDo~9JTud@7f(x6m%lTPA<9L z(ntr%fb4>l<^1a_+v2V&Ggd_xR&6W9U|;lWgLqTFcsqOp@9hkKi;G`M^EWxgo2NPz zQ=QF5J}1ymeEBo6axH(mkzdwN+>-FiCnSwxs9{3#7G$~jP-fGR80wkSFiwf{&UdQR zNr|8vaXT<}{_B+c)eORTU5YVx>_|n=f${j`_72?6bqGq2cL88k7g1b034s1J9?4Dt z+X9hfJfb)PwJmTWyf`4gpQ!yQu9+L{?0BGa9Ai%tf=rzjx3daES&j@JHVisU3Oky> zI=Moc=5z>x7kA@&bg2<{$#gC zPS`*+-iPa`ZnksvzCPT8T18&>w0Jm^E<*y)_1JJ4U~~qD$Za#X9v68|=E&zd5}(O> zxW$r}OsTie4N8 zX^}qbi#du>ov_;Wmk+*~mK?1EvB70)E1oYcE7`Kv7kA~QzjP~^lRYEKl-oUWr%u5X zn216rv7wbH>L-dv36q4sh2gIp7`S~s`*P2@!I7i5&QG&o`g@x8_7gF`nS)%aq%M9i z&(Zf*TJciD6;)^H%X%+$=z)#f)uDOyKJ?n2(d--NZhi9AjqY=|j^Y`!;Mmg(+TNjK zeiL(=ISo~`&p}pIQ5TQ&)-0dr{ng`rHO0kCN4G*niXFcyZGw)g_vwXM3;@oz>F^st zvdXSuWq}M?XxGP9b~+MR{@k^}VgT718Z{AMf7hIkWx1>sj*8dp|zK zD{R~O#XYPU3F=*3{IRlS`2K%>=neC4<7I?^iF`+k?feUYi(J(f`@q>fB&2L~a+K%o zq%kPLrI^+sZsdl<1XXSC@<0tf%^WTnq5SF|Bw`lC zNkGd1kfO^-k&X8NxK<&@*}bLlX9{oJoOATMH@G@{0v1}XF5iI#`{#A}RY;$z%QqnJ zU!g8X!F~R(b@|nQox0p1bJHk81*a9l|Fsqelk5qdoD$}h1`u5|ti?`g9SuIR zRT`${%H`;QB+?fT?++%Yoyt+Ywwoph3vJX(HqlA}2qvp_jgN=*ZL%=LA8^DKOlnW& zxNSL@E^GCv2G?BU<=E!-OZT>IEKRlUXbLTD!W4S119_1LuM!`wA~a)~6Yro2vt+Up zs?lIF&$vl`+M9}|vgNGDrlYz5^+xO4*G{%K??2dnX_>$>H@{+D`Pwa|>r09=&_;pc zwY|^E>z{pju-4t8w|$|hnwGpO_zz>rtGc6SY0o)_+j|c7{r2}~YYz4;6Ew%cv1^OV zwrn$$aAi9K75%&3{kz`%)9YOh4L~fHFY$7T7pz&2!Sjsw0$HwBSDPr36>6omx>>d9 zD!Sti#Kq?V!g{LhMD1n*~0E^M| zy>8uYPhh{tu0vT`p6wb^?d<1oWpa&xCW-TNvj5|+-Cz8*yNem5seIsJkU;k&^9>h2t#zDj+KkBr3I(yA_dA7?a9CG5e?=<_CgyegI{sb*y&C<$$#4n!Fmd!Xz)x zN~6c5mw3U2X=NUN+m=>djz~X$utSS7DWKc#LEuUk3^BqzhECLvOvf=lNqDLOswIX+ zP_3?_({AQuB1eGNJ4}%_tpiX2k`)a2#*R_80CWwoK27A2$rJ@}k1L)3w2QX_Gu8&y z>H7UaUYm~WF`h>JE~r{u-sPgsh0S^`!q@`#vDDE3dr%84D;Hsl+9rr~lSDART%=Fw zogg+&NzO2etV@O_h{vahj~PXsOUEXNrzVL*P!dbCCyr0#rfKV!L?byjR*=!63Vu_;WzwLwY-11`bvX&}Ph-ZeNGvBgB*@d_~Tni*PRui1kIIX8)X0k8p zB6d23y~td-3*Fx@T!AK261ZjU$9oQr>}F?i0(K`W#c-fw`geBBa;qx#T&e_Zu7QM< zDOq2ym+2~FFq9p#n4F|9tO%W9sh2M!S+DBHduAHD0HT#pkth2F3${@I#6Y^o0PGd;#>aft5Us6~Y z`F8;6>d2j!a!sF|^7|xkUY^`J&ys@$X$jYT^StK!LF`)U=>F**7+xWm;gMr z#&%_BuW^7S>yVaR{a(jTl12r#@lg_t2K)k;jJHz%66th5|2|QSBT-AdOYQZMEg{p9 z!j%2VaGLz4NK{yz&Rd^ygo8!uC=LoXqQGZT#c#g9JFO)>5eg_3+GwH2Dcj(VNdCYeK8oqq!+m#GeX1D*on z3}PL&xue|1L*^xL|DRC~L>^7|xa2G}24&~KnH(_bVa$7g+}m%jf!>rCgS{*a6#v!~?OmgPYg76@K73$=w=HbpVHrIT7( zC_2tAtQcTN1v$VbvcE`cS#||+f2+AjvxULXanEg zTp(zKo3Xj_{afI_oTsg4zg63HCsRx}19X#y&QO1U@3B*XiIgze5>V`{;jGp?r{$V+ z|F34UQBD93=mZXpDL=FV9@|%Lq_or;^zL9dhJD)nJt?)W*gy0Y(`xZE3yG{w zFn5a&OdC=w29I{dSo2~_0nBSKc81Tv400rQHjn3SvBR7ZBB*{EEpnmsyvP<}F8D0L zdM;!Uw;V7V=3m2FkCq4c(=2cqCZh^YKMrEZMjxV6)X`F@6PPb(3ZI2x3?)ihkJZA3 zp7Go&#Qe0%qJ0oh^-U|Cvkg0f5_!dqyMsM`$m}(X*{BvlChhhoOnVMO3%2!C|4nLt z@3YyS3=D^-6 zKY6WUZls>3WFYSlyu%Lo)y)FXS?p+K$srZhB1wAmItyz{N*I$fC2;kNrYmR9MeFNaZrvIgr^h zx;L{2Z*?@RO$dhJgcgA6%pa2GK8I#?gNT{FT+WRETjvI;qn<#*^6FkbAb{$CKTP&q zL=--E>$&@D+lD>dMzq#QQm9ePIS6Oy>^q*O)rD8DP#k-SgBQ+lP*|#%UUYc`OmUr}S-wtBm^pyT%t{EL zih}^t5?n-!D8n+s` zX5zKL3zq@;a>TB~;4RDao^RZfz9+<&#;FGNJw{>Zbt2* zS1IV(_c)DZ2ENXWf2-wvrM~$jlo~qx7bunNcUK*?2tv^Ckb|@CtCKh`-S8a;qrOpZ z<)F6V`$?5lt;|L9{b#V10WE_{h_=&w8HZZOQ5nq_bI3i8ifKN{p`JnIq(PDs#xml5 zow0~BNHdo30>3qiG?*V1@*ZMpLIln2J|=l|V!3+}{^O|kkG&V>xUYgmOm06{v6NnC zFa8()gZjZQe|+%Nok6t7@7~m)cFFf$%RvH4nkD2PFNC|Vs(uqGF|*0s!TEp>|emBj8jgiJ%j2Q*g_XyO^- nSx$J!ixYmNg2L7|PB``@PWWpdD6GoogpV(yC;qB{GXU`4=#FO) literal 0 HcmV?d00001 diff --git a/piet-gpu/shader/gen/clip_leaf.hlsl b/piet-gpu/shader/gen/clip_leaf.hlsl new file mode 100644 index 0000000..d570420 --- /dev/null +++ b/piet-gpu/shader/gen/clip_leaf.hlsl @@ -0,0 +1,367 @@ +struct Bic +{ + uint a; + uint b; +}; + +struct ClipEl +{ + uint parent_ix; + float4 bbox; +}; + +struct Alloc +{ + uint offset; +}; + +struct Config +{ + uint n_elements; + uint n_pathseg; + uint width_in_tiles; + uint height_in_tiles; + Alloc tile_alloc; + Alloc bin_alloc; + Alloc ptcl_alloc; + Alloc pathseg_alloc; + Alloc anno_alloc; + Alloc trans_alloc; + Alloc bbox_alloc; + Alloc drawmonoid_alloc; + Alloc clip_alloc; + Alloc clip_bic_alloc; + Alloc clip_stack_alloc; + Alloc clip_bbox_alloc; + uint n_trans; + uint n_path; + uint n_clip; + uint trans_offset; + uint linewidth_offset; + uint pathtag_offset; + uint pathseg_offset; +}; + +static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); + +static const Bic _393 = { 0u, 0u }; + +ByteAddressBuffer _80 : register(t1, space0); +RWByteAddressBuffer _96 : register(u0, space0); + +static uint3 gl_WorkGroupID; +static uint3 gl_LocalInvocationID; +static uint3 gl_GlobalInvocationID; +struct SPIRV_Cross_Input +{ + uint3 gl_WorkGroupID : SV_GroupID; + uint3 gl_LocalInvocationID : SV_GroupThreadID; + uint3 gl_GlobalInvocationID : SV_DispatchThreadID; +}; + +groupshared Bic sh_bic[510]; +groupshared uint sh_stack[256]; +groupshared float4 sh_stack_bbox[256]; +groupshared uint sh_link[256]; +groupshared float4 sh_bbox[256]; + +Bic load_bic(uint ix) +{ + uint base = (_80.Load(52) >> uint(2)) + (2u * ix); + Bic _286 = { _96.Load(base * 4 + 8), _96.Load((base + 1u) * 4 + 8) }; + return _286; +} + +Bic bic_combine(Bic x, Bic y) +{ + uint m = min(x.b, y.a); + Bic _72 = { (x.a + y.a) - m, (x.b + y.b) - m }; + return _72; +} + +ClipEl load_clip_el(uint ix) +{ + uint base = (_80.Load(56) >> uint(2)) + (5u * ix); + uint parent_ix = _96.Load(base * 4 + 8); + float x0 = asfloat(_96.Load((base + 1u) * 4 + 8)); + float y0 = asfloat(_96.Load((base + 2u) * 4 + 8)); + float x1 = asfloat(_96.Load((base + 3u) * 4 + 8)); + float y1 = asfloat(_96.Load((base + 4u) * 4 + 8)); + float4 bbox = float4(x0, y0, x1, y1); + ClipEl _335 = { parent_ix, bbox }; + return _335; +} + +float4 bbox_intersect(float4 a, float4 b) +{ + return float4(max(a.xy, b.xy), min(a.zw, b.zw)); +} + +uint load_path_ix(uint ix) +{ + if (ix < _80.Load(72)) + { + return _96.Load(((_80.Load(48) >> uint(2)) + ix) * 4 + 8); + } + else + { + return 2147483648u; + } +} + +float4 load_path_bbox(uint path_ix) +{ + uint base = (_80.Load(40) >> uint(2)) + (6u * path_ix); + float bbox_l = float(_96.Load(base * 4 + 8)) - 32768.0f; + float bbox_t = float(_96.Load((base + 1u) * 4 + 8)) - 32768.0f; + float bbox_r = float(_96.Load((base + 2u) * 4 + 8)) - 32768.0f; + float bbox_b = float(_96.Load((base + 3u) * 4 + 8)) - 32768.0f; + float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); + return bbox; +} + +uint search_link(inout Bic bic) +{ + uint ix = gl_LocalInvocationID.x; + uint j = 0u; + while (j < 8u) + { + uint base = 512u - (2u << (8u - j)); + if (((ix >> j) & 1u) != 0u) + { + Bic param = sh_bic[(base + (ix >> j)) - 1u]; + Bic param_1 = bic; + Bic test = bic_combine(param, param_1); + if (test.b > 0u) + { + break; + } + bic = test; + ix -= (1u << j); + } + j++; + } + if (ix > 0u) + { + while (j > 0u) + { + j--; + uint base_1 = 512u - (2u << (8u - j)); + Bic param_2 = sh_bic[(base_1 + (ix >> j)) - 1u]; + Bic param_3 = bic; + Bic test_1 = bic_combine(param_2, param_3); + if (test_1.b == 0u) + { + bic = test_1; + ix -= (1u << j); + } + } + } + if (ix > 0u) + { + return ix - 1u; + } + else + { + return 4294967295u - bic.a; + } +} + +void store_clip_bbox(uint ix, float4 bbox) +{ + uint base = (_80.Load(60) >> uint(2)) + (4u * ix); + _96.Store(base * 4 + 8, asuint(bbox.x)); + _96.Store((base + 1u) * 4 + 8, asuint(bbox.y)); + _96.Store((base + 2u) * 4 + 8, asuint(bbox.z)); + _96.Store((base + 3u) * 4 + 8, asuint(bbox.w)); +} + +void comp_main() +{ + uint th = gl_LocalInvocationID.x; + Bic bic = _393; + if (th < gl_WorkGroupID.x) + { + uint param = th; + bic = load_bic(param); + } + sh_bic[th] = bic; + for (uint i = 0u; i < 8u; i++) + { + GroupMemoryBarrierWithGroupSync(); + if ((th + (1u << i)) < 256u) + { + Bic other = sh_bic[th + (1u << i)]; + Bic param_1 = bic; + Bic param_2 = other; + bic = bic_combine(param_1, param_2); + } + GroupMemoryBarrierWithGroupSync(); + sh_bic[th] = bic; + } + GroupMemoryBarrierWithGroupSync(); + uint stack_size = sh_bic[0].b; + uint sp = 255u - th; + uint ix = 0u; + for (uint i_1 = 0u; i_1 < 8u; i_1++) + { + uint probe = ix + (128u >> i_1); + if (sp < sh_bic[probe].b) + { + ix = probe; + } + } + uint b = sh_bic[ix].b; + float4 bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f); + if (sp < b) + { + uint param_3 = (((ix * 256u) + b) - sp) - 1u; + ClipEl el = load_clip_el(param_3); + sh_stack[th] = el.parent_ix; + bbox = el.bbox; + } + for (uint i_2 = 0u; i_2 < 8u; i_2++) + { + sh_stack_bbox[th] = bbox; + GroupMemoryBarrierWithGroupSync(); + if (th >= (1u << i_2)) + { + float4 param_4 = sh_stack_bbox[th - (1u << i_2)]; + float4 param_5 = bbox; + bbox = bbox_intersect(param_4, param_5); + } + GroupMemoryBarrierWithGroupSync(); + } + sh_stack_bbox[th] = bbox; + uint param_6 = gl_GlobalInvocationID.x; + uint inp = load_path_ix(param_6); + bool is_push = int(inp) >= 0; + Bic _559 = { 1u - uint(is_push), uint(is_push) }; + bic = _559; + sh_bic[th] = bic; + if (is_push) + { + uint param_7 = inp; + bbox = load_path_bbox(param_7); + } + else + { + bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f); + } + uint inbase = 0u; + for (uint i_3 = 0u; i_3 < 7u; i_3++) + { + uint outbase = 512u - (1u << (8u - i_3)); + GroupMemoryBarrierWithGroupSync(); + if (th < (1u << (7u - i_3))) + { + Bic param_8 = sh_bic[inbase + (th * 2u)]; + Bic param_9 = sh_bic[(inbase + (th * 2u)) + 1u]; + sh_bic[outbase + th] = bic_combine(param_8, param_9); + } + inbase = outbase; + } + GroupMemoryBarrierWithGroupSync(); + bic = _393; + Bic param_10 = bic; + uint _618 = search_link(param_10); + bic = param_10; + uint link = _618; + sh_link[th] = link; + GroupMemoryBarrierWithGroupSync(); + uint grandparent; + if (int(link) >= 0) + { + grandparent = sh_link[link]; + } + else + { + grandparent = link - 1u; + } + uint parent; + if (int(link) >= 0) + { + parent = (gl_WorkGroupID.x * 256u) + link; + } + else + { + if (int(link + stack_size) >= 0) + { + parent = sh_stack[256u + link]; + } + else + { + parent = 4294967295u; + } + } + for (uint i_4 = 0u; i_4 < 8u; i_4++) + { + if (i_4 != 0u) + { + sh_link[th] = link; + } + sh_bbox[th] = bbox; + GroupMemoryBarrierWithGroupSync(); + if (int(link) >= 0) + { + float4 param_11 = sh_bbox[link]; + float4 param_12 = bbox; + bbox = bbox_intersect(param_11, param_12); + link = sh_link[link]; + } + GroupMemoryBarrierWithGroupSync(); + } + if (int(link + stack_size) >= 0) + { + float4 param_13 = sh_stack_bbox[256u + link]; + float4 param_14 = bbox; + bbox = bbox_intersect(param_13, param_14); + } + sh_bbox[th] = bbox; + GroupMemoryBarrierWithGroupSync(); + uint path_ix = inp; + bool _717 = !is_push; + bool _725; + if (_717) + { + _725 = gl_GlobalInvocationID.x < _80.Load(72); + } + else + { + _725 = _717; + } + if (_725) + { + uint param_15 = parent; + path_ix = load_path_ix(param_15); + uint drawmonoid_out_base = (_80.Load(44) >> uint(2)) + (2u * (~inp)); + _96.Store(drawmonoid_out_base * 4 + 8, path_ix); + if (int(grandparent) >= 0) + { + bbox = sh_bbox[grandparent]; + } + else + { + if (int(grandparent + stack_size) >= 0) + { + bbox = sh_stack_bbox[256u + grandparent]; + } + else + { + bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f); + } + } + } + uint param_16 = gl_GlobalInvocationID.x; + float4 param_17 = bbox; + store_clip_bbox(param_16, param_17); +} + +[numthreads(256, 1, 1)] +void main(SPIRV_Cross_Input stage_input) +{ + gl_WorkGroupID = stage_input.gl_WorkGroupID; + gl_LocalInvocationID = stage_input.gl_LocalInvocationID; + gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; + comp_main(); +} diff --git a/piet-gpu/shader/gen/clip_leaf.msl b/piet-gpu/shader/gen/clip_leaf.msl new file mode 100644 index 0000000..4e2d059 --- /dev/null +++ b/piet-gpu/shader/gen/clip_leaf.msl @@ -0,0 +1,366 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct Bic +{ + uint a; + uint b; +}; + +struct ClipEl +{ + uint parent_ix; + float4 bbox; +}; + +struct Alloc +{ + uint offset; +}; + +struct Config +{ + uint n_elements; + uint n_pathseg; + uint width_in_tiles; + uint height_in_tiles; + Alloc tile_alloc; + Alloc bin_alloc; + Alloc ptcl_alloc; + Alloc pathseg_alloc; + Alloc anno_alloc; + Alloc trans_alloc; + Alloc bbox_alloc; + Alloc drawmonoid_alloc; + Alloc clip_alloc; + Alloc clip_bic_alloc; + Alloc clip_stack_alloc; + Alloc clip_bbox_alloc; + uint n_trans; + uint n_path; + uint n_clip; + uint trans_offset; + uint linewidth_offset; + uint pathtag_offset; + uint pathseg_offset; +}; + +struct ConfigBuf +{ + Config conf; +}; + +struct Memory +{ + uint mem_offset; + uint mem_error; + uint memory[1]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); + +static inline __attribute__((always_inline)) +Bic load_bic(thread const uint& ix, const device ConfigBuf& v_80, device Memory& v_96) +{ + uint base = (v_80.conf.clip_bic_alloc.offset >> uint(2)) + (2u * ix); + return Bic{ v_96.memory[base], v_96.memory[base + 1u] }; +} + +static inline __attribute__((always_inline)) +Bic bic_combine(thread const Bic& x, thread const Bic& y) +{ + uint m = min(x.b, y.a); + return Bic{ (x.a + y.a) - m, (x.b + y.b) - m }; +} + +static inline __attribute__((always_inline)) +ClipEl load_clip_el(thread const uint& ix, const device ConfigBuf& v_80, device Memory& v_96) +{ + uint base = (v_80.conf.clip_stack_alloc.offset >> uint(2)) + (5u * ix); + uint parent_ix = v_96.memory[base]; + float x0 = as_type(v_96.memory[base + 1u]); + float y0 = as_type(v_96.memory[base + 2u]); + float x1 = as_type(v_96.memory[base + 3u]); + float y1 = as_type(v_96.memory[base + 4u]); + float4 bbox = float4(x0, y0, x1, y1); + return ClipEl{ parent_ix, bbox }; +} + +static inline __attribute__((always_inline)) +float4 bbox_intersect(thread const float4& a, thread const float4& b) +{ + return float4(fast::max(a.xy, b.xy), fast::min(a.zw, b.zw)); +} + +static inline __attribute__((always_inline)) +uint load_path_ix(thread const uint& ix, const device ConfigBuf& v_80, device Memory& v_96) +{ + if (ix < v_80.conf.n_clip) + { + return v_96.memory[(v_80.conf.clip_alloc.offset >> uint(2)) + ix]; + } + else + { + return 2147483648u; + } +} + +static inline __attribute__((always_inline)) +float4 load_path_bbox(thread const uint& path_ix, const device ConfigBuf& v_80, device Memory& v_96) +{ + uint base = (v_80.conf.bbox_alloc.offset >> uint(2)) + (6u * path_ix); + float bbox_l = float(v_96.memory[base]) - 32768.0; + float bbox_t = float(v_96.memory[base + 1u]) - 32768.0; + float bbox_r = float(v_96.memory[base + 2u]) - 32768.0; + float bbox_b = float(v_96.memory[base + 3u]) - 32768.0; + float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); + return bbox; +} + +static inline __attribute__((always_inline)) +uint search_link(thread Bic& bic, thread uint3& gl_LocalInvocationID, threadgroup Bic (&sh_bic)[510]) +{ + uint ix = gl_LocalInvocationID.x; + uint j = 0u; + while (j < 8u) + { + uint base = 512u - (2u << (8u - j)); + if (((ix >> j) & 1u) != 0u) + { + Bic param = sh_bic[(base + (ix >> j)) - 1u]; + Bic param_1 = bic; + Bic test = bic_combine(param, param_1); + if (test.b > 0u) + { + break; + } + bic = test; + ix -= (1u << j); + } + j++; + } + if (ix > 0u) + { + while (j > 0u) + { + j--; + uint base_1 = 512u - (2u << (8u - j)); + Bic param_2 = sh_bic[(base_1 + (ix >> j)) - 1u]; + Bic param_3 = bic; + Bic test_1 = bic_combine(param_2, param_3); + if (test_1.b == 0u) + { + bic = test_1; + ix -= (1u << j); + } + } + } + if (ix > 0u) + { + return ix - 1u; + } + else + { + return 4294967295u - bic.a; + } +} + +static inline __attribute__((always_inline)) +void store_clip_bbox(thread const uint& ix, thread const float4& bbox, const device ConfigBuf& v_80, device Memory& v_96) +{ + uint base = (v_80.conf.clip_bbox_alloc.offset >> uint(2)) + (4u * ix); + v_96.memory[base] = as_type(bbox.x); + v_96.memory[base + 1u] = as_type(bbox.y); + v_96.memory[base + 2u] = as_type(bbox.z); + v_96.memory[base + 3u] = as_type(bbox.w); +} + +kernel void main0(device Memory& v_96 [[buffer(0)]], const device ConfigBuf& v_80 [[buffer(1)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + threadgroup Bic sh_bic[510]; + threadgroup uint sh_stack[256]; + threadgroup float4 sh_stack_bbox[256]; + threadgroup uint sh_link[256]; + threadgroup float4 sh_bbox[256]; + uint th = gl_LocalInvocationID.x; + Bic bic = Bic{ 0u, 0u }; + if (th < gl_WorkGroupID.x) + { + uint param = th; + bic = load_bic(param, v_80, v_96); + } + sh_bic[th] = bic; + for (uint i = 0u; i < 8u; i++) + { + threadgroup_barrier(mem_flags::mem_threadgroup); + if ((th + (1u << i)) < 256u) + { + Bic other = sh_bic[th + (1u << i)]; + Bic param_1 = bic; + Bic param_2 = other; + bic = bic_combine(param_1, param_2); + } + threadgroup_barrier(mem_flags::mem_threadgroup); + sh_bic[th] = bic; + } + threadgroup_barrier(mem_flags::mem_threadgroup); + uint stack_size = sh_bic[0].b; + uint sp = 255u - th; + uint ix = 0u; + for (uint i_1 = 0u; i_1 < 8u; i_1++) + { + uint probe = ix + (128u >> i_1); + if (sp < sh_bic[probe].b) + { + ix = probe; + } + } + uint b = sh_bic[ix].b; + float4 bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0); + if (sp < b) + { + uint param_3 = (((ix * 256u) + b) - sp) - 1u; + ClipEl el = load_clip_el(param_3, v_80, v_96); + sh_stack[th] = el.parent_ix; + bbox = el.bbox; + } + for (uint i_2 = 0u; i_2 < 8u; i_2++) + { + sh_stack_bbox[th] = bbox; + threadgroup_barrier(mem_flags::mem_threadgroup); + if (th >= (1u << i_2)) + { + float4 param_4 = sh_stack_bbox[th - (1u << i_2)]; + float4 param_5 = bbox; + bbox = bbox_intersect(param_4, param_5); + } + threadgroup_barrier(mem_flags::mem_threadgroup); + } + sh_stack_bbox[th] = bbox; + uint param_6 = gl_GlobalInvocationID.x; + uint inp = load_path_ix(param_6, v_80, v_96); + bool is_push = int(inp) >= 0; + bic = Bic{ 1u - uint(is_push), uint(is_push) }; + sh_bic[th] = bic; + if (is_push) + { + uint param_7 = inp; + bbox = load_path_bbox(param_7, v_80, v_96); + } + else + { + bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0); + } + uint inbase = 0u; + for (uint i_3 = 0u; i_3 < 7u; i_3++) + { + uint outbase = 512u - (1u << (8u - i_3)); + threadgroup_barrier(mem_flags::mem_threadgroup); + if (th < (1u << (7u - i_3))) + { + Bic param_8 = sh_bic[inbase + (th * 2u)]; + Bic param_9 = sh_bic[(inbase + (th * 2u)) + 1u]; + sh_bic[outbase + th] = bic_combine(param_8, param_9); + } + inbase = outbase; + } + threadgroup_barrier(mem_flags::mem_threadgroup); + bic = Bic{ 0u, 0u }; + Bic param_10 = bic; + uint _618 = search_link(param_10, gl_LocalInvocationID, sh_bic); + bic = param_10; + uint link = _618; + sh_link[th] = link; + threadgroup_barrier(mem_flags::mem_threadgroup); + uint grandparent; + if (int(link) >= 0) + { + grandparent = sh_link[link]; + } + else + { + grandparent = link - 1u; + } + uint parent; + if (int(link) >= 0) + { + parent = (gl_WorkGroupID.x * 256u) + link; + } + else + { + if (int(link + stack_size) >= 0) + { + parent = sh_stack[256u + link]; + } + else + { + parent = 4294967295u; + } + } + for (uint i_4 = 0u; i_4 < 8u; i_4++) + { + if (i_4 != 0u) + { + sh_link[th] = link; + } + sh_bbox[th] = bbox; + threadgroup_barrier(mem_flags::mem_threadgroup); + if (int(link) >= 0) + { + float4 param_11 = sh_bbox[link]; + float4 param_12 = bbox; + bbox = bbox_intersect(param_11, param_12); + link = sh_link[link]; + } + threadgroup_barrier(mem_flags::mem_threadgroup); + } + if (int(link + stack_size) >= 0) + { + float4 param_13 = sh_stack_bbox[256u + link]; + float4 param_14 = bbox; + bbox = bbox_intersect(param_13, param_14); + } + sh_bbox[th] = bbox; + threadgroup_barrier(mem_flags::mem_threadgroup); + uint path_ix = inp; + bool _717 = !is_push; + bool _725; + if (_717) + { + _725 = gl_GlobalInvocationID.x < v_80.conf.n_clip; + } + else + { + _725 = _717; + } + if (_725) + { + uint param_15 = parent; + path_ix = load_path_ix(param_15, v_80, v_96); + uint drawmonoid_out_base = (v_80.conf.drawmonoid_alloc.offset >> uint(2)) + (2u * (~inp)); + v_96.memory[drawmonoid_out_base] = path_ix; + if (int(grandparent) >= 0) + { + bbox = sh_bbox[grandparent]; + } + else + { + if (int(grandparent + stack_size) >= 0) + { + bbox = sh_stack_bbox[256u + grandparent]; + } + else + { + bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0); + } + } + } + uint param_16 = gl_GlobalInvocationID.x; + float4 param_17 = bbox; + store_clip_bbox(param_16, param_17, v_80, v_96); +} + diff --git a/piet-gpu/shader/gen/clip_leaf.spv b/piet-gpu/shader/gen/clip_leaf.spv new file mode 100644 index 0000000000000000000000000000000000000000..7c4c174c68d8cc707df87de587187f814b46a37c GIT binary patch literal 19028 zcmb7~d0<{uwZ*?SNn4-~%1~xX%1{fG0x}5&qy;QUfdV2TXp*LB8=591Ntr|`lY%&l z6UrbUQf6mCQK>}`5obj}1rh?%v*#St(YClEp*^f4IFu2;R-tKxY#6DdVdnLCo^C)eM zWW2byyQ6zx$vjd|9U2~7);+u<&39b33;*ph<3AOe(6)sU)M}?zcrEJF>UeBzed@k} zuK68HyM`Ba%$qlG>eOYs%&<*sZ9Qytx%X7~xY{OgJvw^(hkFKx81~eY7wk5}QqQpw zcKdF=jrFbLHEUZ?5A}2nb}w?u7XKgpZnW9lHq1pEqn`|}_Y0xXqlQmAI^DsiwYD|* zfWF?P2lY8$W9l4+E&-=^GM&dw=uI*sO#~&3DD8gSLWCjTO-HV+V=6Q zCZhD)Zp3d~Z71;1@W5bCM?GLBa%66H9Pw?fO$X~hMPFKKyHGEwRC>gip$h&heX`L><=S@({vMkk|t=Gdf>@oxB! zL0>xD-4{dhyVsp!dKJ@1n{n8>`uhiBNUSx`hX=d*hblvK_j-Mwj`V33`GZ}j zEE(t@=$)Tvugs0-tG;?8?Hc3%pXeUG5nChX+OZ7{cXcn0^_6*DC-$O4uA_4|fx>(` z0>;|Vd=E9&#~2OGbJ$W{YB6jC`xNBiYtXYQ* zE1T;OTO0j29KEr}W-VI)wr^@a-RwPU_wrl;kLU9Q>iV!bpYpfE59?VnFv!ctwUYZR z=~*(8E4uqsy=QQ6U=VCR{oSYN^%&;sg!>rZ{anvior{5R0%$)aoEMY)%!*UNTWS|m zH_rXI`Zr=>U&ow*?ykN=`cI}B?j7hqQK0L%pZ-CbjRk zsV!~b+rf=>ZmECYljm;Z3=Yim&I<3vYu_VC*iDj zy$dw2$9e&+k%yhkCXT)S$*pp1fc{FH*4jp|;w`l;;l2G!i8ro(cVQl9_YKVZj}7V^ zO@39|*4h-?4s|SDHngZdo;{lEd)H$(R?xA}kXQXW5InGKxNbD|2~BM~n)pJvANt1F zTWgD&?0s;?Uw84{vJAeEx8VH7rmXkxtVSC?UXA&?2%i32*=S#14*PRM!*}F7-Q2`) zZ{n+(_}z8xr_Xy%Ywf;Q*~iu%fEV8lrlMnHE?#_qb>m^WE>$ ztg%Jvn%~s;5w5QJ4UE=OY2ypcHpZJ!Xnup$HYhZ|ziJy7n%`bE?_bGbKDG8jTcgsp zDKx*e`b{k~zq4x73eCQ%@md+l;dfPSw?gxKs`k1<^BbzRPoX(ZwfzgtZ>Jisl#v{M zJJk*@G{2o{u1&`49I4GMG{2K-Zz?puk7{qJYpo;Sz|3d9iTe1=<}9hT)NyM){#F`z zX0zAKH<{wv%il)r8f6dU8<}|aSFL%kAJT3^qPrK`DH~yP56GRX$^5_TuVx$Dx_-7*GrnslR=3#3@!B8r$+x7ojWO)A z^QpZNqqXMVSJSTlRBG)F-d5l9&V{^@r?vK4a64rbWe3{GcWvT(z!^m;&Ur4eLoWJx!3=w%4XcykEt}d`mq#i{g0=1 zeqFaM$esN@soK}+pSu1X)b_`{Y=7jP6gA@;$L}@I?Bw%1F^Up?5;*?G=lV(J#vVGK z+8Em1Q=g{RN4xnyLv1W|^UFOeW&BIwKKfrqt<4z8zXI(3+JG$A7Wxff6UXmX=*u+T zoH*YAJ5OrH{U){hT-|=iZ>u!zcU0ImiMIx^R`bZ+a}(gcbHlfSyHDdk1@1Q^d=}iC z@jn{AUcrxpk1zbY;XKRZzYy-+P9v)Qx&ZE6Su&TtXKa?7mqEr+!mrXuag4W8+n;i7 zzlFBCXnzlUdBx5BJNVSa{r4JA{!oJ-K8+rErXPmuW6@?Y*Pp5FvuEA)_!r!~_DB1_ zsqKrpW08CBQ`5hNbv1tcTjAQ&^_P1ORBPw|$5q%inVWVrLe}lug6&Vae`Zx$@_Vl` zUgCRCl4m_0g!|5(#z@@HzQgUG=f$!I>x4_a4ORUNJd5I&-8 z0N8fw&k*xN)IQIXTiXXIYMvu;a(x7BF8kq`k?%!J$MR9|WNP*FQ|&ooB$jnzT?96k z-1Z*>J2uDRcj4n;zZLo!Pp*&H^Xd4E{YkL>w0@dc7gK+l;-mdDmEAhIF991P^KmIy z&2OG_ApaD_SjHAVgzh)l;`=_aE(d2EuG?qf#$LrZS{12G^Ex{2Xj7bi9e*^2M z9-qI1?O*o%KfwB_NBbvO-FKK{d>pLicgK6~6W}S->e?TrR!fejz~@nXH~8&-8my0J zGd|D2)%EfGK1+F$Vq0U%_0jJ4`#G>NvL@btpNFew9qOEU%luhudx1LNt{1`1t+nU% zW$M)wANPp=Y4^YMbrjebS;NtAwVYS^OW2KNTXEWs!RB1Z?ekc;d&Y5jmtTY8{a!!& zD%Yp^J(qZEVoN-~b!)-xTfTkc;jWwdjpXp|@jqbOX>-r5L+#_9)3!E6&AyA1YXZ2; zwJyBOwI1AD>e)N%gKeiRW1I+fjM_2}8-mpxvv*^8Y#W1}BW>Q3<$lYYzfHjYF3C39 z^wC${^~*S{o99rwYwb9#GtN!%bzC_&o59uc4$-D&`|KarI_)>dmU*&|TfoaTo&(BF3!D=^E z`(Zn{%oOUd-u!sV6_!wGv5wiHP_lcX04}Hn)bBa5nQ&N4p&Rtx%9#N!%k?n z*KQxqXFk<3hMmEgQ~h^=>*JhxSA8wq_q2Ytm+O;$>VI*G*ge z4hH-CC~c1CKx(z-vz9!EV9PqXm);03^BxMfzv{_52W(z#8QWoCV`+0ukD&H(oZ1ej zs5wq?^3Mg^$M7S;_9tikD7ZfA@i`joI)%RpY@2+0kAbWCb4lX78E!jmX?HBRjPn+F z8Rs~-S{di9aNB81?Bl`e&Pn$F+u-hh^P0~y@^-Mg>v96MJpP?v{f!m>F0i`(9n|vt z%rFn^&kV-VmN?yD<2Y}&lk4jq&HP(u4(5ZMe|?<`xxVgg*UCD+Jz)1n#=QWnmiMqW zHQVRBxVM}4p1FKiTtLZKd+}4#wg@cG`}9Qc7K}+f{)@r##FC2_QtO{SF9DbPyC1HP z^X0u}0Nh8>&-QYCn){r1OR?EL@9vYpYI%1Lg4KL?dk%)dKAr<@LzL4f&aF6cPX?F$ zIt6aO)Z=q1Slxauqn10@<}jXd<9j;TczG|p1FYsAO?$Pny>Zh13~)Klcf!>&&Ub;; zGS1~-AIGWfOv-yG_Dk%1`z<{id?E8*eoN0mQ_uN57pzv^ozH{YpX|MNgY{E)JZDj> zIUf6BUh`)>=Ywq@egXIv`jL3=h3li9wY?Cmo-w}Q7R9Z2PIoZk;||3^qpI`=17@t)PwbApbbUSjHBo?PtKYy^g)$J?0Xyn*H&tTn6?r zj+|2<5ue9qf3lCR0jp&ntpKZKA6*CbaopOjrKmY>ar${ZIO7}#zL8?T)b+9NH&wd* zya6na?F(SXrR`?0Jool5g1xufMw>qRswe-Kz{&5s^UHAOSv@{q0jt|yU%B(1@mM#{ zn|Ak|-#&-|@$eblp7-vFy;t-c9X z%UXR4?BiN#yN#k|-^I!GZLqo1_wRr`^Xl$X*KZ}IjiSwVuHSbn-TAv6ERXFDaMo`X zSf2I!9@zD>jkc_xy7QAhTBnb9g3En>7rfl}--o-u^fR7ZpXT*TydPlOvhux~TCUBr zeh+vG<(i8B5Ul38{}Hv1bEF5) z+t1Lo<-PTDux-_ypX9TCsy4RX=U;&Bk8`HKeX+m4qV}=B`uvijW`D)W`D^f56zA?f zYI*K7_k;ZzQC++9r8bk?j^j7rjN?JD<4N4#!nLntA9-j09a!y{!sj8lYjGES)aPNa zTE4Nr2Yato&mR2)*jU=^(*x9M$@@pJTKFSi`<=P}6S$L@_CsG|sOgtBe+Dn3ww#_}}S$FXR8it+-*+~VYV25j!E%d=qL9qJk1 zbKo|Lwv6w2uzJQ<=fpOzIm{JXe*TTkx||E=M4o+ZopbmSxSdj-!uwvD!zYNPm@u~E!f8S1}VwY_%7Yd&p>wKv#& zWvm%+_4w=qZqBzantIlKKd|l8)9=@V&6j)q{$Tyo6MH7OdAzgg_E&rkfSbRZlLO)E z$$t>ocIwIh25^~wHe5gTxz(B-4Bnrj?Pb*(L_WgQO% zyN>eXsZVI)3!C^r6Zh}L(|#iO%!=n7do(=n*dxK^dil44+VhTmGg!@cTjs*Q9kkzh zryd9PZ=}>Shi?VjR-1dpF{vd+7g#Ot(0Slaig~ofuN$mwDRbf9zs#qoox_~V7gSmg zwRxN?wX|OZHc$F>B3P~QJB!A*b1}S=Vs7ne+XvQ`+)Kb}$=wfDOK#UiE$x?r%iM!- zwUa0r_Yk;~Vs7neI}FyA+{?gf$$c_dExFwTYH5EexXgVzT^e-M&in6TaNb{QgRf}f*B5*o_?9NVs)^rKaN|8vaN9rD#Gh#5PgdOT%w^!qDVI<# zrMNbZ?X#52DehDIFBjW){k&6LN$psz{k_BIsIR8@X#YI5cI(8w25gMn->(C!t)O_H zl3ztJmTkppdn4Gk`DWe(ww?Ma#$vy3ruaV67QZimZ=h&%49>S&;(ihA7{k8=Hc!48 zUk2-=erJ{ED`53rN`4RWRj|7A^9*^uPVM8kw0(`D=D5Vk_bsq}3ja2^?9X@LWq)pm z>!Y6Uz)G-jv}N4i1*_Xf*ZL0X?@?}}XfvPt@=l7nbK_o<$M$`&ds>_OPM+Ug-whs5 zv5hu;+`HTbC`YdGqCfb9-p6so%hV&FTiU08dIB^`LmAJY5z;GIkG0d0;@T8+iO#^ z{ngZt$6B7V{VnzHC_dUBqSoI! zc^?Mb*W~>@Sk1iVlK+NcEZd6H_77m&W)A-dRx9T~E&hK3m;Qf-ub`-BtsVuRMA4SG ze*t@@wK=zsP^%^GU%_hOe*-(0;eQ94cV%^+{{hxVJ?r;RuyM2{@8e)~_fF2#zrda; zZN?u<{cno8eS4zfu{{OPnR*f|&zX7}Jf313ZMOG)uAVdX3|P%M&3K*#t6y84spr6I z*()!AeOy0n&r{S~KXLl@BG}mGwU6@TcnR#BWnEqdI}hsdSq*lcGhcqR)$}!{HZ}KN z@>|Q@m-ffH+`H~G`2&^RI{8Ks%b3}#qv2{9cPrS(acgU#jH8%GoH%XZvfpFiYL3ru zzuY#q%{r_Jc8>O-B+goB>KXrdu)ot+H;&vm_G>j0k^6*uW-`TROG(EyJ{QR>nd@kI6OaPbtSr4u~`+R+{+A;X1&l|wo;pbLoVIth~rk=B~A=tLsTu1w$ zmKYm>)p8f#7~Dw=^Jt6TCSYyhn}ThZJez^l8t1F=^UUV(#L}L&TY$CY=haDIo*lXg`%E$-A*5hHrLNFs%0*B0IOyFrhz+&X&!Cy z+YzkI7|x44ww=JnNY0(XYUNt)0#97+Y5Q8RHh%}V4mo!PtC`37mdAH@aG7ThxSD%5 z{ofOwJlfOtbzp7Dvlm#+Jg%cWzB9mOo_*kIrxkhjg(r{pwB1ikNuJk()y(7mkjHl> zxXg0^Ty2&$x7wq~cOW=5m2yMqw=26Ti&N+Q6IPs4I zm*@0&xb~dW6ToWaIei=4{^oqV9j>2x#?S%o#K(5pGlovEHs?!h-|VBjyW)aC56A_sMAL$$bh~ExAtxcj9kuW13q{zvMm*T;@I>YJ_AiXx!)t$jZC?MdNp$;bGwwpCj9JdOYWXbk`g*cL!4 zgR}us9i)#TMd848NEOAU90B{&rLr7%;Gf#R!y>;^1_O~| zkw^xKfrLSl%CtOTsvD2>HXCoI<dmp)A48#^+3iV+xmPuVol3#p&E7`w%agAtzGOu^&{4+NCnTGm$Ve;%adTE|PMz+Pj-ZYg(lj(L8!2b*1tm%~L!v88s#q~$7jSzltugv6pD zZ~@+W^eC8To zTMY{N#A9}hj36cRUqG6RB``X=l zRt;sj>D^39Qdyu=nYru?W|L)bGCNsaPRL8_BL93jB`<$kR$ZxSx18p$KMCy5BEdqG zcfiDjr48mt6KrRpWr1Q@a?!oari7+^^!S|pJu7xLp$8iW5KC`De$URm*;U!9L)m{h zG~jB>S<#fRX~p|AmW%}tvf!7vgEbSF#ZlMb1_h}ds3DVB|K`}ajxBD$r)sS0{SJW; zY$Ii~`Kh6qR1_Nw!U`%Jlif5N%14(Io5 z{i@2;^BlC)s*gj(6S+%_Rofj6n`)|?V8n+VXKv0Y&dalaxie}Tnwp_3X54!+Q5;$2L{fsRJyY_x6W^=-riKZt)W78Sef$d4<2E99yvJL73{Nh zT+8*%C|;BK-i_ilXUrM=*}&Mq@PW5}_RcS!Jn+^^<4{`+oju zPyUE)$qBUJw5Py}=0_*AErMcajUuncw;k4^&l&JJ1w~7iqUDwxx_v_QZ6th+jBk6l zudUlP_tOdL(-`&V5cM&KRt@*+$9whBcs=USpEc+suC%ib{M~?U$*^rncf3A1w7AQ* z*pKFS4K09^PP+3iqWS$pWW=D@5b za_Ge#{HKFA7=k=iC1)kondv|L(Fe_}6HmFQwdiaEod-%vt2($ve!V8Bb|GV!q%9x0yE;QVm4uvL`0kv>p zu#{LTO0-ifXgzQHYTe%_w{C0u%Sy$mk=YW~v8pNT^FuJb6Z;4#pT@r3aO_)QA2Wr0 zk*~3@>R+%g$#e(y<@|TCZvk9quF)UIzGM@`zGTyF?CXTs7b&B~upVLV4g$_klO~YC z__N@h+T7)UOI5*y;Lqw2YqW6acdIVhO1?)d8nOn12jlQZf&%P=MH~L+E%c5b^PbS` ze`?g64QCTqmxw4>q^kycX^(}zS;XS9L&oLZlZibx3CV)&2h5~N_ z8GBnzG^k~mc)J5^>GC_FlcquZNrS^=TP6#pKudzoN$<#{3^+z757WSO9LqdzOr? znkzj4@D~waQJ}5jXXDzU3~dcxsx9vA+F*Hy4{J-l#Jd`Jm(KE~b{pS&z!FZXkeR@Z zi{Novnu`NeY$UH3xLM+ zu-_%=u*U+5R<4dO3j;Eh-p7HPeKN8mRdzq{tjCQ+F-Za}916CZ*R_vzM5(B7(m_6i zkXuqzCME~Im6&JtjG|uhil8k{mIYOdPika+g7$=awV33=fraa<8`EB5OXQil)dt;o z5G$XSHz2yptX;lLS835*6@D0EayaM>-8^%-OG{8OSNlxVKLq~Gn*J1y(N zJT<@t0I3mkM%f7&6V1` z!BK;7b0c98)nu7)9GBFeWhWs@4OeBjLk`S z6uJMkTe41uh36MOqW;5#3d!%x#6t3o>-mI<*+iNU0SHf%M|X>ghlmdyiwt1?t+Vb8 z;;-f+3+> uint(2)) + (2u * ix); + _80.Store(base * 4 + 8, bic.a); + _80.Store((base + 1u) * 4 + 8, bic.b); +} + +float4 load_path_bbox(uint path_ix) +{ + uint base = (_64.Load(40) >> uint(2)) + (6u * path_ix); + float bbox_l = float(_80.Load(base * 4 + 8)) - 32768.0f; + float bbox_t = float(_80.Load((base + 1u) * 4 + 8)) - 32768.0f; + float bbox_r = float(_80.Load((base + 2u) * 4 + 8)) - 32768.0f; + float bbox_b = float(_80.Load((base + 3u) * 4 + 8)) - 32768.0f; + float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); + return bbox; +} + +void store_clip_el(uint ix, ClipEl el) +{ + uint base = (_64.Load(56) >> uint(2)) + (5u * ix); + _80.Store(base * 4 + 8, el.parent_ix); + _80.Store((base + 1u) * 4 + 8, asuint(el.bbox.x)); + _80.Store((base + 2u) * 4 + 8, asuint(el.bbox.y)); + _80.Store((base + 3u) * 4 + 8, asuint(el.bbox.z)); + _80.Store((base + 4u) * 4 + 8, asuint(el.bbox.w)); +} + +void comp_main() +{ + uint th = gl_LocalInvocationID.x; + uint inp = _80.Load(((_64.Load(48) >> uint(2)) + gl_GlobalInvocationID.x) * 4 + 8); + bool is_push = int(inp) >= 0; + Bic _207 = { 1u - uint(is_push), uint(is_push) }; + Bic bic = _207; + sh_bic[gl_LocalInvocationID.x] = bic; + for (uint i = 0u; i < 8u; i++) + { + GroupMemoryBarrierWithGroupSync(); + if ((th + (1u << i)) < 256u) + { + Bic other = sh_bic[gl_LocalInvocationID.x + (1u << i)]; + Bic param = bic; + Bic param_1 = other; + bic = bic_combine(param, param_1); + } + GroupMemoryBarrierWithGroupSync(); + sh_bic[th] = bic; + } + if (th == 0u) + { + uint param_2 = gl_WorkGroupID.x; + Bic param_3 = bic; + store_bic(param_2, param_3); + } + GroupMemoryBarrierWithGroupSync(); + uint size = sh_bic[0].b; + bic = _267; + if ((th + 1u) < 256u) + { + bic = sh_bic[th + 1u]; + } + bool _283; + if (is_push) + { + _283 = bic.a == 0u; + } + else + { + _283 = is_push; + } + if (_283) + { + uint local_ix = (size - bic.b) - 1u; + sh_parent[local_ix] = th; + sh_path_ix[local_ix] = inp; + } + GroupMemoryBarrierWithGroupSync(); + float4 bbox; + if (th < size) + { + uint path_ix = sh_path_ix[th]; + uint param_4 = path_ix; + bbox = load_path_bbox(param_4); + } + if (th < size) + { + uint parent_ix = sh_parent[th] + (gl_WorkGroupID.x * 256u); + ClipEl _331 = { parent_ix, bbox }; + ClipEl el = _331; + uint param_5 = gl_GlobalInvocationID.x; + ClipEl param_6 = el; + store_clip_el(param_5, param_6); + } +} + +[numthreads(256, 1, 1)] +void main(SPIRV_Cross_Input stage_input) +{ + gl_WorkGroupID = stage_input.gl_WorkGroupID; + gl_LocalInvocationID = stage_input.gl_LocalInvocationID; + gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; + comp_main(); +} diff --git a/piet-gpu/shader/gen/clip_reduce.msl b/piet-gpu/shader/gen/clip_reduce.msl new file mode 100644 index 0000000..5845676 --- /dev/null +++ b/piet-gpu/shader/gen/clip_reduce.msl @@ -0,0 +1,173 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct Bic +{ + uint a; + uint b; +}; + +struct ClipEl +{ + uint parent_ix; + float4 bbox; +}; + +struct Alloc +{ + uint offset; +}; + +struct Config +{ + uint n_elements; + uint n_pathseg; + uint width_in_tiles; + uint height_in_tiles; + Alloc tile_alloc; + Alloc bin_alloc; + Alloc ptcl_alloc; + Alloc pathseg_alloc; + Alloc anno_alloc; + Alloc trans_alloc; + Alloc bbox_alloc; + Alloc drawmonoid_alloc; + Alloc clip_alloc; + Alloc clip_bic_alloc; + Alloc clip_stack_alloc; + Alloc clip_bbox_alloc; + uint n_trans; + uint n_path; + uint n_clip; + uint trans_offset; + uint linewidth_offset; + uint pathtag_offset; + uint pathseg_offset; +}; + +struct ConfigBuf +{ + Config conf; +}; + +struct Memory +{ + uint mem_offset; + uint mem_error; + uint memory[1]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); + +static inline __attribute__((always_inline)) +Bic bic_combine(thread const Bic& x, thread const Bic& y) +{ + uint m = min(x.b, y.a); + return Bic{ (x.a + y.a) - m, (x.b + y.b) - m }; +} + +static inline __attribute__((always_inline)) +void store_bic(thread const uint& ix, thread const Bic& bic, const device ConfigBuf& v_64, device Memory& v_80) +{ + uint base = (v_64.conf.clip_bic_alloc.offset >> uint(2)) + (2u * ix); + v_80.memory[base] = bic.a; + v_80.memory[base + 1u] = bic.b; +} + +static inline __attribute__((always_inline)) +float4 load_path_bbox(thread const uint& path_ix, const device ConfigBuf& v_64, device Memory& v_80) +{ + uint base = (v_64.conf.bbox_alloc.offset >> uint(2)) + (6u * path_ix); + float bbox_l = float(v_80.memory[base]) - 32768.0; + float bbox_t = float(v_80.memory[base + 1u]) - 32768.0; + float bbox_r = float(v_80.memory[base + 2u]) - 32768.0; + float bbox_b = float(v_80.memory[base + 3u]) - 32768.0; + float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); + return bbox; +} + +static inline __attribute__((always_inline)) +void store_clip_el(thread const uint& ix, thread const ClipEl& el, const device ConfigBuf& v_64, device Memory& v_80) +{ + uint base = (v_64.conf.clip_stack_alloc.offset >> uint(2)) + (5u * ix); + v_80.memory[base] = el.parent_ix; + v_80.memory[base + 1u] = as_type(el.bbox.x); + v_80.memory[base + 2u] = as_type(el.bbox.y); + v_80.memory[base + 3u] = as_type(el.bbox.z); + v_80.memory[base + 4u] = as_type(el.bbox.w); +} + +kernel void main0(device Memory& v_80 [[buffer(0)]], const device ConfigBuf& v_64 [[buffer(1)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) +{ + threadgroup Bic sh_bic[256]; + threadgroup uint sh_parent[256]; + threadgroup uint sh_path_ix[256]; + threadgroup float4 sh_bbox[256]; + uint th = gl_LocalInvocationID.x; + uint inp = v_80.memory[(v_64.conf.clip_alloc.offset >> uint(2)) + gl_GlobalInvocationID.x]; + bool is_push = int(inp) >= 0; + Bic bic = Bic{ 1u - uint(is_push), uint(is_push) }; + sh_bic[gl_LocalInvocationID.x] = bic; + for (uint i = 0u; i < 8u; i++) + { + threadgroup_barrier(mem_flags::mem_threadgroup); + if ((th + (1u << i)) < 256u) + { + Bic other = sh_bic[gl_LocalInvocationID.x + (1u << i)]; + Bic param = bic; + Bic param_1 = other; + bic = bic_combine(param, param_1); + } + threadgroup_barrier(mem_flags::mem_threadgroup); + sh_bic[th] = bic; + } + if (th == 0u) + { + uint param_2 = gl_WorkGroupID.x; + Bic param_3 = bic; + store_bic(param_2, param_3, v_64, v_80); + } + threadgroup_barrier(mem_flags::mem_threadgroup); + uint size = sh_bic[0].b; + bic = Bic{ 0u, 0u }; + if ((th + 1u) < 256u) + { + bic = sh_bic[th + 1u]; + } + bool _283; + if (is_push) + { + _283 = bic.a == 0u; + } + else + { + _283 = is_push; + } + if (_283) + { + uint local_ix = (size - bic.b) - 1u; + sh_parent[local_ix] = th; + sh_path_ix[local_ix] = inp; + } + threadgroup_barrier(mem_flags::mem_threadgroup); + float4 bbox; + if (th < size) + { + uint path_ix = sh_path_ix[th]; + uint param_4 = path_ix; + bbox = load_path_bbox(param_4, v_64, v_80); + } + if (th < size) + { + uint parent_ix = sh_parent[th] + (gl_WorkGroupID.x * 256u); + ClipEl el = ClipEl{ parent_ix, bbox }; + uint param_5 = gl_GlobalInvocationID.x; + ClipEl param_6 = el; + store_clip_el(param_5, param_6, v_64, v_80); + } +} + diff --git a/piet-gpu/shader/gen/clip_reduce.spv b/piet-gpu/shader/gen/clip_reduce.spv new file mode 100644 index 0000000000000000000000000000000000000000..cbe8c1f03a0cf12750d3db1cde14df674037571c GIT binary patch literal 9484 zcmb7|d4QB<9mfau00FsA;W?7quRv->XdF0Ie!o%aWiTm8{1Kc4UJ`#YcGedfiMk&8wZ3L^?*3Zn~W zk0@l%q{2v;LSby7JsUf_&gfb`)aYHl>c|y3j4!lge)>!xu8px3=`U4k8g4>v6W}nB z9NH%3x-m)%|BYnajnYc>&aPtTmMxuK8;d;y1I3}nh1GJgTB((by_LaoPouJ{tl#Lw zuTtx&?(9t++VDvGXrl&`E3B{dWL=1TN{C&l+sihjjhT!SS9*#)_5N*?UNY1e+}YDu zM)?SC?a5Zl5G3WW=DJeKjo438>IVXW3my~Tl2qp!GaTm6D1J6EnXrnN8` zTSiwdKp#_>i9Xb*50;B`q1%5cXRYb@5t7xN0meJaw;y;zwKA}=>ipUZ>OKRd!E&up zGz7LSm_?b1eFXdTo1UM{^G9J~dh@_cM*5y}Wa%WeYo0 z#+{s_3i^Al&ad>c-j!OhQK^=*n5NB>+gGma=wm&?=vo=)+0ngNYBm|X=bHl&G~P}B zcKCrtPc;lt-*e@@(p8xD#EiyPs@3XYh+O-?HwH_!q1+I>=gL#F*MD@@t7oituylTZ zy;iUEhIQ}Sc$TxB+%uQv{PzvsgTBYs%sD=6Lyc0;d12kVt`owZG~_w~=L?v=Vvf4M*D zqse?XF06bjqYQ;a~QLF`Fl|eodnuF@0p#;OP%G+bcRg#{z>1x zE1smc6mCRs^yxb~yRUati(U1eQgw4}7ek{`uWdd77wj#CTNo>~fz0D%BTE*lFpmQ zqSD7X^}+Kx2kScr?AEvu@M&D@FntvIQ01chPHHR6MdwB;Rh!o%Iob+~_37hc@!HJ# zX)7EOoVQAL_qG;}fZKb2w-#2TyC;NXy{yUXF^9H7M^oQ(7rVBb;F@Wp&ds>iJ$F*9XWUWx z_;hl9jl#JyKPh0#5VGZ1%H?Vs!3A?lt9?G{w`dqw^9T%UzLg~L&7&zE1_ubNeDzmzS z(YQMqorik#@eQz!v$1~@s9jF~Z!y}Vym{6A?UvL2{Tv$;`R)b#9)y0MI{Bk7Yw;(G zmp7O5SjuXdXDpm+rP$K}=-Q*F1M?VpbE^A0ZJyBmjaFYuq-*FsV*J)z%zFmhMD)me zW*#GN9(8|b&2tpJc{ap8?$aYM?ytV?`;CbGhki?<-wD1e(OsXr6W#cG5?#CBL6J}W z{$ct9iEjJ|dQJWP-U(g1-#e-9_fD$&4HLTO-EWxCjrSWS)jJa1JIwEw)b2M-=-T~; zQD4WZd44~Rc&6RwV_8G@_!EfyajdZGslJY=^^BiHI*`T4t@#*npF-EZo80F4G$Q96 z6ZJj=_MJ%UeHLB*l05Equ(9&%$YkBmA>D|%Zbzc#Zm>3cb+6UK_Ia?}#ms9V;};OQ z>#5_r_r;uB%xJu4L(X{ZUqa-rp@(+HFX!A=M*F`s@AoTUIqebqRd5>nHFUX%?IzZJ z_&Q>|c6;zV%Efwp2RwX#z6&RBEpvPiG0qt8f*&B>5Bix?T_5p1jQ0L0@68VxeSaJu zBG-=@e}edE|0$z($5_*!fz5G4zJ@;s%iW7SMh^8KA?7kxd@p>7QMnI^T)zN&zC!;c z*xYyK>-sD3RzyC|#;?J~X^S3y1D5v=d6+r=j?u?{wEY&5vmbG+?H|Cy*Y=Na^3l_u zz-do^Mz<&V=;<$D~XN%)rkB1FGe5tSKAYaoPCI6e@Bi$L5z#s zE$H(0;a#mB>)Zx*-rBsk)m`V2aP8okreoA*?@`ln_}Y4J`MbIg;@Ww~IREHlOm4H5 zzX|GM>-*aqe&fKdMXcq%=yF@(;{1Kp`?3w;ULeX*`*=wQTeEB%aD*C+fB1*h>#(QirOm!bPD zrl0ZZ`nYZ}Z^z+hIPx#YWn*T;x0zPG0{m zMs;oG>js-c+ZIOkxKB?5&%?(UZSG5~pX1{!z#7DFet(nvjq}+DSxvstjAIf!7VO&Z z%NXDO3&6(4`+ES`nDqTU5M4fe4g#Aiy?d6R%a0?QYkx38in($mk9}PZHcmd){V=fg;*MDX)=xfiuLKXD@8NLr;d2Do`stdiLYI&F zM}m!$kNQV})B3B?^^^BJx$bKa`_lFVd!rtE>iP5i^qGXXr{4XO5%+N_W9;Kvu=&(a zX6zcKpEFD^4%2redL2BF>+x@z4d@++`?M|}W1c6VYu_F39awH75?mMUKL)VpM9g8FzS`Z}0Y+`c>9-4!^Ia3i zyXN=nOvGmz67Smd9FJz4f%x9dVvP6hLhvXg-jnBoI}qQKi}KOh?cj?M?eU&GFXz(t zz|as@culb54+AkP2Fe2m^+fv!FF12$Hh z^RWlHsPkH|T&&aU!1kW*$?MVOW1Zdrj&(BLymIJn={`--rZ(mXW1Gz2ge-K?R>R$tn`sTI1 voPJUNT5wwbL+EnXAu<0CgQLEEMg5O}wMG5w!E#am25{6jul42hi}&Q;7u>jh literal 0 HcmV?d00001 diff --git a/piet-gpu/shader/gen/coarse.dxil b/piet-gpu/shader/gen/coarse.dxil index 16d47ceb1c63abdaee8fa52a8b5a604855d036d4..0599eb8f4206c5f32603d9b8a756e810417f692a 100644 GIT binary patch delta 6961 zcmZ`-dt4LOww_65Cdm*WgapG&6A^_3Bp?sPRuY1UfEY1QrPK~oJshMI5nHRC=EX}z z1qmGRaT*F}cqt%Mv|tlJ3y2i~TaKmP&{_{t^-^j**!sA8AnCpLcYink?98lhuf6wr zd~5BQOz7L}60s#*HvIV!i3ieIi3i^-xy@MpF$_UapLdCi3(90bkXOm7OG|ll2qGX9 z!YB|Fv1NTM1TZWBgTVW6nem>02|+L6(m2N1i=A*Wj5fj*tJ6w1Q7?-QUs!^lXGV`y z=@|A}l@ZeXd7uRZyl+xO!L3{lyC&CJ>3i13+( zLuC_xYw5}>*}#JQiSW1b{~fgM-%H;LmO|H6Yu`EPO2YYC+eBWWiB~Lj7Vpf$(u%S4 zW-Ma}%bdotShQ?0Ek{mE7#n+ZH+i^kV0`i{BIO3WvEYk<%a_|iE_uVZbD6iUy|=vR z=K0vnX!)q1XHTWf#zR!PYrlu+y;E%dj=t&S`vVW|P2Imf@Zc=6*%M5@@~;Lm#o*z13JMg`tmJ>!0H10#$^iv^Z;M1CnPW%((k zm?Gl&>qWYF|FYK-6DN8?m;4K+TlbvC(Mw=V_kjdn%;C%6cn&`j&gLZk0-zfOw>MKf zpzU{{viA@^T{9g*l{}EI<3lR+k!gM`>;|KoF!(|cA+@Pp!10^-xLK2chY#LXwjnF?zC`7p~vJ>FCyrC)}7Ao1TB^n=R=@Wonw^4^+diE-dT(*zj13L z@uSnUx6}|2sz~rUGh&Yx>CXyk%^Lb9>3>R?B!0uIUHQN@$@ROwc(uQyWu; z3O=Og+ZGz#G5wIlov@{bb;`sU5@cEo)na;R7w?5&p(cg8xVQlJ3Z|o5dLZ_%ET$_9 zev>J=Z1OX@U?qkEVoXJUlmg*)QSdk7;@&rr9xOs-PnBoO@#cKfk{JOnm{0i|PxTpf zvnq(G^lGs;HNy_mJa zsOIYc=0@@O5bd=D%bSInA;?|{7KDV}kZ?>EUkG>G*FbiEkd8{2>?vD>$fawN9bg5- zPBMjAJrJP$#lDGE)C)Yof!P)pCv7N1e2}I7V5W%It67?y0*;gmS9p41LMX_YlUi=% z8KC;L;8B&nyEL_YLQ>CVW~v=qNn?=*;ZwOzu5Q~?i+DWJnE_6VrRV;e8VMr`vO6ndv!@Y^Z^)xNKa zl)&PA*Jgrw))n_K=$Gqr+rgGQQ(R!w$jhA8_*Z0l73no2DzkBgG1XA010Z5?(WAn<#`8=(S%CJ~9kxzseiE`;-4Lcr88C>@PIi+0U zVt{h{-1V(mexkT@-99%CRE>>xu+ynRmR>VHbc|sUC*oVE(Szkdb;<*7|!YdXvIuwSZOC5S_rTdFbKGX!X$5!TDY|5ev zUm(s3{v#i0kUWLKc^rF@so^q1^xabIB6ERLBOxhyo@Rrq{O5_pza}TRJd;J!^%~Y~T!KW-PdMek+I>_N0Ve1RS!#>HGPXygdaNt~_A+=x zSQWYXJKUu!wg(EJsb+iMnuuoRi=ogsWCK6EStE$se?J(oii8KPNb*2-wnfq-nRZ}9 zDrxB}meRPr*9?KpR|Gyhu3u}BY$|=t>$M7ko*y@_jjcS&vUp%*>0-97ztBYHh&Xfc zVs#O3v#WsTzY=00R4gufiKNs)O9jp)eyEAixcDz-m~bp?Yv~M0JSkA3c$RUh;w!*8 zVrsKSY1(-A{u^L>gfk>6XDQ@Sn@SX7DN1BtLt8Xz#bocBb9+~9rM$1F5CMC;6UDm2 zXN=^0I#nFgY^#8Bo%RB+yz$pfA@d<~*pS;_q5B6CGA>y}rg~1Y{5L@_o}VyLN@jYN z(MK90hv{x@M|nIY$qy%lX`e0a!E)J5m7t2EoyeEOfrvJJr*WW?=a46^-7uCLfq-0l z=>S5mrvNkgz&yZ`7gMkozd9TOsUxLdC$31>#d#Ve>%O6^hD~^W@}@Gd95aMZ<}@c# zE?R7x@tDe|p05hxo6Xe1ngrusEe)HT90)$<;0*si0)gQ_SxY@`J2i96QOVETj;PgrTye(hG{@ z$`bfS96PyMe`JT3Hmyl%ag17(@;;U zB`GIj`v;?o1IVPz?Y9<@)!Km1+0BDw4dE3Zu?s;~QbC=7*sQU5nOl3lqC}f6Jq5J0^7mjxK}U}-8+ta^H1b0_kSj%sZfIMsE!2+YUkEd z#GsL0L=kJYQdSspqFi~I^f5Jar?b(_1g`>l{$euj(GN10{$;20FyX!}F9HRoNEgdD zVQb2;cU?i8yz&2x0cFa0z*PDC80T{EeepPJ+?F>O(5QArg6`Xb+L}%U3^rxNq>K+e z68zxClqmPBbMYWEhX6l-PL;*vU^~n$>VISZShep5^J7hj%{I}#bLPD8Rga{rM%5qL zZ42EuY5qm?mJKxzo&!#7x0_gErwnSdd2@x$nW|&`?gNe6(AV0vX&~V?PjA_AId*Ssr^)+}@poZ}y5kAiiVDvD+LPJ1PF9TVZoi zNjeJ7%HU(2f%NphJ>S(Mgscu4zPEm^L{GVE!5J7K6+t$W9;T6Sv|Pf z9-fFCekeiDRa*l!?%`l;aVQz6((lt4HQFBAoyDtxv!=0sv6v%}%vAyI76(;I%usxr z;{LQ7$9}u*K9?=3MfXa`6cBoFo0$r8=v!)-J9;cpvf9!f-F-fEYz*mG0otRrqjr{l z_3WBUn}bbH$lbj2fXaP(EKwe6#s4vV>@H%S!&g}GOT)e#2{z3ZDM5ql@RME0tkerI zo6NmeDo$-C`)tmu>A3O1TQ5%EdOrL0JlRM6E?b?gT{Cy4#}ag^I-$rOM5hMCYM;ps z@xW|A-ITn%HEXt=9Y6j@$E{a`>_L+x56#Y2eX?78Lr%+KafEI53Meczb%y+R+Bw2lFY+kFxS zSloNBRS8fu`S@64am4k~?@LioOw@t&+ScE&%a5(Ilyv4w*1%kY>>IH=by zWW8>vVjn(Z&dqfUt#TYbb0UJilktquQ?sU_8&s*iP%JR;^KyFiWGm9)7F~6hk?5sN zOv_8``zRgAh(BfSNmbu5lDuq_40%aP`oW!yy96q8`DsW5`b;kKgEmAxVrW0vQm)aFSEDE@*^jLMGI_#w9tX7TNpwuwcS9<%M&fzdelX_^S z2A_;tH*b3n&rnL0@z`MBu}6K!;tk1*kLf9^hb=yf?LQXbWvs@IPt%Vl8cwdzY}Lql zlh8^z%W(~>G>kXdoFRr#SLUp7krq5bOgZ7WhA0iI*uhff1+(yl@#_m*H42JW_>3-` z_Gt1bBg#`av1(psls1t9&Ku9qBjz2uatp=N$tB`pS;Tr>L|uL~-LnbOS!G+xsbxBOCBGR*sT{O=R3*uTvWk9jX=jO0a zwuqR&C?XbGclBdnOc-aB?K&Fodx!0k@Yy=CGG+)9 z@$Robouwh$596}zVFs?+q-L&10}<}nZ~`GgyK1=8V9mMj0?*<)Ya-#5M*+_|bb;@L8b?6Kq8x?etk>SK zI>&|RlR&`hAL&+)7rQi)7ni1D+*D{Hzi!Aq_QoB&zwySVPo+Z8 zf3;CWNRaC+!hKX~q1{XIf8<0BSl}(i}F+d>=;-Qq19Y%b+vz~GU15YroQ6FN^1Hlb5{yu;79itAgvKYk@ z3ONdSd=gC=3usx?NAVxe!U9on-WJHl>aCEy@fQPYbQ@|F(X|}EGF5z%X}wjpsw}>p z)2MT49F1<$`IQmzmY6&pSEutueey7$YwoQ+YoxSeeR~Ot@cY474L!tVtHM85gy|eM0bX$;wIrV3z4Bi!< zX<9DxqrHh;QdpcQewvW<5<`s!b9s#>S%ZH#B^2u^EyH*>*yY^iv$~;q|2cglNs9EA zKbiG#qw>DDq=CFls>UYr&F8_jr^+cq;N{paB3GEmXGvavvbBd7>VPZ%~GczN~M!!fx8C zN#+@vlLL*Y@_=n-N_onH9D!j$mB?T|Z!KUWR~kz28DZ0m@WRaDT4CfQF6)p{Y_+d#CPr%;7*-zI*$wZ`3xZaRf0?ILTO+}Bd$9$(rLbB;3R0!mzJ z=^bE_pxutZm@L}6R$~e;C`V_!rzw7SKWn;UvF5|h@So46lbVDqUxWGcHhj!t&HQ(v z0;@HjX_{KoeP`}AeII^dDA}2=z(VC%c+7)9Kx!&h8`U>(#y$o~#03O&i+&DRe&t{Iz*U znHzkmpVz>|?qQ%<2uO2O9r-D%X6j)}v5Vz1OrJ>}QofFEpjIuEGb3N3~4$)f($FqOK|77UhM^u5WgAy{Je26g-XTdL$$+9PJuA()Frd z_ba$%?XecJFTeTue9^Pe3#8{MO{Bm8A=&bQD$Q@8(#Kee^{;;a9&UiH4%%Eb+gy!o z$bUh)llK4`tg#Kq(Q9)I3BUNwg*TR{+{zVKWX}2}G6oXv+K{bKIpK~82OtGCkR{p+ z)V~OnP+Q^))VOiu|9Pc~n}0Qb2mI;UxXsmk*L{Y+Une@?(@)ZwLRL<>+FfG}5~mw} zBf}S7kpVG*UBUZ6vMJritu4{xYqO&EXEwbcN5QA13zUEg&n&QPOd5@6HSP(*jbx{C}xiA2j@F2liO8}T!L4x_EBsh~@Xi}D^ z#F1$8dQ$Zo5-k>z=y*?3bpweul#%E!0J>a|Jf}K_RQ(|d&hnevGzr#zNrG9GBzT!4 z3En}1dpD5a+jE*%liTc0w56Wt(H1?nMXMad*WCl*e)pd5yO$5Xd8~i-+H38# z{?Yxhe3qcfw zKnM$hW+%^!g8)VdU~lXoC?e8$&x;4{8zpck3o*d^r+-+skbOacogNo+T0c7GfL@Q) z_pvkmg=eFn%>A%TjcVFlAuIz70709j;IC{oFS2YC$7!4z_5U_4J#*c7E_BYztGqyp z7D7`vH#kb3Mbow~uH**CBrmJyDB$+8pE&6$XQJ1CQT@diGRSjhaOl?;>{$>xyZ8IP zZ@cfcwvXo$QoKrGetocdYncLI(u_v}Z|us+(Lu!~RKaVbc&RzHBYuYueG6^i6@eo7Jn7IXP={Q*&3Z%~`*QRVQ%$D)xZ; zp=D#bmL7EfwWr+ruZY6mC-o=(9+fmcG}K4Wh8jnsuUXU9`*A#f@LA9iZDIdcZf_6aA_uf*xZobvbYm41>%_sicC zCUML)$y#D_IZeQrm?B=GVp#+{Ni3TKr-@~ApS|}Ki)Q-ZEFzIlPK$zcGA=zCN*8S4 zu%KkZlQ(oV`hsgKJp2rE7EKK`&I>RH4Gp{-VJ|jw*@~agd}QG>qjAlDvhxSW(LbVZM4G_n%2w+ zNTo(e*p~{aSKn@FUAz)a5%^OGyWBb2=}Mw(1pbW%qg^~27<`x2E=dbX-oV9D8N}6T zQ9-@&afE;Ic#csW+2=svu|l4z9F{}hxLM2uB6A~RnX3`HF!W5h5vR}>I0@^~(v$E^ zo(ehvKg=Q=*%D|Ccsr`;@(hv4Z(H}uoY9aOi5 z+mi~(`CafGyw5zHV@78~j%;#GpIt+t$@77ibc&3%S*pomlbR5oEXdK+06SUCP?!4~ zR$Z!a=N4`uzsNWS)jy!b6rVk>Clb#lI0HBoVpjvLk;aYn)TOe=(v}^tA1Y&WQxO3PNB0!Z5JODM5XOwu4kVoM?WIODtlJ)wV`#5I) z_CCjSvfUFni?T~6BlV5E*-kcB9u@tY#J+8CthR^Af?3PnDj~^3v&0EFu}boWX4_EX zo!?tAQ#5cm;Ut5-Q7(uT{q6kdwg^fDzHHDb{WBj(fJuZ@vqYDsXOU$}uMGQQEv8S& z2ag7qP`?aUK~0Lu>M}LR0XPc>hQO1^)H5dK_$x(P7zBv#N-xp4pzEdn%0JrFX8t*0 zDQ)sMAK`3ph^V9XfBqHM7m7mcJ4F8M(HcEzUr;QSGQn`f#qW&hXc59Ibk`k#@|K`i zxEZc)cDyW{oov#NN`J~O~oU1PRTFmI;ks?Ip zP+JvJ68tA>X9KU@n1=)63IH_(tvOi@oqGeBZgMC zzz$caud8q!_JkFa?2v3Sx*v5Sq2l}KL@Ay3xUiKgpUU_3nn)pen;N1B7%y~hmq~{O z1_xi|6p%Sv+J^;QHbElZu;Zhl!A*Q!PzzKfXGwFo8`-=J*BW16uMi3`Z+pRfl3`z? z#fYLFSU#PbhM2eJUsb|*7O_iFHAT;Y<#aoLbPxdvIqdl}bIks_U#0W88VT>+o-ye` zAY5Ca4pE#piZa2_u_N0OwI_eGvB<=%CH}X`P3x#!qAck>z3vY@jkgFT2c7~9!aNql6mnJ(d2#aU3yd{Nk z4pBTI#4!;Hr&kdF-wnHe%!hne&Z^0WL1%U=M+C0UuetGboA?gbWb^2% zy`IY5#ZnIt^YF^?ubd1;L8qK_PI+udiBp#Ra2DaM8S zB9NnzsT@huwG?6k6JiR3Xeh5H*s{l1)=VrQ8gx}j%X~n*nfWRl?c^6Ysc<E=>Y~tB?{IAE20Kq4s37(|0Thmo&BXGDVXHRr7@?P%SFI z4x*nI>i;(jZRDj1QDxA5pe_37az`6e@LP{=3DgeLf0TY z4M`=w2`09r@A_hjK)kCZMLb(YNSr*@S)si)R1v({wu*2+?wTd0bP`ib;4@}Ed@~=I z+#xCjYbGn!x)zz#kqwh6{5plM5{gF;I3$jK@%fz}I7y>_ScarQi(eF%3qg5pas7ho zFmj7oIIUa4nrz7Bx`W{8)`H+LT6y4Auq{H6oaN&)GF)$I#*hUsIMPN=7EUfi@d`B6 z!gwUMRw-=)s;vpDh3H41Xtt=_@|EtOP6pZ_&^#N&xW)z{hORwABVXDvsgBmVb3)qr zyim?cHgNpkMpS9P*86233`+Q+uZO}cQXL8mvY zPtZ$&V#pq#C$-Dr*kp&vo!F=AFdq@N>Wtl6@_(75KcjqxUY;)glhj8p6wH|@ZX}x` zKE~$IzT&N9mlAd?m?0jr;oTLq(>?Al{DL`OuxMJ`CHzKf7uM+)SB3v!QrS7rh%YDtU0mQ60RPIXS!J{`)Y z(<9lmX94|GGoPH!OlQ;pft;_Fyx`kDsGN0f(G=~ULmwO&`QXfgKlSr13|>9^f|*a_ z7$~Wv-N(XWsXtRSe8SI}r~J`5>%Q}>mdR;qrePPoZHoV;=!YSb!btd4rsNXa%-705 zw6DYMXi$5Y)}4ISIemTg^k;dqdKZN^a@ONy>Q7&I^ey-3nA8Q2=2T$6vn7VzCDLF< z%dzk$>qFt!eS+5wj;}TG^{uZ6IVQjA38e|MWvIZ(vcxKwkKOIn1YLcOn&TlX+nG7d+++ zu15!}!w3Hv=F{a^JUsP%5wBK;nckI~-aY-{-NpDIx*yCBY<`AE#2)|f*yGL0@Hgi0 zH|*&^Kz;D9_7B_iOxMsN`Ps8eIe7Kc!DIB`gpE(Ep7j(>*<>zHJa5~-+rfYLJN@p% zjXBV{!nazdJQUPz>+)VYc>>qV3v9abvyP2>r(L3(DeZ+kW*DCnmOL6GxN@o2=%7rd zAm$WCaX0Ej2M>$nRif_?xdEB)i0o8hO!P*bb4;FdSe5h7hnDE3B%AQ4$uBU_VIq-k zw@H6c`mpG^a(uMGntzJLmaDz$^#!(`Cm7H2x@Vu3$@DyGFI&s=Q87jLe!_Bd13k~# z0#6;59j)l|v%`^&3oSjFJGv$eR58ug z^NQ=eD$+qW_L3f-d~jBBSVy~KkuX;3m_=l3PK-0cs1OTIVI#hWsRd!unPKC2oC zss^JNh(bLp_?EmCG#k#DWAv&xldM?k6X}$jT4*mHsRAS_oSh*$9hUICy^sK43Z0kbUmkdrQrK_V# z9!8hM84lvQB{~u;%v4h5JWEyQ;~vFZCkL;padWH0F;%L_4Vt})eRFLwzRbU1 zuLDvowyO?~sm_h0L!qT+PjDE(1|*Oj{hWo!9ZL5)8Ke-)DAGD z?5%U){e{z+*&r8aj^tulqBb7v>A7k6bv<@+gv#e6^gf*XsBOXD!3$&|u?-ko;u4FV zkR4XaJ0>-;6r4Hk&pDE=GTU+_CA=ILW14xWbLOv|GwTz6Dx9H2{#|49?4hoidlE_) z%%AmW(X8f#kX}wBr=FqYI$0=fkcFNEocuW5h=sZ8Sw$##f~9rVP=gf+u3NE>WL+N^ z$_1euIf5?kb5`{&NZAnscI}FGR#1-`4Mc#wXCQ}>#R`%noC+=$$L;Eh-qjUQAxlV6 zMvurQha@zJ%gf!$Rr4xTffY~!o<e*@v{v1)d$dmmH z#@bw4t{q-?`~!@NsiCl=)MsC2wfh;MqfNSfBWhaZN4NRyKR#vu@dF2sua}!@N3#CR z-s3B<%q)K9XGb%!Lr*sO?f1#dlkh&4Bwhl0@I<`lcJf;qMT`cbvWAtS!?K>paXss+ zTi8Z0&7pnl`jdwej#-Z}bj$rpXQSH(-yi$gPk53cN=^6uLfI)JRaxbd?d2;zpxE!@ zyW@W0O7N;hIcMW%Jh?Ep^1h4O%h%K z_!PUB1-LrU_mdW{sH}WIx?8+PjgW4%L;ah_r{86r`Yon@cvnkIsy{q9;cKQiU`9;d zps0UP4^C5rZB(Ul)J9NWhG;&k`QC||?yRO4F{k6#R8pf*x<>E}WjXZ2ziWWH~o$ z?sFQGTNSqISJFHnuVsC=PVwDy#dmv;KiP9phrNlharydy?^2FKO>P!efaS>Xtb1U= zwtxk*Zi36zaQVhmy*Z*&Gs+TdME3E)6C2;8tx#(#Hl|jZM*U3qzAp+Gox@FY;GK{L z7>rF#CEdCug)4-1bXujChf&G1#l<)Eok#qlYTj~!)pZMt2Zstn)#Q48*V*G# z^Bt-=aZU5~6P(VG0LYDP&XH5~!BoJVw19>akD91T%ER{2FZH)u{i>VePd2BVY;Mg0 zg5tmZUfHe4#8V$qH@Z!6YZs9dviv1@&;q`}1|{C(-E=0$L}3H-Wb@43x4-Z7?( zbT^w!i3^akKJAs+*D9AR?n=R9aRE3oV+#_eavzY&Y$eiEYs>vTqdqEFOf79(we;Mo zrCE#3Yfb*_Y2{{j)mO!piJK8RNt^xg<9Yf<&Lf;;^(_kM>kKf$(~PF6ymII%>CoWb z!+MH7guWSiF{pBQpmso~^>#91@HCj~Ae=`0ldNi8&XBT?`~m{DoojXiC1GDilDajk zO-X7=>1TjGo&loZ^n$+9{&(oKc3Q`znn-t!ndTooA_qOJRq$kl_$pRh3Q2k~XGL$p z^6x^1d)Qmse{}hv_Ab-Il1oX`merDnK+jMQ`!Azlglc6`%eP3D54>Fd*X0$BoTot5 zp6mv}6L*8zKW}H;x|UHYd_|b8f)?Ev zm5IE(-Ies|?WC;5k-$1D@Gx{&kX0ebx(seBJJ3I5jmj|VS|j879|apL1RF048hL+N z&;r+mN2ps3*ZFIMPw2tR%JIe@|M$m6dO~r4!i*IxnAG+(WoJA2d_%9Qb0REPuq1-m=y3kH5&35gT3G)KXqd92} zt1Nj0?;P(_VUej47PEr7ClyD?(4}ZD1C}{5;6X0{W=At%Y9|ApsSW-0Ry&v4!|_;ID=M(4x#4)bfJqqK=+Mh&}riUI> uint(2); uint tag_and_flags = read_mem(param, param_1); - AnnotatedTag _706 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; - return _706; + AnnotatedTag _636 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; + return _636; } Path Path_read(Alloc a, PathRef ref) @@ -295,8 +289,8 @@ Path Path_read(Alloc a, PathRef ref) uint raw2 = read_mem(param_4, param_5); Path s; s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); - TileRef _814 = { raw2 }; - s.tiles = _814; + TileRef _734 = { raw2 }; + s.tiles = _734; return s; } @@ -306,11 +300,11 @@ void write_tile_alloc(uint el_ix, Alloc a) Alloc read_tile_alloc(uint el_ix, bool mem_ok) { - uint _1135; - _296.GetDimensions(_1135); - _1135 = (_1135 - 8) / 4; + uint _1055; + _283.GetDimensions(_1055); + _1055 = (_1055 - 8) / 4; uint param = 0u; - uint param_1 = uint(int(_1135) * 4); + uint param_1 = uint(int(_1055) * 4); bool param_2 = mem_ok; return new_alloc(param, param_1, param_2); } @@ -324,9 +318,9 @@ Tile Tile_read(Alloc a, TileRef ref) Alloc param_2 = a; uint param_3 = ix + 1u; uint raw1 = read_mem(param_2, param_3); - TileSegRef _839 = { raw0 }; + TileSegRef _759 = { raw0 }; Tile s; - s.tile = _839; + s.tile = _759; s.backdrop = int(raw1); return s; } @@ -361,30 +355,30 @@ AnnoColor AnnoColor_read(Alloc a, AnnoColorRef ref) AnnoColor Annotated_Color_read(Alloc a, AnnotatedRef ref) { - AnnoColorRef _712 = { ref.offset + 4u }; + AnnoColorRef _642 = { ref.offset + 4u }; Alloc param = a; - AnnoColorRef param_1 = _712; + AnnoColorRef param_1 = _642; return AnnoColor_read(param, param_1); } MallocResult malloc(uint size) { - uint _302; - _296.InterlockedAdd(0, size, _302); - uint offset = _302; - uint _309; - _296.GetDimensions(_309); - _309 = (_309 - 8) / 4; + uint _289; + _283.InterlockedAdd(0, size, _289); + uint offset = _289; + uint _296; + _283.GetDimensions(_296); + _296 = (_296 - 8) / 4; MallocResult r; - r.failed = (offset + size) > uint(int(_309) * 4); + r.failed = (offset + size) > uint(int(_296) * 4); uint param = offset; uint param_1 = size; bool param_2 = !r.failed; r.alloc = new_alloc(param, param_1, param_2); if (r.failed) { - uint _331; - _296.InterlockedMax(4, 1u, _331); + uint _318; + _283.InterlockedMax(4, 1u, _318); return r; } return r; @@ -398,7 +392,7 @@ void write_mem(Alloc alloc, uint offset, uint val) { return; } - _296.Store(offset * 4 + 8, val); + _283.Store(offset * 4 + 8, val); } void CmdJump_write(Alloc a, CmdJumpRef ref, CmdJump s) @@ -416,9 +410,9 @@ void Cmd_Jump_write(Alloc a, CmdRef ref, CmdJump s) uint param_1 = ref.offset >> uint(2); uint param_2 = 10u; write_mem(param, param_1, param_2); - CmdJumpRef _1128 = { ref.offset + 4u }; + CmdJumpRef _1048 = { ref.offset + 4u }; Alloc param_3 = a; - CmdJumpRef param_4 = _1128; + CmdJumpRef param_4 = _1048; CmdJump param_5 = s; CmdJump_write(param_3, param_4, param_5); } @@ -430,21 +424,21 @@ bool alloc_cmd(inout Alloc cmd_alloc, inout CmdRef cmd_ref, inout uint cmd_limit return true; } uint param = 1024u; - MallocResult _1156 = malloc(param); - MallocResult new_cmd = _1156; + MallocResult _1076 = malloc(param); + MallocResult new_cmd = _1076; if (new_cmd.failed) { return false; } - CmdJump _1166 = { new_cmd.alloc.offset }; - CmdJump jump = _1166; + CmdJump _1086 = { new_cmd.alloc.offset }; + CmdJump jump = _1086; Alloc param_1 = cmd_alloc; CmdRef param_2 = cmd_ref; CmdJump param_3 = jump; Cmd_Jump_write(param_1, param_2, param_3); cmd_alloc = new_cmd.alloc; - CmdRef _1178 = { cmd_alloc.offset }; - cmd_ref = _1178; + CmdRef _1098 = { cmd_alloc.offset }; + cmd_ref = _1098; cmd_limit = (cmd_alloc.offset + 1024u) - 60u; return true; } @@ -473,9 +467,9 @@ void Cmd_Fill_write(Alloc a, CmdRef ref, CmdFill s) uint param_1 = ref.offset >> uint(2); uint param_2 = 1u; write_mem(param, param_1, param_2); - CmdFillRef _1012 = { ref.offset + 4u }; + CmdFillRef _932 = { ref.offset + 4u }; Alloc param_3 = a; - CmdFillRef param_4 = _1012; + CmdFillRef param_4 = _932; CmdFill param_5 = s; CmdFill_write(param_3, param_4, param_5); } @@ -507,9 +501,9 @@ void Cmd_Stroke_write(Alloc a, CmdRef ref, CmdStroke s) uint param_1 = ref.offset >> uint(2); uint param_2 = 2u; write_mem(param, param_1, param_2); - CmdStrokeRef _1030 = { ref.offset + 4u }; + CmdStrokeRef _950 = { ref.offset + 4u }; Alloc param_3 = a; - CmdStrokeRef param_4 = _1030; + CmdStrokeRef param_4 = _950; CmdStroke param_5 = s; CmdStroke_write(param_3, param_4, param_5); } @@ -521,8 +515,8 @@ void write_fill(Alloc alloc, inout CmdRef cmd_ref, uint flags, Tile tile, float { if (tile.tile.offset != 0u) { - CmdFill _1202 = { tile.tile.offset, tile.backdrop }; - CmdFill cmd_fill = _1202; + CmdFill _1122 = { tile.tile.offset, tile.backdrop }; + CmdFill cmd_fill = _1122; Alloc param_1 = alloc; CmdRef param_2 = cmd_ref; CmdFill param_3 = cmd_fill; @@ -539,8 +533,8 @@ void write_fill(Alloc alloc, inout CmdRef cmd_ref, uint flags, Tile tile, float } else { - CmdStroke _1232 = { tile.tile.offset, 0.5f * linewidth }; - CmdStroke cmd_stroke = _1232; + CmdStroke _1152 = { tile.tile.offset, 0.5f * linewidth }; + CmdStroke cmd_stroke = _1152; Alloc param_6 = alloc; CmdRef param_7 = cmd_ref; CmdStroke param_8 = cmd_stroke; @@ -564,9 +558,9 @@ void Cmd_Color_write(Alloc a, CmdRef ref, CmdColor s) uint param_1 = ref.offset >> uint(2); uint param_2 = 5u; write_mem(param, param_1, param_2); - CmdColorRef _1056 = { ref.offset + 4u }; + CmdColorRef _976 = { ref.offset + 4u }; Alloc param_3 = a; - CmdColorRef param_4 = _1056; + CmdColorRef param_4 = _976; CmdColor param_5 = s; CmdColor_write(param_3, param_4, param_5); } @@ -613,9 +607,9 @@ AnnoLinGradient AnnoLinGradient_read(Alloc a, AnnoLinGradientRef ref) AnnoLinGradient Annotated_LinGradient_read(Alloc a, AnnotatedRef ref) { - AnnoLinGradientRef _722 = { ref.offset + 4u }; + AnnoLinGradientRef _652 = { ref.offset + 4u }; Alloc param = a; - AnnoLinGradientRef param_1 = _722; + AnnoLinGradientRef param_1 = _652; return AnnoLinGradient_read(param, param_1); } @@ -646,9 +640,9 @@ void Cmd_LinGrad_write(Alloc a, CmdRef ref, CmdLinGrad s) uint param_1 = ref.offset >> uint(2); uint param_2 = 6u; write_mem(param, param_1, param_2); - CmdLinGradRef _1074 = { ref.offset + 4u }; + CmdLinGradRef _994 = { ref.offset + 4u }; Alloc param_3 = a; - CmdLinGradRef param_4 = _1074; + CmdLinGradRef param_4 = _994; CmdLinGrad param_5 = s; CmdLinGrad_write(param_3, param_4, param_5); } @@ -687,9 +681,9 @@ AnnoImage AnnoImage_read(Alloc a, AnnoImageRef ref) AnnoImage Annotated_Image_read(Alloc a, AnnotatedRef ref) { - AnnoImageRef _732 = { ref.offset + 4u }; + AnnoImageRef _662 = { ref.offset + 4u }; Alloc param = a; - AnnoImageRef param_1 = _732; + AnnoImageRef param_1 = _662; return AnnoImage_read(param, param_1); } @@ -712,45 +706,13 @@ void Cmd_Image_write(Alloc a, CmdRef ref, CmdImage s) uint param_1 = ref.offset >> uint(2); uint param_2 = 7u; write_mem(param, param_1, param_2); - CmdImageRef _1092 = { ref.offset + 4u }; + CmdImageRef _1012 = { ref.offset + 4u }; Alloc param_3 = a; - CmdImageRef param_4 = _1092; + CmdImageRef param_4 = _1012; CmdImage param_5 = s; CmdImage_write(param_3, param_4, param_5); } -AnnoBeginClip AnnoBeginClip_read(Alloc a, AnnoBeginClipRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7); - Alloc param_8 = a; - uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9); - AnnoBeginClip s; - s.bbox = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3)); - s.linewidth = asfloat(raw4); - return s; -} - -AnnoBeginClip Annotated_BeginClip_read(Alloc a, AnnotatedRef ref) -{ - AnnoBeginClipRef _742 = { ref.offset + 4u }; - Alloc param = a; - AnnoBeginClipRef param_1 = _742; - return AnnoBeginClip_read(param, param_1); -} - void Cmd_BeginClip_write(Alloc a, CmdRef ref) { Alloc param = a; @@ -777,44 +739,43 @@ void Cmd_End_write(Alloc a, CmdRef ref) void comp_main() { - uint width_in_bins = ((_1249.Load(8) + 16u) - 1u) / 16u; + uint width_in_bins = ((_1169.Load(8) + 16u) - 1u) / 16u; uint bin_ix = (width_in_bins * gl_WorkGroupID.y) + gl_WorkGroupID.x; uint partition_ix = 0u; - uint n_partitions = ((_1249.Load(0) + 256u) - 1u) / 256u; + uint n_partitions = ((_1169.Load(0) + 256u) - 1u) / 256u; uint th_ix = gl_LocalInvocationID.x; uint bin_tile_x = 16u * gl_WorkGroupID.x; uint bin_tile_y = 16u * gl_WorkGroupID.y; uint tile_x = gl_LocalInvocationID.x % 16u; uint tile_y = gl_LocalInvocationID.x / 16u; - uint this_tile_ix = (((bin_tile_y + tile_y) * _1249.Load(8)) + bin_tile_x) + tile_x; - Alloc _1314; - _1314.offset = _1249.Load(24); + uint this_tile_ix = (((bin_tile_y + tile_y) * _1169.Load(8)) + bin_tile_x) + tile_x; + Alloc _1234; + _1234.offset = _1169.Load(24); Alloc param; - param.offset = _1314.offset; + param.offset = _1234.offset; uint param_1 = this_tile_ix * 1024u; uint param_2 = 1024u; Alloc cmd_alloc = slice_mem(param, param_1, param_2); - CmdRef _1323 = { cmd_alloc.offset }; - CmdRef cmd_ref = _1323; + CmdRef _1243 = { cmd_alloc.offset }; + CmdRef cmd_ref = _1243; uint cmd_limit = (cmd_ref.offset + 1024u) - 60u; uint clip_depth = 0u; uint clip_zero_depth = 0u; - uint clip_one_mask = 0u; uint rd_ix = 0u; uint wr_ix = 0u; uint part_start_ix = 0u; uint ready_ix = 0u; - bool mem_ok = _296.Load(4) == 0u; + bool mem_ok = _283.Load(4) == 0u; Alloc param_3; Alloc param_5; - uint _1529; + uint _1448; uint element_ix; AnnotatedRef ref; Alloc param_14; Alloc param_16; uint tile_count; Alloc param_23; - uint _1841; + uint _1770; Alloc param_29; Tile tile_1; AnnoColor fill; @@ -822,41 +783,40 @@ void comp_main() Alloc param_52; CmdLinGrad cmd_lin; Alloc param_69; - Alloc param_86; while (true) { for (uint i = 0u; i < 8u; i++) { sh_bitmaps[i][th_ix] = 0u; } - bool _1581; + bool _1500; for (;;) { if ((ready_ix == wr_ix) && (partition_ix < n_partitions)) { part_start_ix = ready_ix; uint count = 0u; - bool _1379 = th_ix < 256u; - bool _1387; - if (_1379) + bool _1298 = th_ix < 256u; + bool _1306; + if (_1298) { - _1387 = (partition_ix + th_ix) < n_partitions; + _1306 = (partition_ix + th_ix) < n_partitions; } else { - _1387 = _1379; + _1306 = _1298; } - if (_1387) + if (_1306) { - uint in_ix = (_1249.Load(20) >> uint(2)) + ((((partition_ix + th_ix) * 256u) + bin_ix) * 2u); - Alloc _1404; - _1404.offset = _1249.Load(20); - param_3.offset = _1404.offset; + uint in_ix = (_1169.Load(20) >> uint(2)) + ((((partition_ix + th_ix) * 256u) + bin_ix) * 2u); + Alloc _1323; + _1323.offset = _1169.Load(20); + param_3.offset = _1323.offset; uint param_4 = in_ix; count = read_mem(param_3, param_4); - Alloc _1415; - _1415.offset = _1249.Load(20); - param_5.offset = _1415.offset; + Alloc _1334; + _1334.offset = _1169.Load(20); + param_5.offset = _1334.offset; uint param_6 = in_ix + 1u; uint offset = read_mem(param_5, param_6); uint param_7 = offset; @@ -902,16 +862,16 @@ void comp_main() } if (part_ix > 0u) { - _1529 = sh_part_count[part_ix - 1u]; + _1448 = sh_part_count[part_ix - 1u]; } else { - _1529 = part_start_ix; + _1448 = part_start_ix; } - ix -= _1529; + ix -= _1448; Alloc bin_alloc = sh_part_elements[part_ix]; - BinInstanceRef _1548 = { bin_alloc.offset }; - BinInstanceRef inst_ref = _1548; + BinInstanceRef _1467 = { bin_alloc.offset }; + BinInstanceRef inst_ref = _1467; BinInstanceRef param_10 = inst_ref; uint param_11 = ix; Alloc param_12 = bin_alloc; @@ -921,16 +881,16 @@ void comp_main() } GroupMemoryBarrierWithGroupSync(); wr_ix = min((rd_ix + 256u), ready_ix); - bool _1571 = (wr_ix - rd_ix) < 256u; - if (_1571) + bool _1490 = (wr_ix - rd_ix) < 256u; + if (_1490) { - _1581 = (wr_ix < ready_ix) || (partition_ix < n_partitions); + _1500 = (wr_ix < ready_ix) || (partition_ix < n_partitions); } else { - _1581 = _1571; + _1500 = _1490; } - if (_1581) + if (_1500) { continue; } @@ -943,11 +903,11 @@ void comp_main() if ((th_ix + rd_ix) < wr_ix) { element_ix = sh_elements[th_ix]; - AnnotatedRef _1602 = { _1249.Load(32) + (element_ix * 40u) }; - ref = _1602; - Alloc _1605; - _1605.offset = _1249.Load(32); - param_14.offset = _1605.offset; + AnnotatedRef _1521 = { _1169.Load(32) + (element_ix * 40u) }; + ref = _1521; + Alloc _1524; + _1524.offset = _1169.Load(32); + param_14.offset = _1524.offset; AnnotatedRef param_15 = ref; tag = Annotated_tag(param_14, param_15).tag; } @@ -959,12 +919,13 @@ void comp_main() case 4u: case 5u: { - uint path_ix = element_ix; - PathRef _1624 = { _1249.Load(16) + (path_ix * 12u) }; - Alloc _1627; - _1627.offset = _1249.Load(16); - param_16.offset = _1627.offset; - PathRef param_17 = _1624; + uint drawmonoid_base = (_1169.Load(44) >> uint(2)) + (2u * element_ix); + uint path_ix = _283.Load(drawmonoid_base * 4 + 8); + PathRef _1553 = { _1169.Load(16) + (path_ix * 12u) }; + Alloc _1556; + _1556.offset = _1169.Load(16); + param_16.offset = _1556.offset; + PathRef param_17 = _1553; Path path = Path_read(param_16, param_17); uint stride = path.bbox.z - path.bbox.x; sh_tile_stride[th_ix] = stride; @@ -1019,59 +980,53 @@ void comp_main() el_ix = probe_1; } } - AnnotatedRef _1826 = { _1249.Load(32) + (sh_elements[el_ix] * 40u) }; - AnnotatedRef ref_1 = _1826; - Alloc _1830; - _1830.offset = _1249.Load(32); - param_23.offset = _1830.offset; + AnnotatedRef _1755 = { _1169.Load(32) + (sh_elements[el_ix] * 40u) }; + AnnotatedRef ref_1 = _1755; + Alloc _1759; + _1759.offset = _1169.Load(32); + param_23.offset = _1759.offset; AnnotatedRef param_24 = ref_1; uint tag_1 = Annotated_tag(param_23, param_24).tag; if (el_ix > 0u) { - _1841 = sh_tile_count[el_ix - 1u]; + _1770 = sh_tile_count[el_ix - 1u]; } else { - _1841 = 0u; + _1770 = 0u; } - uint seq_ix = ix_1 - _1841; + uint seq_ix = ix_1 - _1770; uint width = sh_tile_width[el_ix]; uint x = sh_tile_x0[el_ix] + (seq_ix % width); uint y = sh_tile_y0[el_ix] + (seq_ix / width); bool include_tile = false; - if ((tag_1 == 4u) || (tag_1 == 5u)) + if (mem_ok) { - include_tile = true; - } - else - { - if (mem_ok) + uint param_25 = el_ix; + bool param_26 = mem_ok; + TileRef _1822 = { sh_tile_base[el_ix] + (((sh_tile_stride[el_ix] * y) + x) * 8u) }; + Alloc param_27 = read_tile_alloc(param_25, param_26); + TileRef param_28 = _1822; + Tile tile = Tile_read(param_27, param_28); + bool is_clip = (tag_1 == 4u) || (tag_1 == 5u); + bool _1834 = tile.tile.offset != 0u; + bool _1843; + if (!_1834) { - uint param_25 = el_ix; - bool param_26 = mem_ok; - TileRef _1901 = { sh_tile_base[el_ix] + (((sh_tile_stride[el_ix] * y) + x) * 8u) }; - Alloc param_27 = read_tile_alloc(param_25, param_26); - TileRef param_28 = _1901; - Tile tile = Tile_read(param_27, param_28); - bool _1907 = tile.tile.offset != 0u; - bool _1914; - if (!_1907) - { - _1914 = tile.backdrop != 0; - } - else - { - _1914 = _1907; - } - include_tile = _1914; + _1843 = (tile.backdrop == 0) == is_clip; } + else + { + _1843 = _1834; + } + include_tile = _1843; } if (include_tile) { uint el_slice = el_ix / 32u; uint el_mask = 1u << (el_ix & 31u); - uint _1934; - InterlockedOr(sh_bitmaps[el_slice][(y * 16u) + x], el_mask, _1934); + uint _1863; + InterlockedOr(sh_bitmaps[el_slice][(y * 16u) + x], el_mask, _1863); } } GroupMemoryBarrierWithGroupSync(); @@ -1095,11 +1050,11 @@ void comp_main() uint element_ref_ix = (slice_ix * 32u) + uint(int(firstbitlow(bitmap))); uint element_ix_1 = sh_elements[element_ref_ix]; bitmap &= (bitmap - 1u); - AnnotatedRef _1988 = { _1249.Load(32) + (element_ix_1 * 40u) }; - ref = _1988; - Alloc _1993; - _1993.offset = _1249.Load(32); - param_29.offset = _1993.offset; + AnnotatedRef _1917 = { _1169.Load(32) + (element_ix_1 * 40u) }; + ref = _1917; + Alloc _1922; + _1922.offset = _1169.Load(32); + param_29.offset = _1922.offset; AnnotatedRef param_30 = ref; AnnotatedTag tag_2 = Annotated_tag(param_29, param_30); if (clip_zero_depth == 0u) @@ -1110,23 +1065,23 @@ void comp_main() { uint param_31 = element_ref_ix; bool param_32 = mem_ok; - TileRef _2029 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; + TileRef _1958 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; Alloc param_33 = read_tile_alloc(param_31, param_32); - TileRef param_34 = _2029; + TileRef param_34 = _1958; tile_1 = Tile_read(param_33, param_34); - Alloc _2036; - _2036.offset = _1249.Load(32); - param_35.offset = _2036.offset; + Alloc _1965; + _1965.offset = _1169.Load(32); + param_35.offset = _1965.offset; AnnotatedRef param_36 = ref; fill = Annotated_Color_read(param_35, param_36); Alloc param_37 = cmd_alloc; CmdRef param_38 = cmd_ref; uint param_39 = cmd_limit; - bool _2048 = alloc_cmd(param_37, param_38, param_39); + bool _1977 = alloc_cmd(param_37, param_38, param_39); cmd_alloc = param_37; cmd_ref = param_38; cmd_limit = param_39; - if (!_2048) + if (!_1977) { break; } @@ -1137,10 +1092,10 @@ void comp_main() float param_44 = fill.linewidth; write_fill(param_40, param_41, param_42, param_43, param_44); cmd_ref = param_41; - CmdColor _2072 = { fill.rgba_color }; + CmdColor _2001 = { fill.rgba_color }; Alloc param_45 = cmd_alloc; CmdRef param_46 = cmd_ref; - CmdColor param_47 = _2072; + CmdColor param_47 = _2001; Cmd_Color_write(param_45, param_46, param_47); cmd_ref.offset += 8u; break; @@ -1149,23 +1104,23 @@ void comp_main() { uint param_48 = element_ref_ix; bool param_49 = mem_ok; - TileRef _2101 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; + TileRef _2030 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; Alloc param_50 = read_tile_alloc(param_48, param_49); - TileRef param_51 = _2101; + TileRef param_51 = _2030; tile_1 = Tile_read(param_50, param_51); - Alloc _2108; - _2108.offset = _1249.Load(32); - param_52.offset = _2108.offset; + Alloc _2037; + _2037.offset = _1169.Load(32); + param_52.offset = _2037.offset; AnnotatedRef param_53 = ref; AnnoLinGradient lin = Annotated_LinGradient_read(param_52, param_53); Alloc param_54 = cmd_alloc; CmdRef param_55 = cmd_ref; uint param_56 = cmd_limit; - bool _2120 = alloc_cmd(param_54, param_55, param_56); + bool _2049 = alloc_cmd(param_54, param_55, param_56); cmd_alloc = param_54; cmd_ref = param_55; cmd_limit = param_56; - if (!_2120) + if (!_2049) { break; } @@ -1191,23 +1146,23 @@ void comp_main() { uint param_65 = element_ref_ix; bool param_66 = mem_ok; - TileRef _2185 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; + TileRef _2114 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; Alloc param_67 = read_tile_alloc(param_65, param_66); - TileRef param_68 = _2185; + TileRef param_68 = _2114; tile_1 = Tile_read(param_67, param_68); - Alloc _2192; - _2192.offset = _1249.Load(32); - param_69.offset = _2192.offset; + Alloc _2121; + _2121.offset = _1169.Load(32); + param_69.offset = _2121.offset; AnnotatedRef param_70 = ref; AnnoImage fill_img = Annotated_Image_read(param_69, param_70); Alloc param_71 = cmd_alloc; CmdRef param_72 = cmd_ref; uint param_73 = cmd_limit; - bool _2204 = alloc_cmd(param_71, param_72, param_73); + bool _2133 = alloc_cmd(param_71, param_72, param_73); cmd_alloc = param_71; cmd_ref = param_72; cmd_limit = param_73; - if (!_2204) + if (!_2133) { break; } @@ -1218,10 +1173,10 @@ void comp_main() float param_78 = fill_img.linewidth; write_fill(param_74, param_75, param_76, param_77, param_78); cmd_ref = param_75; - CmdImage _2230 = { fill_img.index, fill_img.offset }; + CmdImage _2159 = { fill_img.index, fill_img.offset }; Alloc param_79 = cmd_alloc; CmdRef param_80 = cmd_ref; - CmdImage param_81 = _2230; + CmdImage param_81 = _2159; Cmd_Image_write(param_79, param_80, param_81); cmd_ref.offset += 12u; break; @@ -1230,103 +1185,76 @@ void comp_main() { uint param_82 = element_ref_ix; bool param_83 = mem_ok; - TileRef _2259 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; + TileRef _2188 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; Alloc param_84 = read_tile_alloc(param_82, param_83); - TileRef param_85 = _2259; + TileRef param_85 = _2188; tile_1 = Tile_read(param_84, param_85); - bool _2265 = tile_1.tile.offset == 0u; - bool _2271; - if (_2265) + bool _2194 = tile_1.tile.offset == 0u; + bool _2200; + if (_2194) { - _2271 = tile_1.backdrop == 0; + _2200 = tile_1.backdrop == 0; } else { - _2271 = _2265; + _2200 = _2194; } - if (_2271) + if (_2200) { clip_zero_depth = clip_depth + 1u; } else { - if ((tile_1.tile.offset == 0u) && (clip_depth < 32u)) + Alloc param_86 = cmd_alloc; + CmdRef param_87 = cmd_ref; + uint param_88 = cmd_limit; + bool _2212 = alloc_cmd(param_86, param_87, param_88); + cmd_alloc = param_86; + cmd_ref = param_87; + cmd_limit = param_88; + if (!_2212) { - clip_one_mask |= (1u << clip_depth); - } - else - { - Alloc _2293; - _2293.offset = _1249.Load(32); - param_86.offset = _2293.offset; - AnnotatedRef param_87 = ref; - AnnoBeginClip begin_clip = Annotated_BeginClip_read(param_86, param_87); - Alloc param_88 = cmd_alloc; - CmdRef param_89 = cmd_ref; - uint param_90 = cmd_limit; - bool _2305 = alloc_cmd(param_88, param_89, param_90); - cmd_alloc = param_88; - cmd_ref = param_89; - cmd_limit = param_90; - if (!_2305) - { - break; - } - Alloc param_91 = cmd_alloc; - CmdRef param_92 = cmd_ref; - uint param_93 = tag_2.flags; - Tile param_94 = tile_1; - float param_95 = begin_clip.linewidth; - write_fill(param_91, param_92, param_93, param_94, param_95); - cmd_ref = param_92; - Alloc param_96 = cmd_alloc; - CmdRef param_97 = cmd_ref; - Cmd_BeginClip_write(param_96, param_97); - cmd_ref.offset += 4u; - if (clip_depth < 32u) - { - clip_one_mask &= (~(1u << clip_depth)); - } + break; } + Alloc param_89 = cmd_alloc; + CmdRef param_90 = cmd_ref; + Cmd_BeginClip_write(param_89, param_90); + cmd_ref.offset += 4u; } clip_depth++; break; } case 5u: { + uint param_91 = element_ref_ix; + bool param_92 = mem_ok; + TileRef _2249 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; + Alloc param_93 = read_tile_alloc(param_91, param_92); + TileRef param_94 = _2249; + tile_1 = Tile_read(param_93, param_94); clip_depth--; - bool _2351 = clip_depth >= 32u; - bool _2360; - if (!_2351) + Alloc param_95 = cmd_alloc; + CmdRef param_96 = cmd_ref; + uint param_97 = cmd_limit; + bool _2261 = alloc_cmd(param_95, param_96, param_97); + cmd_alloc = param_95; + cmd_ref = param_96; + cmd_limit = param_97; + if (!_2261) { - _2360 = (clip_one_mask & (1u << clip_depth)) == 0u; - } - else - { - _2360 = _2351; - } - if (_2360) - { - Alloc param_98 = cmd_alloc; - CmdRef param_99 = cmd_ref; - uint param_100 = cmd_limit; - bool _2369 = alloc_cmd(param_98, param_99, param_100); - cmd_alloc = param_98; - cmd_ref = param_99; - cmd_limit = param_100; - if (!_2369) - { - break; - } - Alloc param_101 = cmd_alloc; - CmdRef param_102 = cmd_ref; - Cmd_Solid_write(param_101, param_102); - cmd_ref.offset += 4u; - Alloc param_103 = cmd_alloc; - CmdRef param_104 = cmd_ref; - Cmd_EndClip_write(param_103, param_104); - cmd_ref.offset += 4u; + break; } + Alloc param_98 = cmd_alloc; + CmdRef param_99 = cmd_ref; + uint param_100 = 0u; + Tile param_101 = tile_1; + float param_102 = 0.0f; + write_fill(param_98, param_99, param_100, param_101, param_102); + cmd_ref = param_99; + Alloc param_103 = cmd_alloc; + CmdRef param_104 = cmd_ref; + Cmd_EndClip_write(param_103, param_104); + cmd_ref.offset += 4u; break; } } @@ -1359,17 +1287,17 @@ void comp_main() break; } } - bool _2432 = (bin_tile_x + tile_x) < _1249.Load(8); - bool _2441; - if (_2432) + bool _2326 = (bin_tile_x + tile_x) < _1169.Load(8); + bool _2335; + if (_2326) { - _2441 = (bin_tile_y + tile_y) < _1249.Load(12); + _2335 = (bin_tile_y + tile_y) < _1169.Load(12); } else { - _2441 = _2432; + _2335 = _2326; } - if (_2441) + if (_2335) { Alloc param_105 = cmd_alloc; CmdRef param_106 = cmd_ref; diff --git a/piet-gpu/shader/gen/coarse.msl b/piet-gpu/shader/gen/coarse.msl index 096f710..e5a0f0d 100644 --- a/piet-gpu/shader/gen/coarse.msl +++ b/piet-gpu/shader/gen/coarse.msl @@ -65,17 +65,6 @@ struct AnnoLinGradient float line_c; }; -struct AnnoBeginClipRef -{ - uint offset; -}; - -struct AnnoBeginClip -{ - float4 bbox; - float linewidth; -}; - struct AnnotatedRef { uint offset; @@ -221,8 +210,13 @@ struct Config Alloc_1 trans_alloc; Alloc_1 bbox_alloc; Alloc_1 drawmonoid_alloc; + Alloc_1 clip_alloc; + Alloc_1 clip_bic_alloc; + Alloc_1 clip_stack_alloc; + Alloc_1 clip_bbox_alloc; uint n_trans; uint n_path; + uint n_clip; uint trans_offset; uint linewidth_offset; uint pathtag_offset; @@ -249,7 +243,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset) } static inline __attribute__((always_inline)) -uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_296, constant uint& v_296BufferSize) +uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_283, constant uint& v_283BufferSize) { Alloc param = alloc; uint param_1 = offset; @@ -257,7 +251,7 @@ uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memor { return 0u; } - uint v = v_296.memory[offset]; + uint v = v_283.memory[offset]; return v; } @@ -276,39 +270,39 @@ BinInstanceRef BinInstance_index(thread const BinInstanceRef& ref, thread const } static inline __attribute__((always_inline)) -BinInstance BinInstance_read(thread const Alloc& a, thread const BinInstanceRef& ref, device Memory& v_296, constant uint& v_296BufferSize) +BinInstance BinInstance_read(thread const Alloc& a, thread const BinInstanceRef& ref, device Memory& v_283, constant uint& v_283BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_296, v_296BufferSize); + uint raw0 = read_mem(param, param_1, v_283, v_283BufferSize); BinInstance s; s.element_ix = raw0; return s; } static inline __attribute__((always_inline)) -AnnotatedTag Annotated_tag(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_296, constant uint& v_296BufferSize) +AnnotatedTag Annotated_tag(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_283, constant uint& v_283BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); - uint tag_and_flags = read_mem(param, param_1, v_296, v_296BufferSize); + uint tag_and_flags = read_mem(param, param_1, v_283, v_283BufferSize); return AnnotatedTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) }; } static inline __attribute__((always_inline)) -Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_296, constant uint& v_296BufferSize) +Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_283, constant uint& v_283BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_296, v_296BufferSize); + uint raw0 = read_mem(param, param_1, v_283, v_283BufferSize); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_296, v_296BufferSize); + uint raw1 = read_mem(param_2, param_3, v_283, v_283BufferSize); Alloc param_4 = a; uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_296, v_296BufferSize); + uint raw2 = read_mem(param_4, param_5, v_283, v_283BufferSize); Path s; s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); s.tiles = TileRef{ raw2 }; @@ -321,24 +315,24 @@ void write_tile_alloc(thread const uint& el_ix, thread const Alloc& a) } static inline __attribute__((always_inline)) -Alloc read_tile_alloc(thread const uint& el_ix, thread const bool& mem_ok, device Memory& v_296, constant uint& v_296BufferSize) +Alloc read_tile_alloc(thread const uint& el_ix, thread const bool& mem_ok, device Memory& v_283, constant uint& v_283BufferSize) { uint param = 0u; - uint param_1 = uint(int((v_296BufferSize - 8) / 4) * 4); + uint param_1 = uint(int((v_283BufferSize - 8) / 4) * 4); bool param_2 = mem_ok; return new_alloc(param, param_1, param_2); } static inline __attribute__((always_inline)) -Tile Tile_read(thread const Alloc& a, thread const TileRef& ref, device Memory& v_296, constant uint& v_296BufferSize) +Tile Tile_read(thread const Alloc& a, thread const TileRef& ref, device Memory& v_283, constant uint& v_283BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_296, v_296BufferSize); + uint raw0 = read_mem(param, param_1, v_283, v_283BufferSize); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_296, v_296BufferSize); + uint raw1 = read_mem(param_2, param_3, v_283, v_283BufferSize); Tile s; s.tile = TileSegRef{ raw0 }; s.backdrop = int(raw1); @@ -346,27 +340,27 @@ Tile Tile_read(thread const Alloc& a, thread const TileRef& ref, device Memory& } static inline __attribute__((always_inline)) -AnnoColor AnnoColor_read(thread const Alloc& a, thread const AnnoColorRef& ref, device Memory& v_296, constant uint& v_296BufferSize) +AnnoColor AnnoColor_read(thread const Alloc& a, thread const AnnoColorRef& ref, device Memory& v_283, constant uint& v_283BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_296, v_296BufferSize); + uint raw0 = read_mem(param, param_1, v_283, v_283BufferSize); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_296, v_296BufferSize); + uint raw1 = read_mem(param_2, param_3, v_283, v_283BufferSize); Alloc param_4 = a; uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_296, v_296BufferSize); + uint raw2 = read_mem(param_4, param_5, v_283, v_283BufferSize); Alloc param_6 = a; uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_296, v_296BufferSize); + uint raw3 = read_mem(param_6, param_7, v_283, v_283BufferSize); Alloc param_8 = a; uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9, v_296, v_296BufferSize); + uint raw4 = read_mem(param_8, param_9, v_283, v_283BufferSize); Alloc param_10 = a; uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11, v_296, v_296BufferSize); + uint raw5 = read_mem(param_10, param_11, v_283, v_283BufferSize); AnnoColor s; s.bbox = float4(as_type(raw0), as_type(raw1), as_type(raw2), as_type(raw3)); s.linewidth = as_type(raw4); @@ -375,34 +369,34 @@ AnnoColor AnnoColor_read(thread const Alloc& a, thread const AnnoColorRef& ref, } static inline __attribute__((always_inline)) -AnnoColor Annotated_Color_read(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_296, constant uint& v_296BufferSize) +AnnoColor Annotated_Color_read(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_283, constant uint& v_283BufferSize) { Alloc param = a; AnnoColorRef param_1 = AnnoColorRef{ ref.offset + 4u }; - return AnnoColor_read(param, param_1, v_296, v_296BufferSize); + return AnnoColor_read(param, param_1, v_283, v_283BufferSize); } static inline __attribute__((always_inline)) -MallocResult malloc(thread const uint& size, device Memory& v_296, constant uint& v_296BufferSize) +MallocResult malloc(thread const uint& size, device Memory& v_283, constant uint& v_283BufferSize) { - uint _302 = atomic_fetch_add_explicit((device atomic_uint*)&v_296.mem_offset, size, memory_order_relaxed); - uint offset = _302; + uint _289 = atomic_fetch_add_explicit((device atomic_uint*)&v_283.mem_offset, size, memory_order_relaxed); + uint offset = _289; MallocResult r; - r.failed = (offset + size) > uint(int((v_296BufferSize - 8) / 4) * 4); + r.failed = (offset + size) > uint(int((v_283BufferSize - 8) / 4) * 4); uint param = offset; uint param_1 = size; bool param_2 = !r.failed; r.alloc = new_alloc(param, param_1, param_2); if (r.failed) { - uint _331 = atomic_fetch_max_explicit((device atomic_uint*)&v_296.mem_error, 1u, memory_order_relaxed); + uint _318 = atomic_fetch_max_explicit((device atomic_uint*)&v_283.mem_error, 1u, memory_order_relaxed); return r; } return r; } static inline __attribute__((always_inline)) -void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_296, constant uint& v_296BufferSize) +void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_283, constant uint& v_283BufferSize) { Alloc param = alloc; uint param_1 = offset; @@ -410,42 +404,42 @@ void write_mem(thread const Alloc& alloc, thread const uint& offset, thread cons { return; } - v_296.memory[offset] = val; + v_283.memory[offset] = val; } static inline __attribute__((always_inline)) -void CmdJump_write(thread const Alloc& a, thread const CmdJumpRef& ref, thread const CmdJump& s, device Memory& v_296, constant uint& v_296BufferSize) +void CmdJump_write(thread const Alloc& a, thread const CmdJumpRef& ref, thread const CmdJump& s, device Memory& v_283, constant uint& v_283BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = s.new_ref; - write_mem(param, param_1, param_2, v_296, v_296BufferSize); + write_mem(param, param_1, param_2, v_283, v_283BufferSize); } static inline __attribute__((always_inline)) -void Cmd_Jump_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdJump& s, device Memory& v_296, constant uint& v_296BufferSize) +void Cmd_Jump_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdJump& s, device Memory& v_283, constant uint& v_283BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 10u; - write_mem(param, param_1, param_2, v_296, v_296BufferSize); + write_mem(param, param_1, param_2, v_283, v_283BufferSize); Alloc param_3 = a; CmdJumpRef param_4 = CmdJumpRef{ ref.offset + 4u }; CmdJump param_5 = s; - CmdJump_write(param_3, param_4, param_5, v_296, v_296BufferSize); + CmdJump_write(param_3, param_4, param_5, v_283, v_283BufferSize); } static inline __attribute__((always_inline)) -bool alloc_cmd(thread Alloc& cmd_alloc, thread CmdRef& cmd_ref, thread uint& cmd_limit, device Memory& v_296, constant uint& v_296BufferSize) +bool alloc_cmd(thread Alloc& cmd_alloc, thread CmdRef& cmd_ref, thread uint& cmd_limit, device Memory& v_283, constant uint& v_283BufferSize) { if (cmd_ref.offset < cmd_limit) { return true; } uint param = 1024u; - MallocResult _1156 = malloc(param, v_296, v_296BufferSize); - MallocResult new_cmd = _1156; + MallocResult _1076 = malloc(param, v_283, v_283BufferSize); + MallocResult new_cmd = _1076; if (new_cmd.failed) { return false; @@ -454,7 +448,7 @@ bool alloc_cmd(thread Alloc& cmd_alloc, thread CmdRef& cmd_ref, thread uint& cmd Alloc param_1 = cmd_alloc; CmdRef param_2 = cmd_ref; CmdJump param_3 = jump; - Cmd_Jump_write(param_1, param_2, param_3, v_296, v_296BufferSize); + Cmd_Jump_write(param_1, param_2, param_3, v_283, v_283BufferSize); cmd_alloc = new_cmd.alloc; cmd_ref = CmdRef{ cmd_alloc.offset }; cmd_limit = (cmd_alloc.offset + 1024u) - 60u; @@ -468,70 +462,70 @@ uint fill_mode_from_flags(thread const uint& flags) } static inline __attribute__((always_inline)) -void CmdFill_write(thread const Alloc& a, thread const CmdFillRef& ref, thread const CmdFill& s, device Memory& v_296, constant uint& v_296BufferSize) +void CmdFill_write(thread const Alloc& a, thread const CmdFillRef& ref, thread const CmdFill& s, device Memory& v_283, constant uint& v_283BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = s.tile_ref; - write_mem(param, param_1, param_2, v_296, v_296BufferSize); + write_mem(param, param_1, param_2, v_283, v_283BufferSize); Alloc param_3 = a; uint param_4 = ix + 1u; uint param_5 = uint(s.backdrop); - write_mem(param_3, param_4, param_5, v_296, v_296BufferSize); + write_mem(param_3, param_4, param_5, v_283, v_283BufferSize); } static inline __attribute__((always_inline)) -void Cmd_Fill_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdFill& s, device Memory& v_296, constant uint& v_296BufferSize) +void Cmd_Fill_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdFill& s, device Memory& v_283, constant uint& v_283BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 1u; - write_mem(param, param_1, param_2, v_296, v_296BufferSize); + write_mem(param, param_1, param_2, v_283, v_283BufferSize); Alloc param_3 = a; CmdFillRef param_4 = CmdFillRef{ ref.offset + 4u }; CmdFill param_5 = s; - CmdFill_write(param_3, param_4, param_5, v_296, v_296BufferSize); + CmdFill_write(param_3, param_4, param_5, v_283, v_283BufferSize); } static inline __attribute__((always_inline)) -void Cmd_Solid_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_296, constant uint& v_296BufferSize) +void Cmd_Solid_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_283, constant uint& v_283BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 3u; - write_mem(param, param_1, param_2, v_296, v_296BufferSize); + write_mem(param, param_1, param_2, v_283, v_283BufferSize); } static inline __attribute__((always_inline)) -void CmdStroke_write(thread const Alloc& a, thread const CmdStrokeRef& ref, thread const CmdStroke& s, device Memory& v_296, constant uint& v_296BufferSize) +void CmdStroke_write(thread const Alloc& a, thread const CmdStrokeRef& ref, thread const CmdStroke& s, device Memory& v_283, constant uint& v_283BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = s.tile_ref; - write_mem(param, param_1, param_2, v_296, v_296BufferSize); + write_mem(param, param_1, param_2, v_283, v_283BufferSize); Alloc param_3 = a; uint param_4 = ix + 1u; uint param_5 = as_type(s.half_width); - write_mem(param_3, param_4, param_5, v_296, v_296BufferSize); + write_mem(param_3, param_4, param_5, v_283, v_283BufferSize); } static inline __attribute__((always_inline)) -void Cmd_Stroke_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdStroke& s, device Memory& v_296, constant uint& v_296BufferSize) +void Cmd_Stroke_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdStroke& s, device Memory& v_283, constant uint& v_283BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 2u; - write_mem(param, param_1, param_2, v_296, v_296BufferSize); + write_mem(param, param_1, param_2, v_283, v_283BufferSize); Alloc param_3 = a; CmdStrokeRef param_4 = CmdStrokeRef{ ref.offset + 4u }; CmdStroke param_5 = s; - CmdStroke_write(param_3, param_4, param_5, v_296, v_296BufferSize); + CmdStroke_write(param_3, param_4, param_5, v_283, v_283BufferSize); } static inline __attribute__((always_inline)) -void write_fill(thread const Alloc& alloc, thread CmdRef& cmd_ref, thread const uint& flags, thread const Tile& tile, thread const float& linewidth, device Memory& v_296, constant uint& v_296BufferSize) +void write_fill(thread const Alloc& alloc, thread CmdRef& cmd_ref, thread const uint& flags, thread const Tile& tile, thread const float& linewidth, device Memory& v_283, constant uint& v_283BufferSize) { uint param = flags; if (fill_mode_from_flags(param) == 0u) @@ -542,14 +536,14 @@ void write_fill(thread const Alloc& alloc, thread CmdRef& cmd_ref, thread const Alloc param_1 = alloc; CmdRef param_2 = cmd_ref; CmdFill param_3 = cmd_fill; - Cmd_Fill_write(param_1, param_2, param_3, v_296, v_296BufferSize); + Cmd_Fill_write(param_1, param_2, param_3, v_283, v_283BufferSize); cmd_ref.offset += 12u; } else { Alloc param_4 = alloc; CmdRef param_5 = cmd_ref; - Cmd_Solid_write(param_4, param_5, v_296, v_296BufferSize); + Cmd_Solid_write(param_4, param_5, v_283, v_283BufferSize); cmd_ref.offset += 4u; } } @@ -559,65 +553,65 @@ void write_fill(thread const Alloc& alloc, thread CmdRef& cmd_ref, thread const Alloc param_6 = alloc; CmdRef param_7 = cmd_ref; CmdStroke param_8 = cmd_stroke; - Cmd_Stroke_write(param_6, param_7, param_8, v_296, v_296BufferSize); + Cmd_Stroke_write(param_6, param_7, param_8, v_283, v_283BufferSize); cmd_ref.offset += 12u; } } static inline __attribute__((always_inline)) -void CmdColor_write(thread const Alloc& a, thread const CmdColorRef& ref, thread const CmdColor& s, device Memory& v_296, constant uint& v_296BufferSize) +void CmdColor_write(thread const Alloc& a, thread const CmdColorRef& ref, thread const CmdColor& s, device Memory& v_283, constant uint& v_283BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = s.rgba_color; - write_mem(param, param_1, param_2, v_296, v_296BufferSize); + write_mem(param, param_1, param_2, v_283, v_283BufferSize); } static inline __attribute__((always_inline)) -void Cmd_Color_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdColor& s, device Memory& v_296, constant uint& v_296BufferSize) +void Cmd_Color_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdColor& s, device Memory& v_283, constant uint& v_283BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 5u; - write_mem(param, param_1, param_2, v_296, v_296BufferSize); + write_mem(param, param_1, param_2, v_283, v_283BufferSize); Alloc param_3 = a; CmdColorRef param_4 = CmdColorRef{ ref.offset + 4u }; CmdColor param_5 = s; - CmdColor_write(param_3, param_4, param_5, v_296, v_296BufferSize); + CmdColor_write(param_3, param_4, param_5, v_283, v_283BufferSize); } static inline __attribute__((always_inline)) -AnnoLinGradient AnnoLinGradient_read(thread const Alloc& a, thread const AnnoLinGradientRef& ref, device Memory& v_296, constant uint& v_296BufferSize) +AnnoLinGradient AnnoLinGradient_read(thread const Alloc& a, thread const AnnoLinGradientRef& ref, device Memory& v_283, constant uint& v_283BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_296, v_296BufferSize); + uint raw0 = read_mem(param, param_1, v_283, v_283BufferSize); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_296, v_296BufferSize); + uint raw1 = read_mem(param_2, param_3, v_283, v_283BufferSize); Alloc param_4 = a; uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_296, v_296BufferSize); + uint raw2 = read_mem(param_4, param_5, v_283, v_283BufferSize); Alloc param_6 = a; uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_296, v_296BufferSize); + uint raw3 = read_mem(param_6, param_7, v_283, v_283BufferSize); Alloc param_8 = a; uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9, v_296, v_296BufferSize); + uint raw4 = read_mem(param_8, param_9, v_283, v_283BufferSize); Alloc param_10 = a; uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11, v_296, v_296BufferSize); + uint raw5 = read_mem(param_10, param_11, v_283, v_283BufferSize); Alloc param_12 = a; uint param_13 = ix + 6u; - uint raw6 = read_mem(param_12, param_13, v_296, v_296BufferSize); + uint raw6 = read_mem(param_12, param_13, v_283, v_283BufferSize); Alloc param_14 = a; uint param_15 = ix + 7u; - uint raw7 = read_mem(param_14, param_15, v_296, v_296BufferSize); + uint raw7 = read_mem(param_14, param_15, v_283, v_283BufferSize); Alloc param_16 = a; uint param_17 = ix + 8u; - uint raw8 = read_mem(param_16, param_17, v_296, v_296BufferSize); + uint raw8 = read_mem(param_16, param_17, v_283, v_283BufferSize); AnnoLinGradient s; s.bbox = float4(as_type(raw0), as_type(raw1), as_type(raw2), as_type(raw3)); s.linewidth = as_type(raw4); @@ -629,73 +623,73 @@ AnnoLinGradient AnnoLinGradient_read(thread const Alloc& a, thread const AnnoLin } static inline __attribute__((always_inline)) -AnnoLinGradient Annotated_LinGradient_read(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_296, constant uint& v_296BufferSize) +AnnoLinGradient Annotated_LinGradient_read(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_283, constant uint& v_283BufferSize) { Alloc param = a; AnnoLinGradientRef param_1 = AnnoLinGradientRef{ ref.offset + 4u }; - return AnnoLinGradient_read(param, param_1, v_296, v_296BufferSize); + return AnnoLinGradient_read(param, param_1, v_283, v_283BufferSize); } static inline __attribute__((always_inline)) -void CmdLinGrad_write(thread const Alloc& a, thread const CmdLinGradRef& ref, thread const CmdLinGrad& s, device Memory& v_296, constant uint& v_296BufferSize) +void CmdLinGrad_write(thread const Alloc& a, thread const CmdLinGradRef& ref, thread const CmdLinGrad& s, device Memory& v_283, constant uint& v_283BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = s.index; - write_mem(param, param_1, param_2, v_296, v_296BufferSize); + write_mem(param, param_1, param_2, v_283, v_283BufferSize); Alloc param_3 = a; uint param_4 = ix + 1u; uint param_5 = as_type(s.line_x); - write_mem(param_3, param_4, param_5, v_296, v_296BufferSize); + write_mem(param_3, param_4, param_5, v_283, v_283BufferSize); Alloc param_6 = a; uint param_7 = ix + 2u; uint param_8 = as_type(s.line_y); - write_mem(param_6, param_7, param_8, v_296, v_296BufferSize); + write_mem(param_6, param_7, param_8, v_283, v_283BufferSize); Alloc param_9 = a; uint param_10 = ix + 3u; uint param_11 = as_type(s.line_c); - write_mem(param_9, param_10, param_11, v_296, v_296BufferSize); + write_mem(param_9, param_10, param_11, v_283, v_283BufferSize); } static inline __attribute__((always_inline)) -void Cmd_LinGrad_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdLinGrad& s, device Memory& v_296, constant uint& v_296BufferSize) +void Cmd_LinGrad_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdLinGrad& s, device Memory& v_283, constant uint& v_283BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 6u; - write_mem(param, param_1, param_2, v_296, v_296BufferSize); + write_mem(param, param_1, param_2, v_283, v_283BufferSize); Alloc param_3 = a; CmdLinGradRef param_4 = CmdLinGradRef{ ref.offset + 4u }; CmdLinGrad param_5 = s; - CmdLinGrad_write(param_3, param_4, param_5, v_296, v_296BufferSize); + CmdLinGrad_write(param_3, param_4, param_5, v_283, v_283BufferSize); } static inline __attribute__((always_inline)) -AnnoImage AnnoImage_read(thread const Alloc& a, thread const AnnoImageRef& ref, device Memory& v_296, constant uint& v_296BufferSize) +AnnoImage AnnoImage_read(thread const Alloc& a, thread const AnnoImageRef& ref, device Memory& v_283, constant uint& v_283BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_296, v_296BufferSize); + uint raw0 = read_mem(param, param_1, v_283, v_283BufferSize); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_296, v_296BufferSize); + uint raw1 = read_mem(param_2, param_3, v_283, v_283BufferSize); Alloc param_4 = a; uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_296, v_296BufferSize); + uint raw2 = read_mem(param_4, param_5, v_283, v_283BufferSize); Alloc param_6 = a; uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_296, v_296BufferSize); + uint raw3 = read_mem(param_6, param_7, v_283, v_283BufferSize); Alloc param_8 = a; uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9, v_296, v_296BufferSize); + uint raw4 = read_mem(param_8, param_9, v_283, v_283BufferSize); Alloc param_10 = a; uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11, v_296, v_296BufferSize); + uint raw5 = read_mem(param_10, param_11, v_283, v_283BufferSize); Alloc param_12 = a; uint param_13 = ix + 6u; - uint raw6 = read_mem(param_12, param_13, v_296, v_296BufferSize); + uint raw6 = read_mem(param_12, param_13, v_283, v_283BufferSize); AnnoImage s; s.bbox = float4(as_type(raw0), as_type(raw1), as_type(raw2), as_type(raw3)); s.linewidth = as_type(raw4); @@ -705,101 +699,68 @@ AnnoImage AnnoImage_read(thread const Alloc& a, thread const AnnoImageRef& ref, } static inline __attribute__((always_inline)) -AnnoImage Annotated_Image_read(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_296, constant uint& v_296BufferSize) +AnnoImage Annotated_Image_read(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_283, constant uint& v_283BufferSize) { Alloc param = a; AnnoImageRef param_1 = AnnoImageRef{ ref.offset + 4u }; - return AnnoImage_read(param, param_1, v_296, v_296BufferSize); + return AnnoImage_read(param, param_1, v_283, v_283BufferSize); } static inline __attribute__((always_inline)) -void CmdImage_write(thread const Alloc& a, thread const CmdImageRef& ref, thread const CmdImage& s, device Memory& v_296, constant uint& v_296BufferSize) +void CmdImage_write(thread const Alloc& a, thread const CmdImageRef& ref, thread const CmdImage& s, device Memory& v_283, constant uint& v_283BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = s.index; - write_mem(param, param_1, param_2, v_296, v_296BufferSize); + write_mem(param, param_1, param_2, v_283, v_283BufferSize); Alloc param_3 = a; uint param_4 = ix + 1u; uint param_5 = (uint(s.offset.x) & 65535u) | (uint(s.offset.y) << uint(16)); - write_mem(param_3, param_4, param_5, v_296, v_296BufferSize); + write_mem(param_3, param_4, param_5, v_283, v_283BufferSize); } static inline __attribute__((always_inline)) -void Cmd_Image_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdImage& s, device Memory& v_296, constant uint& v_296BufferSize) +void Cmd_Image_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdImage& s, device Memory& v_283, constant uint& v_283BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 7u; - write_mem(param, param_1, param_2, v_296, v_296BufferSize); + write_mem(param, param_1, param_2, v_283, v_283BufferSize); Alloc param_3 = a; CmdImageRef param_4 = CmdImageRef{ ref.offset + 4u }; CmdImage param_5 = s; - CmdImage_write(param_3, param_4, param_5, v_296, v_296BufferSize); + CmdImage_write(param_3, param_4, param_5, v_283, v_283BufferSize); } static inline __attribute__((always_inline)) -AnnoBeginClip AnnoBeginClip_read(thread const Alloc& a, thread const AnnoBeginClipRef& ref, device Memory& v_296, constant uint& v_296BufferSize) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_296, v_296BufferSize); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_296, v_296BufferSize); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_296, v_296BufferSize); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_296, v_296BufferSize); - Alloc param_8 = a; - uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9, v_296, v_296BufferSize); - AnnoBeginClip s; - s.bbox = float4(as_type(raw0), as_type(raw1), as_type(raw2), as_type(raw3)); - s.linewidth = as_type(raw4); - return s; -} - -static inline __attribute__((always_inline)) -AnnoBeginClip Annotated_BeginClip_read(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_296, constant uint& v_296BufferSize) -{ - Alloc param = a; - AnnoBeginClipRef param_1 = AnnoBeginClipRef{ ref.offset + 4u }; - return AnnoBeginClip_read(param, param_1, v_296, v_296BufferSize); -} - -static inline __attribute__((always_inline)) -void Cmd_BeginClip_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_296, constant uint& v_296BufferSize) +void Cmd_BeginClip_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_283, constant uint& v_283BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 8u; - write_mem(param, param_1, param_2, v_296, v_296BufferSize); + write_mem(param, param_1, param_2, v_283, v_283BufferSize); } static inline __attribute__((always_inline)) -void Cmd_EndClip_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_296, constant uint& v_296BufferSize) +void Cmd_EndClip_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_283, constant uint& v_283BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 9u; - write_mem(param, param_1, param_2, v_296, v_296BufferSize); + write_mem(param, param_1, param_2, v_283, v_283BufferSize); } static inline __attribute__((always_inline)) -void Cmd_End_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_296, constant uint& v_296BufferSize) +void Cmd_End_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_283, constant uint& v_283BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 0u; - write_mem(param, param_1, param_2, v_296, v_296BufferSize); + write_mem(param, param_1, param_2, v_283, v_283BufferSize); } -kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_296 [[buffer(0)]], const device ConfigBuf& _1249 [[buffer(1)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) +kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_283 [[buffer(0)]], const device ConfigBuf& _1169 [[buffer(1)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) { threadgroup uint sh_bitmaps[8][256]; threadgroup Alloc sh_part_elements[256]; @@ -811,19 +772,19 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M threadgroup uint sh_tile_y0[256]; threadgroup uint sh_tile_base[256]; threadgroup uint sh_tile_count[256]; - constant uint& v_296BufferSize = spvBufferSizeConstants[0]; - uint width_in_bins = ((_1249.conf.width_in_tiles + 16u) - 1u) / 16u; + constant uint& v_283BufferSize = spvBufferSizeConstants[0]; + uint width_in_bins = ((_1169.conf.width_in_tiles + 16u) - 1u) / 16u; uint bin_ix = (width_in_bins * gl_WorkGroupID.y) + gl_WorkGroupID.x; uint partition_ix = 0u; - uint n_partitions = ((_1249.conf.n_elements + 256u) - 1u) / 256u; + uint n_partitions = ((_1169.conf.n_elements + 256u) - 1u) / 256u; uint th_ix = gl_LocalInvocationID.x; uint bin_tile_x = 16u * gl_WorkGroupID.x; uint bin_tile_y = 16u * gl_WorkGroupID.y; uint tile_x = gl_LocalInvocationID.x % 16u; uint tile_y = gl_LocalInvocationID.x / 16u; - uint this_tile_ix = (((bin_tile_y + tile_y) * _1249.conf.width_in_tiles) + bin_tile_x) + tile_x; + uint this_tile_ix = (((bin_tile_y + tile_y) * _1169.conf.width_in_tiles) + bin_tile_x) + tile_x; Alloc param; - param.offset = _1249.conf.ptcl_alloc.offset; + param.offset = _1169.conf.ptcl_alloc.offset; uint param_1 = this_tile_ix * 1024u; uint param_2 = 1024u; Alloc cmd_alloc = slice_mem(param, param_1, param_2); @@ -831,22 +792,21 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M uint cmd_limit = (cmd_ref.offset + 1024u) - 60u; uint clip_depth = 0u; uint clip_zero_depth = 0u; - uint clip_one_mask = 0u; uint rd_ix = 0u; uint wr_ix = 0u; uint part_start_ix = 0u; uint ready_ix = 0u; - bool mem_ok = v_296.mem_error == 0u; + bool mem_ok = v_283.mem_error == 0u; Alloc param_3; Alloc param_5; - uint _1529; + uint _1448; uint element_ix; AnnotatedRef ref; Alloc param_14; Alloc param_16; uint tile_count; Alloc param_23; - uint _1841; + uint _1770; Alloc param_29; Tile tile_1; AnnoColor fill; @@ -854,39 +814,38 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M Alloc param_52; CmdLinGrad cmd_lin; Alloc param_69; - Alloc param_86; while (true) { for (uint i = 0u; i < 8u; i++) { sh_bitmaps[i][th_ix] = 0u; } - bool _1581; + bool _1500; for (;;) { if ((ready_ix == wr_ix) && (partition_ix < n_partitions)) { part_start_ix = ready_ix; uint count = 0u; - bool _1379 = th_ix < 256u; - bool _1387; - if (_1379) + bool _1298 = th_ix < 256u; + bool _1306; + if (_1298) { - _1387 = (partition_ix + th_ix) < n_partitions; + _1306 = (partition_ix + th_ix) < n_partitions; } else { - _1387 = _1379; + _1306 = _1298; } - if (_1387) + if (_1306) { - uint in_ix = (_1249.conf.bin_alloc.offset >> uint(2)) + ((((partition_ix + th_ix) * 256u) + bin_ix) * 2u); - param_3.offset = _1249.conf.bin_alloc.offset; + uint in_ix = (_1169.conf.bin_alloc.offset >> uint(2)) + ((((partition_ix + th_ix) * 256u) + bin_ix) * 2u); + param_3.offset = _1169.conf.bin_alloc.offset; uint param_4 = in_ix; - count = read_mem(param_3, param_4, v_296, v_296BufferSize); - param_5.offset = _1249.conf.bin_alloc.offset; + count = read_mem(param_3, param_4, v_283, v_283BufferSize); + param_5.offset = _1169.conf.bin_alloc.offset; uint param_6 = in_ix + 1u; - uint offset = read_mem(param_5, param_6, v_296, v_296BufferSize); + uint offset = read_mem(param_5, param_6, v_283, v_283BufferSize); uint param_7 = offset; uint param_8 = count * 4u; bool param_9 = mem_ok; @@ -930,34 +889,34 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M } if (part_ix > 0u) { - _1529 = sh_part_count[part_ix - 1u]; + _1448 = sh_part_count[part_ix - 1u]; } else { - _1529 = part_start_ix; + _1448 = part_start_ix; } - ix -= _1529; + ix -= _1448; Alloc bin_alloc = sh_part_elements[part_ix]; BinInstanceRef inst_ref = BinInstanceRef{ bin_alloc.offset }; BinInstanceRef param_10 = inst_ref; uint param_11 = ix; Alloc param_12 = bin_alloc; BinInstanceRef param_13 = BinInstance_index(param_10, param_11); - BinInstance inst = BinInstance_read(param_12, param_13, v_296, v_296BufferSize); + BinInstance inst = BinInstance_read(param_12, param_13, v_283, v_283BufferSize); sh_elements[th_ix] = inst.element_ix; } threadgroup_barrier(mem_flags::mem_threadgroup); wr_ix = min((rd_ix + 256u), ready_ix); - bool _1571 = (wr_ix - rd_ix) < 256u; - if (_1571) + bool _1490 = (wr_ix - rd_ix) < 256u; + if (_1490) { - _1581 = (wr_ix < ready_ix) || (partition_ix < n_partitions); + _1500 = (wr_ix < ready_ix) || (partition_ix < n_partitions); } else { - _1581 = _1571; + _1500 = _1490; } - if (_1581) + if (_1500) { continue; } @@ -970,10 +929,10 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M if ((th_ix + rd_ix) < wr_ix) { element_ix = sh_elements[th_ix]; - ref = AnnotatedRef{ _1249.conf.anno_alloc.offset + (element_ix * 40u) }; - param_14.offset = _1249.conf.anno_alloc.offset; + ref = AnnotatedRef{ _1169.conf.anno_alloc.offset + (element_ix * 40u) }; + param_14.offset = _1169.conf.anno_alloc.offset; AnnotatedRef param_15 = ref; - tag = Annotated_tag(param_14, param_15, v_296, v_296BufferSize).tag; + tag = Annotated_tag(param_14, param_15, v_283, v_283BufferSize).tag; } switch (tag) { @@ -983,10 +942,11 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M case 4u: case 5u: { - uint path_ix = element_ix; - param_16.offset = _1249.conf.tile_alloc.offset; - PathRef param_17 = PathRef{ _1249.conf.tile_alloc.offset + (path_ix * 12u) }; - Path path = Path_read(param_16, param_17, v_296, v_296BufferSize); + uint drawmonoid_base = (_1169.conf.drawmonoid_alloc.offset >> uint(2)) + (2u * element_ix); + uint path_ix = v_283.memory[drawmonoid_base]; + param_16.offset = _1169.conf.tile_alloc.offset; + PathRef param_17 = PathRef{ _1169.conf.tile_alloc.offset + (path_ix * 12u) }; + Path path = Path_read(param_16, param_17, v_283, v_283BufferSize); uint stride = path.bbox.z - path.bbox.x; sh_tile_stride[th_ix] = stride; int dx = int(path.bbox.x) - int(bin_tile_x); @@ -1040,54 +1000,48 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M el_ix = probe_1; } } - AnnotatedRef ref_1 = AnnotatedRef{ _1249.conf.anno_alloc.offset + (sh_elements[el_ix] * 40u) }; - param_23.offset = _1249.conf.anno_alloc.offset; + AnnotatedRef ref_1 = AnnotatedRef{ _1169.conf.anno_alloc.offset + (sh_elements[el_ix] * 40u) }; + param_23.offset = _1169.conf.anno_alloc.offset; AnnotatedRef param_24 = ref_1; - uint tag_1 = Annotated_tag(param_23, param_24, v_296, v_296BufferSize).tag; + uint tag_1 = Annotated_tag(param_23, param_24, v_283, v_283BufferSize).tag; if (el_ix > 0u) { - _1841 = sh_tile_count[el_ix - 1u]; + _1770 = sh_tile_count[el_ix - 1u]; } else { - _1841 = 0u; + _1770 = 0u; } - uint seq_ix = ix_1 - _1841; + uint seq_ix = ix_1 - _1770; uint width = sh_tile_width[el_ix]; uint x = sh_tile_x0[el_ix] + (seq_ix % width); uint y = sh_tile_y0[el_ix] + (seq_ix / width); bool include_tile = false; - if ((tag_1 == 4u) || (tag_1 == 5u)) + if (mem_ok) { - include_tile = true; - } - else - { - if (mem_ok) + uint param_25 = el_ix; + bool param_26 = mem_ok; + Alloc param_27 = read_tile_alloc(param_25, param_26, v_283, v_283BufferSize); + TileRef param_28 = TileRef{ sh_tile_base[el_ix] + (((sh_tile_stride[el_ix] * y) + x) * 8u) }; + Tile tile = Tile_read(param_27, param_28, v_283, v_283BufferSize); + bool is_clip = (tag_1 == 4u) || (tag_1 == 5u); + bool _1834 = tile.tile.offset != 0u; + bool _1843; + if (!_1834) { - uint param_25 = el_ix; - bool param_26 = mem_ok; - Alloc param_27 = read_tile_alloc(param_25, param_26, v_296, v_296BufferSize); - TileRef param_28 = TileRef{ sh_tile_base[el_ix] + (((sh_tile_stride[el_ix] * y) + x) * 8u) }; - Tile tile = Tile_read(param_27, param_28, v_296, v_296BufferSize); - bool _1907 = tile.tile.offset != 0u; - bool _1914; - if (!_1907) - { - _1914 = tile.backdrop != 0; - } - else - { - _1914 = _1907; - } - include_tile = _1914; + _1843 = (tile.backdrop == 0) == is_clip; } + else + { + _1843 = _1834; + } + include_tile = _1843; } if (include_tile) { uint el_slice = el_ix / 32u; uint el_mask = 1u << (el_ix & 31u); - uint _1934 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&sh_bitmaps[el_slice][(y * 16u) + x], el_mask, memory_order_relaxed); + uint _1863 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&sh_bitmaps[el_slice][(y * 16u) + x], el_mask, memory_order_relaxed); } } threadgroup_barrier(mem_flags::mem_threadgroup); @@ -1111,10 +1065,10 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M uint element_ref_ix = (slice_ix * 32u) + uint(int(spvFindLSB(bitmap))); uint element_ix_1 = sh_elements[element_ref_ix]; bitmap &= (bitmap - 1u); - ref = AnnotatedRef{ _1249.conf.anno_alloc.offset + (element_ix_1 * 40u) }; - param_29.offset = _1249.conf.anno_alloc.offset; + ref = AnnotatedRef{ _1169.conf.anno_alloc.offset + (element_ix_1 * 40u) }; + param_29.offset = _1169.conf.anno_alloc.offset; AnnotatedRef param_30 = ref; - AnnotatedTag tag_2 = Annotated_tag(param_29, param_30, v_296, v_296BufferSize); + AnnotatedTag tag_2 = Annotated_tag(param_29, param_30, v_283, v_283BufferSize); if (clip_zero_depth == 0u) { switch (tag_2.tag) @@ -1123,20 +1077,20 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M { uint param_31 = element_ref_ix; bool param_32 = mem_ok; - Alloc param_33 = read_tile_alloc(param_31, param_32, v_296, v_296BufferSize); + Alloc param_33 = read_tile_alloc(param_31, param_32, v_283, v_283BufferSize); TileRef param_34 = TileRef{ sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; - tile_1 = Tile_read(param_33, param_34, v_296, v_296BufferSize); - param_35.offset = _1249.conf.anno_alloc.offset; + tile_1 = Tile_read(param_33, param_34, v_283, v_283BufferSize); + param_35.offset = _1169.conf.anno_alloc.offset; AnnotatedRef param_36 = ref; - fill = Annotated_Color_read(param_35, param_36, v_296, v_296BufferSize); + fill = Annotated_Color_read(param_35, param_36, v_283, v_283BufferSize); Alloc param_37 = cmd_alloc; CmdRef param_38 = cmd_ref; uint param_39 = cmd_limit; - bool _2048 = alloc_cmd(param_37, param_38, param_39, v_296, v_296BufferSize); + bool _1977 = alloc_cmd(param_37, param_38, param_39, v_283, v_283BufferSize); cmd_alloc = param_37; cmd_ref = param_38; cmd_limit = param_39; - if (!_2048) + if (!_1977) { break; } @@ -1145,12 +1099,12 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M uint param_42 = tag_2.flags; Tile param_43 = tile_1; float param_44 = fill.linewidth; - write_fill(param_40, param_41, param_42, param_43, param_44, v_296, v_296BufferSize); + write_fill(param_40, param_41, param_42, param_43, param_44, v_283, v_283BufferSize); cmd_ref = param_41; Alloc param_45 = cmd_alloc; CmdRef param_46 = cmd_ref; CmdColor param_47 = CmdColor{ fill.rgba_color }; - Cmd_Color_write(param_45, param_46, param_47, v_296, v_296BufferSize); + Cmd_Color_write(param_45, param_46, param_47, v_283, v_283BufferSize); cmd_ref.offset += 8u; break; } @@ -1158,20 +1112,20 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M { uint param_48 = element_ref_ix; bool param_49 = mem_ok; - Alloc param_50 = read_tile_alloc(param_48, param_49, v_296, v_296BufferSize); + Alloc param_50 = read_tile_alloc(param_48, param_49, v_283, v_283BufferSize); TileRef param_51 = TileRef{ sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; - tile_1 = Tile_read(param_50, param_51, v_296, v_296BufferSize); - param_52.offset = _1249.conf.anno_alloc.offset; + tile_1 = Tile_read(param_50, param_51, v_283, v_283BufferSize); + param_52.offset = _1169.conf.anno_alloc.offset; AnnotatedRef param_53 = ref; - AnnoLinGradient lin = Annotated_LinGradient_read(param_52, param_53, v_296, v_296BufferSize); + AnnoLinGradient lin = Annotated_LinGradient_read(param_52, param_53, v_283, v_283BufferSize); Alloc param_54 = cmd_alloc; CmdRef param_55 = cmd_ref; uint param_56 = cmd_limit; - bool _2120 = alloc_cmd(param_54, param_55, param_56, v_296, v_296BufferSize); + bool _2049 = alloc_cmd(param_54, param_55, param_56, v_283, v_283BufferSize); cmd_alloc = param_54; cmd_ref = param_55; cmd_limit = param_56; - if (!_2120) + if (!_2049) { break; } @@ -1180,7 +1134,7 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M uint param_59 = tag_2.flags; Tile param_60 = tile_1; float param_61 = fill.linewidth; - write_fill(param_57, param_58, param_59, param_60, param_61, v_296, v_296BufferSize); + write_fill(param_57, param_58, param_59, param_60, param_61, v_283, v_283BufferSize); cmd_ref = param_58; cmd_lin.index = lin.index; cmd_lin.line_x = lin.line_x; @@ -1189,7 +1143,7 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M Alloc param_62 = cmd_alloc; CmdRef param_63 = cmd_ref; CmdLinGrad param_64 = cmd_lin; - Cmd_LinGrad_write(param_62, param_63, param_64, v_296, v_296BufferSize); + Cmd_LinGrad_write(param_62, param_63, param_64, v_283, v_283BufferSize); cmd_ref.offset += 20u; break; } @@ -1197,20 +1151,20 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M { uint param_65 = element_ref_ix; bool param_66 = mem_ok; - Alloc param_67 = read_tile_alloc(param_65, param_66, v_296, v_296BufferSize); + Alloc param_67 = read_tile_alloc(param_65, param_66, v_283, v_283BufferSize); TileRef param_68 = TileRef{ sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; - tile_1 = Tile_read(param_67, param_68, v_296, v_296BufferSize); - param_69.offset = _1249.conf.anno_alloc.offset; + tile_1 = Tile_read(param_67, param_68, v_283, v_283BufferSize); + param_69.offset = _1169.conf.anno_alloc.offset; AnnotatedRef param_70 = ref; - AnnoImage fill_img = Annotated_Image_read(param_69, param_70, v_296, v_296BufferSize); + AnnoImage fill_img = Annotated_Image_read(param_69, param_70, v_283, v_283BufferSize); Alloc param_71 = cmd_alloc; CmdRef param_72 = cmd_ref; uint param_73 = cmd_limit; - bool _2204 = alloc_cmd(param_71, param_72, param_73, v_296, v_296BufferSize); + bool _2133 = alloc_cmd(param_71, param_72, param_73, v_283, v_283BufferSize); cmd_alloc = param_71; cmd_ref = param_72; cmd_limit = param_73; - if (!_2204) + if (!_2133) { break; } @@ -1219,12 +1173,12 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M uint param_76 = tag_2.flags; Tile param_77 = tile_1; float param_78 = fill_img.linewidth; - write_fill(param_74, param_75, param_76, param_77, param_78, v_296, v_296BufferSize); + write_fill(param_74, param_75, param_76, param_77, param_78, v_283, v_283BufferSize); cmd_ref = param_75; Alloc param_79 = cmd_alloc; CmdRef param_80 = cmd_ref; CmdImage param_81 = CmdImage{ fill_img.index, fill_img.offset }; - Cmd_Image_write(param_79, param_80, param_81, v_296, v_296BufferSize); + Cmd_Image_write(param_79, param_80, param_81, v_283, v_283BufferSize); cmd_ref.offset += 12u; break; } @@ -1232,100 +1186,74 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M { uint param_82 = element_ref_ix; bool param_83 = mem_ok; - Alloc param_84 = read_tile_alloc(param_82, param_83, v_296, v_296BufferSize); + Alloc param_84 = read_tile_alloc(param_82, param_83, v_283, v_283BufferSize); TileRef param_85 = TileRef{ sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; - tile_1 = Tile_read(param_84, param_85, v_296, v_296BufferSize); - bool _2265 = tile_1.tile.offset == 0u; - bool _2271; - if (_2265) + tile_1 = Tile_read(param_84, param_85, v_283, v_283BufferSize); + bool _2194 = tile_1.tile.offset == 0u; + bool _2200; + if (_2194) { - _2271 = tile_1.backdrop == 0; + _2200 = tile_1.backdrop == 0; } else { - _2271 = _2265; + _2200 = _2194; } - if (_2271) + if (_2200) { clip_zero_depth = clip_depth + 1u; } else { - if ((tile_1.tile.offset == 0u) && (clip_depth < 32u)) + Alloc param_86 = cmd_alloc; + CmdRef param_87 = cmd_ref; + uint param_88 = cmd_limit; + bool _2212 = alloc_cmd(param_86, param_87, param_88, v_283, v_283BufferSize); + cmd_alloc = param_86; + cmd_ref = param_87; + cmd_limit = param_88; + if (!_2212) { - clip_one_mask |= (1u << clip_depth); - } - else - { - param_86.offset = _1249.conf.anno_alloc.offset; - AnnotatedRef param_87 = ref; - AnnoBeginClip begin_clip = Annotated_BeginClip_read(param_86, param_87, v_296, v_296BufferSize); - Alloc param_88 = cmd_alloc; - CmdRef param_89 = cmd_ref; - uint param_90 = cmd_limit; - bool _2305 = alloc_cmd(param_88, param_89, param_90, v_296, v_296BufferSize); - cmd_alloc = param_88; - cmd_ref = param_89; - cmd_limit = param_90; - if (!_2305) - { - break; - } - Alloc param_91 = cmd_alloc; - CmdRef param_92 = cmd_ref; - uint param_93 = tag_2.flags; - Tile param_94 = tile_1; - float param_95 = begin_clip.linewidth; - write_fill(param_91, param_92, param_93, param_94, param_95, v_296, v_296BufferSize); - cmd_ref = param_92; - Alloc param_96 = cmd_alloc; - CmdRef param_97 = cmd_ref; - Cmd_BeginClip_write(param_96, param_97, v_296, v_296BufferSize); - cmd_ref.offset += 4u; - if (clip_depth < 32u) - { - clip_one_mask &= (~(1u << clip_depth)); - } + break; } + Alloc param_89 = cmd_alloc; + CmdRef param_90 = cmd_ref; + Cmd_BeginClip_write(param_89, param_90, v_283, v_283BufferSize); + cmd_ref.offset += 4u; } clip_depth++; break; } case 5u: { + uint param_91 = element_ref_ix; + bool param_92 = mem_ok; + Alloc param_93 = read_tile_alloc(param_91, param_92, v_283, v_283BufferSize); + TileRef param_94 = TileRef{ sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; + tile_1 = Tile_read(param_93, param_94, v_283, v_283BufferSize); clip_depth--; - bool _2351 = clip_depth >= 32u; - bool _2360; - if (!_2351) + Alloc param_95 = cmd_alloc; + CmdRef param_96 = cmd_ref; + uint param_97 = cmd_limit; + bool _2261 = alloc_cmd(param_95, param_96, param_97, v_283, v_283BufferSize); + cmd_alloc = param_95; + cmd_ref = param_96; + cmd_limit = param_97; + if (!_2261) { - _2360 = (clip_one_mask & (1u << clip_depth)) == 0u; - } - else - { - _2360 = _2351; - } - if (_2360) - { - Alloc param_98 = cmd_alloc; - CmdRef param_99 = cmd_ref; - uint param_100 = cmd_limit; - bool _2369 = alloc_cmd(param_98, param_99, param_100, v_296, v_296BufferSize); - cmd_alloc = param_98; - cmd_ref = param_99; - cmd_limit = param_100; - if (!_2369) - { - break; - } - Alloc param_101 = cmd_alloc; - CmdRef param_102 = cmd_ref; - Cmd_Solid_write(param_101, param_102, v_296, v_296BufferSize); - cmd_ref.offset += 4u; - Alloc param_103 = cmd_alloc; - CmdRef param_104 = cmd_ref; - Cmd_EndClip_write(param_103, param_104, v_296, v_296BufferSize); - cmd_ref.offset += 4u; + break; } + Alloc param_98 = cmd_alloc; + CmdRef param_99 = cmd_ref; + uint param_100 = 0u; + Tile param_101 = tile_1; + float param_102 = 0.0; + write_fill(param_98, param_99, param_100, param_101, param_102, v_283, v_283BufferSize); + cmd_ref = param_99; + Alloc param_103 = cmd_alloc; + CmdRef param_104 = cmd_ref; + Cmd_EndClip_write(param_103, param_104, v_283, v_283BufferSize); + cmd_ref.offset += 4u; break; } } @@ -1358,21 +1286,21 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M break; } } - bool _2432 = (bin_tile_x + tile_x) < _1249.conf.width_in_tiles; - bool _2441; - if (_2432) + bool _2326 = (bin_tile_x + tile_x) < _1169.conf.width_in_tiles; + bool _2335; + if (_2326) { - _2441 = (bin_tile_y + tile_y) < _1249.conf.height_in_tiles; + _2335 = (bin_tile_y + tile_y) < _1169.conf.height_in_tiles; } else { - _2441 = _2432; + _2335 = _2326; } - if (_2441) + if (_2335) { Alloc param_105 = cmd_alloc; CmdRef param_106 = cmd_ref; - Cmd_End_write(param_105, param_106, v_296, v_296BufferSize); + Cmd_End_write(param_105, param_106, v_283, v_283BufferSize); } } diff --git a/piet-gpu/shader/gen/coarse.spv b/piet-gpu/shader/gen/coarse.spv index fbe025d39c6a00852ac9e6e2b1528683dc1a5848..b30e2d8eacc8fe26af1415f677c0a95c06bcfb88 100644 GIT binary patch literal 61516 zcmbWA1%PG6wY3j)&*1LfxI=IVmca)10D+KU7=eZ{MsR|Y0Kp+Z65KUt@Gy8n2o50x z3l71ZVE^~sdscV#d6ECU`(CBiT6@>7U2^K4d(WMwW6}jDtEx$=DXYn=#ipoQuNkUI zQL1XHs(-53cd%TXnSj^qH=jtm;SINf|k~dz6OXK(3-pPkZ$l z6mizy(!u{tN_{t^lb#1|I%wb)TMXQE{Xs*b4I0rsYS^Hm-D8Ih8Q;D4 zF#RSk{JKXC88KmK>Cg|4G6(sT^f6$>h|xo+`;(>I*XZHH#|;}_p-e{pRW)kZK7$79 zx7dW`R_FhBU2b)3oz>KRd^@XY;N!aYCmY*#G7pS-*vLVn_muZE~p|gm&s^t#_l|q2stdl)*zWPe{%ss40w3GXLtVz27v3AIqnZ!C z_uvuuPFXDs9y@IC(0>~iyD=A}E@LhP?~OTCwIqDpi0&c(J-?1>F=J7TyLg{Coz)V? z=~=TWs%7EByGM)|G;%ciXZYCBBL@v1F?hFeEb=IMueO2rv>k-! zf85T=rac7r@>aA@{#%3F`OjGG&?jzdoBa2A>KmSnJF4x9_}llypSjw(PyAwn|JS{= zugRT?etz83&y3ZsefnvgME~m^9LFw2eCI3c%v(^kPWPydMvWUkc+`;gDd$?L`|4fi zX{&B{;X9~xR%N{k1KXvwKpw63?S^Rg*I9Yq<&5*akGIe=Ci|p+OGB=PjTknPYjjXI z*GRu=#%gq5-~T)h{~{k_n$Jk;>=|!dlG|S3EeDU^o%~z-Rqm^I4mvB}+_&YeJ3g&9 z`-aa1xN)djapb;w&x?MQn%Ba3Vzu9IrmeW_TkZby!0HU^tJv}EL%WQ}_m1{>`c(&l z&2^h$yP0k)7d2nym=1#Lr|#OzeVLoJYocbpyAB?*=g_gE$Ef$?PUui-+y3XA^&o{J z=YwgNoDTuFbM9Xq4qkWU&~3(#9ldAyo~!PwH}8Jck? z#PC7&8*jgAw(3~4o*08#cShOKy?0mj8)}bVhSJm--_pam8zZLc@^k8p@n~>+j2+c6 z;P#sLtBwN`cLQ!sh}xciHDA4B?W|6K*M6;W%6&D*sqQ%YuG1{lX=ru(|31n(o7Qsm zj#1SSb~{4HcQP?EzEi*%->Kk?@AP)Rd7mctewCW9-tqOT&V-X+%TJqYsphNiJ;E&2 zIcV|y?}JI#jnTB^mev}}Z-|+(oMoRZhGRS%+#XB+>b!Qp^Y?*-I4q789>%~4$j@9pEi&qQzHjk%}|F7EL9 z|J}rn|6=;cyj%j#yj%*-8eh@Qqvtl+ajE$#=j19l`L_JD>8IwaoD((Yq_^#?)%EbS z{qG}9DvfcrWJkGo_ww}~zJ|CN-?iY3?>cbCcVoNnjVFv8(^_=%R`b=HV`p^}oE-IQ zjZN;$&zbGGvs8D$YoGr~CckKp!r zXRRIuJCif?{-wzjkKho3pbk&pTd-RCz|IJJ~w#SS3PxQo} zxq7ya**doVFT-h^cYi7Rd8VhIj_R-A_8iV$y##g+>*L}7((AuDS=#IPJn^$wec6wp;h8bw_lM`Ty^`wNKtG z`gp6SkLj!T>bP8E|I4WDIeVvw`)*I%nX6CgxE>Dw|Ecy~XXT;dKK!ug=cAr}I;xMs z?X{V@`V!p!E~#}F()wD*?^Eiiz4uS}`5n#Y)Wb%!Zs%Kb^aZ@V{*zbVf?MAo^$q1$ za;>&s_i5W%eFN|P4sN=ta}r|tb=aUGBmezZBJAdzTRq?V^s6SJEv?$$1rpa>C#|*i zo@7R9KB9YMxA*+M|4xh69{+GoxBqn8Sae!X&bHj2E?waEJ-R1q>;7tZ#ytgnw8mBE zHD#Z?I;*MRy`QJ~?h@`4;!ckJs%hXOitn0S+v(8;*YRcp_r3@4eIb0aVIxP6ZQXbD zZ*gO_=DPm=)O$1i`2UtZZ0uOSzG=PZseA8&e=COl&7~tR@q%D|o#W-fV+M~MJhJ91 zHu%a7KCr_B1t9=`M|F-{f zgldiBpoab620ygzzkK|UY}kL+;77OpSBU>{4f_cVep1_i#rU7ru%F)GXSV%UivL*+ z`#BAMUfX}=_+QkpU($znRF}isXGBN!AUN~gRju`@0qui8}?Nj{QGVHp8dII!@hQd4`}LW7^);Ai&X9o0qf z?61qf<^H;|k8fvnbsyeQT?@}Xy$PH+w}Hzzzwgtovzpl8cenj}_UU~M`vVRB$F_gZ zK7FWRf4C3ts2+nS&sV_7^9^vB=i7bSbyn{*_to;*Kn*gx*WJE~9N>30%7w5Q+6 zz~yy2MT1Y-;4?M&EDb(uAKp>T4o_e6_OW$VT@5~egD=tGOEvh?eRxN;EIfU!)W_CY zt%;kO8=iR> z*~ivdjc)KU4SsNgAKKuD_2C`W5%BbNOdnfk^@|2SuE9@j@Lx9g=?(sy20y#O&uQ?B z8vK$5zqG-xZt!aw{MrV;sljh)@LL;vVuRn^;P*869~%6@2LDroKi1$+H29MZ{%nIk z*Wk}L_$v+mT7$pd;O{i}dky}6gMZrKpEvjy4gPI|bDP}fESsdkCu{I28hpwIpT5Co zZ19;He2xa6tHI}P@C6%up$1>L!Iy*ky&azmTle@YH0-Ox_wb88UVdg+yI~*D;OjQ{ z)_r)tYG=5gLH#6|m_z#bbXLO}e76Q4)8J$K@P5_)aQon6Cl6QJc^}%xr?Wb|!H?|2 z`&Yk!w;rGH^l#9v-J{y?bDh<3=&cX@B=7g5a$n_V%j4139X)DzHxC5;3@P_DYEb>8 zk6_Wc6-FP!Q?zlza7Ul4<U(E(|>>TG2@4fh#~z?rrypfrk*yFV;jtK@EFp^6zJo}4jwhGHbkEi-DiTm{*$-H zI2HO(Zcs*!9yOXT5cBcBryqZxG{mo@YD;pPCi<@3LwarPnA66_bKW6)#@ait)5TsS z>k9RWGCwHaEPGhvLjHe*XOv);Hfp|!`JouI9q)6Cdq@b&uej%p)#du;uy?ZAoKJD!f}AbsjR@{4x+ z)(pD;kLzjQS)HbB?C4#G)pKkfE1Uw1b6-3@+k zAKtIJAI>;>rp7#{QS zWE}RMGj1ivHC4?st{Gdr_j}6O8hj2o@2TBGhqd?meDdAfS6{Ercc17(TkFwLEg^1Q zuN~EL;C=b_(ebVb-mk{1sPm~)eQos?=67k}#LsUCouADMy&bP#wN0V5^J z^=5I*yP~xwq_vj)t5HR}UcFy67QHoB^?Q$P4{m9_^K)2(AKu_cHTci_aPAA>8DF`d zPeyO=`N^uQz~lMF*@*Tu$E`J2*XUR8`&+@~Gl$!HVsuvbfP43SNA(1}y_Y(w=fLfK z*I7NUU-3Ql%MJcIeB7|TS|^e7@d>~ zP0-p0XnUV`x7xISqs6?{IPI;%|^e2WI(s=>Eu@SPicP=gO{ z@SzPpyursb_}B&?-{1!{_(2VRT!SCq;Ac1Zxeb1PgJ0R;S2y_04Ss8bKiJ@ZYVb!I z{ILdqqQPHi@Ru6==h~+i1P{9Xs?#|AKVctSORka~ksn1w_ zE!eJlG-b8Y&(F(hbXg7L{l~Vv3xPXZ&t(??`wE}GO1?Li4k? z+ERsPu4>$t_2lAbZ8aZiB|kr7tF2mSey&zqqtG0u+7AlN&(mt_6`G%?)ix|NKToS| zT4>IZ+LndpXJ@r-3(e2WYCE(vepBki^U8dN>*LG)qSn#EovqKz<$m!qwEM-icmMeB zznnk+t&-e-3nrhd=8Lij@;N9ou{+!!^Ap|p;#p|p8h25Ab-`UHAw4@kQxlTfs;Os!^q z^AgbqoM~fj+RfGdsm;9X&m7jI*2f&yqBaM$dGTMHx{PCtAE2olLw{qePn{SWfQ_Lx zFS9t1x{TpCQjE7DTp#1v)_9vyC*I~@7 zG9PO8J(N0qdt+|jYQ`O2N~#Xj&lN7 zyPAFPO|52ZoX zM^S56caQ&!I@g13wHxy|>cl)AY#TLWo>1G&$(ScnYgaesNz{pHTkXa?ojNhk0NX}Q z+nI&!SG7&ec)zAjJlkov&3V*`cRtv7YF;}R)VA!=jQ_&gPrI=%qE2jMXgBs1)QNp1 z*w|{0|Ek)yGPUEsnp(TMF@H;)n6}k!%p0i_^LJp|sA;>Yu-#nS)Qoovb>i7hyKU~I zPP~a=&1U5x)wLqDq4pKj>RyZ+BLbmvU{<%aIK)jw$H=5LOl z)w+4lT5DfX=j`{~w|zB9?>RmZO@HUTKf2iKK|Wc{UF-QdvnGd^bD(dl4f>M!yMJc@ z`i_|!Fbe;x$qwYJx>_vz02yxduNk1Z~X_Ww;O+cvTNc~59Rj{8JvhThWL z({k?%)iRb-;jXLib-w}|({;D~ui=w`)om~LK2lBlSv9sz;$4cSL;StZ)L*;v{}5b% z+dm367so$0{EOOV4EdMTYVIBV{|V;*Te|V(K08p;{%wtoZ~N8RP_C)*w66izSKan< zzxPl3fi<>`{y(g_Ip{C<`+WT~-W}ndMH%0|aJFra{dD*t@U-`P^t5+A-nM<={~mm? znrnBmrlLxFzrVJ9xZgLI-0zu7?sv>3_q*eg`+crF<5`W1wyWT4!sml`;q_B6|KGZo zi~o`&KXdKx-hbY~@LGz*-8X)&aQ;0jEh}@CyXN-k%f0pm3d3%V$@o6UwswrxXEeCa z`bxXc_)6}xy^>$k;6Ar2?Kd~L&+JP3#De=?=Ciuk<&P9x-RE?%Tl<_Yd?Ngrf~)(S zE_Q35(Usiib0zn=T*-YdS8|`rmE7lX;huRuiwn2@puv6iR@&z&xa0NNTWR;%TgiR) z7Oua~-omYY_EvJAua(^AYvJbW^R;kmpRbktpa%DOT50!rTFHH;R`N>=ZoWQai+vc} z=WF4<7x`>0T>BjbSNGXk?8fuiTDbl`TMM^-zQKK#7W?MdeU27xJfEe7Yxh}NxV6vH zO71hXaNj$9h8Au-pP_~Oxy5H_;nqGw3-`U$XJsY#d0EMQW>#{ao0Z&WXC?RfS-7>& zz)J4(uWejbGtZ?)3nO3;|KGO=f_L)}6-)?Z9ag}zTag|&W?)>3-R~4R=Dwft`$BJ?sKhh{e7+#ZtZieaOcbCTH(g? zxmLLI<#VlYKdbp%D_nn{X@xsqKGO=dDAX@wilXIkM`!+oX|ZtXLzaOcNoTH(g?nO3;-<1?*r?LN~Ap9nv< z;OgfWTz{WwmHs}{3b*!|R=D~2Oe@^>KGO;}AD?N3YxkK}_(ZtRw8E`@rWLNg&$Pnz zf2rW=FBjbT_nB7woqwNcg&WUjTH(g?nO3-VpJ|0__nDS_0Qc;E&v7#MgMLo*`xC#1 zSd;s3-a@KV(6k-G-MaiVu$rGC5399bg4Mm(KdRPFhpX$i7I*yK->W6=nP9a6`1_gi zY>J;VjS-)7YW;{h=DBd&si)m}VB2*S?aqg*C+7>mwpBM5KU=7Y{aj(&%)`a-%!6~0 zTrNS^mYA1;)%={3dAJ;`Ue3c6aCQCc&(AGt$^UAwpO?%0e+yUF-_JGj_+JZdj`un= z^~}@tVCP-k7=9K~ON<-A&AI*^ntEc~1U81cF>at%ON?8<&Ae|#Q$M<1uiLFxEy?pveEtSjv%Q~%|N_L>sET;Nv<{Az*y{2c$+Yixd=jc>yJUfAz)vu1Chsb7u{M{V^sSS>N$ z0IPk;_ZFIF#<$Ifwawa?A5(uq zNz6}cyLEE@3~Y>R>YP6ZtNC4<{mMV07|VXeY5O(Uw&CA^J)7L0-%@`^`HG^SHuF_W zzUF33>%<_9R?MvNByct7Ipb9;$7^o#NzsjG?f5&X`%yCfj@oXW{`!Mm+wjTY?t!e~ zL!lKQTaL@3};Z$&a)Gw*$b!xD&w0+JzOiS&{Io37}Ma}%h>1#T$`8js? zqg?EFGtOiDW`H~Q$zevg+NEgrH#0c>X`6|nW`E+unFU;qZC1E(vmUd-^-)io*}>)b z=YZ>{?z*HO>rXiyyf)?pn}=i7-yFB1+3;P`L7CJk&^u7`k9O0 z8QNd+{XTeM+Ul=OAHR20Prhq`UEA=r;a)#kpC5qLe0CO}b>PM^-T-R3cwK7!^>ZJu z5B6NwwjNlX_sR{x(>X3`+i7=<1F1KptVuC9+sgI#ddYsb*0vF~*SfZi!Cnj2o^hK} zZ$|N@ee>FGov~~IHb%~dEx~Fz%BjAd+b+HM6d&y}s=YB^W71^eAFm_Jj_}0a30&s+W4K!K{0TUDYTKEj zW}f2283Zo#+y(C3-AkTcAA`Z}bM>py+ylFUZKusMY$&xa=S15Oikf|i)7LO?+1GG* z+1GAx`%=&S&F)~^Y0KPogPl8ViM3Gi}W_J-@Dp84Ge?ED(bxsvN=?&;IIIX7v)A2!?XOG&@`gVl}a`@1~01HisF zW$q4yt2uW4wW-XFP3c@xKx5cszdw zQvXgHMO`01$KO=zw!Z-^*Wb_Xw}92kd+J-^>V9?~iqCCeHQV1zEjPBd--Fe(-9ar+ zZWF=gtL;u|x%YCmy9?Y+Z5wUL^&YVK*ye6(x%ggc+t}XU1laYfQO5u3yd`>*o2NKL3Qx^EmtbA-J07d3^p1_q?2% z74SaeVL02kT7;s%cKh}m)@I-RsNHAQiSr0ppYTWF_MN@+7+fFqoN14P)$`u^B-oeN zzP2YQYW5{g?5DtGUr)o!GygAeebm$D8SsTget(6RpB!W@#`_cD=zk!XV&2c9e z>wKPk4s5Q@h5qJg{_2xbIw|IF?L0nD?KzRWUVxYP2`|FSHG2uJk9ykt9qjlsk1xaZ zQ}_INmD-niXnTdCW**|ic^#ZR>Amn9aQB;feBK1BoBM0j@_ZKl2RNU#-UgR@`5n0S z#CaEN`+W9!AFTEsCHwOOuG@CJ_M`V|6A1ZeAfFIT=s8YepdYiU0XiueF|3d zmFw>__(zm{)|2aJ4Ey~8Y=8FqIknvDeG6<~g14czy|(o8Pp~#$S?iX@yl4KcGbdkx z-4CwWw{^R(_%F(+e?##){F>U|C|UD$L#t+pts1FSyRQ1%r@virf4N_csm-=ovq`XL z&b;8uHJcQ!{ciR@Rn@`&sFmMebi&JZ><9NVk$Tp#KiIa~%;`J&SIf932dlY`ey1`8 z-0w8)M_c@+1Zy*fzaf!l98-fc4u8j5j$;~Z+7oA5uEX81=HB!7 z_cMUiv)^U{mwg)F{WdeYw(Nshz-qp--)4pTJD2P?xqgn#-@DkC`)dMiW(V&_t-m&X z{LPHIx%gf=Cs@sR&VR-_7n-*C&0YAF=gvIn+HyY63%0Gg{rQ_4wZ!cLm*>>{aDDP# zwE)=P2I^;fxjxBbA#n07^H>;N+kwobzc*Y2td@Qk1z*D)$G#X`&Ajzr9BdnP`(2P) zE&VPDPQTt)CGJw_+7fqZuv+ZPfNv=JT^6pEe!mB{jk^8%8zeQ`o7eJSbD5fV68Q>X zx^Mma9&N6hziGCuzgN7-yLS1;VAm)8Z35Ou-JGeaP5B@7Tnn3l z2U3jf`pWf5pId;-KDUJ1N8)V-R!h9C!JAWTXFRz+iMK7djQ2yh`y%nS1FI$8_TX(O zwlkhwpN#oO;BpRjgqL%$6I>tl^zmb`dSdJhcCN#J0yd}2;~=m;>WQ%nSUu3bO1>nZ&V2dl+@H}D*k^t(G)AN9oO1}{iSj6J|= zdH>iGY&-SD839)JGrQvkRwMZz^V8;-cBNKJE~CL&OJFsI|IsIFw-9yTDeV+j~Mm}qt z30BMZQ1VkL#xl0}-+dna6}EC5zlIz8-ul}34cL96p6`dw0^3fT_sQo_`|`7mwzDZ} z_AO4{=Yq?=&V!eIoe#G!_3Xn7z_!ztxxEnV+-ghgi@@sP7lU1ooCTMF)%0@>w5d7A z$<_Mb&C{jW%G@u58zVVg4mKzKj4#*6_^zjOVV(T006X^NcO_UY-*;(KEBA=&*~~Bf zUyaTF_bvA5Z{h0Za}~8b{a*`qJld`S%kAqrYIC)AKi)unBgL2Y-_>^O#JUM=jPRSm z_Lu#63s@g@_lW#@im~lWY#z5#dtHcctFe9FUi(>_)97+yyp9a=06; zmTN?Q2gO+CAWqwR!R8WvAJ|xT*Vn}T;M19B^{n3mVB2Xk|9hy_65|ixa*qB8*C+e@ zL9m+pKWq9YxNE8{aUKGz=UV(T*x2e`i;qzIa-Ow4Oi^=;;>39rZ2y^expN&~Yq|4m zTkEua4D5Vl?2m)hvUjzqm22lb=X2td;CyClen;^XHtp%_X|QXNIr$6N??cq%^9)!$ z?@50JtCinlJPS9rwtSEAH?X>M@C3Eo_pnE~E?xkaIog+>D_=y{md}+hfz^DOqn`u+ z4u78F`@ON{`WeITFCha-V4X2jxSGYb{RPcfrQVK79|YR_@dH;qFsyY4-tG-CWhbvstRDU~*gV3&0UI~JbNm*pk9w||@4&{d9j=uyM4RhZ|T;?7sn7jXiIC@5`oudtc`MnF{R7{i8l5 zMa{JpC#Pw_j>CBVn-O{9O%Kj}p8tMC?secj+6-X-8#A`imUuIQ%XygzK8pD2@tGN{ z9zF}WY%?o-KH8|qXEv~U`0QZkB4^4RV13l%Gbh+}%UsR{*H1m|<_4?#z7xNB;O3~! zv#9lVAi(O*lmB+c@r%2t=cPC={=4`2!G}`oqfH1XnA6)3R{G zXA!uX|3*CDwJ!>{tv37d-|VR6K7TQ=eS6N8pY;|;({=^tq5EbDu$tHTrHof?Nw~Uu zX=!R-?j`l5C~EE{vDdQy#%vj|{rNuQSp60+eH_=y)V>^-`brcv$0g2stqOL3<~sd8 zT+M%XYoDu7tHpl}u>KD)9;#|BxZ0YOduwfNuxGS3WBBi>)Dq(dVB>}l0GIoH9k|-5 z_4RK*>%zuNQDALZkI`V`sXNAQYPH1O z3#^uVoUvf-+*}L}18M6;uP2bGBTJGuh1()~B zIe)qAs{OEOPwf4{=9IZR0IXKtuOA4vownSs9|Tsnf1d-&bH9Elxa{A)yk9>IU0d#> z4+pFH%KiEg@Ixr(Y;3uH#_)dqC~%%T9toD`e*I@)@7Haw&GrXVt7p7F2m5afa{U|) zSIeF}7VOLGR@*U@k2x6JgW}}*3$U@vxjhc9?sa$ywJ)Dns>|JP$5Z>=)(O-;w>*)W zbgGl6na1j5YWvbJIsX#u9&qaL60!R5cdxd5)8dfHtGRxj_{ zE`pn*w%oT}3|4oZE}?e(;!CNu<-YAQ@S)6$KHBs-m0CS}_)74<+QV0&Ys((K8m#6k zd*iq8D=68+a{U~$d-z&#_V6`exw);4?K9&Oiy<=XtW zJHG?_?{l=>2$pBAZwCACZnWJ5mit|<`{)*MCmYmww$q;eZUw8`&VJ?T?>4ah+U!@J z{%!{wN1Of1bI*AP*gm~h?xgnRHK+c2ikjDf*mK|ON3Nc{?gnQb?gGn`*S+A(!#!Ym z*5`ik!Ia#)dM-WyS2x%DsO84;KL3wk_lLGWP|I7fn6E#9U7y7AZ>&57SJ(eRYWZsX zH6yS4KZE^mUz)FZ*_Sc(OYRSYwb|Y=$dmgcVEZe7bMPo!-Rr==+x8e(E%WpQIP>&4 zSf0L~0%x9{1k3IBX=>xRKg|0t;Ag3Aub=kh^bFV-z`p9QOV4X54T^r_p~ zr(C~sy-$Fft84TExSOKy^VD)}?%$WdIR{?^%X1F?9qc)18*PdAGT3hXCSd^0&@zWxE%Pd(@0J7D$l9DEmU zj@oh#z6Vx!p5CW+{NfL&wdEZA5bQaqk2ZbW=ju5JKLP)%bMRAiZ8--&1FQMU`hE`o zn38i)uAgJ}9Q+cTbMOnW+}s9Y`zLs3YTIiww~wgRZSOhwH8}hFE3jOf=is;C?CWp9 z^31gxCg2FeO>gHm<^7PjM)?b_b%F|yb*f`qkSDtgQAJ{%U z2PdQU7&R*)yJslk~C|IUIu>oYC*VDie@K?8}(?CHGmu z+HCI_JmmVs zJ}=l9{>CaX=0j7DPZ!u2=4uSNKC#aaHirLRE-@BBQ;*MrU}Ko8G35H_@3pWnID23r zuw0wh!lK~pfknV_Z5Lo$9Bf=;`gbwpIrEnQzl4wRY^S}I586^-W0q(B(rD^A!bo3Ru8pc%k^jk=zXDjEGk+zpXTEK;CEm*5 z^88)}?wPM1pH;!?;j4kmHs6PP=BvkNb+CH)8sPHGUlXp6dVJOb-^?6lzSh=8QO}wG z1F(8|<`00Iqqdy+>wwjrr*)|vzj!@rZ8`JT2OrA3=%Y;^uW9w1`5S`&)tSE$y0)D8 z8-vw+Wqmh+52WOplk4Z0J@YpM=gi*}EH^j**5&5lU8!xa&D=JiR=2(Td`ocl`4(Wg zHuw3~;Oz6Q!1B!Xw&0xk+koXc^M45L?;YZT2fqe>;GU zqs@NhIrD!6wolLeov3|z=Bw{WQS;0fm)C`Q^7;ul^RP2mp1gJeXC4ND?r?Qu4X2jp%qdYJ7K~vB5zb{zL-`%F&e(`cA!Tzm|%>QrTYOQA*#PNGIHTyTF?Tur5&+c=;xyH^0%e8rSp9ju0b}m?+ zelGy$>^>hX&)Iz;*t6Ssw$q;eE&{8&X7(#je;0%G*Ji)+^mhr^INI!2p0oQ>uzfmj zms9(4-qbIns5x$Nd5x+kudBeBhbzJI!{=HY6v+`Y&)sICFOO>G-(IU}wG+os%m z*P*Frt*!^F`P?z>Zh)tqW0mVyuAl3edASi>uK(}gYVJ$hYf~%7eKLD3?Qg-Bv*~8A zTpLw&8~>BD=~l2jXVdLq&nDYwOT0V4c?ex)-jGdVKB!-^}lgP9LOp{Ng`RYs=a6 z5cp8$MIUYYcuuG%uRnubmpos17_L^XnVSAyqmP2KHy;7ZwRw#`4$j_u3@lGSPl9ue zJ^_~J8hr}vHEKNDX-|JogVkH(BTsqy`wLiqZT2fqf6su8qs@NhxkmpAwok9ozft@0 z8dZOmqUJR!F88W>@_G@Rd3XUVPhNiqXC7Vx%gy6uYR`84ymxvP?s{l@1uPf8M*Tb` z_eHORy)UvK;~7VP{nFKrHdvnXgL$`qxJObe$)0HMa{Tk=OusR%6W**I(=p2~7X}h+Y00%l>e+Jk!^v7XQg=|M1D-{(IB>E^-REKI-wA5`1AX zuT#PO_oitxHC!L{{LO)Bz{b+%Tu(>s%emGzEk(__7CYW|i78i?`?tL0*7ASUT)*Tt z1HS%SpYR#sH^Z~{XM*da?meLYMsH?tJ~Nk}Pi8^amfyb43bwtvW0;*fW6(AmMa>+= z$zcw#|IR0TPPp$$$!RXQKI+M7Zg6tS=OM3&dC;{br+LA)S9ea*zjf{%=L4Ic^Q6DI zng9IM$zPu?ikkV0&DZyp1;II^{QH)2ZN9H849*$l-@26Nj9LVIFFv->W`2op{Y^Cq z|4pVzT7S3Re*E_czWkf~tB@btoM-;Mc$ym1w3?P;p8oe0{9DKV?ak>~OWS0gXN226 zXVgq^eLSPQmgV|noHK*9e@k3(a`V44;@{!-H6tat%>*`wS*Y{(mS>}#Imj0PFIaH( zfRg4mjtW%J!*c7u@qQ6znfheYMI)3L`9@|P_$E(eGmy4Zq<9N^GK3oOtUbMY_ z+Re#5wkp`1vxnsRC+F2_yJJeu-v_IE&8_vF+zA@O)rn%}<@{b|CR`%~)?#b8~k+HiR3`wbECfHQg9&d~LRqd;U1C zO~8)LHrldhHU(>QJvOKI<$9=ZMo}|Iv1gQ6Eq!hUw$Jdb;f^b3_cn0XZcB>3=BB1! zdFFY2EllyX03~y`V2z!Ng(&vF2zBObTjG@SwOxbnT=UG|58=i%58KL<`}Sb_&}JNY z=5GhE^Jg1vnZF-_wK;z~QTuZK)OVz)nX}ma(p7Wy^!pRA{btSv!82#t;q zcZ2Jr?&m@C*d45Hj^gC#+AmA-wKT;X?ROc9xqOd0{p?9Q_56lx1YB)CigD$M>v!?1 zQ+%yT$=Fw`@$%H)r`U)8ou#~QjV6ZuhmR|`=fU0u*Kgkj-@oAcA5?Jt4{q>73$Fi> z1=s&)4SsaN^*^rQ`k&C?Cly@((+aNt=?#8n!Sz3@;QF7_;O7-w|BDK)|0M;#419Ub zv%ki`U59di?S-a(H+#_iF&3=mIpY3zjnuL)#)H+e|0aOdyx#P2|Lp^I?QQQG%Ehjq z{$A_$wIA5CNFQzbxChkJ-vQvVzXRcF?!)x=Q*ioo56F$>`s<(m4guSrKHBtg52>fW z!@y;KN5It%uYDZfQQ-9F9+Df&{iA>S`#ISD^wFk|dr&?79Rn`=`vqL>*xJYbjt8ed z_n_QZCs6C3{!RqjpFZ03aSyAfzmvgbf2YFLPN{wD@0Z~8=N^_D%l)f=`a1(`fBIhu>I+yO&_m8_4IcI*gfk#=nD8=308~!DzJMj_N&2avHuqA zo{0S#uv+Zbf?dPduLG-X$;s(|-|u>`eHz!c+P&UypuUmf888FI+~xWw_V2*1S7P4; zR%@LX)HhRXzYxXta(&YN7O?Z5^W|2s{##P?m+KS%+riG?z4cl1d$8Ia6m#&t{!Xy^ zJ^08cg4Og%oV&orNt?UDYH4#1Sj{#*!&{j+_kxY3&3x~pzMo>gw^7VjuAlw*p7sD( zyYF}6yx*;jZC#45A5ijsH=xFAQLjVs{cb(#yuUq24A1HChYRj`{#e2Fd!oUgD!Bg7 z6kPvj8~nL~>;GcG_5XW=zfy4hUoW`+Z#MYb1=s)Gg6sc&gMV0X{XZ$V{+|{63-FgU z&;I%o+ViR?qv_pTTO`gU-2H{2u|U<^AhXuv!;IAJ2!!!2_t>6RwF|?0V>* z^WjOb=Yu}l^l?qq)8EtJvcJE;)#j^x?C-DO0d;?_soYqur~c{hZ(#e=N1HzG3H9{% zJh<%d1-RPbwU7P11RhZL=bn%o%l)8#`g<8{fBIi*nQa$~ul^iO|pf$dKpZTh$;)zjZUz-52$z|~HveeCZ&@PN8M_oUod?nnL8-v?m( z(?^>=Z&RzMzmLFWe;>ou%J;8N;REXa+|zPnxu5kY2#O?=IYhAzS{lT_hsBSOUC+#N#J0H2;C$Igtto`Ns=pypc&s%W) z=P$Va3pV(|1=oMEg6qFTgD+KZ{g*Ac{>wG^3I*4H<$~+KYJ-2j;QFswaQ)YA@Bsza zfBk~%Kd`|!D!Bfe6_WIXHn?CL__4GG8xa@BZxY~U67(CzRf~Pf z>2DEm+25jYwetO9ad`T3kIRka{?CbCiZY-~F{nOuuVEfZYn?9Zq>gjJ|aM|Cca5aAup^yD-4o-iYf#t^9f?EIdwy!4wz^-@N4+pn2?l&f*&2DhpFHyIb>y!4~VAn70 z_W-M<|2@IB-<{%G%k{DSK=#21u=dvXnbf&|*aF)&6kl6Xa{sVZjW?&>n&SP#w$!rl@;&cfpK`JN>FazthNHlK?$<|~<5bHy-BUYLeCJCL7j1&MGSq)ah#2&o@?qHux;~OzjNVgu3yG_9^7`?+`Hy;K3Lsc#mRL~Y@;c@ zMo_XQBWrBlqbTM&hT1hT|BHxW9e!ECJ;SanxPDg`{2K7JHP6^DhTBg$_Dj&zbADV3 zR?GW}IjCiBE(fb+9FSH3$Fit1=s(92LEHh^?#_~`afLoN5PNPJo9ri+;Nm^d<&X-*7#PiTGrSxs%6e^ z2dia`?*OZnYkVi%xpNJi1G(7o>z_5g3+x)}qfH;@P(A(K11|f!7p_)*uDTzd{+vU( zv7A5s)88M!_NR|FeOv?e^!FgR?C(!-weoY-pW*4xHIN(2`PV=FJp#5reYEN08mgzi z$H9&{@8eH^)%K;t=Si?WS)-@GYQB%h=V|Sf#Q6(Yt&H#q0Ah>Rg|%5Tm?4-zd1(&)Wsp@0|vJui*NBSaAJ6E_i>| z;?tVvdV3Xq2!7@D_8OXc*5GxpTCO*9R?D2e30BMX_7+&Jyx#r+cP`vtj!7Z45`=R`gIy$>$?`v9(1UT+`4)1Px9Hf74xo)c%! z0jpyvzJ5l@p8I)?kDxxf#z#>fLvjE8f;#(el8)Z}=l+?j;O?I(8hpxv>pxS$^`E7| zXRUem+oW)FD)(ClntJwICs-}}%^cM-pZ&mU*>C;9YUO^L9PT{0o{mW_c0BqgpQ*sk zOPSBqXzKbqUU~jq-8A4X{EcZW?a6CeaGBS1a5eX3t3T@L;r8cR+s@~@GoY!vKl;OG z1gmME5DuDgF6q-t7DRj9gqIWX92MDQs%QDn!5guSDrOl2;7Cg zF^#1?c`Xbs^I8P1mc8RSu_)aBvUfa77DH3d-dP;1mc6qiIAd|X<>uyG+lSbBwq5dE z25g@C>!-cBch14?>(`X*o!`{>4C=FL{446SDej$fsk3*MC1$yIRw}sqDhph>|(JxW?yGUsB@>sV}8C$Cp!Qj@KY&Imhc1 zTz$O;U%%j9pPLt4|1BGQtAgvlL&5dmvB7sLxc<8qT>qgBKCI@M<2B)z;8)J^T4?H7 z^R>ZhS##$=Eo=S*uv*r909dVD^L63Qv+L%Z$;Hl>{>f(pu=8H#GZ0N(f9F%4>uN)A z7yiaHmiFYe5xC52W4Kzbf4{Tb1a5!1{(bM*6iq$X|7KvdT>o2uGZxoVZf>rneTZF0 z+a=Gf!RD#Ie%h1gHsCVPZQ*Ku_Grxu^$+3Z=^EJ1JhwwrPoCR@)sp9rz{%5nBsVwr zkbQ{VKekJrKL(qp{`zT8o;!ofJbwaL^I1^x90WH{ze7%*yP&Bj&%t1|Y@Yh-r@eWtUyt3_Zz;Lfuc`5s)YsPdYU=AKUh6kd=UU&5m|jD9 zFCJNN_0bJJrr=(S2NzubLmT|CnrH9t4mYQA?{}lA=UUtYtd?uh9M!T$dxF(+Esg-I zb=B+Pd+{i^^WeNXCb`)0=%0M{0y{5dK4a0;^>@7TtjRcV7yiaHmiFW|9$e-%0j}1* z7W=pE+4qLqpVwmY_5FAsbZyx``-0W-nRh?1nxA?7t+?;&`@<8R<)H|O|X?7r@zWRCBy@kHu-D9-DB)VcOgB~Iqm`j-uUdd-vfN$`8{EAu`XO+E8? z3Ro>?y7O{c%ibQFeaaoD{pl~ZFXu92K0{7h{q@t{9P@+NeceyVm>;O|MCw1(*v}<@ zq&WURQD^+WA!a%Lvm5-Jf_wH|RB%7{TvBlRxU}Y(uQTE1P|nw{(9|NWb{$g|U49I+)3-6+h{`zT89_N9Zd7O`?p8b3QSS`K{_$>vuk6UY=`Md&d4&}ODiKd=)y$Y;WuIq2%j>CB| zN4ePi^cS0(_ehzq>)>6q(O*C9$>Vx(Gmjh4)U&QPg4MFFhZE~}aP_R~&EVwaT*%GE zdC*tCBWZUV*!kAxoXC^s?O?xSvc2uJH`nz^?7r@zWL+Px@kHt;D6ZvG)LF}k#3|SE z?gqc7=9!Z_;KnJ}=J#moS(`h-YUSG81veMRW}kAg{pl~ZFYgO8=6m5?w9#Kb?aeVi zi{01Ll#KZ=HJ(WQOpX2g`&WwN{~LA2{|920C~7)?EU z`4O;M_VVe(dK9jnz5F;hc{vwyb8#N@)$dH&Jq32YwK*s9-r|x?@DZMJMGPNeI2{6mnm7- zS86-r8c%XNLP!QU^q>-uTI{rvWM!R_OVnrA-Wf}2CRu5Y8M zXI=jRRx8)_UHB`sb6(6*E;c{?#pbpypEELFAHch4qrZOIlgEePW*#4*sb^h32CHRV zuOQYZaP_R~XW&=ryqpWUxi}B{>US0PFTu{YHs?g1JpT#ybFuAhr@gtZZ)5j$7bWZZ zW{oFOzeRB^|3RI#{FXT7T9Qg1K1t0pCtu-joN{fxMpMt)d;?Z1*XBF8xi~ial#A_8 zf3baSQmoCS@Gjcuub=kjnBT+Xs~qz?aK}H9`dx}+exEvHo{Tu11 zcTRBfaxUcN;ymc9-%Yfe2kd-nb57*Rb6&9D5!l{#+MDb8C3at*QCwHo=<^!84qvp& zr2KpQzHR;d`7OoQSCq8eR)ct+cO0=0Dtmbb?^IgpXaCPU!KD)4q?N6KYH#uz=0h^~b`<6R@j&D)$bkw%D zopzrAtLGW8F*=FedQs=vn54!NF-%%x$K3&TPWn+hCks$7N4#=QRw%f8ef5I7CTkab zB78u>^;D}raHR@d&h)y&hiUkR+1@7JB%mEr2mtwfxjxu))p)kk?p(OW8-SCuvCLs0n!5d%i#%(*Ay};(-$rQa zj?X+d2CLcLeC27s3AovQQ#AFQTbqH^T+_qI(|xlASbcMfaos~|*$=r+c#CcQT|263 za(r@~Oi|;B=u_6%HJhr&{i&ypPCX69^_`A7XUf*}m+Qyxi*_!!b2+5o6XC-OuK#WY z*MCgG^&eaCqu~1&T>nE0uK(c$*Z;_x=UUqa?)aQ9bC-LKIG@{sm!($E-*f#TSlu(s z>tK7ZFRue_+fnwTI4|Pt`yIf}U31_62whwLJAgZa)e`^5;KbLq6J<2T_~OL>3HV>d zAB3*Wz2*1oyVQQUu068{qpABF^Aq5^f^DbnytsyH+51Dm?iFpWr(B!+dN|mo#nayy1@_PV?5hw&-Jh;*qG(@Gy+XM*TYD#+8&g&8wE}~`;_bF-zsxn zokQm`=du?L0ROAKa3H!ib6J60ehT&)K9Y5p9}GW;qV8Vs`>;d6 zw$o-VUTQzl%^{vjDtZE(8Rg<7p z)f82qRykncHUpO$KVjHXE3CMT4pUbhEkAvxsV1#@Q+9HW9z1f4hIb*~a!yNq_30FG z*59*(|4l-9KcthE2MioEV2dpV4BTMQ(0%tEG=9QCqlOO}HFC`GLBmFl8$NWx$o+@w z*SGK+IcDgniNi{V-guNb$S0?b{-Z{X9ZK1UEae_!M~oOhd_sjXDfw5`nBfNu8m!+U z6PI0`{|{Mqb!?s0ls$YqtEu4QM;=Tz*6m~*==1Q=gU0S7@6+n%X01S54zjrkwd~{bL?KWmW>uz)MYIgX9u@i^xHHelM89!m%#Gw~Hc*PF%K?eY@VQ_@zB_oekdI?!0!v_!hk72PJbAHM)<^u5Um{U}X!^e*rIrN9~>!=nn7N>C+?GdN5TFf|I zb2eGEGdVWK_f;D-gCTDkEs=7DSZ@Myic_(W&bf_#%?@%@SelB z9zFuvT2Ja8-Ff$}R)%-^f?aoN9z%wVJFOn``Nn{ zqdBkoI)!tHTSWWc2Kw0JK9jkXU1w%cq6AD9;KoKbhyV@ZA`q3)h6JK)n;`& z>ki*@C&4{NeUX-J={UwdPp$u4bsVLGKachkMr5T(e(W z5i9-L8r<$zpK80hjR}J%3?EkXN!_FD(++U+Y58tDcu(zqvZ;GaK(dc=`_sL5Ok3>& z@2Wcp&;NRvCY$!uc#ZBv{p7zhxSjv>)gC?KwwC`7k5bR@WZY5hUc?{N6@SKRc#rtS z!ukJhrM=g%Y`glw$6Z%H_U_S6Ylr>+Zoz)Ie`cIUcEz8e8rvg&*E0D(wK2MA!yBEf zrTxI`jvTY`nDG+^j~Uwjgm68mdvvd*sjL0rh3}x&KA$yF7+8|6Mc~Jh{qts`-Ck!k z5uDEl?;HG_D5|m+`?NIVYWS$(quJjEjl`*UHGOqRPv8GO4n@%9V@&fom@?!2BXDv% z47}yw347t+x+dfv-DA*M9SPr-+n)HyJxZUW;KosREae{E`$t>rg*Jg$?a%6|tK;C- z`|lmAGpu(&`*RHS(w}3&?f&$xP6V6lHpBNc-BvDY9%a5K!Sz!&U%7|5Svn_b<~wBY z(0zuD8@sQ1Z|)*bqqOdS&sh&rC~`i9ddc}za69Kd)fwRRMi1L&!nm>fl=my@9^HBO zuFisY`GU<;&BNTuslMN7>*l}L;87z6)i*7@t68e^(Yj&`YTZ?4MR(ts);A+ve(6e6 zr+-V1;7*p9&da%!>En6eb{{*cAA{R--n+U0Oxz8*ts`oC{M9_V``TGu2(SHG{git& z`>Fmv=V_+uGW5Fs5BIXprZrvNeN?rFBkiI6yO@~i-zDJm?^1C3cR84LbB{yry(=}3 z?*8?zu7Jl+n{%n=(ewUlrs_Jh`2KKb(sZLYExDz&`f@cf)0b;(lgY4;*Mi%9=~La% zZnyqGX-!wFKk6Q3e{O`6W9{3XS2d69e~yKk^EJw6#`-==&HJsER^Ladb=_~Z?xSX_ zZbj>ERXp1MEW)1=bqXG^x1`;T0%Uc+}2H~qT@oc`SlPXF$2x4p^4 z(d|1(^H%fd&atz45KfNzwfZLa@EN@wcc$ubc?!sQwIY_jl&%C9r+jru9Mce`&d0)74UM>8-Y(FWP>gX#2&kwx_9H zOWR{djr{)^N&EIn5&u%Bl~z{XJakE~NF9nBPU!QM>P-sPFe%o%jzQ)w-Q; z^`$p_aLd-WngZPV?yYAizs_vcovcS)z8{5me}6emH66T_z@VX{|MS;L?H#bSZ*_fV z#$7CRsnz}-wa#^#T5Df_y{lPS@_sq1>>Q|lDEIsn-tY(FGKeO{)Dcm8%og8~tv%^Of-_tp_bD<5c z<1GO0{tW1>7J_d!eDv6Ht^1BXEpDvVSl8b@dvB&6|8MET$Bpx=uhwUty7w;lw_@1d zB06%2mjUbRdRQC0@8EHReKwW%Z}9aReA@=!zQG4K_|P7_quLw3f1Sr6;P!K6XLSU4 z+~5P2!PLs*=pOYtt79Ad__qJD@jt0yKc&G>Yx^%3|FatQa~k}-w*T_+zo22iu)!~C z`>zoH%Nq8}8~lp4|BCUyreVLX!Eb2$uN42A8upue@Q&&>czciNsQv=ZIKKcc$NA+R z^*XCp8~kr=|E_UU36jHTYu<{<{W$roo?W@Ru6=l?H#S!Qbw|JE~9NnZGZ=<@|lq!?&~g zt_Sa^dU3mybucM7ai##5ai)fs`@*yhK7HH2YaPtgu+P%qv$g%Z*1?<&``kTvM>Q`z zc`gr5o-2dPJXh;cud`aC!Pjd0cjei?VPCHY@2EC_r`@f-Y_;QKZB!3};$gCE+1cT|VN)7G&)Y@OBd4Sqro-cg-W)IF<* zt+P6(!Ov~*iyHir2EVkyuWImX8vNP@zp24*Ztz=raPA4<8Hc-j*gC6w8~nZof2zTs zZty?$;Cw#-Pg~FTuys~1Huy^o{(6JI+2C(A_`41MeuIC|;GZ`5=MDZvgMZiH+$Q(f zD|$8fqzyhD0@4L(DI&)VR#H~1V4K5v81-{1>0_+kydM1wEc;LA7oiVeO} zgRjxxYc=@V4ZcBx4`}d>8hrBx-?G8CYVaKze5VHAxxx2n@F5L8w82L<_&yCjs=>!M z_=E%n_h*Ta2Q_lY|(f8E2U zv%00hZ)@=T8vOnqym$3D+%|YT=Ot7-@27kCbXLza__IBDpXx<;>opT^Ne2xXIi~$N z*IB)U-uhyMF^f<@;M1ASlK8jc_C%l$t5 zE=+%JMY`Mf3xQUhwv8>N+H3g8J@?|(>X<=oW8&GAO1=z<-W#26MDA+85Bk0nhK`CM z?N6%S&MKy^I(@MX<~Ip3q>ahYCyX0BW_)dkJ~_JYPP_g4w)!{)`Y>)#MvomcmM>cK zH3{wXZrP{APXCP9rA^h+%&dE?rbcVGn_1A-Zetp3UEiM!D(&sDoEE#$ytj=tV=!H9 zV`jQ+ekb!v*Zi(KaRff*tL8D3dAIbtlGkd@>*gO|$G`o#)~EG5#XUz2+F|UteFlsh zJ8|EQ*SAie>RgdZ?2yic_f+!1FZ@xS#L^E)}~@vFCk+EQ!0Rzd6jKC`3h z4b}nc2@a5jL!O!i%JE{xd?Y{M?t^_A;cYivn-|N#_U+kIf`mGW4 zx_G&(erNTjwsB*J46nzkHycv9Pq{uPLt_V?klzE?->GViYOV2{8eWdqv<*H(gU{51 z^GgCa{pcDR^IQte-&dK#O5mQqXW@5?XzexHS*-^y+ux|cw`}qDm1_Q5*F5>})bQWE z!S{gkc{Xww1Lc|?A>XTg^>yofM~XhIHSQhNe&W{k+EE<{K9Fxn?bnaM2i5p6b)NXt zk5g~qKGkX9#LsU;9fxxZy&aF=KonX#Ztv=1wATJnFADp7HQJEDHhg(|rcg>GA_~Q-!WP?A|gZHVPg{Obzx_u74y|yQ<{so@EFYiXRhdFMo zvHD!UdVTle!?JuYG6}pLgZCWZ?)BYK%?@v`nU2a|zqi*{XSINS#dqn8Hu#e8@x%9P z?KqCd>hSJ)>8RF*lTItQ0br)RHO5u`Cs;#wlT-VN-@b>Sj^Lf!(orTuk7~A>Y*s9b1-5BFk_qTU-TcNkdy`y>< zE#vnHxXk~F27f`{qW>>8>@PR?`wjj6!B_6V`F%H>`z~g^vs$f(jrT?kzHWoB-{2cI z_?8X6b%Sr);5#(O^oz>Y5eqMv0-{2QC_~i|LMT1|};MX?z^$mVogWu8McQyF^4gO$*|Ea;B?ZJCj zAH#?EJMcl?^|@DcR-g3n>8!qJ@UI*E+Xnx>!6%!n=QTG)gHP4q(>D0@4L(bQ&(`3p zH~5+jzIKCe+~Auw_|^@+ZG)fE;HMRQ0OPqJ52JjnfvRdFM%Z`39`m=0E=^gj^z(UD zjUlZDa4%t9pHq$N`{Vh*9^v!0X)9))q04si%-qzPZRc4zZJW26ZL9eV8O?mveCCVR zyVhnbG{-={SqqKrxuwlnX!cdVeuc)6wzLHb&F5eJ7AZ8JebtsIG;>ws)~qW(pLf-K zX_K6N-c?(r(0tBSTcgnIr`kG&<}$x?3&2y|i9?qp&M+h1H9wO9lF!CD1K)DEZssMr@x?O{!}0IO>Cq4F*n6tah?nE8 z?pUkK=O%*Vtk1ms*EZy-tMA;@vH$AUnHN4irJ8=moVmvO%*$Gug;L+>#&!*uYc%WX z>$uo{bbS|w8!KAdx3gNZ_LZAEx81F}=HQDTb+wtXEl=5xa~5o_13AOm8Y^G2_T%iu zseL6%HS?RBh`uCE9dpxeuC5(z=4E^4;GeP5#~jw8GzYc0@n4&=jAM*-(A15gzcDtT zOpFb|#!#D^Q5-;7#;_kbjkgh8ALCiqc$-ls-sWKAsX3QhP$r)Bw}h)%UtjBQN16KD zgRQS-{T*tXcI)p*sb+nB)pnyyAEOOHOP5sks)8pw#AC(f7!j+xMfuM{ugy_R*AT#x{;?-~>wJ z>0|v9DUE0SlPKk`HS3>Dsb+oa>3;@gayt`jEH&$&Rok3@+d7+4ySmrSIh2!d8qd1g zjd=lOV*V6t9W`TKSli6Wm_MV`u5QeWC==7V+KqWRWn%sUY#lXiR}{7@Ynz(!uA)pl z>uI;n4U~!ZOR(|OydH0?ZT35-x&8{SpLS#4M48ye&~EJ8DHHn+uyxez|DCmMB}!x7 zMX6ogn7^S+OzUbl=KYk3`2g5DYT6zwY!B5oHRC-@nRwRIZk;D66Yoi|@zh)=zo$$c zZN_<~p*b(=&oy+{fcoDX`jxf*c0+f}^#7ot+i&%68@lkHN={dC-CK-bUfK|W*6o$q<6F%!HT1AVQd-{SbYe&+^z%*A--zRZ2t`zpuQ zQ{DIrQyNd(>NWSCOHKQtHMWlNEtf#!|6P1Z35WhmgUx9=&g8Q!nl9lhLptG$lAmL+ z6+E`xz_y*X&S>!S;T?tj61cJAe-*rU!Ec0{v$@?0HaGWL`EP6P=Rn$j6fN-`%g5o< z*8YF2{d0}YchNg+_f5hl!ME;7>(PH6Tz`v1YV=? zy%X+zsCL)ulW>l;y{^4acOK^EF3fvuad~S0Z)fRSVq<%cX-xb0DkVeK(p=AS?=#hs z_v>)y)X%whz{X=}TJ`@4?*&%3zTEp#HRHcmW9uZ|7ic=f-}_emwL8{xGQs*=zaQ9K z?CYHHjcc1RpQ{c{@@wA^>+tsZv_xt?RKflJ- z(f^8?n}hyxzo*wf{k<0Mo|gVS24{(O+5Z7Q0-pMQKc4!I$Les`QsS)*U!?H&`)U1C z-|v%4?)S(g_dDd0``vKK{k~S7_Kx5p>R0e%;Pb%y;q?&MYs`DO_>Uvm8R1JXi>?cw zLmXfCLC=-Af}K;_@Ng}CfxnIEE4lC2O78o$ zlKXzGVcVgj|z7q@YN4~xj3%9=S#KI4U z`%Wy}a*u-hx#+vE*yXK$$g&{Zoa;^3b*u~Rmpv4 z74CZRomIG{@2tX&=R2!#OW#eE-1k!@_nlP9eJ@q=NgCXDQ?WZgzMBfS^!-%0`S^}1 zTz}tDgdJhrwTW|@2SG|_dQj(rSGZ29WUQgg&WWJRN*JUeNPqcbC>U_!u9tZRrvOB-%*8Y z_Z?NZ`T342+|qYc;rd@*aQ%Hp6}zSHsKOmD-%*7-UcRFWH=ggP!j0!Us&Kyp@EujS z<^2VBd>$;g@q9-W{}bT8qYC%A&v#Ve`ulz=e0#X>r^2=Sek$Djd_NU#>HDd0{e3?b zuD|c6!YzG274G=>ek$DY@%>b|@q9lOZam*lg=_cyRJeBEPla3hekxpl-%o|>@B686 z?Y^H1*Y5kNaP7XI3fJ!YDS3bH+5HaVZ`?oo9O!o=e#fvT_u+ixRewj*b_6%n@;AV0 zJ|iAgYj1+pz1Kgk*4~1v>$es+Oy1wCCGOi`wf^|~%=kX1&ymK6&j+=BOdaz>xb@Ui z?<27F{Ej8{K8C9&=TE@aRW}!(E!4z5S6DaW@EJVg;8-N)=jhrJ^GmRr&p8=~ufXc% zID8FP*U$ERZc$7A-+_G|F7y8$uCBk&HS+lP;y$(6-$~HaGfo{~$6ehR3}2;|7`?&G zvF?MWo*0vYjiGLg*7FlMF(v~y^PU_{{lt2{rU0uM#~40ysU^l#;AV`e(bTh!rU9!Z zhR-)__KobQ;*MV zVEY=M*}?j$I}Scus>OdUuv*%k8?5GcOXl1UJTGT*_WKg+`W?x9wQZ|<%;$%zc{t8q z;|sujUUfa&zFa@C&#ks6_L_B2CjY)nsI%mRulU??KLI#SvuI~=wP3r zgMEGu_Sreu=Vr0Fx=*eO_xoSJv(5aihNgZEK76cHtAo`N!)I-^FZs@64NAZJu>6d% zS&MRQP7m$t)OO3Su>0Nbnqd8`*TVFvKiv4%S+}-X8gqTh4LB2X!`g0{oCko7abunH zMqo9+SF>IDdYs0xU2*De2DWba=3w_J*XkDFEjb5r>Zi>)P)ok%W=zY(*a~dS%=gw{ zHOJZd+SJPanwxwZO5<7D|LrKZ=S=^%t?ia+ZwIh*8@?mhHIO;n39OHLe0B!=c@e$~ z-2FUbxGP*A^(*Uf-3@FkZJ*PRL6jbjv9{eg)y!X<*n5D@&%V1J<>JAVj${0Wz#aSK zFchxl=Z)Wu@_t)_0$;&F8jYXTt9W^CGA*#%AVr2 zu@Bfh?5qCfV4kBXlczqTIMvKk+@Jbhe`CSf2YokC?iu@H(|$i=@4d=?V6}3O9|te{ zIUZj2a{^o+_3XzJ!Rn4_`o2Hh`-qIs0dW1)Q|~~qy6cM#mXv^k$+DAlsh{|H?6 z&-UEs4?)+Keg06enn(80!{7&VW}lbqmz<9P+n((nPAOlNk#ek#1TRQweQjB5>ZyM; z*!t?Ccar~c@PC*8@#xx;{|R61?=hjCU5LTzn3t{>E}0p9fy0ww+5U&*$ck!PDSl zed}qrkLOeVg!42`b2F}7f3KIUcS~&-P-f5kDcEbl(mn2HloxS&Soh-EZkhgG0yakW zhD*U}+2iFGavICn;?%tiT<$ABhpS~@`32a+eN5ZsoNC4qC(c#iat^MBI|q)(7L?a; zUdgGSeU|HEpAygVRod}7^1K$F_}78UJgzXn@Rn|s(Tlpcbcjs18hBQ z8M`~dj-9r|z6-1#{u{94oon`Pu$q33w>CA$JGolQGftMx@z?J9vu~DZ?;fyyNqhH# z)$%#6P0jjQ)2_eNzYko_=WpR^na>Bn8DnktbE=t#IB^~Xmt*`8e5JzvFt{9-N8tLX zXM7(8JHEzptmOKcd)l;Yj!o)6j?MayaVD?dfz^$-GNnAWC%}F-W$d1WtJ!z`wW-{z{0 z`&qhozDD^sP7m#WukDuU*Xv+oq#tj9)v|ZWU*$BGIfzsDEwH(SzYR84?n~YQ|ASLK zbMsHI^|YD)o0Mva@h-R==l9_HWPf-ctk(W4Zttxhz+HdZ66Zs(dgk;au(8!0=T9g- z+{3he%&BG{#fkGTuJb}a${?o2CSyd_o4FSHXYb}wfSyT?!BD# zrU&m&X&r6Jbw;rHSjYFLao;QFX%Pn#dCp3l|=!5&`w+7{qcvn_FA zF9a^zS{QC05_b`}KI*BnD7bv?S`1!3J1!2_NBxTWx?TcoEN%8Xxmf0Ta!IhcIu`ny zr}?XIgSH)~`CB@UzGp9=pO=Q0_X*3u%Q;&Xu8(^9vK-j{r@za?^;38MS&=g1sBHyK zHS-WB&dT7d$t{St3f%Rk9-mdg>gMim0_1rXUIUzGEq^mquH`l1+7o9ju=Vrovkq9z z-xg(k_6J)}o7ekxbYfkwy6yWLgch&b-$||yFWa{*pH(+N*Oq6!4Z&(2x&8*g*W=8y zo?Jg;*zU$)+q2z`DCJ)7{_bcK@Gg|r*OqoR1#9!jT&uZuGX9nslYwB@gLAe8rN`I& zYe4Fob9xl zp8f{3oX>5sX-}N(z}C-vZVy)TH-?$d9pKi}=6w3w^&P?Ld0yWcT()U^=WrKvZ5i8L z!D=3v!`xz&!53yHRJi5m%a``(-yy> zgJ-f_2v2`kA1+&x6ES{y0$|Y zOMlNd8myLf$AH=5y6j`&YUZu~zF_O9+wR_!YH4>IIPH3`lDOm1wWZw&V71sMf;)?L z_lK*c-2=eZQMcXwDAlZQUI&5AWlHYY zKd?X%4Z?xIOB0Gnzpok9oXwB z?OYF5i~kMaJvh_uFTwh#C&rE7y*U%(S75b#e%u7Mo_gZ^8m#Vfu>A&BH}fC!(`KKp zrc_HVw}La5!0I;sqfh4UcJM8n`dMGDPsZd;$RU0eKqTlkfG@BQf7(#8W|>#Aq$9t7LAwrCH5)g8~Y@d(^pnrq-u zG;NO2!<2IIW0b~my!|faci>gn=e0c!mgl?aC%}F;ZGG!$*Y6HWZJD#*gPk*NPlDyz ze1`l3SbuF#QOfMD^|kSLuR7C@VQ0X8=PvkEl%EVfy=i30WaHn8*W?bS%>d{ zt*0$x`%kcAt1YqL1*?a@2X;QP7rYNv)6Y53rsfzYSIhr2P9I<^bN>)-jO6qY*qrn; zzFZ&UJD-k)W%BzNY~PdLCt$UFhonu-`dK5+XEVRF|0y=xFTbn(46bfI|Du$q{V%}w zN89IMxov$Zu1nYB*ZOgKX#cjhTPD_bU}J=T54OFmPj8O(QFo2VzXBWEw#4S)Z<4(( z#Qq*R*GLCgKTC7!P1%PtIZayIEt5lEurZRuWMH*iBl1p8V;NhVx>JD7C45S-vF@+0 ziK)PT|EZq&n;L9AZRYRqr_~Z;8gMyA)57)1I-d@#=K9Z^P7ilZwI$9BVD(&!GlGq+ z?zK2GrHA9JZ6;1N`zTJFS-`fRahE&4@wJpY&epX|-C4noNBTY+SS@Q;o0|1AcaC$O z6XyWunQe9YSKcSgiA{UjnhWe)WK8A;&q5vb_{;-V&u3CUu-e-ACC7Q;#@3ea2Id2+ zI|lx~UG8UCKjwBpaG9fR`CPdWy0$!5E(})lFh}7c{KwzJ+b?6w^)tqH*cSuao;H6c zFVA-ai-Y}c!1~(K&Jtj49@g{s_i7n~rNHI-S{m-S0FBwcLlIJdu zRkyx>u0SpRYkt#*wY2>CJpS8rC)?eMSu&o0w>-UGBOMUg!Ul*(%z8=^-!q*2I zH@_p?0IZLC*5ig?<7hJv|Ga}*Vr&F1^Vk@E0ePs$XA`h`_@>}8kAZOW$Zruh1M8!n zJT?a#N1J&Jpi~oYLFu0dP5dq4uDkrkXe+SVC9J9VY~AqL2Cn9x+s*f)+rq7@&9%5a zW!9p$?KpSfbe)Kk%T~m24e75Z!L6Sl!RW_>F>_qc-=+BkAvGu)52r zeIlotb1e4y_s@A90Jc3pn;aXzKR6Jsp0*AGtC^4MD{(CIJ^jI8+jjlvZyWZ{Yebv% zvc@dSb-F#=w(ZxCz}B%3hf;dj2lYcZ)$D^fb9p$}HInQ2NU+)woaS&CrCR)t2J8O_ zW580Xjs>e7!}(yX9S3#~*Jg~PDAf|&5-9xuQ|E~v^%C5A4|FSrjD0oVx5l7&%&(fGvI3Z9fCGB z>*s#gd(q~-wc~Ooezu?3XT!a3PJhmU+aK4Cw)mY3)@F>eDCM?y9;NwOdM|rE8AqHrmw@ee z#_v+FTJG<(sm1>?uv*&vIatko#dd!IPP^JJ=Tx&@vHebe)D!nguyMn$0+(ZdHQce+ zPh0%10c*?H%VWD1T#nIoaJ7ul4dCRe?Rri%a}}Gb^Cef8XACXV&ac4cp1t-au-Xxv z@%c5_T=>`4TKm$?V5X|tm{VK6*SZC)E%R|J*m&yp@kUCu#JwG?mV2N(z}_>f$LCJ4 zy7h0Pl*e`#*t~Mj^&7C7*J6C`u6?q{+yhtBH{-6Bd;WXD<^6QI_uq$2dt(0uJmV{DWY1+h2|T%5y*eD7b9jw!EKz3|(99vmXbmdE|cncko9z&Dq#; z{fy!L{F7kY)Aj^dp8NUVgT0@(zBYXxrc_UVp8_w(I?na;2e?|+33g$ru1FmUnm)x>aUcfRXs;(TlyvE zm%y$u`|~2DT$}sVE8yHay$qJ;-sx4a_fFQ)mUyp$%QgEqxc5%#@%cMgJ^XcWS?3M7 z_fG2Zc@wN2{uX#jI+eBc54b++@p&8E7oS{j@4)p_PrZMF)ysRQcj4x!E%#3Efz=(S z_bKha_ybC9xp(>y?7fpd+VpvWQax+<6Y#%V!~a6pmNon-Sj{7A<1_fjoLR$i{p_=A z_zQ5>@aJH;xvh=uOYk0)*4JikA5p4X-+SV(!E-QX+P(tIwfSdfz6CE*+r9zIGuGdO zm#A&uf#rVJ>pJp3)3H{JXFcs{uNO9T+p}GH+M5Kdzc$;Ir@aoaakSa4JolHKVB7Rs z=_BUwnp5x1spfSccHj50HR{fuD$y-V{o zFWWMve#w0%ur}-42YGUz8EkvypHr9xP2KC@3fh&Z{xT=%zJL|Ta?zdgmFJ=>J)SI)Qlq`5jr z3u1E*(su!{TpLBTF#pLuxDZ&LeQ*)5`=E8SCElXoa!*?f?mnmhp@E(6y`JwD5V`!a?ZujSzSsb?Qt9;{yOgDb$zQCs%G6~XF`lYjQ! z{)<nHozF*;gr_brq8OB>ehE3To0Uey)Ia;&3$kKaMtzuV0p%R066>LhG2R2 z!HvM~gT}L-_O!P#SlwJ~SDyAZ0qd{LcI9brQ?PNg*{(eM;6Si#x({wn>ES-8z8R;Q z`<}SGF4U9PR^W`omSB1E+6J6)*cvR){A>qq?t|N-shjJzlyYOa5AF!gKDYx|ZY=k~ zox#n0a2GUnW9>vK&px;-*nQA^&C9losb6y64XiC=Do^f%z^;dKAKU}39-qNr|E`9t zKe;~1c?j4T{ylZJ>E1mQt{$IZU}Lzyn}=MV*oT9S;cu`KV+341K6`?VVXnrI>l6E4 zU}N}a)DmMPTs=N}gNOHvf*xeZj^x=2%L3 z_Wb?8@8M%S>uEP1wuovx*qG&>KLJfWd)P#<+BnYC+aH{Iwkg-IT-)9go1c6BLD;hA z9|)Ffqo{twf3oKv43=llKLqTaZyjxkcPO~rzYl}E=c~u(aIkv#5#X}Uk#P5X_4phG zRu4ZKT<-bD!1Ym&&#~aXjA6#>IJkc5+4GMFtCxHJ32<}NmOcMOu)1S<5~ckYpG>JO zd;Te4_k4Y{>Eku6o<09`@W0#h&p_9fJ^xIwnn&jMEcj`hx#r~h*=P6sbHLg2&j!oQ z&A&(TT=3qM*4Jikr&6k0-*x_DaMt;GV7WHe`A@)E=jVgv8S9^dv*%v`mS@ku5bU0B zJnLysdp`rKn~Uwr)80j3{k7SyJndZ!HjXyim1obt1Z#&A8}KK@x4wSbll$FZ zW0d>lJ!tB={_h2=`MccIyAPgvwkg-ITo->5G>Dr z`7qf1(mL7_?-6jh_dN=Czf_OUV_^00$H8Tt-@)B4)#LL7SUvnnaJgUp9>BXB%8Tm6&vmvUbzVYKPwbb$ zYPRp%NF2+&S9t~8%7Iw=+lG74Ym^@5q0g(FuXCD*IOG2}@Uq4D{~fO8ca@Hd->a$F zzA>$DoQ%^O;D0wxZ=!2UU)}<%IZkQ&A8^~wZ%p2Xs~tgKu4J5m)jRyhy4vj5yNVq4 zOWQv=KjJhmabmv*Hui(W@fvy`ZlBcS^8t8yd>*QOK7_03Yu~l0*>~Hu4cp3R>c7DM z?lbjMbZz-e{S2(;kv-sZ_$Qpst!>HmGdDj|zXazq^$W1v_3Ph#{1tdCrS-L0|6@vZ z>-(Ad4LEz>*I>CeKU2R0XYczKEYFzummTLb^?P)AK2v*v{Y*8U^|YtGNwBG#i|xwO zUI$ozZMG{5s zjDvsQmpt<`HMseiIt`k-x%&5k$&KY_>U3bQ18x4@U~*&mnK}d5`N_5JGs%o->iYY4 zhspDqIuqE>RP!}2+cKtp$$e(9HtX95dHOsnIDPi-A(N-x?BMj-zmrVvy7K#=Il!M& zT1Q*PVotDi%6)JyH1+J;bA#3VEko+f15Z8klIiDZw;hbxm zk5kP#7bngF;PP5u5bm|E{%GCSLSXgOSs3i~o%b(`z}56KS8ZyptF&oZ?gj1%^6ZP2 ziN7em=9KYS4Bpbn%lO*V%602|g4AE4uAg^EOTt@PQC}_oOV$4QUE9)dHODg7(=u>% z$96f&jIH{zoNA7(IB}N;I~L(9z{{~-5$;&4XDnB;4yU&4|0{#l-T&=#+Oo`N*(zZ3 za~$+H4|83OGP&xrDyN#cip|YEZw+wPy??)+T$_8|THvgE|E@iG`m_!>d!B#qo;-VA zf3SO=@vNsk?X3$|cRp=bp7z!g*EZX2adL6pua9mV>)Eb6bF=~2HtqKSN)Ok+`i7ip z_FG)89rfh32{_}hF<73w27)sV{vCqyT>qPaU!y(iXv=lHIXKs`aqPnuXzE$tTY}Zf z>whbF>e*Mhe#vtiaPr(5EKi=>fs^O9V0rS~9{e>qSVvp(+yR{SlIMdA8_uv(et z&hXSTPq}`MZ`PS*Io`gzko#`Y`PdD6^4J~hXGghT4nk9RUWZV6d|gdismnXzdr-0* zs=<`Tl&2p)%kNmUsl|W4+CO|8+&|ZxJ#{=>ANBZ50GEHRc_Q3D*PJ^0!}U?mJ?#NtV`+1& z52DOiYderr&9N5S-?hj?t}b7@=9cm`YOY^$`w_k_sPIGJec`zl4u$KZ?mex4zt&;k z+|!oNfhodOd2W>}ks+qGmIUEi4&%T8p1D~?U=~%cv>dEOiaB|9b z0PaD@qiah}CxET5?wF)~%Y4S12sS^*Nq=)Q|C1?`zdk2%s+qsoeEp0$6`Vcm6tG;I zpE0L{vuB+KmS@j81N<~T*3o8uiEsIB)r)^OtXJ#bO0XUOjE;x@*4oPC$1>-4F5P&l z8dJ5Jn$tY}H#_|I6a4#L=VC6clX0F0ZvE_0{owlK`9rQx`Z;fH|BkrgZM-&M$0 z0jv4$AiqCa6|A1$lCK6fzPf$0ud9O{ADs>}VQtTIk2w!O zQ}>$Nn9{?~81;=f)m%qn<2dh|a60bB`JkTXO=~^#Y-!(Ip98_><=Pb+&$zK~Uf9K1 z+vYdB331HA>DsoRv)0%(I~%9*=AbnGRz-c+ML$lDIXP2*t{Pi^?gIO7su*it%3QbG z5--<`<@PlxE5nyeO-(Hj+j-UFT zoNDGQcD=YK$<@>DK49C;n2myG%(TUCG+3MY*)Mr)W58;*AitdMwPDab2Xw zj@_aKUJPu0OHg|KY2S}{`g@(Hy>Vdm@bO^dWdEE1*GJv&XU$_GSlt}O$r}({EC9>e@(&lzplY=D7gMN6^n)>~$LD$FOU^Vv< z*S~Y5mUVF?SS{=CD6pE>n?A0;W5CY6^_@ex*!k1nYu&bv1G^XLqfH;zfO^_H0bI6s zB3#XNnD$Nvr#;tz+*r=P{%P-2uS^!C;Ih4+z}3#LeQfWi;I!u&lpE_pO8wK`&%m~)k2ZZ=!|G}8 zVsP2srEs-NY9HJCIXLaPhULa`{pz3gegU>UeYEN0HK3mMt^}9uT@6>es`jzHYr$#H zYd~%+*T4R0?|QK9>7z{_uOaoc_e*fu-mln)M!Zd3^5xtHpjN*fkdWU0}7?e*<<+#C|teE%tlB&SC8Lg4MQU=k(u; zybo-f#gZxRbnm&p1d$4g*=P9sS>ihw$ zW*y&KuSA@u!N$^NzJH{AhSPi>;G=S^?$#?KPn5r#>~$`uZ#N*TvuUoVD;ASUsO# z&x6&n1|4&?_`e8N%jefiV6`PV^>KfA1>B#~HQ}7d#m0$jHDC0y;2+Q;_32KTSqb4|*P<$BaV?R^WjJ$42hUvF-JiwTp9f&ul+$A)&g{<{*LXw9 zO*q}32U2GLnT;6bel%CX-LLu;T)+7me1U@Nzi`3zU$ns&FS!0o6%U^b z^~QDV^_n#{2by|5FXjZRWsSK8)Z#xkSS_Cy^MKXL&x?8Ct}U+# z*N9x~`p`eu|NLODe|@y+;~G;>dkccg_7;Mxm7f=jz|)>9`~);_kk4LI$2jmwSY^{s!}+YW4d z`e@U~JwiS0?Eo&@+X=4bZxr>hy_fm_L$MD9tHnMH?3#~#I9M(A5n$J7?0bUM^7*?L*fx!8UG4e&9SI)9na^Lj z{)xRecva5C-UqCf&)-pC>*w=Vu21TZ2D`qpe~tm`pS@A8PyEM%T_0(0U$9!*+YfC0 zN9)gTxjv~s4(xoV{&=uj>Q4Y$f91NqT%Xk6AME_4{sCaM)IShx{fV5;wOk+T4`3Y} z1lHdAK9e%{4_jc{hSOt9&fGt2RpZSmx90TzVOz@FKO8~~KbOLfD7c?rM;BbbV;lVV zg6n@$!Sz3-!A~o={%00k|FawX+=ApusOJxc(OxT>nc8{&VoEjwx zPkTQDm+f5yS1Uh%FM+2$*O=T`t}p%5-eq9h(?^>=7gDOHyXX5NFcAp4mKCZ z$2R3++tb(av=2{!{T@>vZT3?w{TzU8IH$)D&h&F=jqT?!PWw56GW~pp80n|a+RxTJ z{d@{;y|SNwKvPdYp9ZU?pBv!+N4R?W`6sZs*f-mhi)~L|`)MEk4EFl~eYDw6we-{P zdiUn^*poB;+^fd+b0nwz+=nv#e2Ez8r|&plsd@VO9NcgnejV72sfGyLC#tEZp;0Go?_vrW0!_Vl%%_Te3{_hpx-Xn%SWk23WQ#VK7S$+VvuD?T)e+XBz zU+L>daO-LF8a1Df!RqEJPOkpDI{uqF9tUw|P7bcIdH;yhTo0vmPR##PVq{J%zbyF7 z@NWvP{kwuQ991t)_4NHS?6y<({c|++?2})B)pD<94r&>jufS>Ddj_dbe_j-M_>0=+&)7~WXK`njgsQYCf^q1?SzvJ9l+nob(=HMu7$8mZb z&6znkrpAX;9?R(*98Z}!n3NcfVfYjUcMhg5xPH?%`1A$Wf2M-#KTCtpR&f32EV%x2 z7rY;M-kN88`oir;ImeTssb`KS2dia{?W0=8d`hrd=6EWwS~7Vvy1>2rJ+VpV_)YIPV;Ih3r z;A-Xl_+0R`=N!n5<@oEL_T~ZGo<7?2aSqkf-h5#Docope!D{yu>wJOQCv&tQT+REH z#90V#UlM0wuv!^sk=iG57KN*|?|UX?PWf&SZVt9#41HWbi&HMa>6*&*CD%VWED5%M z`E#O6fz_;k0bLRRyrN+iSmDB6lJ(k=3;}I=XzTKZeHc}wj!E(=3phTTCO*9 zR?C>K0#?iQwklYyyxvxaI~J}l`y?0JAN{i)*95yB_0gt}W1^n+)&`gDtpispueWvK zY0oi{8_V&~Kkcm#wmp5c>EoEHr@alq=HQw+5#Iq|HOFux%8fZ4*Q`0YKB>P6*qA;u z`Fnv)!D`kYNVyrO^|R*W`dHs}wRvrK&55(-xcsW~IX%wd%$hs5#%EHVSL3rOf6VFn z`w3;%-&VxS`g8s4P;m8~8hq!1>p!&M`VVjL5jD?x+Zt|8<$Bu&O+D*vTd-Qzn>ngw zJhua@WxZ_=Rx8)rj&R4p`Ls`RvHj6M`RoFAT*`cQMN`+`{>t-r0PF_thrcn6r9FA= z4leT=1Xpui+Me$h_ki1;b8Wrl!GqD%T_4_83<0ZY_t|q8IDK)v<>uyC+lJV2wqEkw z6Wouw`s=5?{Ee+^=Q2)@i#W4(F0S!UDKDw<&nPeDbnX0{GHYiqVwP)XOu^OnZSegH z?%FxH;QAlZ;D^>cb2Sov34Y~V?Tx0MwX+XcEo;Xd)iOt;z-n1LqrqzB+8GOX92{5s zBp2Hs{gcl)u;WtZGagM{fBP%XoJ;`s!{3<3(w@8~g3G-2hpT1nxKA9wX?s~a?j;A- zde+WCaJ8(RAA!>s$6Ic0j>gKfX6;;E<6lr-Q{yWs zujO>@Tu+&`b2u@}wR3F2)sJuR6KbBZKLTzW<=7vIrk=HM6j&{5!8X-0=0}6ovKEd3 ztCefvIJo_CoXk-!Hb4E7&xv6BS>|&Rn!5hxFV7gC4DN@&F^#3YImWkQ_qd5OWBltH z|B~|N8vly&7EZ_bHp-0gDa0(t_^g7fpVQ#y7ToLeqJryxNrPWnaQ&|;xc=8P__YPs z|E7ZLe{+N1QuB=Qsc`#Tj`3+|>Y4M?!D^Xv$3QJ}eg;@AbABdRt(^0-;f}NO=9tOF zj+g$)=RC0EUgq;-GNG7 zdanPUfz@*TUkpxPoKLyAT|#LaV&~C%$@4O>dFrp9_T>38KH z1M8XR6=>?o^GdK<^1K?HJY7d}b8`*ZhS>FEz2tcv*gW;uPkZvb9$e;m16<8_2+8x8 zaP#!tU-G;WO+9)33apkqe+^Eau3x#ixpr+s?7FpH^1Kynp8D&jy?L$Qhuz~hoVnKT zuJIj|_tf|<%6mDz)_+TxYyCE2dJW0Fw(c&t`n?T)U%~Z%s^I!R-Qa($dDi~zaC0ix z{vBxQxfbsPtL0iWN43n+U0}6bi@yP@mDl1uaL2)MwNG-f{n0=9{1)uEl=<9`rmnyJ zm1j;K0QbY+n8wncydDIXc|8PIYhR0fTKDV^!)?!NG5M|te*|4y*3YA0wLJ4a23GT# z*FWPk5d1i2V%dgy80&YnEwP?}tL3-JPlDBaHqPH$^Lw~$Xv>=Y12|*oJj)#u=h(i9 zonPx_e4YXKBM1HU)81UOk7M_Em@{kkks3ch`Dl$FqI`_gHTyfttl2*ivs|;!7hL_t z27jsMne%7iwo%UcpV8E_2LA$9%Nn#zwaoQj!D?B9&w3?z7(Z3xCn^8H>A3!pGS~j=#L2i? zzS-b!)jWB>1~*Qb_utUeGmd`;t7T7jT;712i+!_Ax!Csf7u%L&nLhsm-j6!^>!-ch z=f7b0c!o25{!@+Z^RqSfx#Z8B_W!Sx>HoXLEc^d{gMUzP_r6aH?sL!Q1-FeaYM$|W z8*UEec)f$Bp7Hu8Sgjnd_u%%!aWO}^*!=Vto11$;#_L0PKkDeOpZ4VO5xAMh$7t$V z&!2$RE+MAhUtB<}f5FwWKYRvGUXF#_TpR~|^}CRIUxFQPZH|dNd42`vzN~=asa2@LmOXUMDTM--S$8aNC%?<{8hg ziEj?&ynchGo_YNitX9tJ_i+2+xR|3{Y<~KS&CPqHjMpUae$>%lKkdn*1KiA`6HPty z+8eBvc|C(Lec`Id)!O<#aCpMwz*smN?~HPT$}&)I4J{72G)G+)Rz8p1GL@tX9s=bZ~RAZ?-8H z+n)Yn+w#6JeV!5Ck2?D6r@h(dx3GKsoil6s^%~pfH)?z(<(r)L{~wg;|E$C;`#*bw z&rxvK^1KE2`Fj3>+r|Pl&v?xQH-~b(W=2!bc+CPa7fRytO$d z^33Zh;9Id--+J1c^ZE&Pj}JIAuOHUfdHtxycTs-K>Ae1nGV{6yG0S;ftHIYUxbwO} z!F_%kP;lGWsOA~ZRpI7P&g*Ju>Y3No!D{8at_imvj*B_U#pb8K*xa_{c_ZVs4!j?A z^w&>&^5_q4=CQ7J&dlq2V71Ka?ZjFiuAX__5S+Xm3%R*C4*KeMC-pW4JKowH6M6F7 z1biPh>swEIb6!8k?oodBdtgG=W@#i->T*rlTG2qDd%P&ntJAD zGq74YH(S8X#lG35Tx@&#i*0MiVs5sE_oI&f`e|?W`D^SRWuL!GKZ6 zDf_%rgYR7P^m!Y&amqe#i>97FZwFQ@`@AFET~pVPIBgsBKcvBj7ToK1WWgVR?^AHw7*+F( z*RF7LD939zH1&+v?qIcYyavPVhvQ<7aq~(#|Sj_ zT)%sQ)h=Otd?vV`SbM?MbN%iOPF{|M+*}+7ef4{gdZWRPw>HN_o;=5ZAIEHc>uGP! zYai-*bWl34&QWKLorm6F=Wi)h;z8_f4KLe2OYR19U9T(g58CYz4+8n>W z)ZHI!p4x0%?)cfi1HjWzTHkuweFv$pv>#PA|Ce&n&q9XV*My@?f|*l-I_O(9|7=KGZn`Y;O7c+YW{6 zqy9N#b2z1kf1i%F!#LIKi`a45iu!VO$J2fs#knf;G#z!10nbROuHAmCnWuAqELbhy zuRFHK!PU*-1WFHcP(Pki%{Im6aAM8X{eI7JJq2uB`(KX1sc71~1|P)lG_abvIp*?Q zk7t0LQ*DmFT-y`WI}7aiYI82++8)Gq4%k@QoELen&vU`^33u1V70P;KS5Ksf981sSk3z8D^LBOf}8a(L{rbc^)s-Vb9xkcx^6B8t6#)v zT-T6V)tyB{yN+k6v2!+Sjs4BcY&G`X#_XKV_neg3Q!b;uTt9wa zbbY}c%U>7Vc5W%S{;G)cokz#)=WzSyc$vH0 zYed`S;H4?m^Y``s0<7*H=5=r-*u(2U+ZCLTaXK#Itoy6Lj$L!zUyZIU|7P$tV70`* z4xISfuI0R!)A-`VzXAO3;{Os|n`>(z->=+Q`{lZJ&;Avfx_|eE-)-Flww}7<;vA}F z?cWS`t!Q&T<=R}=w}M?e+MHXtHsjt7HkLN$S)S|f4zS;K8qa##b3NPzHfDJ}{RT}v z*TdalwL3Xe?;ddK*`{1S|Bgw=)iHD|)34ux)35u$^7QKgaQfvMl>56WucZgUuTfe@ zTiSmJoc4|5ntB*bJ$XL@R`dQN^&W+%o_WjlEAuvgx$DnefYK8L2BF?}AaR<4B?;HhVua{bCNbS!gU_5T1c C(R@q* diff --git a/piet-gpu/shader/gen/draw_leaf.dxil b/piet-gpu/shader/gen/draw_leaf.dxil index 17bfd04821a365b1d94408450ff3b37ff214016d..d901a802b4806388b14f6739da46a28cf504fcf2 100644 GIT binary patch delta 3215 zcmbVPYgAKL7QXl9CR|9+JO~C5^579nAOr*z6`BM@h!i4CDJnBw@L3ls(rVRG^B_tf zI0OO=wNwMu8kJ(iM2*n-72GEN&UYi4dVx3gw`%#X>ReIDQ0 z``i0`=iC!_+HhJ}c8M%&*B6tI;eY&&xioF#^4n7Z*FgXPhMU47h!qF`=$q2NkIfAL z02Cksqu5k-NnSpHSt^DSW8<Q8qB)tNwe0a$0! zkh4ssFf+5R?tKpMB@C*U#OQjd!y{Cn?tDksu|SBrGne#`YwqL%^o{9DmPOEWn)GGc z5%h*T`%kcwOrUQ}Ubgh&1-J>k*iMOuOce+TNB{r^yqQYIpy8&w^P*MvBp_3HvtaSo@Q~CW><45|NG-c1 z)1(lwbX7>PhKWj_{FB|X_SKQakODf{05s6l9Ax5K7Kms$NYEyjgJ?a=hOweVlL7%P zDjo;7jewC>r(O!FIjsBxBiy&pfSuxUBfvpOrl|e5BL_7R4XhXd(K;^$oTyqIu-8M1 zCXukUs9t+G;J0S$zniB57Sm8b{E?Uel7PiQX=qBh+UKq~-r=MTI=9OJC2H!KzaUnf zi^R~3u!Iie0fa;#4~fHj6m>m3(Y<2|{;3=>0_ zm0U0 zW3{e^-6DG|tGp3hPBl6cZgjzEBw07AsQo=sEVk4nneI|KDrCh;D1j0|HHv{7Bo~9C zPW}$489(9n+$y4w7Y?HPeMTKC0+FhwlRDMYAfTA#qr#+@guT0hZ!o{78al6FS{XepEgR_#m`|t z=J*aH9kQ7Dm$WBvhm*X7pSdgp=FlctM3z9ATXe(*76&^zn!O9Qf^=|wgDMVo5asI> zbkCWG33#nPD={=9UzcNr6)A?GR&}BDO)#2-_>S0NQmG}Iv*4pZuu@YLYKN=O9wht8 z+(~^j5v#rC+aYpG`U~u_FNuELAF|25>9y7cc@mgm^V^Ud9Z_jCxcN zR^_c#c|Ol`v}tjN?S{F4T`-5IM)Yd^Zq>aad}7R?&fNubd53fLy20JUbhY#l)%<$=xhH4qaN#>!+E0(0dszIP_HXA}GpP z?}JWf)#1>L0Tu57&E~^)bj@&wj(5(`hQiANW-koPo7{9ba_|;qg|RUZuFmJUH$hR3 zTkuMCRTDJ3`JCu$SS8rbh)mw@yF)3;iR@KSde~%k!6cmXNHhxz z_A@g62wZ{FHMANMcI7#8PZgBLSQ*ZO8CSvf$zo`XHv#8R!ZDEmi(6M{nt(!hrIWGp zLQ_^CddJLQ`5nJLo1Ft?(4J-&K808J#HbTD7oaY)@2e$Nz5W#=JkXo%W@L^tBGXHN zDm&4iKEmh z0$UZ&nr^5+IgLlMme5)w19MifTGxiPiqpws^)INaeB3`^7EL#qmAaxutwmv`b&xpz zD$4Yq+;hdbTUJA_g<+@itf5Hbzxyj6Dyw61TQSj#+r_OBN{MhoZ>K<^Ig5+Fe?qX0 zZXfap&d&;V7F{Y6D!_A7pAfCF_MisV3(bd$dVAxXs&3?Bitr+xUmNNO9T)gTKV}<| ze*jXbm9k3y%8yEe*9!SX{~@`@m1h*ndM4#P z_itY&SWgY((b`0PDST%Sa;;Z*t(kw>fsd;`7G0C^!ShP{fY9E>ziGiIu88bKeChf8 z{!w9nFaIKpDkeIG3QZ55;NuD5JM_8wt8+u==j10s4z!b%4&lLf!HU4^IGvjY+f1HV zG+=AEW77?d6RoSfxc4PVlcRj;RpgWAl7h7HA)Ss zHj|ZV(ejrU%GhVtHD}<*DWb=tqQ|`|K`i?z#IzWS9eDiq>vr;4*fSvXKuMknL9B)B z-?5*QJk0{B{i)pUT4T4%gJ zA1z5^SKRx@-rg~A=Jv;N$9}C={rDjGY5b1nGkK@uI*wKB=(_sl_nF`PH|qZe9Y4Cd zM~yuyIAJO6TM$ArlJH6i3^TjQg-ZulbE0C|lK8h;d23#R|@BGR|;VT|M003v3w+~W^0npSI`}e>U zHvsei0k`O>#LP`8fd1k%Y8`D?CD#dbinb?@f+hfIPsZ8BI)F?@b{9@C;rxR{=h3If>;~lBTQL^r!Gy+|p!iJ6OnjB7E9&S|5QOAbChXoLK5Hc_%x0S|#2LYyrEk!jZy2+6w> zp@I5_TYbH36fab-9|n*D=Y?rUkA>VjcJnysb9$JuO zZOF;+g`FU02mR_*Q>xM-k37895jNBfN1>{IeT-WcmQHjHtufulrH1777S(E=58g_E zA#~W2M@BpX$EIpYFT4H}waF-8vw@OO+=WEhxdT%kqY!V8cUMqw3|4nRznodsp;2T# zR*}2TA2g04k_ZLBVfF>z%Bf|n9LZLU+`-ywWhqun>d{cpebZyl0K}hk#j(o!HW)II zOvgC?8Q!m-uB*h80m`ulLzED=1d=rT;khn~OI8N&UD&_=qZ+q&El7ncD)$+5V)@L} zW{hFcT3M$Bb(O9;RgJDy$g%|1(;=Xz=k{Aw{_fK?Ur^kUFst|#8oK;4nLX{(r@75C zRJmqz7iYugXMu#uo8INb-9rRnrS*t;k=2w@xTcRCbdkjkt!%iU56}-=U1w_RBsd4K zZG$llrWj6nus>rbPRBGHz%+vjPP?L^oY=E+x5lx?UJtp%of%6BAL4z(g(?F`bMki@ zH%yl%Xgu}ABry$3+VPpNW0w&bin+8+jcn;3iXW!2pnX`F3dJc9cAC_4C$yCbwl~-F z2i)lb=&bF?EI1ok!b;-d0Cw2(PMopVS>;KdEGMfsY}#%1_Mj)RPy*>XQXLN)@*+BM zAPz2|lI_2jo+Slrm_BdYOm|q~s+beBPzMwt?%P;#+BT#~BBe`@`lGn`fDqc%6a?Fm zv1RYTzlx|9gNR8Dx)(x4ytlt1w8}$;&?5QfTHVz|g}8?UF?4auTTAK1)a9^_~-KA zx3_3K%7Y~}TdVy4$CuYDHf1NXeYpwWjhq)xln$?WjQsF*q%K`Q?7wpDvN;Prym#0R zXpNluQPHAb7<$bPG~10at>1(>qBQ3)gI8>t40WRqdVJKa2tF5)!n!vo&&_X*-T6dS z#caTHf!`59(=~bnGJVNw9-)?&@WWDmAGx7?QG-7z_!HPfesar;Wd5DtDw5x4-|#{Y z-4jl*lXxp=C4j&2Un-id8;`#%YYg?A3WOlfE~R} zM26>YWYql&$-j#|OsH>}t>@n#co$s3|3wOpp|nWss>C>*%)-x&`!+wW@KJz%OOY$0 zD_}|DG&|b(3V+%Y_jD+_cspZnN_;AdX8qJUGZZ)TD0&a)i(*6F8%A>}=i{sg!YXI_hs!-gUcT0*Z7WJB&2_2om9_QE65u|e#aNT7@ zS1GGY{Hh7mJr%AGot~}_9yu`Rlc^Z3fBIvz`tLlXIQ*A-S zV&t;}4UO>rRadW{zRGFRZ34TnPJ6wQP4^nTfh7+U4maTP144OZnGKYL>|>JEG#a`F zk_q`>*Ny94pj|CX znSNI$Un_Jrkn&2SJhsfsCOX47)rKBUl~EsIRo(Kc&t;e3oDp^3ytoZ?GyLQv?KX3r zR`>F(Y1(7H&D;sC&XZAR={hAH!yiprjcNCbo8u|&o5Xh(1Z^^JpKBx2&{Tg>YKW<- zSxa$M>iywlIF+`jU1C?PIfA26D@18Z^Sao};7a{=YEiqTf!eVEjYw(Grm3}eACIx_ zMW!H&rE<%xjWv;4I6YUCUS#rg*Sm*^7KPoEu`_ThD?^|g(dwRTz35AZBU&a~&S7&D zN3SMDnUp!^<6ee^Er$|CbMkr{&6y3N*btPl6QvF2XpUib!Aa4MHgoKUfwQ$fDKf3= z+Wp>`>XAh)w&E|<>GuMxX4jkHu7&0?Ulw2Kil1yX?s93}*t~;*P9PbN1IFi}_Xim0 ziL8p=ZN_XDG?kxCaVyn}8+b%qI%P6Qb>o4$!9ZPN?>%;wm6daoYiIVs)Nx?WMC$y? z{KaJTbP_npl;=-3YQAMV@@PCDQGB{n857nl`U^d)Bf zQkKPB_aE%&Nag&sfPQq0ZJ1!7uf)GqnnAV;I?=NPZENVza=vvzxrvXF&{nbULYwN} zJ!4R(8;=xq9Qjkn5r3-v1Aw--L?7B#0w?sVwt5Bh&o^XDbh{|zc@tArBTER>@(K^mziz5?iW Nl74vuKtJTczX3NPTqXbj diff --git a/piet-gpu/shader/gen/draw_leaf.hlsl b/piet-gpu/shader/gen/draw_leaf.hlsl index d0bef52..0ca5843 100644 --- a/piet-gpu/shader/gen/draw_leaf.hlsl +++ b/piet-gpu/shader/gen/draw_leaf.hlsl @@ -41,16 +41,6 @@ struct FillImage int2 offset; }; -struct ClipRef -{ - uint offset; -}; - -struct Clip -{ - float4 bbox; -}; - struct ElementTag { uint tag; @@ -143,8 +133,13 @@ struct Config Alloc trans_alloc; Alloc bbox_alloc; Alloc drawmonoid_alloc; + Alloc clip_alloc; + Alloc clip_bic_alloc; + Alloc clip_stack_alloc; + Alloc clip_bbox_alloc; uint n_trans; uint n_path; + uint n_clip; uint trans_offset; uint linewidth_offset; uint pathtag_offset; @@ -153,14 +148,14 @@ struct Config static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); -static const DrawMonoid _418 = { 0u, 0u }; -static const DrawMonoid _442 = { 1u, 0u }; -static const DrawMonoid _444 = { 1u, 1u }; +static const DrawMonoid _348 = { 0u, 0u }; +static const DrawMonoid _372 = { 1u, 0u }; +static const DrawMonoid _374 = { 1u, 1u }; -RWByteAddressBuffer _201 : register(u0, space0); -ByteAddressBuffer _225 : register(t2, space0); -ByteAddressBuffer _1004 : register(t3, space0); -ByteAddressBuffer _1038 : register(t1, space0); +RWByteAddressBuffer _187 : register(u0, space0); +ByteAddressBuffer _211 : register(t2, space0); +ByteAddressBuffer _934 : register(t3, space0); +ByteAddressBuffer _968 : register(t1, space0); static uint3 gl_WorkGroupID; static uint3 gl_LocalInvocationID; @@ -176,9 +171,9 @@ groupshared DrawMonoid sh_scratch[256]; ElementTag Element_tag(ElementRef ref) { - uint tag_and_flags = _225.Load((ref.offset >> uint(2)) * 4 + 0); - ElementTag _375 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; - return _375; + uint tag_and_flags = _211.Load((ref.offset >> uint(2)) * 4 + 0); + ElementTag _321 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; + return _321; } DrawMonoid map_tag(uint tag_word) @@ -189,24 +184,24 @@ DrawMonoid map_tag(uint tag_word) case 5u: case 6u: { - return _442; + return _372; } case 9u: case 10u: { - return _444; + return _374; } default: { - return _418; + return _348; } } } ElementRef Element_index(ElementRef ref, uint index) { - ElementRef _214 = { ref.offset + (index * 36u) }; - return _214; + ElementRef _200 = { ref.offset + (index * 36u) }; + return _200; } DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b) @@ -219,13 +214,13 @@ DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b) DrawMonoid tag_monoid_identity() { - return _418; + return _348; } FillColor FillColor_read(FillColorRef ref) { uint ix = ref.offset >> uint(2); - uint raw0 = _225.Load((ix + 0u) * 4 + 0); + uint raw0 = _211.Load((ix + 0u) * 4 + 0); FillColor s; s.rgba_color = raw0; return s; @@ -233,8 +228,8 @@ FillColor FillColor_read(FillColorRef ref) FillColor Element_FillColor_read(ElementRef ref) { - FillColorRef _381 = { ref.offset + 4u }; - FillColorRef param = _381; + FillColorRef _327 = { ref.offset + 4u }; + FillColorRef param = _327; return FillColor_read(param); } @@ -251,7 +246,7 @@ void write_mem(Alloc alloc, uint offset, uint val) { return; } - _201.Store(offset * 4 + 8, val); + _187.Store(offset * 4 + 8, val); } void AnnoColor_write(Alloc a, AnnoColorRef ref, AnnoColor s) @@ -289,9 +284,9 @@ void Annotated_Color_write(Alloc a, AnnotatedRef ref, uint flags, AnnoColor s) uint param_1 = ref.offset >> uint(2); uint param_2 = (flags << uint(16)) | 1u; write_mem(param, param_1, param_2); - AnnoColorRef _805 = { ref.offset + 4u }; + AnnoColorRef _735 = { ref.offset + 4u }; Alloc param_3 = a; - AnnoColorRef param_4 = _805; + AnnoColorRef param_4 = _735; AnnoColor param_5 = s; AnnoColor_write(param_3, param_4, param_5); } @@ -299,11 +294,11 @@ void Annotated_Color_write(Alloc a, AnnotatedRef ref, uint flags, AnnoColor s) FillLinGradient FillLinGradient_read(FillLinGradientRef ref) { uint ix = ref.offset >> uint(2); - uint raw0 = _225.Load((ix + 0u) * 4 + 0); - uint raw1 = _225.Load((ix + 1u) * 4 + 0); - uint raw2 = _225.Load((ix + 2u) * 4 + 0); - uint raw3 = _225.Load((ix + 3u) * 4 + 0); - uint raw4 = _225.Load((ix + 4u) * 4 + 0); + uint raw0 = _211.Load((ix + 0u) * 4 + 0); + uint raw1 = _211.Load((ix + 1u) * 4 + 0); + uint raw2 = _211.Load((ix + 2u) * 4 + 0); + uint raw3 = _211.Load((ix + 3u) * 4 + 0); + uint raw4 = _211.Load((ix + 4u) * 4 + 0); FillLinGradient s; s.index = raw0; s.p0 = float2(asfloat(raw1), asfloat(raw2)); @@ -313,8 +308,8 @@ FillLinGradient FillLinGradient_read(FillLinGradientRef ref) FillLinGradient Element_FillLinGradient_read(ElementRef ref) { - FillLinGradientRef _389 = { ref.offset + 4u }; - FillLinGradientRef param = _389; + FillLinGradientRef _335 = { ref.offset + 4u }; + FillLinGradientRef param = _335; return FillLinGradient_read(param); } @@ -365,9 +360,9 @@ void Annotated_LinGradient_write(Alloc a, AnnotatedRef ref, uint flags, AnnoLinG uint param_1 = ref.offset >> uint(2); uint param_2 = (flags << uint(16)) | 2u; write_mem(param, param_1, param_2); - AnnoLinGradientRef _826 = { ref.offset + 4u }; + AnnoLinGradientRef _756 = { ref.offset + 4u }; Alloc param_3 = a; - AnnoLinGradientRef param_4 = _826; + AnnoLinGradientRef param_4 = _756; AnnoLinGradient param_5 = s; AnnoLinGradient_write(param_3, param_4, param_5); } @@ -375,8 +370,8 @@ void Annotated_LinGradient_write(Alloc a, AnnotatedRef ref, uint flags, AnnoLinG FillImage FillImage_read(FillImageRef ref) { uint ix = ref.offset >> uint(2); - uint raw0 = _225.Load((ix + 0u) * 4 + 0); - uint raw1 = _225.Load((ix + 1u) * 4 + 0); + uint raw0 = _211.Load((ix + 0u) * 4 + 0); + uint raw1 = _211.Load((ix + 1u) * 4 + 0); FillImage s; s.index = raw0; s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); @@ -385,8 +380,8 @@ FillImage FillImage_read(FillImageRef ref) FillImage Element_FillImage_read(ElementRef ref) { - FillImageRef _397 = { ref.offset + 4u }; - FillImageRef param = _397; + FillImageRef _343 = { ref.offset + 4u }; + FillImageRef param = _343; return FillImage_read(param); } @@ -429,32 +424,13 @@ void Annotated_Image_write(Alloc a, AnnotatedRef ref, uint flags, AnnoImage s) uint param_1 = ref.offset >> uint(2); uint param_2 = (flags << uint(16)) | 3u; write_mem(param, param_1, param_2); - AnnoImageRef _847 = { ref.offset + 4u }; + AnnoImageRef _777 = { ref.offset + 4u }; Alloc param_3 = a; - AnnoImageRef param_4 = _847; + AnnoImageRef param_4 = _777; AnnoImage param_5 = s; AnnoImage_write(param_3, param_4, param_5); } -Clip Clip_read(ClipRef ref) -{ - uint ix = ref.offset >> uint(2); - uint raw0 = _225.Load((ix + 0u) * 4 + 0); - uint raw1 = _225.Load((ix + 1u) * 4 + 0); - uint raw2 = _225.Load((ix + 2u) * 4 + 0); - uint raw3 = _225.Load((ix + 3u) * 4 + 0); - Clip s; - s.bbox = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3)); - return s; -} - -Clip Element_BeginClip_read(ElementRef ref) -{ - ClipRef _405 = { ref.offset + 4u }; - ClipRef param = _405; - return Clip_read(param); -} - void AnnoBeginClip_write(Alloc a, AnnoBeginClipRef ref, AnnoBeginClip s) { uint ix = ref.offset >> uint(2); @@ -486,20 +462,13 @@ void Annotated_BeginClip_write(Alloc a, AnnotatedRef ref, uint flags, AnnoBeginC uint param_1 = ref.offset >> uint(2); uint param_2 = (flags << uint(16)) | 4u; write_mem(param, param_1, param_2); - AnnoBeginClipRef _868 = { ref.offset + 4u }; + AnnoBeginClipRef _798 = { ref.offset + 4u }; Alloc param_3 = a; - AnnoBeginClipRef param_4 = _868; + AnnoBeginClipRef param_4 = _798; AnnoBeginClip param_5 = s; AnnoBeginClip_write(param_3, param_4, param_5); } -Clip Element_EndClip_read(ElementRef ref) -{ - ClipRef _413 = { ref.offset + 4u }; - ClipRef param = _413; - return Clip_read(param); -} - void AnnoEndClip_write(Alloc a, AnnoEndClipRef ref, AnnoEndClip s) { uint ix = ref.offset >> uint(2); @@ -527,9 +496,9 @@ void Annotated_EndClip_write(Alloc a, AnnotatedRef ref, AnnoEndClip s) uint param_1 = ref.offset >> uint(2); uint param_2 = 5u; write_mem(param, param_1, param_2); - AnnoEndClipRef _886 = { ref.offset + 4u }; + AnnoEndClipRef _816 = { ref.offset + 4u }; Alloc param_3 = a; - AnnoEndClipRef param_4 = _886; + AnnoEndClipRef param_4 = _816; AnnoEndClip param_5 = s; AnnoEndClip_write(param_3, param_4, param_5); } @@ -537,8 +506,8 @@ void Annotated_EndClip_write(Alloc a, AnnotatedRef ref, AnnoEndClip s) void comp_main() { uint ix = gl_GlobalInvocationID.x * 8u; - ElementRef _904 = { ix * 36u }; - ElementRef ref = _904; + ElementRef _834 = { ix * 36u }; + ElementRef ref = _834; ElementRef param = ref; uint tag_word = Element_tag(param).tag; uint param_1 = tag_word; @@ -575,11 +544,11 @@ void comp_main() DrawMonoid row = tag_monoid_identity(); if (gl_WorkGroupID.x > 0u) { - DrawMonoid _1010; - _1010.path_ix = _1004.Load((gl_WorkGroupID.x - 1u) * 8 + 0); - _1010.clip_ix = _1004.Load((gl_WorkGroupID.x - 1u) * 8 + 4); - row.path_ix = _1010.path_ix; - row.clip_ix = _1010.clip_ix; + DrawMonoid _940; + _940.path_ix = _934.Load((gl_WorkGroupID.x - 1u) * 8 + 0); + _940.clip_ix = _934.Load((gl_WorkGroupID.x - 1u) * 8 + 4); + row.path_ix = _940.path_ix; + row.clip_ix = _940.clip_ix; } if (gl_LocalInvocationID.x > 0u) { @@ -588,9 +557,10 @@ void comp_main() row = combine_tag_monoid(param_10, param_11); } uint out_ix = gl_GlobalInvocationID.x * 8u; - uint out_base = (_1038.Load(44) >> uint(2)) + (out_ix * 2u); - AnnotatedRef _1054 = { _1038.Load(32) + (out_ix * 40u) }; - AnnotatedRef out_ref = _1054; + uint out_base = (_968.Load(44) >> uint(2)) + (out_ix * 2u); + uint clip_out_base = _968.Load(48) >> uint(2); + AnnotatedRef _989 = { _968.Load(32) + (out_ix * 40u) }; + AnnotatedRef out_ref = _989; float4 mat; float2 translate; AnnoColor anno_fill; @@ -600,39 +570,43 @@ void comp_main() AnnoImage anno_img; Alloc param_28; AnnoBeginClip anno_begin_clip; - Alloc param_33; + Alloc param_32; AnnoEndClip anno_end_clip; - Alloc param_38; + Alloc param_36; for (uint i_2 = 0u; i_2 < 8u; i_2++) { - DrawMonoid param_12 = row; - DrawMonoid param_13 = local[i_2]; - DrawMonoid m = combine_tag_monoid(param_12, param_13); - _201.Store((out_base + (i_2 * 2u)) * 4 + 8, m.path_ix); - _201.Store(((out_base + (i_2 * 2u)) + 1u) * 4 + 8, m.clip_ix); + DrawMonoid m = row; + if (i_2 > 0u) + { + DrawMonoid param_12 = m; + DrawMonoid param_13 = local[i_2 - 1u]; + m = combine_tag_monoid(param_12, param_13); + } + _187.Store((out_base + (i_2 * 2u)) * 4 + 8, m.path_ix); + _187.Store(((out_base + (i_2 * 2u)) + 1u) * 4 + 8, m.clip_ix); ElementRef param_14 = ref; uint param_15 = i_2; ElementRef this_ref = Element_index(param_14, param_15); ElementRef param_16 = this_ref; tag_word = Element_tag(param_16).tag; - if (((tag_word == 4u) || (tag_word == 5u)) || (tag_word == 6u)) + if ((((tag_word == 4u) || (tag_word == 5u)) || (tag_word == 6u)) || (tag_word == 9u)) { - uint bbox_offset = (_1038.Load(40) >> uint(2)) + (6u * (m.path_ix - 1u)); - float bbox_l = float(_201.Load(bbox_offset * 4 + 8)) - 32768.0f; - float bbox_t = float(_201.Load((bbox_offset + 1u) * 4 + 8)) - 32768.0f; - float bbox_r = float(_201.Load((bbox_offset + 2u) * 4 + 8)) - 32768.0f; - float bbox_b = float(_201.Load((bbox_offset + 3u) * 4 + 8)) - 32768.0f; + uint bbox_offset = (_968.Load(40) >> uint(2)) + (6u * m.path_ix); + float bbox_l = float(_187.Load(bbox_offset * 4 + 8)) - 32768.0f; + float bbox_t = float(_187.Load((bbox_offset + 1u) * 4 + 8)) - 32768.0f; + float bbox_r = float(_187.Load((bbox_offset + 2u) * 4 + 8)) - 32768.0f; + float bbox_b = float(_187.Load((bbox_offset + 3u) * 4 + 8)) - 32768.0f; float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); - float linewidth = asfloat(_201.Load((bbox_offset + 4u) * 4 + 8)); + float linewidth = asfloat(_187.Load((bbox_offset + 4u) * 4 + 8)); uint fill_mode = uint(linewidth >= 0.0f); if ((linewidth >= 0.0f) || (tag_word == 5u)) { - uint trans_ix = _201.Load((bbox_offset + 5u) * 4 + 8); - uint t = (_1038.Load(36) >> uint(2)) + (6u * trans_ix); - mat = asfloat(uint4(_201.Load(t * 4 + 8), _201.Load((t + 1u) * 4 + 8), _201.Load((t + 2u) * 4 + 8), _201.Load((t + 3u) * 4 + 8))); + uint trans_ix = _187.Load((bbox_offset + 5u) * 4 + 8); + uint t = (_968.Load(36) >> uint(2)) + (6u * trans_ix); + mat = asfloat(uint4(_187.Load(t * 4 + 8), _187.Load((t + 1u) * 4 + 8), _187.Load((t + 2u) * 4 + 8), _187.Load((t + 3u) * 4 + 8))); if (tag_word == 5u) { - translate = asfloat(uint2(_201.Load((t + 4u) * 4 + 8), _201.Load((t + 5u) * 4 + 8))); + translate = asfloat(uint2(_187.Load((t + 4u) * 4 + 8), _187.Load((t + 5u) * 4 + 8))); } } if (linewidth >= 0.0f) @@ -649,9 +623,9 @@ void comp_main() anno_fill.bbox = bbox; anno_fill.linewidth = linewidth; anno_fill.rgba_color = fill.rgba_color; - Alloc _1257; - _1257.offset = _1038.Load(32); - param_18.offset = _1257.offset; + Alloc _1203; + _1203.offset = _968.Load(32); + param_18.offset = _1203.offset; AnnotatedRef param_19 = out_ref; uint param_20 = fill_mode; AnnoColor param_21 = anno_fill; @@ -674,9 +648,9 @@ void comp_main() anno_lin.line_x = line_x; anno_lin.line_y = line_y; anno_lin.line_c = -((p0.x * line_x) + (p0.y * line_y)); - Alloc _1353; - _1353.offset = _1038.Load(32); - param_23.offset = _1353.offset; + Alloc _1299; + _1299.offset = _968.Load(32); + param_23.offset = _1299.offset; AnnotatedRef param_24 = out_ref; uint param_25 = fill_mode; AnnoLinGradient param_26 = anno_lin; @@ -691,48 +665,51 @@ void comp_main() anno_img.linewidth = linewidth; anno_img.index = fill_img.index; anno_img.offset = fill_img.offset; - Alloc _1381; - _1381.offset = _1038.Load(32); - param_28.offset = _1381.offset; + Alloc _1327; + _1327.offset = _968.Load(32); + param_28.offset = _1327.offset; AnnotatedRef param_29 = out_ref; uint param_30 = fill_mode; AnnoImage param_31 = anno_img; Annotated_Image_write(param_28, param_29, param_30, param_31); break; } + case 9u: + { + anno_begin_clip.bbox = bbox; + anno_begin_clip.linewidth = 0.0f; + Alloc _1344; + _1344.offset = _968.Load(32); + param_32.offset = _1344.offset; + AnnotatedRef param_33 = out_ref; + uint param_34 = 0u; + AnnoBeginClip param_35 = anno_begin_clip; + Annotated_BeginClip_write(param_32, param_33, param_34, param_35); + break; + } } } else { + if (tag_word == 10u) + { + anno_end_clip.bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f); + Alloc _1368; + _1368.offset = _968.Load(32); + param_36.offset = _1368.offset; + AnnotatedRef param_37 = out_ref; + AnnoEndClip param_38 = anno_end_clip; + Annotated_EndClip_write(param_36, param_37, param_38); + } + } + if ((tag_word == 9u) || (tag_word == 10u)) + { + uint path_ix = ~(out_ix + i_2); if (tag_word == 9u) { - ElementRef param_32 = this_ref; - Clip begin_clip = Element_BeginClip_read(param_32); - anno_begin_clip.bbox = begin_clip.bbox; - anno_begin_clip.linewidth = 0.0f; - Alloc _1410; - _1410.offset = _1038.Load(32); - param_33.offset = _1410.offset; - AnnotatedRef param_34 = out_ref; - uint param_35 = 0u; - AnnoBeginClip param_36 = anno_begin_clip; - Annotated_BeginClip_write(param_33, param_34, param_35, param_36); - } - else - { - if (tag_word == 10u) - { - ElementRef param_37 = this_ref; - Clip end_clip = Element_EndClip_read(param_37); - anno_end_clip.bbox = end_clip.bbox; - Alloc _1435; - _1435.offset = _1038.Load(32); - param_38.offset = _1435.offset; - AnnotatedRef param_39 = out_ref; - AnnoEndClip param_40 = anno_end_clip; - Annotated_EndClip_write(param_38, param_39, param_40); - } + path_ix = m.path_ix; } + _187.Store((clip_out_base + m.clip_ix) * 4 + 8, path_ix); } out_ref.offset += 40u; } diff --git a/piet-gpu/shader/gen/draw_leaf.msl b/piet-gpu/shader/gen/draw_leaf.msl index 06a4e23..8de5379 100644 --- a/piet-gpu/shader/gen/draw_leaf.msl +++ b/piet-gpu/shader/gen/draw_leaf.msl @@ -87,16 +87,6 @@ struct FillImage int2 offset; }; -struct ClipRef -{ - uint offset; -}; - -struct Clip -{ - float4 bbox; -}; - struct ElementTag { uint tag; @@ -217,8 +207,13 @@ struct Config Alloc_1 trans_alloc; Alloc_1 bbox_alloc; Alloc_1 drawmonoid_alloc; + Alloc_1 clip_alloc; + Alloc_1 clip_bic_alloc; + Alloc_1 clip_stack_alloc; + Alloc_1 clip_bbox_alloc; uint n_trans; uint n_path; + uint n_clip; uint trans_offset; uint linewidth_offset; uint pathtag_offset; @@ -233,9 +228,9 @@ struct ConfigBuf constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); static inline __attribute__((always_inline)) -ElementTag Element_tag(thread const ElementRef& ref, const device SceneBuf& v_225) +ElementTag Element_tag(thread const ElementRef& ref, const device SceneBuf& v_211) { - uint tag_and_flags = v_225.scene[ref.offset >> uint(2)]; + uint tag_and_flags = v_211.scene[ref.offset >> uint(2)]; return ElementTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) }; } @@ -284,20 +279,20 @@ DrawMonoid tag_monoid_identity() } static inline __attribute__((always_inline)) -FillColor FillColor_read(thread const FillColorRef& ref, const device SceneBuf& v_225) +FillColor FillColor_read(thread const FillColorRef& ref, const device SceneBuf& v_211) { uint ix = ref.offset >> uint(2); - uint raw0 = v_225.scene[ix + 0u]; + uint raw0 = v_211.scene[ix + 0u]; FillColor s; s.rgba_color = raw0; return s; } static inline __attribute__((always_inline)) -FillColor Element_FillColor_read(thread const ElementRef& ref, const device SceneBuf& v_225) +FillColor Element_FillColor_read(thread const ElementRef& ref, const device SceneBuf& v_211) { FillColorRef param = FillColorRef{ ref.offset + 4u }; - return FillColor_read(param, v_225); + return FillColor_read(param, v_211); } static inline __attribute__((always_inline)) @@ -307,7 +302,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset) } static inline __attribute__((always_inline)) -void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_201) +void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_187) { Alloc param = alloc; uint param_1 = offset; @@ -315,61 +310,61 @@ void write_mem(thread const Alloc& alloc, thread const uint& offset, thread cons { return; } - v_201.memory[offset] = val; + v_187.memory[offset] = val; } static inline __attribute__((always_inline)) -void AnnoColor_write(thread const Alloc& a, thread const AnnoColorRef& ref, thread const AnnoColor& s, device Memory& v_201) +void AnnoColor_write(thread const Alloc& a, thread const AnnoColorRef& ref, thread const AnnoColor& s, device Memory& v_187) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = as_type(s.bbox.x); - write_mem(param, param_1, param_2, v_201); + write_mem(param, param_1, param_2, v_187); Alloc param_3 = a; uint param_4 = ix + 1u; uint param_5 = as_type(s.bbox.y); - write_mem(param_3, param_4, param_5, v_201); + write_mem(param_3, param_4, param_5, v_187); Alloc param_6 = a; uint param_7 = ix + 2u; uint param_8 = as_type(s.bbox.z); - write_mem(param_6, param_7, param_8, v_201); + write_mem(param_6, param_7, param_8, v_187); Alloc param_9 = a; uint param_10 = ix + 3u; uint param_11 = as_type(s.bbox.w); - write_mem(param_9, param_10, param_11, v_201); + write_mem(param_9, param_10, param_11, v_187); Alloc param_12 = a; uint param_13 = ix + 4u; uint param_14 = as_type(s.linewidth); - write_mem(param_12, param_13, param_14, v_201); + write_mem(param_12, param_13, param_14, v_187); Alloc param_15 = a; uint param_16 = ix + 5u; uint param_17 = s.rgba_color; - write_mem(param_15, param_16, param_17, v_201); + write_mem(param_15, param_16, param_17, v_187); } static inline __attribute__((always_inline)) -void Annotated_Color_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoColor& s, device Memory& v_201) +void Annotated_Color_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoColor& s, device Memory& v_187) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = (flags << uint(16)) | 1u; - write_mem(param, param_1, param_2, v_201); + write_mem(param, param_1, param_2, v_187); Alloc param_3 = a; AnnoColorRef param_4 = AnnoColorRef{ ref.offset + 4u }; AnnoColor param_5 = s; - AnnoColor_write(param_3, param_4, param_5, v_201); + AnnoColor_write(param_3, param_4, param_5, v_187); } static inline __attribute__((always_inline)) -FillLinGradient FillLinGradient_read(thread const FillLinGradientRef& ref, const device SceneBuf& v_225) +FillLinGradient FillLinGradient_read(thread const FillLinGradientRef& ref, const device SceneBuf& v_211) { uint ix = ref.offset >> uint(2); - uint raw0 = v_225.scene[ix + 0u]; - uint raw1 = v_225.scene[ix + 1u]; - uint raw2 = v_225.scene[ix + 2u]; - uint raw3 = v_225.scene[ix + 3u]; - uint raw4 = v_225.scene[ix + 4u]; + uint raw0 = v_211.scene[ix + 0u]; + uint raw1 = v_211.scene[ix + 1u]; + uint raw2 = v_211.scene[ix + 2u]; + uint raw3 = v_211.scene[ix + 3u]; + uint raw4 = v_211.scene[ix + 4u]; FillLinGradient s; s.index = raw0; s.p0 = float2(as_type(raw1), as_type(raw2)); @@ -378,73 +373,73 @@ FillLinGradient FillLinGradient_read(thread const FillLinGradientRef& ref, const } static inline __attribute__((always_inline)) -FillLinGradient Element_FillLinGradient_read(thread const ElementRef& ref, const device SceneBuf& v_225) +FillLinGradient Element_FillLinGradient_read(thread const ElementRef& ref, const device SceneBuf& v_211) { FillLinGradientRef param = FillLinGradientRef{ ref.offset + 4u }; - return FillLinGradient_read(param, v_225); + return FillLinGradient_read(param, v_211); } static inline __attribute__((always_inline)) -void AnnoLinGradient_write(thread const Alloc& a, thread const AnnoLinGradientRef& ref, thread const AnnoLinGradient& s, device Memory& v_201) +void AnnoLinGradient_write(thread const Alloc& a, thread const AnnoLinGradientRef& ref, thread const AnnoLinGradient& s, device Memory& v_187) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = as_type(s.bbox.x); - write_mem(param, param_1, param_2, v_201); + write_mem(param, param_1, param_2, v_187); Alloc param_3 = a; uint param_4 = ix + 1u; uint param_5 = as_type(s.bbox.y); - write_mem(param_3, param_4, param_5, v_201); + write_mem(param_3, param_4, param_5, v_187); Alloc param_6 = a; uint param_7 = ix + 2u; uint param_8 = as_type(s.bbox.z); - write_mem(param_6, param_7, param_8, v_201); + write_mem(param_6, param_7, param_8, v_187); Alloc param_9 = a; uint param_10 = ix + 3u; uint param_11 = as_type(s.bbox.w); - write_mem(param_9, param_10, param_11, v_201); + write_mem(param_9, param_10, param_11, v_187); Alloc param_12 = a; uint param_13 = ix + 4u; uint param_14 = as_type(s.linewidth); - write_mem(param_12, param_13, param_14, v_201); + write_mem(param_12, param_13, param_14, v_187); Alloc param_15 = a; uint param_16 = ix + 5u; uint param_17 = s.index; - write_mem(param_15, param_16, param_17, v_201); + write_mem(param_15, param_16, param_17, v_187); Alloc param_18 = a; uint param_19 = ix + 6u; uint param_20 = as_type(s.line_x); - write_mem(param_18, param_19, param_20, v_201); + write_mem(param_18, param_19, param_20, v_187); Alloc param_21 = a; uint param_22 = ix + 7u; uint param_23 = as_type(s.line_y); - write_mem(param_21, param_22, param_23, v_201); + write_mem(param_21, param_22, param_23, v_187); Alloc param_24 = a; uint param_25 = ix + 8u; uint param_26 = as_type(s.line_c); - write_mem(param_24, param_25, param_26, v_201); + write_mem(param_24, param_25, param_26, v_187); } static inline __attribute__((always_inline)) -void Annotated_LinGradient_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoLinGradient& s, device Memory& v_201) +void Annotated_LinGradient_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoLinGradient& s, device Memory& v_187) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = (flags << uint(16)) | 2u; - write_mem(param, param_1, param_2, v_201); + write_mem(param, param_1, param_2, v_187); Alloc param_3 = a; AnnoLinGradientRef param_4 = AnnoLinGradientRef{ ref.offset + 4u }; AnnoLinGradient param_5 = s; - AnnoLinGradient_write(param_3, param_4, param_5, v_201); + AnnoLinGradient_write(param_3, param_4, param_5, v_187); } static inline __attribute__((always_inline)) -FillImage FillImage_read(thread const FillImageRef& ref, const device SceneBuf& v_225) +FillImage FillImage_read(thread const FillImageRef& ref, const device SceneBuf& v_211) { uint ix = ref.offset >> uint(2); - uint raw0 = v_225.scene[ix + 0u]; - uint raw1 = v_225.scene[ix + 1u]; + uint raw0 = v_211.scene[ix + 0u]; + uint raw1 = v_211.scene[ix + 1u]; FillImage s; s.index = raw0; s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); @@ -452,167 +447,140 @@ FillImage FillImage_read(thread const FillImageRef& ref, const device SceneBuf& } static inline __attribute__((always_inline)) -FillImage Element_FillImage_read(thread const ElementRef& ref, const device SceneBuf& v_225) +FillImage Element_FillImage_read(thread const ElementRef& ref, const device SceneBuf& v_211) { FillImageRef param = FillImageRef{ ref.offset + 4u }; - return FillImage_read(param, v_225); + return FillImage_read(param, v_211); } static inline __attribute__((always_inline)) -void AnnoImage_write(thread const Alloc& a, thread const AnnoImageRef& ref, thread const AnnoImage& s, device Memory& v_201) +void AnnoImage_write(thread const Alloc& a, thread const AnnoImageRef& ref, thread const AnnoImage& s, device Memory& v_187) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = as_type(s.bbox.x); - write_mem(param, param_1, param_2, v_201); + write_mem(param, param_1, param_2, v_187); Alloc param_3 = a; uint param_4 = ix + 1u; uint param_5 = as_type(s.bbox.y); - write_mem(param_3, param_4, param_5, v_201); + write_mem(param_3, param_4, param_5, v_187); Alloc param_6 = a; uint param_7 = ix + 2u; uint param_8 = as_type(s.bbox.z); - write_mem(param_6, param_7, param_8, v_201); + write_mem(param_6, param_7, param_8, v_187); Alloc param_9 = a; uint param_10 = ix + 3u; uint param_11 = as_type(s.bbox.w); - write_mem(param_9, param_10, param_11, v_201); + write_mem(param_9, param_10, param_11, v_187); Alloc param_12 = a; uint param_13 = ix + 4u; uint param_14 = as_type(s.linewidth); - write_mem(param_12, param_13, param_14, v_201); + write_mem(param_12, param_13, param_14, v_187); Alloc param_15 = a; uint param_16 = ix + 5u; uint param_17 = s.index; - write_mem(param_15, param_16, param_17, v_201); + write_mem(param_15, param_16, param_17, v_187); Alloc param_18 = a; uint param_19 = ix + 6u; uint param_20 = (uint(s.offset.x) & 65535u) | (uint(s.offset.y) << uint(16)); - write_mem(param_18, param_19, param_20, v_201); + write_mem(param_18, param_19, param_20, v_187); } static inline __attribute__((always_inline)) -void Annotated_Image_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoImage& s, device Memory& v_201) +void Annotated_Image_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoImage& s, device Memory& v_187) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = (flags << uint(16)) | 3u; - write_mem(param, param_1, param_2, v_201); + write_mem(param, param_1, param_2, v_187); Alloc param_3 = a; AnnoImageRef param_4 = AnnoImageRef{ ref.offset + 4u }; AnnoImage param_5 = s; - AnnoImage_write(param_3, param_4, param_5, v_201); + AnnoImage_write(param_3, param_4, param_5, v_187); } static inline __attribute__((always_inline)) -Clip Clip_read(thread const ClipRef& ref, const device SceneBuf& v_225) -{ - uint ix = ref.offset >> uint(2); - uint raw0 = v_225.scene[ix + 0u]; - uint raw1 = v_225.scene[ix + 1u]; - uint raw2 = v_225.scene[ix + 2u]; - uint raw3 = v_225.scene[ix + 3u]; - Clip s; - s.bbox = float4(as_type(raw0), as_type(raw1), as_type(raw2), as_type(raw3)); - return s; -} - -static inline __attribute__((always_inline)) -Clip Element_BeginClip_read(thread const ElementRef& ref, const device SceneBuf& v_225) -{ - ClipRef param = ClipRef{ ref.offset + 4u }; - return Clip_read(param, v_225); -} - -static inline __attribute__((always_inline)) -void AnnoBeginClip_write(thread const Alloc& a, thread const AnnoBeginClipRef& ref, thread const AnnoBeginClip& s, device Memory& v_201) +void AnnoBeginClip_write(thread const Alloc& a, thread const AnnoBeginClipRef& ref, thread const AnnoBeginClip& s, device Memory& v_187) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = as_type(s.bbox.x); - write_mem(param, param_1, param_2, v_201); + write_mem(param, param_1, param_2, v_187); Alloc param_3 = a; uint param_4 = ix + 1u; uint param_5 = as_type(s.bbox.y); - write_mem(param_3, param_4, param_5, v_201); + write_mem(param_3, param_4, param_5, v_187); Alloc param_6 = a; uint param_7 = ix + 2u; uint param_8 = as_type(s.bbox.z); - write_mem(param_6, param_7, param_8, v_201); + write_mem(param_6, param_7, param_8, v_187); Alloc param_9 = a; uint param_10 = ix + 3u; uint param_11 = as_type(s.bbox.w); - write_mem(param_9, param_10, param_11, v_201); + write_mem(param_9, param_10, param_11, v_187); Alloc param_12 = a; uint param_13 = ix + 4u; uint param_14 = as_type(s.linewidth); - write_mem(param_12, param_13, param_14, v_201); + write_mem(param_12, param_13, param_14, v_187); } static inline __attribute__((always_inline)) -void Annotated_BeginClip_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoBeginClip& s, device Memory& v_201) +void Annotated_BeginClip_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoBeginClip& s, device Memory& v_187) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = (flags << uint(16)) | 4u; - write_mem(param, param_1, param_2, v_201); + write_mem(param, param_1, param_2, v_187); Alloc param_3 = a; AnnoBeginClipRef param_4 = AnnoBeginClipRef{ ref.offset + 4u }; AnnoBeginClip param_5 = s; - AnnoBeginClip_write(param_3, param_4, param_5, v_201); + AnnoBeginClip_write(param_3, param_4, param_5, v_187); } static inline __attribute__((always_inline)) -Clip Element_EndClip_read(thread const ElementRef& ref, const device SceneBuf& v_225) -{ - ClipRef param = ClipRef{ ref.offset + 4u }; - return Clip_read(param, v_225); -} - -static inline __attribute__((always_inline)) -void AnnoEndClip_write(thread const Alloc& a, thread const AnnoEndClipRef& ref, thread const AnnoEndClip& s, device Memory& v_201) +void AnnoEndClip_write(thread const Alloc& a, thread const AnnoEndClipRef& ref, thread const AnnoEndClip& s, device Memory& v_187) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = as_type(s.bbox.x); - write_mem(param, param_1, param_2, v_201); + write_mem(param, param_1, param_2, v_187); Alloc param_3 = a; uint param_4 = ix + 1u; uint param_5 = as_type(s.bbox.y); - write_mem(param_3, param_4, param_5, v_201); + write_mem(param_3, param_4, param_5, v_187); Alloc param_6 = a; uint param_7 = ix + 2u; uint param_8 = as_type(s.bbox.z); - write_mem(param_6, param_7, param_8, v_201); + write_mem(param_6, param_7, param_8, v_187); Alloc param_9 = a; uint param_10 = ix + 3u; uint param_11 = as_type(s.bbox.w); - write_mem(param_9, param_10, param_11, v_201); + write_mem(param_9, param_10, param_11, v_187); } static inline __attribute__((always_inline)) -void Annotated_EndClip_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const AnnoEndClip& s, device Memory& v_201) +void Annotated_EndClip_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const AnnoEndClip& s, device Memory& v_187) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 5u; - write_mem(param, param_1, param_2, v_201); + write_mem(param, param_1, param_2, v_187); Alloc param_3 = a; AnnoEndClipRef param_4 = AnnoEndClipRef{ ref.offset + 4u }; AnnoEndClip param_5 = s; - AnnoEndClip_write(param_3, param_4, param_5, v_201); + AnnoEndClip_write(param_3, param_4, param_5, v_187); } -kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1038 [[buffer(1)]], const device SceneBuf& v_225 [[buffer(2)]], const device ParentBuf& _1004 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) +kernel void main0(device Memory& v_187 [[buffer(0)]], const device ConfigBuf& _968 [[buffer(1)]], const device SceneBuf& v_211 [[buffer(2)]], const device ParentBuf& _934 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) { threadgroup DrawMonoid sh_scratch[256]; uint ix = gl_GlobalInvocationID.x * 8u; ElementRef ref = ElementRef{ ix * 36u }; ElementRef param = ref; - uint tag_word = Element_tag(param, v_225).tag; + uint tag_word = Element_tag(param, v_211).tag; uint param_1 = tag_word; DrawMonoid agg = map_tag(param_1); spvUnsafeArray local; @@ -622,7 +590,7 @@ kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1 ElementRef param_2 = ref; uint param_3 = i; ElementRef param_4 = Element_index(param_2, param_3); - tag_word = Element_tag(param_4, v_225).tag; + tag_word = Element_tag(param_4, v_211).tag; uint param_5 = tag_word; DrawMonoid param_6 = agg; DrawMonoid param_7 = map_tag(param_5); @@ -647,9 +615,9 @@ kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1 DrawMonoid row = tag_monoid_identity(); if (gl_WorkGroupID.x > 0u) { - uint _1007 = gl_WorkGroupID.x - 1u; - row.path_ix = _1004.parent[_1007].path_ix; - row.clip_ix = _1004.parent[_1007].clip_ix; + uint _937 = gl_WorkGroupID.x - 1u; + row.path_ix = _934.parent[_937].path_ix; + row.clip_ix = _934.parent[_937].clip_ix; } if (gl_LocalInvocationID.x > 0u) { @@ -658,8 +626,9 @@ kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1 row = combine_tag_monoid(param_10, param_11); } uint out_ix = gl_GlobalInvocationID.x * 8u; - uint out_base = (_1038.conf.drawmonoid_alloc.offset >> uint(2)) + (out_ix * 2u); - AnnotatedRef out_ref = AnnotatedRef{ _1038.conf.anno_alloc.offset + (out_ix * 40u) }; + uint out_base = (_968.conf.drawmonoid_alloc.offset >> uint(2)) + (out_ix * 2u); + uint clip_out_base = _968.conf.clip_alloc.offset >> uint(2); + AnnotatedRef out_ref = AnnotatedRef{ _968.conf.anno_alloc.offset + (out_ix * 40u) }; float4 mat; float2 translate; AnnoColor anno_fill; @@ -669,39 +638,43 @@ kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1 AnnoImage anno_img; Alloc param_28; AnnoBeginClip anno_begin_clip; - Alloc param_33; + Alloc param_32; AnnoEndClip anno_end_clip; - Alloc param_38; + Alloc param_36; for (uint i_2 = 0u; i_2 < 8u; i_2++) { - DrawMonoid param_12 = row; - DrawMonoid param_13 = local[i_2]; - DrawMonoid m = combine_tag_monoid(param_12, param_13); - v_201.memory[out_base + (i_2 * 2u)] = m.path_ix; - v_201.memory[(out_base + (i_2 * 2u)) + 1u] = m.clip_ix; + DrawMonoid m = row; + if (i_2 > 0u) + { + DrawMonoid param_12 = m; + DrawMonoid param_13 = local[i_2 - 1u]; + m = combine_tag_monoid(param_12, param_13); + } + v_187.memory[out_base + (i_2 * 2u)] = m.path_ix; + v_187.memory[(out_base + (i_2 * 2u)) + 1u] = m.clip_ix; ElementRef param_14 = ref; uint param_15 = i_2; ElementRef this_ref = Element_index(param_14, param_15); ElementRef param_16 = this_ref; - tag_word = Element_tag(param_16, v_225).tag; - if (((tag_word == 4u) || (tag_word == 5u)) || (tag_word == 6u)) + tag_word = Element_tag(param_16, v_211).tag; + if ((((tag_word == 4u) || (tag_word == 5u)) || (tag_word == 6u)) || (tag_word == 9u)) { - uint bbox_offset = (_1038.conf.bbox_alloc.offset >> uint(2)) + (6u * (m.path_ix - 1u)); - float bbox_l = float(v_201.memory[bbox_offset]) - 32768.0; - float bbox_t = float(v_201.memory[bbox_offset + 1u]) - 32768.0; - float bbox_r = float(v_201.memory[bbox_offset + 2u]) - 32768.0; - float bbox_b = float(v_201.memory[bbox_offset + 3u]) - 32768.0; + uint bbox_offset = (_968.conf.bbox_alloc.offset >> uint(2)) + (6u * m.path_ix); + float bbox_l = float(v_187.memory[bbox_offset]) - 32768.0; + float bbox_t = float(v_187.memory[bbox_offset + 1u]) - 32768.0; + float bbox_r = float(v_187.memory[bbox_offset + 2u]) - 32768.0; + float bbox_b = float(v_187.memory[bbox_offset + 3u]) - 32768.0; float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); - float linewidth = as_type(v_201.memory[bbox_offset + 4u]); + float linewidth = as_type(v_187.memory[bbox_offset + 4u]); uint fill_mode = uint(linewidth >= 0.0); if ((linewidth >= 0.0) || (tag_word == 5u)) { - uint trans_ix = v_201.memory[bbox_offset + 5u]; - uint t = (_1038.conf.trans_alloc.offset >> uint(2)) + (6u * trans_ix); - mat = as_type(uint4(v_201.memory[t], v_201.memory[t + 1u], v_201.memory[t + 2u], v_201.memory[t + 3u])); + uint trans_ix = v_187.memory[bbox_offset + 5u]; + uint t = (_968.conf.trans_alloc.offset >> uint(2)) + (6u * trans_ix); + mat = as_type(uint4(v_187.memory[t], v_187.memory[t + 1u], v_187.memory[t + 2u], v_187.memory[t + 3u])); if (tag_word == 5u) { - translate = as_type(uint2(v_201.memory[t + 4u], v_201.memory[t + 5u])); + translate = as_type(uint2(v_187.memory[t + 4u], v_187.memory[t + 5u])); } } if (linewidth >= 0.0) @@ -714,21 +687,21 @@ kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1 case 4u: { ElementRef param_17 = this_ref; - FillColor fill = Element_FillColor_read(param_17, v_225); + FillColor fill = Element_FillColor_read(param_17, v_211); anno_fill.bbox = bbox; anno_fill.linewidth = linewidth; anno_fill.rgba_color = fill.rgba_color; - param_18.offset = _1038.conf.anno_alloc.offset; + param_18.offset = _968.conf.anno_alloc.offset; AnnotatedRef param_19 = out_ref; uint param_20 = fill_mode; AnnoColor param_21 = anno_fill; - Annotated_Color_write(param_18, param_19, param_20, param_21, v_201); + Annotated_Color_write(param_18, param_19, param_20, param_21, v_187); break; } case 5u: { ElementRef param_22 = this_ref; - FillLinGradient lin = Element_FillLinGradient_read(param_22, v_225); + FillLinGradient lin = Element_FillLinGradient_read(param_22, v_211); anno_lin.bbox = bbox; anno_lin.linewidth = linewidth; anno_lin.index = lin.index; @@ -741,57 +714,60 @@ kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1 anno_lin.line_x = line_x; anno_lin.line_y = line_y; anno_lin.line_c = -((p0.x * line_x) + (p0.y * line_y)); - param_23.offset = _1038.conf.anno_alloc.offset; + param_23.offset = _968.conf.anno_alloc.offset; AnnotatedRef param_24 = out_ref; uint param_25 = fill_mode; AnnoLinGradient param_26 = anno_lin; - Annotated_LinGradient_write(param_23, param_24, param_25, param_26, v_201); + Annotated_LinGradient_write(param_23, param_24, param_25, param_26, v_187); break; } case 6u: { ElementRef param_27 = this_ref; - FillImage fill_img = Element_FillImage_read(param_27, v_225); + FillImage fill_img = Element_FillImage_read(param_27, v_211); anno_img.bbox = bbox; anno_img.linewidth = linewidth; anno_img.index = fill_img.index; anno_img.offset = fill_img.offset; - param_28.offset = _1038.conf.anno_alloc.offset; + param_28.offset = _968.conf.anno_alloc.offset; AnnotatedRef param_29 = out_ref; uint param_30 = fill_mode; AnnoImage param_31 = anno_img; - Annotated_Image_write(param_28, param_29, param_30, param_31, v_201); + Annotated_Image_write(param_28, param_29, param_30, param_31, v_187); + break; + } + case 9u: + { + anno_begin_clip.bbox = bbox; + anno_begin_clip.linewidth = 0.0; + param_32.offset = _968.conf.anno_alloc.offset; + AnnotatedRef param_33 = out_ref; + uint param_34 = 0u; + AnnoBeginClip param_35 = anno_begin_clip; + Annotated_BeginClip_write(param_32, param_33, param_34, param_35, v_187); break; } } } else { + if (tag_word == 10u) + { + anno_end_clip.bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0); + param_36.offset = _968.conf.anno_alloc.offset; + AnnotatedRef param_37 = out_ref; + AnnoEndClip param_38 = anno_end_clip; + Annotated_EndClip_write(param_36, param_37, param_38, v_187); + } + } + if ((tag_word == 9u) || (tag_word == 10u)) + { + uint path_ix = ~(out_ix + i_2); if (tag_word == 9u) { - ElementRef param_32 = this_ref; - Clip begin_clip = Element_BeginClip_read(param_32, v_225); - anno_begin_clip.bbox = begin_clip.bbox; - anno_begin_clip.linewidth = 0.0; - param_33.offset = _1038.conf.anno_alloc.offset; - AnnotatedRef param_34 = out_ref; - uint param_35 = 0u; - AnnoBeginClip param_36 = anno_begin_clip; - Annotated_BeginClip_write(param_33, param_34, param_35, param_36, v_201); - } - else - { - if (tag_word == 10u) - { - ElementRef param_37 = this_ref; - Clip end_clip = Element_EndClip_read(param_37, v_225); - anno_end_clip.bbox = end_clip.bbox; - param_38.offset = _1038.conf.anno_alloc.offset; - AnnotatedRef param_39 = out_ref; - AnnoEndClip param_40 = anno_end_clip; - Annotated_EndClip_write(param_38, param_39, param_40, v_201); - } + path_ix = m.path_ix; } + v_187.memory[clip_out_base + m.clip_ix] = path_ix; } out_ref.offset += 40u; } diff --git a/piet-gpu/shader/gen/draw_leaf.spv b/piet-gpu/shader/gen/draw_leaf.spv index 7e92cbb34da93d54a2f20df565bbe42b9642d93d..d5e9136221b42dda90f449a54107ec5ace5246d5 100644 GIT binary patch literal 37720 zcmbuI2b^A2wZ6YFGogjvJE4Y70O>8Xz@_&p!z7s`1CvZhCJ9w~Qv^Xps?r54fLJIB zhN7rcu>dwi!~%k-G!gkf&-a~oX3rt_{=L6@?%j94@4MDsd+l=eIdf*XX`5&Dc^i#k zjRhL>HBRejG_S=P^Pn^u3pR!~>#=(sxYvd=`X_9-*+(|kVc|ww(@&p8XlJu_%9O62 zsT#J%d_3i9%A&;3wwMMQ*v8Vv|I9=EAf%n%$L`fRcHe!+?zL;@_-WHRXY`-g+uhmQ zGqt;OLeKQ>@%=rsy7il{@avg6zIWz?(xC&7G7mX}YmDjb?Hf-$oJ{3jeG@0n=7FubM*sAgfp_jV0i-x_a^LXejuDL& z;FVuz&(sOsCuA&xjRywQ!Tc|W|8xR6mgT|CHq;%=)JoL5_4M}c+}GPTz1eH4Uv;m+ zemfegzz2N6!}%XIuj!M3wjXH^sk|V@G3iaJ6L)>YCJ=4>hlT1oM#F=fvKwNi(QBI4!oK&i=zM z3tQWL`nEN;#JAPo!j0{Ub_2U$NX{P$(TuU}K#WBiV`?#qwfG++nahp?aTaduT8s1H z`I~z#y9~tWXp9B#GQDf|-hEU1dM32iP0edsSO4VBo)emLsOB7w@9mkU*1`F<2b{U< zoYHLD*)xGn-qU|#bIW#c!tASm-;{A4je~xf>cL)F+2-O_r`d-M#BuC(c`xxb^{6TaBDBOH9iV%-4DYX$AFDfKSR~M%DHjg z2YkWKjha_EH)!VACuV044M*VnqTII@9z=Pcr9UC#z*T<3r@uJfxL`n&qOClqU{?lm~B z*}P-_gPaq&nK4IwaWKy1rguF)K|jg+LU1eZ_Qof{gYN@vjf>!|ajep~ z3_O@yvGf0L85V)&ehZoJrSzSAKMhX4p8=Qoeiq)!cZJ3^;K6)bZvp@RC&~XXm(37G zj@84&{H~y{(-)2{A#kt)-U!;%ZbqlzaS6k!Ofjrt9w}Yp3P4AjQ?aw)P*7!Z} zy}PIMO>dt4!y9rheiH1=z2!TJ`mjpxo<7|lXPW1Ry3ZN>n=zcP``}*UA5rsa*X;XA zFt3HppP_x9dK!M<`0lCQJIx&UIY7;8hB~z|o@pvA{JgeGe}Ou&Ujh%tZg2dl#Ya;$ z`+mK}Hwk~U#WxLquf_ebq1op=3^x5N4NgDHgUfzaY4NI`w#J$R?S?lt2m8L*HFZMg z;67<@?2cZp$$m9{*bv^<=!EkaZsv6|IJulN#IL<^UX7n$;}_TXB{hC&jbBmYSJn8{ zHGV^l-&Er_*Z5az{Pr5ZqsG5cvnM_!>37UX5>1;~UoaM{0bF8sDM zRO1KN_|Y|fOpSNd_^~zKTjNuP@U})DJZpEt5L{7iK?0|3U{ja03GkEBCs`kb%wb;AEyCzL)z8mc^ z#Ma)}2hMK{`1;;@=hz?K)5PtK!)v}r*7(si-c{ohYJB1l-qDx_pE0>}#`x)7{p0y~ z<2ONnm%`ZhGNszko9{%ku_wRtz9D!*r{@fa??XLdAhMBA@xI$tqcFS-6quI5Ev z=&d=vxzfz}9&nlSeKr07yl-Z|pG=$iJyh{=T{C=TMr&_8jMn_v2e(hTSKjIMc?50e zzNr&?_*msTzT9hSXLswjExZ6ncU)6D9qo*6zikcYH%a<)OE;R|DEW;`XMay`ce72) zX1K}SJ(DJrYK)23j!pN@>X$&#JJ9*K8+oAr;po%)$M?pN{^wP1WfjvvoB6PH@jW1h z^f5nr|Mae@Gb%&$1HfE-%dVolANFh)yTys6pCVB96pT4T>n z(AFB~lGu9l&3WLbzrnUkVYdg*mbA_~EM3{y9RoJ+o$rxvYmW`?e>Jc1>^*CL#(T8V zybqrOH|NGpUTtdk>rc^@@^kL5;jR0=t?@EAKjXdzF7Kbe4&n61=Ifu_Gou-(*+0KG zMJwA3ukrbZ@Q%g`@aED~6JdU9MC0I;uNU6D$J-m5gmXA)-zt1ui;t;zYfkt*3VdKa znt66K_C}k?@3r^^$%NL=#~qEs(OPTjiO`zQqtz-r5j>@0fAI{jC^YFkBTSwzTc(Xqp zsxkafAHE#8SDqE~6Sw$f@PrdiY{vOBcm~gijO!h^^XIp2bACUZZyoZ*IBW*T+1^;X z#+R+}<%jT&#>(*KTJ%iuNNmPjrRK95yg8P_XM-U=?28)Ttj0GV!iP7uhBsr5^J8~s z^9FZ3+Yj+;Z|q#-yVm$_L-_E<{_tkpZa!GFj*aG+4jJOt-Z-+xKU(9Hn!NRUzOm$W zH*@3p&Ga{RCt|)atq%8!<}*gE^z-?m=2_> zIa1rN(0m4~9aLyOd({qWYVFNuE^uHz^Ssr^%l)Fp|2FrF&)#ys$lWilz5DTFESmGT z9Ayc1zw1Ae*0vWfg^z2vD#dGMEUsTm^RJd#>#4RXzOGs9OA^8L8p$5C53&CGj3k0{ zY9G~*NsjbrQ$s1tibu(8#g&yA>8r3|AO-#x0UrfwsFU!>7&iK6Q~o{_S%hmEOp`@2eysc%Iwx&>N2k5 zNwE*#RrS};K77$hAI8#dAN|znVhxjE*_B=0m?uytrfszw)87dv=4oJK zs%bl&+L*>WgIe3^6yu#qop`p>ZkzL|6Ym1BZPZ4BK2DuB+FXCqk{HA84>#*zE}siyt53a72_qQ+dAw+r9<(no6T z|C^F_bF0nO_a6H(m+w=XmzufCefLqz9{2%#!Gb>xA5rk1!Lujq^LcpgANkTOfM;If ztqAullCMR*jtj)no6nA5*WB|r_I+FSf%ctnul9|AZQ!SaALQHl6nR`+d6PexEM+B{lB%>C*1E;gb7pINbjHHeB+D zYTR#Yv0s3{-_*iw?>Dt@^L@F-{kB%x{k9hFeEDsyMZhwAHE4km&O73^GaO3$cE!^j!-_yc<9{VjV z-0}G>EqpxOZ)xG48-7O%zX0xcv~ctDJ6gEy{f-vy^V;ud;r8dZv~b_0{FWAOzJ5n5 zx!=-C?)S8C=gV(uCHI?J$^E7l?s)vB7VdcbrWWpa{H7LeetuI6_kF@|YT=H@?`h$O z!~LEXZhwAD3%5VNrG?v{-_pWu@3*w@*>Jz5g&WUrXyL~58(O&W{Du~8d%vNDp8@w9 zTDbB2b{1|tznz8a@3*sXj)pXZCiLel|Yx?cr*^J8aJ> z>zUfrD*qkfYVjWfR(piD`{A=R^-geg+j|Df?N8gTU^Q)?zw)i&X4EQeqhh|Ibi+e`iMR2 zUqOGDXU+j&pFP%Z5$ho8gDGCx52@_d$@5UKG47x*pOJ@w)tu*l5<`9<#aPA`XU>iQ zXU=@a$P@P{uyM`N=Z$4ndFFgP*g03%-+Ar^JI}`Q87CL}eAC~2^_v9Vms%ff`q)lAeNP5E-o)+!muq+| z+%?qC_Hun(7uU$SwNAX_z{Weh+HbvJHGfvIy*9OS-Cd*HQ`5o6Q_6d423$Qk_Jfn- zOzU%qA=gJdo7!0RcLLaat=(rQQJ+ll(tb*1x6XJ@1sfy#>|@)d^6k{1% z`~tE4+0JqrJYzc@Tp!yRXzJHp*4vj0!O?O*-ODyIv7q(^9nJ&GWBU|X zO`Gpo^4Kl~yS~~kp_XS)e;Vwbwv9G@^i_8aGj8kuZXG^@tv(N*MN>}>mw}C&@qG^L z`1G?+xjy#k9&)a&ZST2%IoNs3eQ*U>E%$*owQ^s&hw3?RMLh52tFY}sZQoadXl zOO%@_p8eXLm+Ps&OwpFJ@hg>PF81kj_N!=)G0zRTera<%*f<<118wer+a~iX*U$OQ zxU4g-JHhom@HI5`%-LOFwanQ!z+TRowy#svoHKD^e-m7f{Vg>0%-P*wHRmjQ?q0ZK zaE>^|vYRKikFsJC&~gePDU+ll#HmC&tm1eWUK2Bya2F{ax_ZlyV=GP~9@4;%WoqJgR0>xOy7TebI<`3ZP#h1bI#Qh`KxVcaM1Xjy^@(S3?KD7Or zqGliB#Ca8LoZJ(C0jrhw#OrYTcRpUDmb+i~B>o%VeJHj|-hTtD>;G43dG3k7gS{t= zqfH-Ut2>9u(K2DaCdUBix zoE(i|j%xbZ#`ok7@D7!&9V}10dBNsuyXN->xa(uP`M|zk+eVu{ezQ;=GT+WbBt z&%5`6P2Bo!k!QsSG<7fA>`dQk$!8(3+HI_pKSwMKSM%F@d={zsEDBfioA%9ByT#zP z)s}XPgUi@Uz}5WropwvYZL2NqmI9aUmVvAJy)x~V1>07e&-3M}y}akNEl1gtpI5!- z#5vDa0_QyQduO>HSB7iP*|-W=&Hb2tyehoDk4K`Z=bl&%>=@Ov-&O}3Pn+xDw@#igTN|ux8sl7xyhef5&SD+p>w?w%c56RtP^+c=`e6G>UK@bbTK7Tg zKHLzVzO|?AMqq8}dtoCCmR zoCD!%#z|fWfy=YtV7OX&_8bDYtv1hs!>GMH3$z_dQS&SiC-xEGa=wm)s~J1I~ecKJTtu}Kw znp!RSO#-WB9FxHrhqn0jfVG7m3$|VQIS#B=o)5k7#L}L&Q^4B7r-I8^eQ>q%exC+U zEbVD~JXl-!bZ{AK23)PYfBWHyr9EwDg0*=+&7z)7aopx>Kib{*-fJg-wL7O1spW}t zBDma(C&AU+FYdcjz+Ubm~mOlbAMSI-*fK*aCv`! z9Iod5?R;HG?d5!F`vgVJ`4aog^*jG3!LCJqE;IH;aP|0HT=}>k(*LK>)X%A&jhBFJ zr=E6~f{pF|PPd*!%W>4cPuiQM9=hiKESS!?ArH z>|S@BZUB3|$8Vd}hZZ9Y>fPzEZ4{L^-k)qQF7+{bFy(BroOAP<$e6?a5b;Q{s#OGO2#AC&lvuE z{4KCQ7dzH(g5}Bc9`N22ZFf`4wT+|QePDCeb}zMjEB+#YAAcL)2475Vzs9t!KJFve z*Y&%W+E}jNcfnb|2f%XI&wVrpJce4`zV4@1v#n?9L)tCX`aKMOkdpDr_0iuw`UtpO zzwg2I&-%&r$^3mE-0YLK#&9n_il!c)AApUK^^@z99Df9!NZFVB#B=X4u$skd7{#^r zJbE0>{>-O5lb%4+b`j$=#*<*b-vZmGw(~3PZSX4;ZI1QN)N1ML zU2uJ@|3p)F{*3z`*mmjrUtoRIjrk6>T4MeiTp!~HXzGbcdP8CkgX^Pi%=f`+@t+55 zKk1_l?pfjAQg@NuA{B7eBP~!t*wgX-+9P=-i^jK61*$5x_$X~ zA!@eudAB;aeBP}AUyYLS%JtFTdtgoQSo+G`uLajX&pWw3nZLEc_Gvt0I4`5%>hW0z zY>Yhb&;nY2?`hTY|5_$9|1z zTYbtsaDBB0T*qy&mFu`I+`q|5&hn-YbL!c?U1iU@$@gupYiquCfX{8dc0|{f`5FUO z^U8ef1o!WM(zjf{%-1g9%-7CfxpT0qI&t)yMLibo7_{w1EzkVy4!#i|`!R-X^(p7) z2F4>_p4xXrzp-Xr_5znbAMXQK+neIg5&Kbl`SYH(eJN_j5j)l$iF+`3e@ezGKd5PM z&Euiqxy|EY=-M)mhlAC;GLJ{V51}NsT))iYQQ*wukzl#D(}{63*u0!O|29g#74M6_ zqjZ9Aqc*nfv^!53pLIEJ&R^#37;NRdb-~q^E_}w-e8$7ooZEa8p8&S4Hs^aHwU_g) zt(&6ee2dfnWbgyk&n%uLJ#c?VaXRZEKNhZ!`lG~BI}WVwTuh>tCtfeO9M2TEn&UCA zKCqX0X`4z>GcR%0Y8tryt^Ig3_4F%uUen$>c}xfU`^?0f0XJUGJjct{dy z{?j?LPRvuVm1}n@TrFpkHZ|M-rfTo+Ki}tVeoKmf7v20lkPF1MAY}{sosZhTEf@O? zUx;Ggi%|cY+W&qf?Y$rT?*qO3?=8}Pu?pMX|IRS6{O=2mwG_2~Yi(b*Q|7I>{&!Z~ zajM@zF`jdxuRL?`HL&ru*-q~7*c{hg6#tt^+i1ImcJT9)D*EknIrg`Erkn_c@~kvjY2 zL9jmM{(1nzSX`|V~mir1X8$KmSvcK!rdE$=w?p_cLf1gw^^{S2(; z^Dyi3bMP}1`_Z1Z&w{nNPUb3)?H6FRJO zoWC`x*QGdr>r-d`UI3T#_fn1jsm5Qg@i%Myy@C%L#?Lnt<7Z621)GOs%D7(yt7ray z2Ug4czW`Q?|L?(Sng5r;YUTX@0iOBSp0BQ_;(h!P>fFcv|LdH$;Qs&YEcq%0Ujw{m#j_U6z>PbHlCds}rk?w9Ij~yBVV`R8 zUjeL^eX}B1t-LQ+f@j}oPurEj+T1thE{|SA!?6_Ox9ctj)O2fjqXg z!1k3nSR3rT%wZ1nm+KS%QD9>{T)n@p3s&>L|8zfWM7#CC>JQ-~UmvWdPvUF&#F~kAi*l6;vvSmZ(p6b_M$kZJ*oFD@IGMM??>(Uw09Dt z9N#f;+svV4d|hbjxzEOdjaB-LM^n!nOaR+XJ?*-|_BV&N&VyXP*e8PRyMsD?PXepQ zXENBnOP?Mz_4Iu#*mmk^cO2NhZD-$d{p`m%?geXiUd847ABf#+e~R-uKlK3==kg%x zo1>Gar^tA zG5GWBJj6Ph;&lYYyyv4nvciW@A64PQsXt0FZYOol*OQ6q-jLh&oPwM4c{P51!S%nm z;QC)uq~WT>sl^{EmX_@Be@6j9>q6)%e{7 z*Z+qF*Z;8^f4t(^lc&I4gE^G!p;OV+^UU}dSnb>RxNlt(wfLU~R?FT$9juo3nlr#> zQe59T6m{FKMSWIf^I7j+JR7d2-Ep4_cE8xKYcChO?)D>g&25)&f){|7q>cXiX-}>n z2bZ~i0nAH)a=i$ymRvstPOk1dx%s*0>__Z=vt4riG}v7A*H3$L z{S3Iw^|NrbH}Lnp!CWt+BvKCD$v#$<;kCH@~Z>?MLihw_S3*25heS z>!&@rejZ%r`USXJe%3PAYbnW9J-J?2*^=w^aJA%mBRIKwzsb$dd(D2tH&fd#xqcCB zuKMezJ-OZjE_1yVuJ%6u-WTS28^v5ZsFUZHDm{6A8LpN*zY0#C-sf_2^B%VkvG=#_ zlINXZ^VDBI?aA|N;4;sz!`1Gpe7rxt2~MA$6>|IWoX|h_`8{CA<2{`C_oAum?>Qn* zUiX2`%b3Q}o_5~`mwDX}SNjgdefeGLITZVQfI9cP@B0r{w#?x}aJ9U{J`7gdj*?i9 zfDr`*9)%yTLstuof?0);QBMXf$``+PmQ+~Tz~)lfs9{& z|NViI`|l6Pv&WyJKj(A~C42R0H1#~oehgO2v&=bGi~loVwLHsy0#+-ZWj}>as`|;^ z_F48b{V3UwKL@L2pFIb5{oPNlrCjVf+K<>Zv|aYuFTw6J{q@tHTz>^FbNw}3t$ddK z2A*7V@0jZg=-QI&Z^3HG^>^Up>OPX2pL@uD#O@#4CD)h1=BmGb+LP-az-6v~gsYX$ zvOmF-YxykuGrG3q`U+Srx&8&5T;0QR^K<{&kN9wDlb*MGs) z-p4=BviITU+CgnQ*Z<%8P?F~dV725qjC(72df&>;&3o29#NMyAOP=j;^VDBI?a8wP zT;@3(u69?^=X~(=>Af$vAMbnpbDu8=c0A>?Yy_IR{+h8C#Btg|D@zw7F>V-{gcu@rp9+Fxc<8p-2DCbPs;ZG`zPW0 z`|qEG>+ipRQgZ+OlW_h0_fNw0Kc>dJ3a-EZ{z>WITjT!wC)%^87J<9Qb12!fi=wIL z*|8W{Ezb_uTrK{KgVplvSOTn8K0B6#yMOGbe0D5_t}XZD(qOgRkIRDHkM3{xmR#(< zvLCT~%68fJD}dei`s=4XxvmHR!gqygOjWG zl-&HhpX^7xA+_z2>qcO6)n7mD$#r9Jnd@k{TKVkQ1fE>WXUC@K+LG&LV727BIXJm` zKg-R}d)a=(-p95}u3Lf4Re$}oC)cgPWv<)6)yikbws3PTpB>wwsVC3v!D`8KM{x4= z9Fd!wXNY}>JwI%hJa-10r~dkBPoBGg%RG05tCi1=vGDZiStPd~&msMDpZ^!w@s!Vw zJ<-(l_neX^uf4$LWlUpfPhNY2%e?l1t9^%OTAsc8!tL*Y>N)40+z(AX^S3`(E%SFE zIAb{gEH}4|1FtzQH=OJM8)L%dC$@5TfndjkfwZkaM^GL9H9zkt8^E?Wyo;*Ja zR!g3p;N*EUSZ;2|P}_&Ni`sU{a~#+__18~(@*EE?^X!JJO`s&tNnrDwNS!<wkHT zUr})VuPwO#*Vp(B70&M_~IRORi^t)spL3;Nv`ZZ*Yn|OZ{VM2 z?gjAVs%~4)jgO;iORk>)t0mV@f|ILzQf_|kNBa?9Ol`a5`YEuv>aU;nZ zTKUZVG(5SM&)m{lvW_=S~TXOvtSS`8U15U24v)ue#WBU=izP3xQ-v*nj{`zT8uHONdx!w;~ zE6=P4;K{W-v%ZV2ExFDCt0mWmz{%A;CO1F#m;Hzzp|)Le{T|p{_18~(a{WHI%=J;Y zT6t#u0G?dSGwX-w+LG&!z-r0$ad2{Vf6L9!y=^~Y_qFYk>r-HJ)n7mD^|Rn&>|P(I zWe6z3!kF)T=2g8Iq`EY=>J@eKVNYDUnqDN{KbOX$L}hh z`}AqJ`IPtRkI~d~pFRUtD}NUF3EbTCJA|LY)z%p{_&bE3!PT?Z9mlh9=fb&lY;v*V zvLCTyvTf%47hvaHfBm#4|6hXZ`Tq({J?F-+!D>D`{M(shsDA@j&tCW~IC(jTa&vM1 z^wn=1?Op=APTHJHdGh={crrHI+fI9ZZ(N4m>k>-##-$Z@Z+yDKuJvas?6oLZKu7yH?G0%bvY$_ zg&oJkE|ZjcY6H-ng#9?v3j!Je&FkihJWG>g-@%hvdE z1$S?(Qt+AZkp;Jp)heF3T@-FU<=$8fO+9;Kaj;ssHsxa+FFe%h1A2H<)g8=|RaT{i-&WnDi;tc~I7S=UX#$;-Kr hn~U?HuYRY~?jvC5TbpwtPoA5D{h87Bw$q+<{eN{%DT4q2 literal 38560 zcmbuI2b^D3wYCq;Oz6FXgwT6$BE5zxp(AaWB$H%tQb;BcdMKeuQF@mqO+Z8t#9k5X z1-n2%1r-$)K}6ttp8x;6GkXqPuix*U>%RMa-?jGIYnQXnnKJ`)%)9!0RW(nwU^Rbr zPgm8tmZ;`Msj7vl5v_Xkz6b5Q(Tsud8*Q=WCORxyb+r8SS&Viz>!eKX>6@ZqHs(ht z&rud9hPEX%(7-m94*qXm>aRdL>3#IR-J|#4fAqe)caNJkt$W76N&UUu{e4qZd{rywNQI8-~x!2SQ6K3=dR45BFo~jy{ zI&<8l?#aEA*O@Ueedf4<4O@*ioVn?y+s@o{Tm3q#h4JgDW62j87H6SqY50lL`v!Xd z$2hG}tys=S8EeU5u{x@y;3xF- z@x8M$mZ8Rj1L|V_SHOQd0UgVV;8q*zE@oOK1U`*)q%KXrPm*LJ_^UPJwMRja@U zeZeF6KQ*uE6UX*+k28pI7p_)A8}#j--rF-iNe-&?)=Crqj%rn7v~rxUTBB~eZ{L*B z(|gAEmGhy^YbdV~)jDva7XGb0pzf9Nx z>(RD7h7r|_e|`?3);#_-D@cSu8OW(eYSkT?q4;pq1@Y=xev8nxY`14zOA{ISF6u-0D2Ck2~K60(^0nd)0k+RXc#|?Rs#}#P(XKc?}@gm)!AA=-~ef+Yi8auz;-%j|p z$GT{>d(m!i2M$a49U)pd?=~1?v1+eIjA9@B$4HKS&%rp0R{J&LymRf|J(qop7zY$F zcIlniH{~4*@a{3j48~ZbI=G0j`;_tjYXrw}&|v$n>QL}*(|b)hWxVJ zL%p)UT3f6>&EIJtj(HzN-BaVEsmHc(SF4`fGtEBM@znMl*3UjqoI0IR*?vSN*m=s7 zskx)n-J>lB@X$TeRe7@y`qqA}0|1=+#>q9JqZybyOb& zx9`ajl`jLvX+A?*FA3Pnx$z(w^aVSIt-}Re&W+kT=VnC9*SQ(~yji|F0c|kue=!%y zwK*3peP}KQ=fSaN9(bJ?oQIj<_B_m2c_5lYaV~hYw#L{xEU^#G#fa+Uclx({)xFC3 zPEP_OGr*bSv+Dc?&%D+Qx5uN+tDNI=;Erd|AMSjpdzEvn)|}%m z-k2^#9~R@kn32K$$gepcgSL7$oB{8(^XMnzKOda&UjWX%c2S*6eO|Xl?|9U`hQ{7i zT>>}n+P5`sHLr}jt6ILg0$%w27o!{My*akp;>ZT?osSSdztx!D*9?GxY{r|t&6 zCR6)!-WMDEarl0{lc!E^o&6&!xfeePcjw;nokV?}TJN1c-5=*#=Z3n^8T?x@oUbR~ zUgDoq^J>@X``2Jz3pZ)ox~gXY2aW5U(!0yd)@zXGm73QKb!ubGZ7FU1qPAN9Gj(GB z4LlUPvwE$~TR$#2?l;?fGpbfD9SjEl&BGUHbAN1V^|?4a{j32_KkKyp>M?Xy8@74f zPe-+R(Qdb*-JXqh`weT?SsmEsgZUlS=5>A@)zR>FJ|n7ru?Z3|Ibo6qq z&u#Ea8vLpTziAlns6Gbg@!y({FM~4&-x}uES$(I$zuVwXHTX{({AUgROoPvD@aG!* z_YMBX27jr+|K8yLXz*7X{EY^Gv%%kL@Wr_y%5_+>!Iv7wJE~>incI=WY@OAp24A(o z*J<$e8hrf*->kv6Xz(o?e7gqUp}}`-@I4xQ&j#PC!S`$M{TqDDFy2ue49{F0Jv92bb~L`;43%yDh)oW!Pjc=bsBu#2H&i~w`lM!8+_Xa-@d_jXzin(WXf~9tKSRe;M;!h?5ZvR5C1;aSzXwOeF?m0;>6ZF*rmg4 zoz)d^epAD5J=*UaSHu0zqOZ z{1)l&uo(NkrqmdE>z(NH*puIPz*7e%_4;0HzE8o+cYvP`;~mwr@aa=e#CJsN_hj^Q z*wpDC96f#N%xO-p-y2`VXXwLa`+Zx@tNf0t#w$qi9kr|a1KOCL>3ltHzg+unt>#5u z==RZ3y;N)F{5H7Ed7jSUybC^c=767STlpMO1Gn((Th9!d@X;D#H#gel;3o95A^l-w%W99 z4rx+v-^59z8e`&_j_%#{FW;j3y^D{^kq7%9fj(_uTz?Gde?IkgRxu5>nIBsZ-yvd1 z9}A!lOz)X8qc%ie5Z&)CL;mx(#<&prcz#H!zlG=5h4jO3JQ&Er*cqP@2en07nwbsF z)uL$aezOYN+I=jBtv}zH2Y=!lYP&dgqj_Jab>?7++Q!NZ+T6Db53cXf-c$1$$GTg) z_ZF=+_r$Vb_f~#}T?O90w>qk|!TI@hU2wTa*B{3D-4J|WQs0bLpjJPf)sDk_I;))< ze79k|t2z+gT910b=66&y4k!7z@Ya3QSsfS70igZ(@Ud+^tLE*ocU7mu2j{(&C*K~? zCh$8ue)%!J{j+aZ^+%rI*UDZSJ?dM}z%idZ20?nB1ww={4 z;Y0J+RXq)F?bX`J+t)1e*%j;-Ij zw)TXWpQgp{0gs<`QY+3V@C?r3-g;ac!JR+9W1I8l;e5xCkA@G8v$NW_!QbEDV}|jr z>R@|uVL)tm-DuffkB#=EMIzRzzs~BW z2EV!Bqw)U+bL!d3cHnoQU4!49R)>2<^ZZvU{XF~CJZDFdv!dGq6 z7JnK%)NkkD_on61?K9t>l85ot5`QkZj4wC7IoaQA96h(+Q*+-T z&sT?gMVkk#=D3afd)k?|W8M>sy3KJ%Pj9qy&(Gsdf_ zmAS>w@$1*luie(zYCfmpXGk@lE78~*Ep6#SbFBJheCFaaSIxOiTl-TRRcJnE)z&C9 zpRsD|6q@Uxwn3qptC}D9(udDfwJi$GXR6wJ3(aS#+IEHJIMsG4G@qMlyR|g_uEno@p3KII$F51_1rAi(&wma>AJXA?q)(A+X|GW*qN@)s`N2Dxm?x#K`qu_pH+$Ac)ayccpf|!bgK0WB5K`^9tV|Y<#)#52YT8 z-%%X~S96~nL2cf0*Y(I6t6S{<*jmdvkB1x2_Px}$R~v~sfjVQ?$GOvQBDH?n-E)(u zv*(PV-8uCIBeAD~ZKLLTPopkl8~6CyN1Ji|SuSyHuidz_s1x@juyNH!vg=Q#F5|8R zPqB|v;QH%lAE#2M4`XS!kF%)L$Jt=psJRB`P^S-L&Z+I{#ypogF>R~em={qe=EY!R zs%g7~+L*?>lv%%t&KUs7A zzf)sl#Qr_7mt6fvlr<>!|1`ClbN39j+&Irt+pn7a%I8wl66blam;NtM&qwh-j{SA; zJOzInT(~CVt-EYZ%2HM*sy#Z_E`Ee`UF?}5)>bKBhuw?EJB(+O-}iGK!wbXvM=EuUR$ z_J3xLZSx-ZWpL-iKD1vBH$V0C|0%FR^?#`LSJVGtYI9Y$Ke^w!)Y8B2sOF^qXKR~% zCjQsa=EouazQgM8S>*exxmw)gzNf0W$9?~l8~2aY456hNNA5eQn)a7#oVLD?8gnFn z^XI!?`be$)Kb813YxDA5%9!@M2(>w=nU~!6DYdNkVsPJe!k2?*y^X&Tyu5b~f_v`h zzZP{5oNILWxotie?<%{esirx&K`pD2cPPK@O@VfUTfcLBfqhHLlRZ@Bgg3+}Vu@4m6? z@3-G@_q5-B!?j;saO3&?x3u3Q#hlT6!_pors<2SLA`(3Q$>lfVl@w-^;a=(d% z8-M45Yu~Nl+WjULf6rLIhn3v#VI@DV!TlZc@53pZbHwvsQ};C>5>-T4~T;C>G)?S2mncRYR%3wJzz4-0oZeh&*bKfi~C z`@HviSh(ZyTUhu+xZlFU?a%LE;r8cuuyFhHJ6QNcxZlCTeP8f9Sh)T94J>>P+;3pv z_BXrW_UE^+*zM16U*U7$e)|fyKfis2+n?XP!Y_sU-7DPw{N5FAe}3-@*Z=04?_71Y zp6O?C9?YgZk8OL-P|xYJ;p*q$BR>bM=CghWPGQenwfLV4ZfUjud2lt~F~<;dS?crQ z>bCccmfK$2hrnvuJfG#;5X+wrF9PpOt$siq|6;JZmu)=b)soML!D`w(&*jGR-QgqP zvDE6?J^$5gtNlu_n)Bm%b`{uX!1uuV%k>faENDNg=B=&)`)sm)l~~tNUr+IR9sP#d zZk;@D1RLW4`tmt=6Iji8{ueRi*HVmSY;or7W^m@rXOKK`Zv`9I9DN?iw_zon>)XI9 zQQJnFK0c$=Gv^-zJLl?|^E<%mUbb<5)zar(;LN$tICSAMWS@Nvtd@Ny|02a$#uh(IY=2g>d>x*#eFNMa z+v8~J>F=9h`}Dqq_z}f5(3W-jajm-!-vP^G`w3W0o9|)r*nS3feYO3R zTAn@qbFh2bHrn*jSKT$txUK)Eb@&Cg<~;loO+7jM3T)hr@7G|*r=NYw^|4R)kaKNq zd-wg*VCOOS!82gB+y~m!%6<7Icr)j1i08dL7u(^~_Wdkao^^d5{2b+Z?CJkEa>~8+ zp7|}frQzeAc>(U_p3(L@ikjmPC-$Gf&DeiNQ_p_*3s}v4vNNN98T?nuOBBz3?dJ6& z_1`GkayI_G*388|eXhTP<{0zbkn5K={{S0@BW1A7t8m+7e&zZ(zZsWx#`PMwxd;A< zrk*)_9jx{|{WxcDfW4eEZU3UEIcMU;{x`T8`%N_U%-LIDHRmjQj15I6je-@GFKIsH|pBP75_Kmu8lDw^x_XupW;m_AOxsT^VQ_uU%{9v`b z&-gQs+RoK{t#4+H!T;M%{!n2fz7Sk3Qm@mUIP%=EW3Sk1FE_tG+O z+iG)M{+y+j9F_+s2Y;?Fb65ecJ^if+F8g~AT+N^V)8EQq+iCNi>?(2@30Aj%f3A~@ z{h7}BbB;%W-5b_EZ&ssTo#Lf^joNOVeAWaTBhQ<)z-oEk$XBHp%h=+yT?cGixo5_D z;AdG+^YUj&xohpa*#_V%u-PX58-msK_h(Ic_R~gS_mgq7>Eq9<>gj)Duxk;%3D|tH zmYagr%C+1KZaZz}=Ka4p*x2Q|Z2?#JeDNFYmf$C--8;rGA2t2b|5o5KpRM6#KJSIA zmHBJ~x1F}E|NFq|?endjXbf{y)6X``lKW2JIrSRt2$m<_ zE@1Pu-Oki<*TZ(Zf_)dYjW&IDpjJ=p-PLQ`Zq)L;r|tpvJykvD>S(aKm+>wjFSX>e zCs^$s*2#D7z2ItocaG2A4WE7BYJRW1qi(k^+_u`%Za;7t`~7e=zw4&m{&3rB^E^I) z+RO7;+Zf96{EX&(Acy-rSFeqp9aS{{UFc zJ(B%?1l;_!WiK5GwhwL2@qyH8$>(UWTK3Q};Orr7@#_X_o5tAJqR(T&YG*UY^08pG z9*X@OMXi?h7 z-iI0YWO(}4p0-oK+S2z_uv+?_23AYo$AQ(-emc19dj?$1-=(DQ0eJe>p0+c=+S2z4 zV72spB3Lbb9}iZuy=!t3*mVd$8SLDIp8_`D@KeFY7{jyG->aMkHiqr=)$ZEPqSj_R zpQ~qp%Q4P|t9hShjAz0#M(t^P7FgSIfNht4ZUw8A=fiFA z#L}L&w}Z8Xe+*p4x&y9O-tTw96H9yA-UZecemA&`bq`#vynjCqPb}?edoNg<_tSmU z_fs6V`Pz?m_r3SpC&1dB(;KPfiSq!s+>4)ttGQp?cb@`#x$m?+NKtd&i4*5xa5e}Q6rb?0Mk#`48l zcRn5h%YBEUl1F7i^!}7Uonmhwp(&krDI#rO>}$>#Nj0>E{RN&SCobAy`d+*F>9|Ymm9KPWvB$ z^;wv`Z~s3A+y6QgZLUS)XtUi2Y)^sR>#oz!!Cr6hmaZ;W`U(G!*RJZP)VBEV6zoL^GQeHV3{zXtD4t*-qS)M~c%UVH{Ti{d)?d!c8+Pg6Ws^_T0DvCaiM z)~wreVEyxLK(3GW1&HxH*gngjH+}&A)g0E7+WE z_cFD78-B~pBc%Ep_(p2`HKuL#asRlkuAl2^EZ6T<@D3JY{exQW`ni8z1MfwxuKg8i zHQTy=uY=3=`xpG5l#ExdkN)n>H^8S;vVQ*t>!0yx#46KtR5`n?5LkI&m+V`Tm0 z`XtAB*uXc!_vilbyqg!UX7Tb~ajiXvI?(LTe9E(^6J6US%!@I)z{XKeKO?|pKl8!W zEUpW;+F(EPqotqnTv`BKTl!fLY#jA7>%JEP+rKvZHlKyT50Q6#7J=)h?z+*mQcJFj zfy-PMhpSn<$~jpA&0L+|a!!^+*OpwD0vks?{VWYG`&kCAW@*mJvS{h2oRj6ywWXis z!NyU~cvk@1zqZWDir{iiR)XuN?wt5LTebMF3|0#t33lE*>z(6O!1}1$-rw=6xewht ztAW+ZIa(dA9-lSd#b-@4^_>4}fo-Rrc58!;UCzroaP|1C`z}7~p{XbK`e56spGn@{ zlN*5TQ``BqwjtQRQ_<#FM^US#uZ_XYv2KEVQ5!`)adrY5r(EBi;p&OA3)tKeXIHR3>Sxl}Io}Ozd~Ijc z>##dmJ$>x~c1-@=n{y!7C(pY*!SD9G+Y4P=o_BkL)x3=D^L8KjXiCn1xqgnp=iPqb zJn!}e%e9Tbwm&$}yZ3|T+D^uH0N9+(cMMpb=iPze+wrkqW7<}q@_F|&+8WE}-J#g> zygLLe_j%{vM;->=k6PWn4yIPKti zcXE9)e@BDuvwYqi16PkvH`o|?-pTdR?%#4A3pR#*n1_GY*#lRP&seZAav#X`$sQOF zez!f)i>@tuU;9=TK7rcHpJTMmqPc4<8wOrl-kD_a(%S> z3_7E>XMf3k|IfOf34gbBJqul1*7a<#npf8K9QbTX)>W>bV|QK81!rC7faT_R9(B&g zbMU(W?ijS4Pc6?nUI=~=ANw(eZS`^8Tm$FZdC$6C1TNR@61dvM6xZ#;)LyQewo55$ z#t~=TE(d4bE(6Q6ZdZU0CWg8>e}q~sYjPD>tz5UO;p*|Z27G1hV+^@IS(od;@3t=2 zqif5$+yGYd%DUVLzm}49k?WUr`6xK+auZmt&3|6t7O;7l)6LZKZFo=i-RxHIqtwQ> zop#qD^JiVIi|bR~U*2cQ@iuJckU8>Rza33|cGdo!;m5#gD;4eTfTx`~%Js9K)$wy3 zt+OBQ#I_1NG4Fz_<$lnnX8TcK+pk9bCU3m&rTDkC3zFNS6fggdcx(FgZ^-;-&BQ+E z7NOX7G3vLdAEQot=gxmN&da~CPx~cmZ2Kh(ycF12%TW9Gy7u*X%6v7~|BE$uoa$eo z7|%J;SDrce64?0KY$x}3OOETy6vt*8ZFdn*{VNo0-ZNjL_VS)l|0+ez9L3I+SS@{i z3v8d^-v&D__qV@q_%7uMiq9;4%}q_ea_&7t{oR+>@)YOJzwcY2#?Hlx6#Ms|E6o1- zK3Jb}fBgWio}7LNwr#$-{ur$0-{WP!JO#F$wlR#~e11Yvcb&yqXTNW(M)4X&G1rmQ zt5VEwb?W>*_RqoPdj7iLFMwaHdB*b#xN*wy{1UF7@5;Xdt2w96r+uhpyw8EvGPd7< z)jW%{F24oOrPz=5wEZ1en{mxm9^3E1YRUBvVDmSI{&IcvcaHu@(eC_-Gk@!V*Qa=` zOL6|zq+XBW{B1y;`TH}toWH*{_-hURW`q0hk1N~z?~e=j?|=RG$H_CMzkto7od1{M z>Y4w)g4Ld+IBs)P%RK!Ztd{wI1*}%i|3Bb!DfXj1ZC?dzGp=Km$M#Qf8TVgswbvfJz{9FD!Xo;&mZRZ7RGp=(lkF672#vK7yE5GZ{ z2cJtj=Rteg`p>awGp=hWk8MG48Fyj0TKSEA5%}CXuJ*KD6s*m-?ge>lOMsp8?1d%a z?hkX*U#?I5mj*kR`MugQaJAT%1v@9PF9%oioBZ17%Y$vBZl5bquSl`aJh$ZfCFV+C z=OZ!S16GTDWw7IleI!^deXatwje7bV1-8%ZXSsgH^j=-HwtF9ob02StZ3~LmW)$z^ zji@)Lcpq;`o%?tV+U36UEMKR=H!Qe+$G&;ZvleTr(ayEVSl2>R&waTzSS{nQPqp~3 z3s%d0xgJ=pyf4>>&#n8>p0*o+wYd+>T^`#;;4<#UaJBNj+yp+ij;lRwHw9}mu5%!d zZ40n{We&CkJ1^Ofa(&{z71$VGtKX~M3s&==`Ex&PO#j<})xV05{C!|GeG+F|uyN96 zJFr^XY!6nmjr(d7;_Lu6mNxVC=h2-g=9|4N*Ux_3J3H5Q_mw#NY8!0ZQM}$qabIms zy)DIkwS5b>|9Pj~Xjks5JsW(#20yUD4=eal;G=7vIo};_zvaH$15G{qZZude`_7!y zGM{^a)w1vQ2CJ3(ZXfvEx*zRnyDwOq`@^xxV|zcijJrQvt=xBG;B)J^+SB#`ur}j5 zNAlPX0+(?QhO50zoIGa^fzPeuYERok!P<=LT+3rS99+iz09>t$lDJ2J=hktxr|pqo zZN_!Y0q_T8LvOv4S>~VP~tNatWWwr z0j!qu!?B(Sx2-nkLY}$sU2=Dd*RGVz#cp8dVh?KPVkGtHv@7T0WVn9iT%3ZYp8N4s zu-ZZt-~Cs{|1_|=V>YJT?*is>26(l)FLRYASD%-AQM^V|lIxyebKRTTTz%g?mv&{Y zXTkL=b3GePJ-MC(R!gpH;6DehZm!0Zo2$8;2licC-CX6#)ic;{o?iP(h zlIs|-xgJPuu0AKOqFtHmWpMqc#3;vi2i!L0`0hkg&olQf zu(3*?yV2A$2ls$&r=E5n2iu?RoCmpnvEK`}@1>}7Hr)qSkI(&J`!0Pxfu^3m9{}4< zJ?%aTwr|_nw_HE_agHAZYjW@-7;h(1Ns%y_tAEy5uAlrdYPtUM&(+-i-mZqemCZ}6eu~#5ih0jZ z-B;sY>f>rWk@|xa<4&e_Zu+xA-p*Z!Rb|8Bwcf2!d6|Fpq>R&f2FDY*V~ z8~nL~>;L;K0Ff2rX5|GnV)|D(ZQEx7(~6kPu|8~m+;>+ip>JoBZ$|Gx5)`|m54 zXHR~G_O3yBAAJ=~J!&@reji-s`UALH-iOWg zhm_>1o?L%a+mh>#;cChCC*b7jzLT4ud(M8u?l;>d*PnyURe$}oC)Zzq%Upj6S9=?O z-^a}LSCr(co?L%j+mh?kaJA(6EI7Hk=jG-%m)d^B&r#bhxjqjzSN-+Vo?L$eE_3}Y zT&;_eTz>~nuIkD41+cc{`XX2@x&8s1T)p4q=I6a;KVt7Q+a=dOfz4Ha{j?|7KZDC$ z{{mNAh~jZf@S=_96EEwq5dk4Q!tJ>!&?= z{u5m0`7gNI>$Q*f$G^ep)3ZWuKb{l%=RSWM?0CF~6Q85DQrF*eM4r6n0h^aGjio*9 z=EYX#)d^SgZjY1_=t(qQLDfBm$V zzpH?@{(EIqt$+7eo_#am;;GaFH9nqtCdFspiPU-a`QMVr+3ndrvfvBAM>Y7W1=oL_ zg6qFtgRft3{WmMP{#!J-|2>H^AOCw2;rj2;;Qsd{VmJOC1=oMi2H&gT`upFLNPGSL z?@5$=OwF^$mxVj0K4Y?1mqSy}vut^=TApRjv0D6B0ITI$wjx-qe3q>QcfIT=vGyV0`o?KT4m$|M1S1X@oYr>Oj z?j3Vo3td}sT^p>HT-OCBSNDPYiS+)&4 zxt7ne_n~V`uG@mulI!;1!17l0I=gJpJfN4sq62#AWvQgfz8X9#?szAt7l{PI+c>M`m`FKM16XV zPoX}8;#qwrbv+L-BdvJYeVTs}JvMN`kS<1nyVo*k~aTK4$iV6{9uJ^)rLpB+cQ-9OD|$C2pT zaz7pgR?Gc(4A}kX{&sK4#qKNn5xb{smwn#@cHir-pZ4TB7F^~!4z5-{JI2G4Yx(Tx zMc0;GCxF$G>m+b;bzjTP&pmBFV)wJ{lIsV-=BmGb+LLQPxXg7jT+L@hezuqbPp;~= z^*J{cU0ZUU23AY1)4|ErdrEG8-cR-;9-y{ea-9h_SN-+Vo?K4=m${w@S1X?#v*5|K ze0H3Kt}VHq3|337r-GBK_p{vmyqE1q?0syz^CSZ;3DQQL?3dTQGx&l|wzslR^Oljn`#GS82~)o!9B z&s)Iec{6qLycMjTJZ}T5CC`t6ljrSVxw+jzZ6D%0scn}$?*f~r{`zT8o_B-GJU4)vpO0Vp%zXq+JAX{%~#NV%Nof#IA|$ zvTwcucHii)pZ4VXRdAW>*Wha9GxzK8mnX_-h5%|E+@S&oBn#$>%A!{__`H`+^O=P{H+Ivf%nJ-QdgAJZIKRN&Ii3|7mT!&@rz78&P{TE!VJhR?_C)e`K`Zv0^)7OC$7Mfa$7I{g`Kn;&Tz~zvC;!#J&HPtKQ_s1v z23XB!hkuhag;;CC)w37Y1}87)P;M^HpT7D{quqL7*GZdmDNmm3gHOO_d)sMm?v2~A zdwrCWy>WAm-5a;m*tNd3#(qn?jpE+;7X2dV|#ug2}Wx?GW+ZOyJ`1S?2j~!~B zx!nM6KIPun5KTRMVb3n|t=gCUCWKZ)^%z&)MQQHizF#?c6#xx!7^pkJvHU zHgmoe*u9~@e%h1&*5GFT??qG3-q;4Lmc22HSnq?YXK!o=PF~KT++3VLef2w;b~}P^ zqt@nJ%9H0#;Mv%0Z#(VHz43AEUUyKkH}0&ld*iMeyEpEx@kHu-DDI7Wsk1jm6TjRW zdo}pp1$S?ZDR?jZz=GSyK{e0Z?hH4da&PQ{rk=gAD_E`E8@s{HJ$qw!xLUb4_JFJB zEO#7x!tbPZZXKIk?6~Yl?3iquIo}8D-q2q^?a6;%a5Mk?(A2Xx-VavG-WX4;{o(4_ z8wY@smvbmL7w1o3{U*@vV6f|?&AF5(&qKie{g3T!r@gs19>nf-A0>O^{u;YCK2c-$ z#sf8;L;XpLd*f5o*&9a@-#L}*e@ugS7u>zkTkx~t6ANx1lWLy1Jrr&}<=!|9O+9<# zaIjjrH$DJ2_w0=$;A-XGI1;Yznq&=*hC3I|tz(mm9hd!x9g}S{=f{HG8~W>~J^A;5 zoB5ALQ_tQQ2Ug48IEz^0;p)k20yud&hjMdq{`A%F9NP7PT_RH#BV7094MZ`J*uAX(B1x{Yhh1^`62YvOsgm$NZoo{W-zry DZ0CAZ diff --git a/piet-gpu/shader/gen/draw_reduce.hlsl b/piet-gpu/shader/gen/draw_reduce.hlsl index a6ccde9..0553c9b 100644 --- a/piet-gpu/shader/gen/draw_reduce.hlsl +++ b/piet-gpu/shader/gen/draw_reduce.hlsl @@ -36,8 +36,13 @@ struct Config Alloc trans_alloc; Alloc bbox_alloc; Alloc drawmonoid_alloc; + Alloc clip_alloc; + Alloc clip_bic_alloc; + Alloc clip_stack_alloc; + Alloc clip_bbox_alloc; uint n_trans; uint n_path; + uint n_clip; uint trans_offset; uint linewidth_offset; uint pathtag_offset; diff --git a/piet-gpu/shader/gen/draw_reduce.msl b/piet-gpu/shader/gen/draw_reduce.msl index 8a87f15..064c515 100644 --- a/piet-gpu/shader/gen/draw_reduce.msl +++ b/piet-gpu/shader/gen/draw_reduce.msl @@ -66,8 +66,13 @@ struct Config Alloc trans_alloc; Alloc bbox_alloc; Alloc drawmonoid_alloc; + Alloc clip_alloc; + Alloc clip_bic_alloc; + Alloc clip_stack_alloc; + Alloc clip_bbox_alloc; uint n_trans; uint n_path; + uint n_clip; uint trans_offset; uint linewidth_offset; uint pathtag_offset; diff --git a/piet-gpu/shader/gen/draw_reduce.spv b/piet-gpu/shader/gen/draw_reduce.spv index aabdf6b29f9fa76cb0dff6601013851140091547..a45627d8f6604795c29314078a096e7e9edfd50e 100644 GIT binary patch delta 353 zcmX?Mdc=G~8(Tda!wm)o1|A?z&dDr@Pt3{5PiA0XV_*l1^TNcFGLsQ<9AG& z3>F580k!58$LFV|6{nT}xj^+0M~O_n$R^JyI{71;G^5yNUiMJtc!WX9K!qM)3=snv w1QPQ>5)*-n1t5uuLd8Om#KfRt5u0PVt}-)9PB!K@%d?K#i=C-b%L8GvWGHn_Ts+4%q+riV{#(D@MHsig~qfvQpdFxT9fFQd)r_2aO6im2k>)e&6rdJpIn=b@)R*U-o`J_xJnx&Lb(B zb?&igG2(lH!Mn@$um0d_v8IXCz3veKK@g39t-CwrIEo|P=hKE2%J*i68AR1_(f>c%>k2tSb1 zET>1rpx81E&9$%7)kV1WN}(&Iuwp|1>*`>}Eala-3G7+tvXCOJp1^60N*-f47^!3? zm4xL9=@}Seq*gM_2QoMeaF)KoI*{dXFH&O+J8+28pV4I8>@WeKj4TK>;i+aUYCgq+ zgKjWmxy>7C6S@<|$dICuAq9w$_c#^Cs9lJ$3qDw>S%?7+102Cyks!wRt6D(}wddy` z#yy88Fh)LvnuZoam=Cew07B3o5Ih^BREB8$40whww|NUxMV--^OR+JR@&+lA;-rs(tdNK zp@m51Lo7UG$`LL~^+>oO_8KLvR{4|lg<`}>iVa|>Aw+>C$*HGpa}kxlj*x2bN}%jV zT#pzxGxd0gh@VtQ-$Y*;7TY8264LK(P$UQ`|7f*r!d3DbZOoA4XW`bw1#C=8B#?si5Be;xeg0W!g~n& zH}-MSK5F*#Ap@)ORn{>3@2@zu4L$GcqIk7iJ;{7!oT2y=a`c{*Uz%&_=ydr5;%XCc! z$yalLFfuVjlNy|i)L6`sgeA3tCjDJ6T}uf*={o&9KerV#c8Yc8{P@=V&Qem8VvRHG zS$wiyNg&F(th~Gq=SPfb)6N*=$gGoPTKBij;pA21HT+`?nig|SA;~FXn}>}pz?8J1n~-Mp(ipyF)#H{zQf86uSvKsoybry%h=8Z9WW>Ppr?SGZz`%$G+kH+6Q7|k6>r_c--HKJ`i%Km?jyg`GpWR^DP?GRN zDV z#{0*GlwAbvHiJ%_2FE`LuI&f5=)Eob)?72(^D6mwxNdPK8#Bf5azmrp26(;kU^)_Bvd=f=pZyV0HOU zjcQtg@t{l1bC=m0d7m;zdSB>HmraG@-RYTAo`7OPk}E0ng?VhV-2Wv}(@y(>a@V%L zZ>7VwlC46I>5lG*hcW`wyQq3DZAfp~$ir*c&L;(yVI!_#%qf{=*r_6C!Wzp5jD~RL ztSmAHJ^p&LaqraH@BwZdkXHH-yTd|f=IZ8B6#ZPesZZ`?iz zJRxd8Sl-9&pgt55fIiS@66XX0s{ltorvQOCAHXfdx|Iq@-jLZ-OAVG%e(MY-G=rpm zV9=483ciF+2ke)gJG$G}(p~-0S8cxoTN!vjiYPfil%!CU>~86kFpI|AQYLW&z%3w( zOcG8=RZJ38yR_f4(#?H^{QvXpE9I-e0)OtqLSk%WDi#aM#=~X5J6W2@44ZTs?b8uD z91MtN6-#E)LqTfP^c|I2w5e|EQR#ZH>bp^@e8j-A$fbtwo>4UH6 ze+IeGK|sU$9ogASrF+2B2SGAOP&jG%B3(3a-2)P3xHdO6*^PLDuseVzXlQqO2Psny zKu2KQs%gPMEC^MG7S^nJi~7INd7ITQ%RLJjs|kJvfG4N z#U_LRdLD79sVUudXLh&`5|qjgHRR}sjdy+1slkQpP*V0Z{q#lb33~c4*002C)l4qW zn-?PyNR+D5YF_zX_N_6cBE9uc4a0X%QJ6sln2jH)yw zfjyUYC2Y1sngyt4-lwF;SJqip>(Jxt_7-#C@r~P3((T@!>SMKP`v_i(Ww*A(7gSr! zw4CSW%LfXoA6JXczvn?ccdUSQCbyC6(Z<4eIvlFVYc}3&PZ28kA=tRA+{fF4KCfjl zX17jA!#avWiZJ%qqQ)rg;MC#)PIC2b7hjdAlCQ+5%x>NBwGU+?AMarDd>X&EhZZA{ z#7^^l%hgO~sY(fX7i&VmDh!$Lj6AwtM;IfMq$6{B>2;q`PfVMIb1$`)bnR&+-n&aH zvym(eKFRw5vzfaG489fpM!_VJ0b@o+a`yE5kDj<&iL_PhmM)Cr+S3`&E4F1hU>u3H zo-Do)F7>*y&YXPW40|0ak5=969Pv<{@Ef59QGfg5PR9GvvWvvIyY90#!EG&{7}}h1 z)uqv#QJKQmn`fd5+`?x&6bdzkXcd;UlTU49tQP>cV=dC+5X^Jl!qCo`$Ih={LrM_| zs77pPtl&)j8pF{w^Yv@ymxFi1Y11sd`8z*%??x^>`33h`6CH z&69guliDZ=3Af~zDG9ZjZ%USiC$FS$T=6ZI@MW=)yLo`~<^k1r(k5K|^ARQ|q^Q?( z`N4`ThmVRRZ^BD{51d*BAASP?uUJ63Vv@8-Bf-Fh%7g;FydE0ZvC9WYL~dR-8r0_F zxzPHb-YG@*mwUT|*3$|mL-Z%gZ-9{w0yeF8ke;uDy4^9E(mR z;Do95bW;NlqT2u z%xS~K)BLJx&_nyao@)0xz=_3HPGs*=TN}5t=DC@B4p&n>ObLPrRc&b=muGEaT~e#H zbElF{TFpyIr~bQ%8_}bK(k3zO(dX=tq6DAqF`$P_t|aaHUp>qnN`gJivjsgAEYtcS zc!lDikBeTutm5@168+gm-5{Y_zcy_G5s!t36v=(w@HG>S+sTS$$(oJy6EDX6m8PtW z-khci*3~aA4~kR|Y+s(>N`7$(En@IW>i+Yb@kwBfno?ET&KQ?qqeLND zalqEkzJC$GP{M$vmQu?FAcw&$vF>O+svd#}gWIC}g8>_Z)P=Nc+kR4J>?XA z;g-@G7TV(U_Icv1I(HBTS%20w6;si^NlZdR@%GURSJ00-X%71AF&2c{QTm^()o1(l&Cyix7lLWrFXvYKvlA79A$>-4~~Z~3W0gY-~2|}RFQV`s7FWTRYmXRnH__Jm^3>r1Fx-{>D^0_o_Uk@1vYmzX_U&$Sp#S*=FPIIhUeUFkX02j76 zD0~_Au>G4l;Vd>`NCeG~3VZ)4=)Hm7&ke+he9-qNE#r1U%4HT6%VcG=a~7z8>lcVyfIin;IqGvnn6{kGiSH%L{qittZpgUPSazoQ4IU4DP; zS}XZ}Ig?}q-pJaYL%K$j(B8Zab)>E-`UIV7v?nwx3?X9oR*@WRaW42D$ZgdcbXYoo zIVs%p*onTW6`{tp43_uw?N6m{|Q&KPrOU?%slXgvZgFqS1f=u68f6`jWQL*EL*Qx~BFb9hvVo zi>v5iApYBs@ng_M_b2E=G@A0}*uL=R-(SA^+w#CO+c$27)Z>{(GM9|t%m%94}2{09RfbidW=CI1vVdAyARtY4s3uA>V2GVYqSEl zyaRlMoA0tqJcf{7>7biYyO$j{FBZEOuo-5_2Q0pKfo+Cfy)3kZquwh9PIEXjUP4dB z_H0~i+1NI592e-xeeCq?)@Utm#Z*t2OL%8xCdiHS8v;FL+0fM8_Izv`6l{iBX3LzH zz!B48ToGC#QOoqeQK42oV;XweXV1y*$+n5p+B|vuHVVkn<5p@xPEq5Pr64DlKee`; z?4AzTbFzB^n*b6SQAu>==zHTrzR8#$gHR(;KOCu>s-}&2lDpA-?olmCpO0rEUK2`r zy2VALjNa^p8}XdgjrI$Ak$~7zYC6?K^Yzl8>4F){?5%Mu>5Xw7`X~1&4e`>dJtEP? z(4>-1EphL`1=ai_z8gdmw=0E_ekB4@VG)V2g^(lI#;sE_u1UEh5hh@j3cb^X?s3#o z_xjX!1LJbOg!D5e*9fP|g(r62+_f@eI(_s`lqpomOX84PbZEMM#ztY1z6ys|aInJ2 zEKeQnxCx{)%g@uI#Po=jA`C9gHY>;vvydy%+MbjNlMY9}YD5Z45{W1E z2eSzNdn<9~F|F&0(=&~vIAjJvcO%P7F2e^`xL0xJkR-FXR;p&^KJ98yyn#7I{!x)AgPkh3`A#+mBT8jaE|> zx(vp*YXH??9E7Yg-1mMj%R~6SW-cn2$HmDVpPkCGSmwke?DIPUM)?71%eQSJwDmE6}P1^f>U(4nt%woQCp>pRE-!f3ZbED zLc}ZoD2iLibemwXzbueHH}Ex2SYJm`$&$&ICD{@w6p^wl@T2H5{ej!K~j#H*?voT{ChY#|% zeYoDG01%a31dS(rtc%yre8fuoG;*I5RJ07IP32bj3T5$B4+J$>66@}h4`iDxh=4}* z__gW~C-h46)v~>seo$~SL2y<=m8zyR7z?sJI_DC-zPHMN13kq0Ig6>>P*~D-@E5EO z^aG8g$dDT_Z{ll)J##64M?hi?-A>84G*b12c}y|$Wy2iMFB~fJt%^6|`fQ0kNS6Z> z@fL4K^j4R>Xi?XV)4|WAD?m31FytsvYW6m5vjYpV94`?_w7hW-}2(r)=7Q7UGj&XE5WcRyg<8M4T_ zLiL?_Oz+L$bOUx0K--sK$7!e#i6qJQnb9n&pO9iU&Tf8xF6$nNFm9cNse?b^fMlTSWxn}n2 z20$zqiYZfHbpVLTzysR+-9KKE11eb$1ZbHZ_KXb(t*3&>mdHiC%f!#3z1~!2So|Hb zVb&f`EJQ=XfNWN2$PU?D378+lG*m;`_y*t|X-p^9JdU^cL)EOQa%Pc*5BQpH8UR_O ztKXSCGLrZgG#r{F9&(!8JvRa`pJO=mSo(5l-_z`uL48kw#Bkvvp80OmjCI5hi@n+o z-k+E&HXMEI9)Cj8OuUR^e^eO2_olt(ZL6vyx;r}(@d~5Alsbwnnm2Ql?td!J6AxTVA0AriPrT=#TrFO71y zsElQ;ns84TQP$Faq{JTQmQjS{Mcfg+$>W*Sz|=d#-1Tt_C)ts_!?#(fRv19>*LO`faOf&noL3<$v%I3V`n@h(yghC%U$E&j>VkxJI$ zZ#^#$g6ZO6K8zFj+{dnyD3wk)$=PQNR_XKXCIeIgKv_!j=3>(EomYc zT#}Zf1i>qbTtg?o2`m;&0Mh0Hhkxee<&WX_RJvDb;W#**wuD!r{05jVf`O)f#W?zv{t0lK2xda; zIqS&CX3HxOQYdtrzWa!F#wD|_E(f{X0s)I#PHi~i_ShGk!TCrY zrM8FOs&vhWPoUBu=L?ueU$gx{T{7&V11P3hd536iC;+u2CICwkd(#U8|L4sco7Kf?HQd$=E`y+vT?^%;w zG4*v`YT0>?RT_t~1N^5jx}m>{40IP4N7d_aICprLeXj=rt(3w=DjMW7s1;1rQ=EDO z$=|Jd{Q;Nxdwnk;GKszL5x)+e=%SQv1BI{d^u>4rct5zXe{X-G>M3I_S+T6ZCEi{R z^{F3MA46oY9L*1ubOH)`+B}vZGtb3N-6=z^8qEtd-x|ytRNJ-Ll9m5tU2z@%`}`{Q z1-VeJ!~gLWQ?twv;&y<<_YMJ-Dzr$20c<8VCFVE>i-))Bl%UihvV6baDT)26?;CD8 z6uPj3Zy*v5Vd3z0V~u&fZrSfMWxuzt1TSzlaF24(c8Z#NR6sS-%YLH%ScJY_W{hMB z^*1Nv5AH0V>@x1LO8b8v=x_c}SvSu@V6W?O;YXC!$qH%gX)~AatS3<;%6HT=NR#GV zuF|CE>qhrmrG&R&xK6zbD(_y5(GlM^>$_$Cn;2uMk@La#?&|clkyLrw7bC(wL*wqP z(c`rYgCWf2NpyRZcuc!IKhOm{PV9tL<6*WZkX~3dVI&d(?n>&>3W)~HOXVY#Cv&!H zgQ~r+qC$&j-gQuK8Lzw>jI;ylJCVwpD%Oy|eOYri`c%*>sTHnw+e+M7>UFi@evv#u zY+!Biy-x6=iK>1U5Wy|gMq=Qa=(3eN!qFYpc<}l^B$C|$iM$v{V*Txj2lUv(GYbFG>5|8RJQbRBYN zk+%X8M}JXmz;m%qECxq?8#dJ%ZK6dHhgsPn$IEg(c!4R#uu9A)SY;EMzeZTb1EZC; zEa-p9lwKigjn*_QUop(;8$Y4V^^l_P-wCVaRRJ1fKM6I2uJOPF@2skB%niwvy z%R;;CT;@Y|=Ekx5gutNCl7aR{pVnw=!)p34YjV89)fRXamqhQwYB_};g5g)S$>+L! zl#D-Iam!OPm++rcqWn5j1U{NOD&7`Pt+G zMzPZA zrFs6DGRN}anln|7<>P`g)By8pe*EPonWiXs8hf4Mj3M`;q}mZf`NeGK^0SLM&Sme# zyy~dzwt$Lg`0L#13l~+{@|uZr3;fLU54>1IzVb?Pv15{>DEIigX}tj)QZ#tkJ@I?N zjF5MCfiut7JMRMLe#kJaJY;{&IK6SFS6TYM3|m*1#~?|*j}Kl}fisUol7OR)*yy9k zBH(8w79ik^UCB9z1CNz&6&)mfyy3E3!hKA8>xGSkX_OfHYeFaS*Mv`r-DpC2n%EF3 zWPm5}aAv(>ZL`B``d@cts5X2c+L#uLxcN>Fet~zbNL6n6kn(xCm!-wKYrR;x^+SfN zd>gi_JT(E^RhAlmzD1^yESSdbWK8mUE?Aea=x^_Vn;ccLRA!{hSBc$E`Rxp1ud}Y< z_l>m33Xd$97IW86@XlVeN`+et{hxl*SV!DLJmo0BvjtyPJC@d@FP&u>`{+v&Q;-CP zW-Az)3#T0yxZ=#~OC3v9=6B9zW2R+kawIiv=@V|E{0BG;hgtWBuA3!Cj2oY)z0J;|jb~EE(uWWBCyn$fy4O=R*1Fe+mKVmLU_~ zRg;y-VaKA=SlB>6Er#E>Q78)q*J;&UHZy#>&6hUG^Ha3$mYnb`09I+D z0#;>N7AO&U5r%9cGS!Bn*okZjcUVHor8Ot@Vk4fcU(*6eUR74J<+xSB(A zQScKi)`Ve%p*tuMFvoy3zdEF|F-4B%X$Gh>k`@YmU9`}sB zTkic$s&=0aVl&@>io855D>N^jGV3!Zx=w#*SprD6aSjilg(dR*zpnTrgd1P3Jh_Fq z=gd-)w!&-T!4E}SMXHYtRZ0T5|0Z{yk^0R=cU=h?Y>@6Ib&$Z^)ZPMP)6e}jN3XyT zWC!?CaTBS5d`~hg@KpK~GLrp8>V>)Y6#A6fZ)ZR&~iKkk0e5{+=l|tMF9}KCJ07-6{2Z}A$rn=&V=ZRB#1r$ z!AKYc$IOM`K?n{x0>L3Oq3K)*)+Ix5s|~yag0pSl8xY)S14luyavOfB9Ha=*BppO^ zA|QGIqSX%|T5t@SE`aE)9kvwLLetkESRV<&f7-y~wm42ea3=(h9)RGs51?h5+e2VE b8-mk9As@BG?SQ;5Ah?_A-3h;Nw*UM$yUA^5 diff --git a/piet-gpu/shader/gen/kernel4.hlsl b/piet-gpu/shader/gen/kernel4.hlsl index 8b0699a..9457d14 100644 --- a/piet-gpu/shader/gen/kernel4.hlsl +++ b/piet-gpu/shader/gen/kernel4.hlsl @@ -117,8 +117,13 @@ struct Config Alloc trans_alloc; Alloc bbox_alloc; Alloc drawmonoid_alloc; + Alloc clip_alloc; + Alloc clip_bic_alloc; + Alloc clip_stack_alloc; + Alloc clip_bbox_alloc; uint n_trans; uint n_path; + uint n_clip; uint trans_offset; uint linewidth_offset; uint pathtag_offset; @@ -457,7 +462,6 @@ void comp_main() TileSegRef tile_seg_ref; float area[8]; uint blend_stack[128][8]; - float blend_alpha_stack[128][8]; while (mem_ok) { Alloc param_3 = cmd_alloc; @@ -640,7 +644,6 @@ void comp_main() float4 param_34 = float4(rgba[k_11]); uint _1390 = packsRGB(param_34); blend_stack[d_2][k_11] = _1390; - blend_alpha_stack[d_2][k_11] = clamp(abs(area[k_11]), 0.0f, 1.0f); rgba[k_11] = 0.0f.xxxx; } clip_depth++; @@ -655,7 +658,7 @@ void comp_main() uint d_3 = min(clip_depth, 127u); uint param_35 = blend_stack[d_3][k_12]; float4 bg = unpacksRGB(param_35); - float4 fg_1 = (rgba[k_12] * area[k_12]) * blend_alpha_stack[d_3][k_12]; + float4 fg_1 = rgba[k_12] * area[k_12]; rgba[k_12] = (bg * (1.0f - fg_1.w)) + fg_1; } cmd_ref.offset += 4u; @@ -665,8 +668,8 @@ void comp_main() { Alloc param_36 = cmd_alloc; CmdRef param_37 = cmd_ref; - CmdRef _1469 = { Cmd_Jump_read(param_36, param_37).new_ref }; - cmd_ref = _1469; + CmdRef _1453 = { Cmd_Jump_read(param_36, param_37).new_ref }; + cmd_ref = _1453; cmd_alloc.offset = cmd_ref.offset; break; } diff --git a/piet-gpu/shader/gen/kernel4.msl b/piet-gpu/shader/gen/kernel4.msl index 9318cc8..3dc7517 100644 --- a/piet-gpu/shader/gen/kernel4.msl +++ b/piet-gpu/shader/gen/kernel4.msl @@ -175,8 +175,13 @@ struct Config Alloc_1 trans_alloc; Alloc_1 bbox_alloc; Alloc_1 drawmonoid_alloc; + Alloc_1 clip_alloc; + Alloc_1 clip_bic_alloc; + Alloc_1 clip_stack_alloc; + Alloc_1 clip_bbox_alloc; uint n_trans; uint n_path; + uint n_clip; uint trans_offset; uint linewidth_offset; uint pathtag_offset; @@ -507,7 +512,6 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7 TileSegRef tile_seg_ref; spvUnsafeArray area; spvUnsafeArray, 128> blend_stack; - spvUnsafeArray, 128> blend_alpha_stack; while (mem_ok) { Alloc param_3 = cmd_alloc; @@ -687,7 +691,6 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7 float4 param_34 = float4(rgba[k_11]); uint _1390 = packsRGB(param_34); blend_stack[d_2][k_11] = _1390; - blend_alpha_stack[d_2][k_11] = fast::clamp(abs(area[k_11]), 0.0, 1.0); rgba[k_11] = float4(0.0); } clip_depth++; @@ -702,7 +705,7 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7 uint d_3 = min(clip_depth, 127u); uint param_35 = blend_stack[d_3][k_12]; float4 bg = unpacksRGB(param_35); - float4 fg_1 = (rgba[k_12] * area[k_12]) * blend_alpha_stack[d_3][k_12]; + float4 fg_1 = rgba[k_12] * area[k_12]; rgba[k_12] = (bg * (1.0 - fg_1.w)) + fg_1; } cmd_ref.offset += 4u; diff --git a/piet-gpu/shader/gen/kernel4.spv b/piet-gpu/shader/gen/kernel4.spv index 0eb1e5aba458f4e344cc68f4bbaf4f26ee236928..31f11c96e4c01d55b9a933412f8efc21da13943d 100644 GIT binary patch delta 2806 zcmY+F>1$k77{*UBOM*?hm_+Kl%Lby>I4b;BcPjd6)B^bKi63 z$LZ3ksnW_sX=9mlCGHYe?tYlAJXGza-At)-70$VAcV}N~rn|d0>s;EE1$w3FZJpVo ztvs+TG}}NflkGBVF?NyJ+uC|JM&`6j1+j}|Ti=@N&-g!AFSvS1!1^+|hlvh7E)6tl zDrT(mXRGT=s{NXprB~F*PDC!#(c0VIKCmuV^sCLUsre)2CEVteb6s*wxrBR0_@M|t zC*0P7o^&q=_htGsJ(2BY@%G3zG0&ECAI{?mmly90;*#zlxF^%GF5-t5&vQ*(C5(1gOskHx)iI5&<9BaNtBth#^9>CXr9Etr-yPcc&KPEMrVq#P zNDRLg!*9g!XbkU);obg{rpvsy=9KU#t_&Aa1tD(`SEB_Laa3%U>cDjoF3jafxZ^Q= zB8E@K@aZDH-km8DcQ%I4#qjw#oOBoRtCnTTOX~cI<{SK(<_e9Z|7-K*`9GVFmek#% zvL{t9v1LMpznsMxW3*+1Xn$7rHahlCI zGtCz9+5j9b3T{{qh5Xo!KV8?*C;1+Ym1=L+*??O@O|JP?o%jZSRr}Iq{gKz8ywdU- z5J&q<7%wsm_&eI`y`0cO#hd4&;%MfH4UP4v?=j)ubX0la6*?QTKQ0a3a%>bv+h0IfJPjbzMwT{)o)J;{~OK!sFp$TEgJl=Z;kBC zR_iula!hHha``=-a-KunEApR`NN}D8$w&^i;O)W`TA`z00wb(zye$hQdL|}A;^^cdA3WO9&2FEFm1cXIl!k8mo)V@=9t)DdC;gGGn>?Pof>ciIQ(;b= znQMGN^Jg0Tuzw!enQf&ngvnvo_)B3lyT$PR8ss83nAm-)<}0rJ!AKa_B8P;3m%)-B z5hpn%^9;keb@(0?p3sa=JD?pCCI_}nTI64gcxw*I`X%O zez%^cA@LK!Xvh8OwN)N}peU4r^`!8KW^`NTlyJCAI=EZkiL(lN{Cb@h$HwPxQjRmi zA-2Qd$rxDW(3me3Tme zL$rLNwt^s{pjBvDh*p7WK(wNWS||kyiZ@>91;!XPG4XwNzf-4fGMV>%&zUpl%y(wC zzdt@Q5HC)|uT47_bCsbNQ1 zd2CwPQ&thXEF2Ur$(<o7Oy2~es?IzM?CzfhadAW37daUI9_>qn5oJHTRrs+55MK%w>`Ym!|!^y z&%^I|_GjQYr{D#q}z9P4-dVj2DhCX?VyyhmHtJi$B_zKNx$h>}w z_-@rf3H)ZO2ZzNk3rwu~+3YJIVzGOkz3X4Xe_h@pAaTN z65?Rmh0}_Gj(>ajPh&+@N946yUX-;;9POeoUPM?G)-{#i7zj<(dh=W@j%J>Dn>hP% zfSuu|4W%_}q;zW7sxD!)eX7qqpA`RDeUOXxgm;BwP36H_A>yuyvU^G#&EFG^OnHzF zE5cv;X3q%!8O?v_c%Ktrr@;^V^O2p|&TG9eG5WPvk^Ejr=@ugPiTte~65O*I1S2@u z7IzDiXhx-Tj2p!Hg4bzayH|rG>1J%U?j>O~;~RxZf)AbM6?jEtp$3i^&GA)XYy{;B zz-yFwuQu!v-mDoNJ68tH*1jfO*rd=Io5bl1bbN@hS$Gql*A~4lY>Q~YE#mYyIzII6 zR$=t@Q6fB?1VXns+l2j!-xRlsiMw6ARRg~OqXp;X#c08Kec1q=D!KS*w$Tn@0>i0j zr!XgB7xW!r*4(D=WP9*(qj#wQ6%@EW5iTAE+n8sZVAw1;&pE+5qsn>G@k3`rF5qrq zj+w+r7>*weNpsRu%vL~uYHpzdyE_9i_G?)8QyrS$@ieF1t3CM}n3J+c!=mgJM)Pmr zK5_pB!uN((EiDf|6gsWDNy0p(a1y*v1$`vUDr|$o@ESGl-LicwyjC-7u+7&Ya544^ zW8*h3igQ35ojAmUM>WLVL5ipvl63Gt*?l5RTJ~hXhr)|X=LEdx_6!^r=2V$^299X{ zT!SBn_C;i8wo@M#CWbu&UkanyD**pYgIL4{vv!xV`HCmwXe4acCSMExB7+70Mx5YO zdQ3B%TZ`{;;T@XM`BrG(3KIicyEeI9+IPa}7V~>yViwv=PsRxm`V0e3H2+Cq^t<%5 zb&HP*qcQmLRw>VXVk6S9lNi3Igg0tNw@t@{3!4`HkWP!U=NM0hf1}QbW90i+D#%%3 z0`fC$Jkm(Qv~!V0QuFi_Qp9;_tj8Zau^UvNjy-=W*ze@K@{Rd6%$D&F!engM;74IJ TyG+>7SZ{rEB)6h@==y&FMm3SG diff --git a/piet-gpu/shader/gen/kernel4_gray.dxil b/piet-gpu/shader/gen/kernel4_gray.dxil index 6ec6d57058b870d1a8d3a1264b1ffd429577a045..f3bd028fbd66f475e8d8e6d61b968fddfa1dd9b9 100644 GIT binary patch delta 5575 zcmZ`+2~<@L7nHWtsgz6*qehuE zPFE1M)S?E5UcI$#LLvmEy}?pTebx&~En1!h>T5%9`}+0{iMQ*n^;iqoJLfz9@csYy z@16b?j$C$zTq2zmtcWU)Q~ae^+1jmc4% zzUNK(n5m7pIKFb|P8;x1_7sOc8fJWKfiKq7&-efvAMU5`h=7l<^wYq{MeZQ*@x9*< z&__9h+8i@3T$>EA0WRnd!nr|d26TzC1H2>9?|BDgMVT_YQt@)7@+N$@hT+#WKEiPuI4r37;<6j3TEk61=a5=DX3qBilIjpD#Tkxle9!u}ujaq+v9 zO%wYqteOq1LH76Seo-n~RtAP1koNn}zDz30_s$WK|J53Jg=Apy5UR$qJs@(Drp^ZG zh?wE1r#nf@?UH(b5R+-ZGP_a|t@G8ZxD>qII&Yk`lFng>)9GfJ*Q#Of~mF zpcPvDt&|LG77@#71tfiM# zDmUw)LgF75K$cAE2{;zHffWLKXfpIFv-zgX>qu(dBVR3GA0{#1Y4`cJcrNt{{gqw~@--sb^OICYuQ2aCb%s z?1enSI0_pEV*x%=(E_73)LApH#6ZJEu>B{8MOayG&l9xSegBZI6fuQcrsT-yK? zz<`2UZ_3&a__aXQOq^Qw%0`f;9w?)JPU58qpp1Mdv`=<6%w_{(6-Y|jIN31T3-Svg zng-NKMI;%hB*F#)d@r2;JB+Rm>Gxkk43xf+fAm#Uxmih0B)DZgqTg$lRYXA9)aX63 zNpFm}%sKkZQ9+M4w4HGhq4^7s; zFj?{n|1;*0{iU7)#dw0Ur=Vz@3nV5Y25J*tn#NYwN4=&#ve|c=Ca|;bTYS)4wpPTM z=;}#%s6d%vZ|j_LhuPhce);nP+Jx?6Qv>HS#})2kzbpYK zoDUT!m=83XEFcSIZBz;e~3@nBOkf)x49~zDcSJeh|B*S2ol!dHgWGL?)E! z4OKe6W*7U@5xu%Vw=MNl$KjFrr#Fr&_Vp?4y~d`7x`b$cm`@%>`1rJ;Dn5(tlwV1n z>cWeG)Li|v|A}dZyR~jVx#H_y`aHe%IZpiC-m5#>Q0JS%Kk4o*dGuDDd-;0ii{-kK zt-2>V@wu}c$_GccvQC#C;&D1y$}+$G+Ol@*C#F1+S`d$oDm39?9Qw48#h6+nn2u*ZREWdF;z-kSfxVX|5y~2ZZ@GKBqGk5HsjaA$GDiWSjoH8y7Rjot%R=!Xl5_jOyOS* zyU#qv+W`vSO1-CM5=lTYL-$H{+=?1LF;IhaXt(1t1#feDOFS2Vpra@8%6N=B?N;a=$&-L}m_sinf~k>_m?1pf zwA6BV>2%Z5>G|MnX!n+2M=@%EDcw<)F1jLeYuO8bfV`?R%w*R`^@ z;3dBoR!u8_-$1|-3q)5*#NTd}Vc`mRGt~PYtfIcB z2@3|))3%Pqo7?Dv_0q*T!@jHNk6D#hhgUN)UsH3o`0l6)>>v1QBt0KhqP0%HRU)&S z#8WlWU%r${9kTL4S^0whB(cT;NsL$kNu>Iod9s>y-sy0hhDi1gut_)1mVn-MOd@w!(iLMb>!NrPR6L>CNC_GI?ZBQdi7K&RwYpDKR|t%@H?l- z3d>&S`Z^duSEj-s@`}})#`nxJykWId$H`kb(+Gd&K9Pq znySjE(bGeWyooOH@nMBE)l%~J!uoTbF&HeslyG7cpD&2z%$$g|=kew4!|}}dqJ8e= z&GQ>?VT8YqDl(Sz^-^~=)-iv25yRuBXY}rAJ4QR}Sw4+@u^eB1R?pWhE;73Q1mAla z8}pI=uYdQ~obP`Bg0cqx!SC_%*jWVmQ#`LK5T2V*b{V1LU&3PSG_8b{xo;8r0vQ&1 z{c8d8kykZYJE9xDV8#y>(rvtsbV5Q$`kgqS#@MGFonR|JuplhLWsN%J$F7)E4LR!#QGIOyx^vkL9h3-3! z^PFdS$a(2};h(Dqntjc)n-m|e?dd1?@HVa7a+nie$@tK@oA~b&3S3my{JhL-Hp@zo z3Zqz4U6%Sfrmm{&qQ)ub_4SgJpiQ!-1dDi}wuCUgzmwHq?$wio;KH^H4!%NPNdKyC zSc_E{l0u83K<+;S+*|0kcri+v0Pz0wEQ)`AIotbc)s2 zvR`e9mj<_rRiG^pq!j$Vp{u9f#cyN&mf$k>@(8&$!J19_i>WwkFYfyhJ&xIFIn)-Z zSvZt@Bf~^_L+5aTgCSVX2ViL#1lJ#c;J-1I0F5q!=;tzkru@@|4uxpD3%whn^Xnk` zM`#)qLhxWF1pf z=t&oP14NJKKy)=kPb5LKC<>zO5Y0OR!O;QGbTI^X=0flx7dRP$aTmBBf?HkSMG#zA z0>RHASa%YF87UCl6D2POgnR?hsmGw{Ef6i<3ekfrpy?YBZCV7;zd_T|F;^TXAozU< ewp2rK^J-}N588cTTA}8Yu_Xy|BB+)7=l=lzJKZG! delta 5708 zcmZ`-2~<sYD~N4vt8D^-fTb;1UvFBif{!Y-wgrmsf!EvKy)*dkTW>uU3-%`8`G@cS zzklz;VA{ZHcA`R-(D+7GH0$msU$NISLsx%ZNJS9@892)3kQ@{QnL4`cuh2Fx1W_U+ z)hNz)X^JKCg9slCiZ z;%NawZ5JB}-j4LOp{Cc#;PKA)u^22K3!MK3_j%aHqkFGdN#vzH(7_fOgTnyR+o1e#-0p48O&jR-(5HY3s9)SbUE@lg6E2id0qj zX2+Fx%e+$N+Vam3m-_yvutdTr5F*`V8=p)B8;Wz22sY+HR8X~BF{W)cEQ?TE21e$x z;+2oIzTIR@lkddRx4{R?mCg+YHZ*RWnh$(zif;lw(zye`$H%^pppRmw50lG>bCU=* zzz5wC#AU^6rOPjXM}(zSdlX{``F9I+Gve~H)9Gc{4Z_wsprxsQJXV zl)!1BaMrKT({5KbUe0Wsn?$}B^yD*kW;QF{BzmXj})v`wckaj1t8WY9;YNA8=G+*35baJ?(epET$SH2T zN)2-I`pe|Z$>r&wD<_vHun~|ch+b(vguOhb6Wh9rWC%SL3r5MEbOU3+kJ5=12~U{F zmLdWN2^`mHvWLAgbjvmcq62;botRzH!xto|>DhD}gBw_nWlLtoyISK;>Qwgv>`&nd z1`=p8_{L&FXi~?dzv0t`hSvO{xV4n5Xs?jQ2B{_F9i?RAW@4dayRc5jzOE9IGjPdV zwUm`D^+}|wed;sYt?a8sO7icx#weYxke=N2^_~?uGub0UakdDlD3woczKUgAX65Qq zE!8MN%g0ON^8L(=HXBGMAjr?tpy_9A5!gZzh>62Z&kujIz@er5kdMT~n+R*BY-W^s zts5y(Cs&;6JCsih-dBY>#!TMg4*za)A~K6;9?B2Y91ISx^r`00BP-j9i?l0K=p2Ub zWx7RPl|!MK@fg1TEnIuV_cEWgSX3gxri4f*c2m+*>Pv6Qw)o}h)_CG8%J&~;+;zq0 z?};yW)E0-u!}#{}VMd&fQ1sS^te5kBrE82~m}G$v)ws*m)3fGm0>Wyiq8VSk$XOl4 zdcrXFhoym0HM+nj zJwGaMQ|0Vni=Bar1!O8{hkCPI@p3K0h=+g>q_^f2psx(7WRlZNje&?rg>lOECHe+j z6o)V{gCOR&;5aI6{!FW+-(DUf4hxwON$cyV`m!|2a8!X34vH^#I98g6&hQEn+AX+% z>;+tuj#n08-DXsT5tW;FSd`a-N_~W_Ta_)!XdPWUQaK!BQMR1P)c-+LEoRWB2YGmR zIbRI&cRt)yRSbwKD22wW-ZINA{v2*rS8SOIRJ0s5WeO{~(lR;S7s0H<8FhcB94xR6 zBN7I^_G$GIend?CHQT;=J197XD0xpwSLvsX_*}^L#JsD_`krbl3VLYncs@tBp+wzE zo#NMleqfNbIT|a@B2BPenH$8r1<5r`7bTBXvHA}kV_N@T&&&h;!lxrQ^r8Xp2T`sd zlMYVHhgt6E$GrAor5!_O!bc6SfNqlD$O)3l!Ln_41?l@2UZ-5*j()mslJdBq-JrPZ zwU5rxFmyshP3sj3DIUxAYEzPSw%c-j^?q!B$IwW`v}%RR@((1Hf{l4XCa(NJFy;w4 zYxEWU7mhItE2zsAvSft#wk5)vIWx%1K;JGR{fWBU?M(Fnt-O@iPSv0p8sV5M3pmTNXG~l`|gf zmd(c6w&W6Qek731N+ZQBn_j^}8OM4U%7!Z-xYPJ&a?M@&a4=L&O0_480)pUErmYWT zk!^Tk^7ugV>sT~2Njl;&Ss4~X(9E+QxvP4zwD(@YlhEFKKw@ZghG_U!6g%~q2M42S?QAe5WofY?tUc&Usy4#g{b>4~2^m0d60 z@VnX%ri-r?sgJ`|HVagL%iC`qQ@UwCfF48AL09gkkhOhQFqFvf7#yp)j=5Qy`tlm~ z258l`1R+xB+lz~YVA^R6=kgwhx``zg4uqN;K?8I3C|@n?t=7S@5y3aSWOb7$Xj~#a z1-kkd1|7L*5XtFuFp!k&;|HkjoQ3Q+snejj1JHPE>d zZ-mNR%LWSEy*pA61r+#RBv9HzY-`FkptOvwK(vFaZW;%GXwTjR;=JJBeEb;5Eu$HT zP#M0cJran}tXT*IJ(8|?xCIJ&9oZxwEmXD)&-b&=zA(H+Z9`B%bIfs+y^bo|799kP zP8}NP8z7DQ0%;3@!#~Tn6+a~GtMaKf!Etb=AS&_=^g1wI3Iolw9zXF3au2vo1~Vby z;u!s_O~~&cqzLFXd*69Y;S%j$5bp^PRQ*Enw^6e#%Rw&RfPfW&<_*Vb!(4C%7a`lI zwOz~>op+8riOztWM{!O}G@G``{Rt+0^N^o~yr_u1n6S;PKQkMf*t@+}_?Xge zMA$PpGjdWSD;!nX`ueuV1)GkIaBA_#Tc;}p-kP`B;)FE0_kgqorz?EJqn6ucRgGu3g=f0^Z>L`w94RbG3hKfWaRWug$ftjMJVeY2nCy_% z*998NF7ZcIi5O21JdHE0_G${yU1B2LVn$IPc$a;-3jwW^E<`GuHM8iI9KwCnuz?&* zJG|cFwQwKz1Adv(Rrrsd_Mhyas<(r}AHKlFMG|;F`0Btudx`!Ydo@KHU+g7!m80hL z{Z;QEWw0Dg-|Lt}4D_^PEUC;9hMyiPN3L}{78!3?9UJsJOa-MYex3V~_v1$k>v_TT&LP4{(m3x48;~#v(CZuVIFfO05yhOWhs4A7{3H zgTB4DveHE0+;UU@V65s^IMNQNzkpPIt)F{S;uGK0iQSX*XlteGX^i{*`wlz%4?a#*46r?cWO3@$kdI1GFc8Vxv<-Do2JKOCMXe*rl} zG{*pmBY)^O5QO*zT!zvmQPZq=TSgp-#w|#0E3fbsMW=Vms_@`&y;EqxMrpYSjMj|! z(ElY`WUs|r{%B&!lNgdh5!=LTvUrWF}|c5kUThd$Y%udmZE znPtZ6VYgaAeVHQgB8)}DvQJQ8+2gDTj+X~OnI0e*HdG1`gK50#BpA{l;B*;1En4D| zg)-%0-VeOI+_Czk(9j5VpP+FLD}Hon6_duDq@44123{>BGkft`e$lI7_*Li5`CKzX z>1wI`#&=^w9rxo9-ty8_o=G-kxN?SdKR(FNk^4P*yS=I^!5j^;DH~DW%w5 z$BVNWZB&&I*Ka59YN+#hfblrJRB=qbf7y8s`QnM48uuJ@pMnnW^HXQIKQd!N)0-kABl$K0C?Rp7VWld%YDLQVe+6J^8$N z*8CTDf%l%S_uK{Ef0|=mam4kS(UH3=usr*pXx^#{8B)W&d-19soOyh*5*%%$hB?}N zA|W8T7y)PO3jReDyjDKfo+Vr4BUd#_;a!H7g%3m;bvX7k=_2WAQah;=ORC6HSRCPQl&xueg z{`P#gp~CmIb9t{z%0D=muv*J*tij}tx9%4JN7f^~IFYMC(^XpV3?UqgH#fK?i= zgjI>p2PL8`!I6y>9D}tq0Z}Q1UnrMVU)uNn@Xw8u@ z47|yRGDK-bCcDl&IPzgxgvo+0opP)*sd9T-^GPe-TzZz(q|Fr;48!-^<`WZt-$%6` z;o5gS(HsliPKJYM0L-BvpFzGwi|+*g2HZ$KO8HLtrNqxVr-YprtS~GlreLp`w`Lqt z`g}x>U|zDeSVQ$o9g&tqGkGI<$O)bU!MOqeHk3ec>k$aP2EjqG5Iijh;L7_>@H_~< zsd1wBL$sm}qQ{+RF+@9(Ao?E=ZQ((5RtPZNeHem^jzaKDe$ccWf(Mczcn%eSO9c>| z5(j|uS2vY7NY-j wqQ{(ZoP_8L5Iu4bqFdKN(+t{OV7dT;vmziLwWOVZybmF`lXVIF7ufyvFS3Ul*#H0l diff --git a/piet-gpu/shader/gen/kernel4_gray.hlsl b/piet-gpu/shader/gen/kernel4_gray.hlsl index 7426758..5ff97fb 100644 --- a/piet-gpu/shader/gen/kernel4_gray.hlsl +++ b/piet-gpu/shader/gen/kernel4_gray.hlsl @@ -117,8 +117,13 @@ struct Config Alloc trans_alloc; Alloc bbox_alloc; Alloc drawmonoid_alloc; + Alloc clip_alloc; + Alloc clip_bic_alloc; + Alloc clip_stack_alloc; + Alloc clip_bbox_alloc; uint n_trans; uint n_path; + uint n_clip; uint trans_offset; uint linewidth_offset; uint pathtag_offset; @@ -457,7 +462,6 @@ void comp_main() TileSegRef tile_seg_ref; float area[8]; uint blend_stack[128][8]; - float blend_alpha_stack[128][8]; while (mem_ok) { Alloc param_3 = cmd_alloc; @@ -640,7 +644,6 @@ void comp_main() float4 param_34 = float4(rgba[k_11]); uint _1390 = packsRGB(param_34); blend_stack[d_2][k_11] = _1390; - blend_alpha_stack[d_2][k_11] = clamp(abs(area[k_11]), 0.0f, 1.0f); rgba[k_11] = 0.0f.xxxx; } clip_depth++; @@ -655,7 +658,7 @@ void comp_main() uint d_3 = min(clip_depth, 127u); uint param_35 = blend_stack[d_3][k_12]; float4 bg = unpacksRGB(param_35); - float4 fg_1 = (rgba[k_12] * area[k_12]) * blend_alpha_stack[d_3][k_12]; + float4 fg_1 = rgba[k_12] * area[k_12]; rgba[k_12] = (bg * (1.0f - fg_1.w)) + fg_1; } cmd_ref.offset += 4u; @@ -665,8 +668,8 @@ void comp_main() { Alloc param_36 = cmd_alloc; CmdRef param_37 = cmd_ref; - CmdRef _1469 = { Cmd_Jump_read(param_36, param_37).new_ref }; - cmd_ref = _1469; + CmdRef _1453 = { Cmd_Jump_read(param_36, param_37).new_ref }; + cmd_ref = _1453; cmd_alloc.offset = cmd_ref.offset; break; } diff --git a/piet-gpu/shader/gen/kernel4_gray.msl b/piet-gpu/shader/gen/kernel4_gray.msl index e672020..15351a0 100644 --- a/piet-gpu/shader/gen/kernel4_gray.msl +++ b/piet-gpu/shader/gen/kernel4_gray.msl @@ -175,8 +175,13 @@ struct Config Alloc_1 trans_alloc; Alloc_1 bbox_alloc; Alloc_1 drawmonoid_alloc; + Alloc_1 clip_alloc; + Alloc_1 clip_bic_alloc; + Alloc_1 clip_stack_alloc; + Alloc_1 clip_bbox_alloc; uint n_trans; uint n_path; + uint n_clip; uint trans_offset; uint linewidth_offset; uint pathtag_offset; @@ -507,7 +512,6 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7 TileSegRef tile_seg_ref; spvUnsafeArray area; spvUnsafeArray, 128> blend_stack; - spvUnsafeArray, 128> blend_alpha_stack; while (mem_ok) { Alloc param_3 = cmd_alloc; @@ -687,7 +691,6 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7 float4 param_34 = float4(rgba[k_11]); uint _1390 = packsRGB(param_34); blend_stack[d_2][k_11] = _1390; - blend_alpha_stack[d_2][k_11] = fast::clamp(abs(area[k_11]), 0.0, 1.0); rgba[k_11] = float4(0.0); } clip_depth++; @@ -702,7 +705,7 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7 uint d_3 = min(clip_depth, 127u); uint param_35 = blend_stack[d_3][k_12]; float4 bg = unpacksRGB(param_35); - float4 fg_1 = (rgba[k_12] * area[k_12]) * blend_alpha_stack[d_3][k_12]; + float4 fg_1 = rgba[k_12] * area[k_12]; rgba[k_12] = (bg * (1.0 - fg_1.w)) + fg_1; } cmd_ref.offset += 4u; diff --git a/piet-gpu/shader/gen/kernel4_gray.spv b/piet-gpu/shader/gen/kernel4_gray.spv index 61e5b1cc0b7161e3317f50b0f55fdf0ac03c307e..42964c881f8a62fc5fc30cc5cfc7897ca088318c 100644 GIT binary patch delta 2525 zcmY+ETWnNS6o&WAblR#E)OIjUF{BoJFjii?q%qZy7%zzr#Y=!hoXm_CYg-uFsx?wi z0Y$u_aJ*opqEJ&6Z868?jC)-8o&@g@F0MzfaZd}E z3j>Az#P^(dcjB8o%U9#xK8t5uB;FIp)wrYJ{=)iu6FwLH($bmDSNm?0muPo*+Tui86E(L^rf=aq{BF`YcY2t!n7-S?FM9YT5AO;8ZM!IV)l>F* zc-+IU{f{drzob$~9aMO$xCSZih@0WCbO&xu@Ch%+DdF>^%NPB+Y;}FAIlR98s<7N% z>O>3nFki47Nm(du? zUZ+di6sEeD1_Q~iQSGYLu0e6M)57c`1KQuZ=-XxglD$*5Q>SD9knlofwfG0b(JUSx z8eP!bgTlY*!Xjbi2!G0FJ06mTZaId9(PmQBxQE3pj=tc-;g8)d!6QOc*_xbSvpAaH z0UA*wvh%9UX&x2kG}u3Nno-#;TKw>DP5f9p?KWX@98#@%?RzZi0*81|%Tcx2Ou=H9;gaFO^IAK$uI#@=_1WeyGI{|3`@*YnT49Fgfg{ej{{nC?i`nQKkctSJmaRE8?SP%&N_TRyQ(i?%j2XU>fdH8f_X$3HjxnG0)Och0%R9i`5t#h{~Wc~I;r z2TSf!P|juDW3n0ED7-Y)BVv}}Q|(?9?h3ktm8q>?ygjwOcb2W%4W7lbZlp=ViX^bw z9RsfnmX%UI9^cs1mWzL%wXG_oNPah{sQW$qfQKLSFa?_bD;}me(>Hl|i-)&*_>Bq< zlZD^(47)u1wug6n_#F=qczDklT-o$Rm726hh0hY#7f;sYP|kDBlVoOUnDCOEjDtdu z&&-HM=3EiwTQ7)PT8A=)CZ+3=*L7LvikdGJzels$E751VDQ+zK_43(;FVb9Nny--; zoQ3&artUeaYEH&;=1!{#Nx;V-39ig^iO=a2Z(y(S$L7h?@jr9_3dA>-zVO$!$BveMHMuVm1LSO2&T3&2xl{nfdVZ6w&D*CU~7=}Vq6>pxa#nH?&v5B)D2f9Z1H+|b! z*lJoU4c&6A6Gl6z-CEou;uc2*aQTJqwEA|g3iwBJt7c2OUJfk2Hr>Lb;%I(hXe6VX zMpY2G9v3DT^WXXgPilTjgCF*%Q#*5;3oD;}g9u6XX|cQqdEY2lB$A{+L zB8JsXpygk|4^Bh z{A+QNQ}VcGI9D3qZ-jShM(5L^eJe~3Y`t3KdTHMYqg&4Jg~?g@cTuDNAVL#jSfCdQ z#})-nh+|u*SBKm3qcEC3mdIDTNEX4)DRE-#l-U+NF3h6VV<&{sPBkepyTgd+e}*G* JU&rtz{{t;qdJ_Nu diff --git a/piet-gpu/shader/gen/path_coarse.hlsl b/piet-gpu/shader/gen/path_coarse.hlsl index 6025bde..59cd7a6 100644 --- a/piet-gpu/shader/gen/path_coarse.hlsl +++ b/piet-gpu/shader/gen/path_coarse.hlsl @@ -86,8 +86,13 @@ struct Config Alloc trans_alloc; Alloc bbox_alloc; Alloc drawmonoid_alloc; + Alloc clip_alloc; + Alloc clip_bic_alloc; + Alloc clip_stack_alloc; + Alloc clip_bbox_alloc; uint n_trans; uint n_path; + uint n_clip; uint trans_offset; uint linewidth_offset; uint pathtag_offset; diff --git a/piet-gpu/shader/gen/path_coarse.msl b/piet-gpu/shader/gen/path_coarse.msl index d263f31..f3cead8 100644 --- a/piet-gpu/shader/gen/path_coarse.msl +++ b/piet-gpu/shader/gen/path_coarse.msl @@ -146,8 +146,13 @@ struct Config Alloc_1 trans_alloc; Alloc_1 bbox_alloc; Alloc_1 drawmonoid_alloc; + Alloc_1 clip_alloc; + Alloc_1 clip_bic_alloc; + Alloc_1 clip_stack_alloc; + Alloc_1 clip_bbox_alloc; uint n_trans; uint n_path; + uint n_clip; uint trans_offset; uint linewidth_offset; uint pathtag_offset; diff --git a/piet-gpu/shader/gen/path_coarse.spv b/piet-gpu/shader/gen/path_coarse.spv index 0da044f27840585e1c819014b0fa359181e78b65..7c452bfb03af5063b9654b8caef5e9b1e8c36102 100644 GIT binary patch delta 356 zcmbQUnQ6vWrVUbp^=u4>nHU&&7#J9mb21C!6LWI%lNlJ;7}&w$yfE>k%w&Wd2Uv~| zCRbdNn4ArhMd;;+$tNY{S3srN7+Ar21%PsS@g+rxlQjjoH3h-Eg2a*xkZyEr`SEJ#>p^9`jVWdP>}M}`0Z delta 86 zcmbQSm1)jqrVUbp?5qrjnHU&&CQlR;7UgANV91LvNG!=?v#1eR0|649>| wtVX*lytd=V_Orx95L0dA846&C?5vl$g%5WN5;?EnA( diff --git a/piet-gpu/shader/gen/pathseg.hlsl b/piet-gpu/shader/gen/pathseg.hlsl index f7c9e2d..0501f6f 100644 --- a/piet-gpu/shader/gen/pathseg.hlsl +++ b/piet-gpu/shader/gen/pathseg.hlsl @@ -64,8 +64,13 @@ struct Config Alloc trans_alloc; Alloc bbox_alloc; Alloc drawmonoid_alloc; + Alloc clip_alloc; + Alloc clip_bic_alloc; + Alloc clip_stack_alloc; + Alloc clip_bbox_alloc; uint n_trans; uint n_path; + uint n_clip; uint trans_offset; uint linewidth_offset; uint pathtag_offset; @@ -80,7 +85,7 @@ static const Monoid _567 = { 0.0f.xxxx, 0u }; RWByteAddressBuffer _111 : register(u0, space0); ByteAddressBuffer _574 : register(t2, space0); ByteAddressBuffer _639 : register(t1, space0); -ByteAddressBuffer _709 : register(t3, space0); +ByteAddressBuffer _710 : register(t3, space0); static uint3 gl_WorkGroupID; static uint3 gl_LocalInvocationID; @@ -356,7 +361,7 @@ uint round_up(float x) void comp_main() { uint ix = gl_GlobalInvocationID.x * 4u; - uint tag_word = _574.Load(((_639.Load(64) >> uint(2)) + (ix >> uint(2))) * 4 + 0); + uint tag_word = _574.Load(((_639.Load(84) >> uint(2)) + (ix >> uint(2))) * 4 + 0); uint param = tag_word; TagMonoid local_tm = reduce_tag(param); sh_tag[gl_LocalInvocationID.x] = local_tm; @@ -377,17 +382,17 @@ void comp_main() TagMonoid tm = tag_monoid_identity(); if (gl_WorkGroupID.x > 0u) { - TagMonoid _715; - _715.trans_ix = _709.Load((gl_WorkGroupID.x - 1u) * 20 + 0); - _715.linewidth_ix = _709.Load((gl_WorkGroupID.x - 1u) * 20 + 4); - _715.pathseg_ix = _709.Load((gl_WorkGroupID.x - 1u) * 20 + 8); - _715.path_ix = _709.Load((gl_WorkGroupID.x - 1u) * 20 + 12); - _715.pathseg_offset = _709.Load((gl_WorkGroupID.x - 1u) * 20 + 16); - tm.trans_ix = _715.trans_ix; - tm.linewidth_ix = _715.linewidth_ix; - tm.pathseg_ix = _715.pathseg_ix; - tm.path_ix = _715.path_ix; - tm.pathseg_offset = _715.pathseg_offset; + TagMonoid _716; + _716.trans_ix = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 0); + _716.linewidth_ix = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 4); + _716.pathseg_ix = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 8); + _716.path_ix = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 12); + _716.pathseg_offset = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 16); + tm.trans_ix = _716.trans_ix; + tm.linewidth_ix = _716.linewidth_ix; + tm.pathseg_ix = _716.pathseg_ix; + tm.path_ix = _716.path_ix; + tm.pathseg_offset = _716.pathseg_offset; } if (gl_LocalInvocationID.x > 0u) { @@ -395,14 +400,14 @@ void comp_main() TagMonoid param_4 = sh_tag[gl_LocalInvocationID.x - 1u]; tm = combine_tag_monoid(param_3, param_4); } - uint ps_ix = (_639.Load(68) >> uint(2)) + tm.pathseg_offset; - uint lw_ix = (_639.Load(60) >> uint(2)) + tm.linewidth_ix; + uint ps_ix = (_639.Load(88) >> uint(2)) + tm.pathseg_offset; + uint lw_ix = (_639.Load(80) >> uint(2)) + tm.linewidth_ix; uint save_path_ix = tm.path_ix; uint trans_ix = tm.trans_ix; - TransformSegRef _770 = { _639.Load(36) + (trans_ix * 24u) }; - TransformSegRef trans_ref = _770; - PathSegRef _780 = { _639.Load(28) + (tm.pathseg_ix * 52u) }; - PathSegRef ps_ref = _780; + TransformSegRef _771 = { _639.Load(36) + (trans_ix * 24u) }; + TransformSegRef trans_ref = _771; + PathSegRef _781 = { _639.Load(28) + (tm.pathseg_ix * 52u) }; + PathSegRef ps_ref = _781; float linewidth[4]; uint save_trans_ix[4]; float2 p0; @@ -455,9 +460,9 @@ void comp_main() } } } - Alloc _876; - _876.offset = _639.Load(36); - param_13.offset = _876.offset; + Alloc _877; + _877.offset = _639.Load(36); + param_13.offset = _877.offset; TransformSegRef param_14 = trans_ref; TransformSeg transform = TransformSeg_read(param_13, param_14); p0 = ((transform.mat.xy * p0.x) + (transform.mat.zw * p0.y)) + transform.translate; @@ -466,25 +471,25 @@ void comp_main() if (seg_type >= 2u) { p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate; - float4 _946 = bbox; - float2 _949 = min(_946.xy, p2); - bbox.x = _949.x; - bbox.y = _949.y; - float4 _954 = bbox; - float2 _957 = max(_954.zw, p2); - bbox.z = _957.x; - bbox.w = _957.y; + float4 _947 = bbox; + float2 _950 = min(_947.xy, p2); + bbox.x = _950.x; + bbox.y = _950.y; + float4 _955 = bbox; + float2 _958 = max(_955.zw, p2); + bbox.z = _958.x; + bbox.w = _958.y; if (seg_type == 3u) { p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate; - float4 _982 = bbox; - float2 _985 = min(_982.xy, p3); - bbox.x = _985.x; - bbox.y = _985.y; - float4 _990 = bbox; - float2 _993 = max(_990.zw, p3); - bbox.z = _993.x; - bbox.w = _993.y; + float4 _983 = bbox; + float2 _986 = min(_983.xy, p3); + bbox.x = _986.x; + bbox.y = _986.y; + float4 _991 = bbox; + float2 _994 = max(_991.zw, p3); + bbox.z = _994.x; + bbox.w = _994.y; } else { @@ -515,9 +520,9 @@ void comp_main() cubic.trans_ix = (gl_GlobalInvocationID.x * 4u) + i_1; cubic.stroke = stroke; uint fill_mode = uint(linewidth[i_1] >= 0.0f); - Alloc _1088; - _1088.offset = _639.Load(28); - param_15.offset = _1088.offset; + Alloc _1089; + _1089.offset = _639.Load(28); + param_15.offset = _1089.offset; PathSegRef param_16 = ps_ref; uint param_17 = fill_mode; PathCubic param_18 = cubic; @@ -574,17 +579,17 @@ void comp_main() Monoid param_24 = local[i_4]; Monoid m = combine_monoid(param_23, param_24); bool do_atomic = false; - bool _1263 = i_4 == 3u; - bool _1269; - if (_1263) + bool _1264 = i_4 == 3u; + bool _1270; + if (_1264) { - _1269 = gl_LocalInvocationID.x == 255u; + _1270 = gl_LocalInvocationID.x == 255u; } else { - _1269 = _1263; + _1270 = _1264; } - if (_1269) + if (_1270) { do_atomic = true; } @@ -612,30 +617,30 @@ void comp_main() } if (do_atomic) { - bool _1334 = m.bbox.z > m.bbox.x; - bool _1343; - if (!_1334) + bool _1335 = m.bbox.z > m.bbox.x; + bool _1344; + if (!_1335) { - _1343 = m.bbox.w > m.bbox.y; + _1344 = m.bbox.w > m.bbox.y; } else { - _1343 = _1334; + _1344 = _1335; } - if (_1343) + if (_1344) { float param_29 = m.bbox.x; - uint _1352; - _111.InterlockedMin(bbox_out_ix * 4 + 8, round_down(param_29), _1352); + uint _1353; + _111.InterlockedMin(bbox_out_ix * 4 + 8, round_down(param_29), _1353); float param_30 = m.bbox.y; - uint _1360; - _111.InterlockedMin((bbox_out_ix + 1u) * 4 + 8, round_down(param_30), _1360); + uint _1361; + _111.InterlockedMin((bbox_out_ix + 1u) * 4 + 8, round_down(param_30), _1361); float param_31 = m.bbox.z; - uint _1368; - _111.InterlockedMax((bbox_out_ix + 2u) * 4 + 8, round_up(param_31), _1368); + uint _1369; + _111.InterlockedMax((bbox_out_ix + 2u) * 4 + 8, round_up(param_31), _1369); float param_32 = m.bbox.w; - uint _1376; - _111.InterlockedMax((bbox_out_ix + 3u) * 4 + 8, round_up(param_32), _1376); + uint _1377; + _111.InterlockedMax((bbox_out_ix + 3u) * 4 + 8, round_up(param_32), _1377); } bbox_out_ix += 6u; } diff --git a/piet-gpu/shader/gen/pathseg.msl b/piet-gpu/shader/gen/pathseg.msl index 9708585..0e97d68 100644 --- a/piet-gpu/shader/gen/pathseg.msl +++ b/piet-gpu/shader/gen/pathseg.msl @@ -129,8 +129,13 @@ struct Config Alloc_1 trans_alloc; Alloc_1 bbox_alloc; Alloc_1 drawmonoid_alloc; + Alloc_1 clip_alloc; + Alloc_1 clip_bic_alloc; + Alloc_1 clip_stack_alloc; + Alloc_1 clip_bbox_alloc; uint n_trans; uint n_path; + uint n_clip; uint trans_offset; uint linewidth_offset; uint pathtag_offset; @@ -430,7 +435,7 @@ uint round_up(thread const float& x) return uint(fast::min(65535.0, ceil(x) + 32768.0)); } -kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _639 [[buffer(1)]], const device SceneBuf& v_574 [[buffer(2)]], const device ParentBuf& _709 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) +kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _639 [[buffer(1)]], const device SceneBuf& v_574 [[buffer(2)]], const device ParentBuf& _710 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) { threadgroup TagMonoid sh_tag[256]; threadgroup Monoid sh_scratch[256]; @@ -456,12 +461,12 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6 TagMonoid tm = tag_monoid_identity(); if (gl_WorkGroupID.x > 0u) { - uint _712 = gl_WorkGroupID.x - 1u; - tm.trans_ix = _709.parent[_712].trans_ix; - tm.linewidth_ix = _709.parent[_712].linewidth_ix; - tm.pathseg_ix = _709.parent[_712].pathseg_ix; - tm.path_ix = _709.parent[_712].path_ix; - tm.pathseg_offset = _709.parent[_712].pathseg_offset; + uint _713 = gl_WorkGroupID.x - 1u; + tm.trans_ix = _710.parent[_713].trans_ix; + tm.linewidth_ix = _710.parent[_713].linewidth_ix; + tm.pathseg_ix = _710.parent[_713].pathseg_ix; + tm.path_ix = _710.parent[_713].path_ix; + tm.pathseg_offset = _710.parent[_713].pathseg_offset; } if (gl_LocalInvocationID.x > 0u) { @@ -536,25 +541,25 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6 if (seg_type >= 2u) { p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate; - float4 _946 = bbox; - float2 _949 = fast::min(_946.xy, p2); - bbox.x = _949.x; - bbox.y = _949.y; - float4 _954 = bbox; - float2 _957 = fast::max(_954.zw, p2); - bbox.z = _957.x; - bbox.w = _957.y; + float4 _947 = bbox; + float2 _950 = fast::min(_947.xy, p2); + bbox.x = _950.x; + bbox.y = _950.y; + float4 _955 = bbox; + float2 _958 = fast::max(_955.zw, p2); + bbox.z = _958.x; + bbox.w = _958.y; if (seg_type == 3u) { p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate; - float4 _982 = bbox; - float2 _985 = fast::min(_982.xy, p3); - bbox.x = _985.x; - bbox.y = _985.y; - float4 _990 = bbox; - float2 _993 = fast::max(_990.zw, p3); - bbox.z = _993.x; - bbox.w = _993.y; + float4 _983 = bbox; + float2 _986 = fast::min(_983.xy, p3); + bbox.x = _986.x; + bbox.y = _986.y; + float4 _991 = bbox; + float2 _994 = fast::max(_991.zw, p3); + bbox.z = _994.x; + bbox.w = _994.y; } else { @@ -642,17 +647,17 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6 Monoid param_24 = local[i_4]; Monoid m = combine_monoid(param_23, param_24); bool do_atomic = false; - bool _1263 = i_4 == 3u; - bool _1269; - if (_1263) + bool _1264 = i_4 == 3u; + bool _1270; + if (_1264) { - _1269 = gl_LocalInvocationID.x == 255u; + _1270 = gl_LocalInvocationID.x == 255u; } else { - _1269 = _1263; + _1270 = _1264; } - if (_1269) + if (_1270) { do_atomic = true; } @@ -680,26 +685,26 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6 } if (do_atomic) { - bool _1334 = m.bbox.z > m.bbox.x; - bool _1343; - if (!_1334) + bool _1335 = m.bbox.z > m.bbox.x; + bool _1344; + if (!_1335) { - _1343 = m.bbox.w > m.bbox.y; + _1344 = m.bbox.w > m.bbox.y; } else { - _1343 = _1334; + _1344 = _1335; } - if (_1343) + if (_1344) { float param_29 = m.bbox.x; - uint _1352 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix], round_down(param_29), memory_order_relaxed); + uint _1353 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix], round_down(param_29), memory_order_relaxed); float param_30 = m.bbox.y; - uint _1360 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 1u], round_down(param_30), memory_order_relaxed); + uint _1361 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 1u], round_down(param_30), memory_order_relaxed); float param_31 = m.bbox.z; - uint _1368 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 2u], round_up(param_31), memory_order_relaxed); + uint _1369 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 2u], round_up(param_31), memory_order_relaxed); float param_32 = m.bbox.w; - uint _1376 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 3u], round_up(param_32), memory_order_relaxed); + uint _1377 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 3u], round_up(param_32), memory_order_relaxed); } bbox_out_ix += 6u; } diff --git a/piet-gpu/shader/gen/pathseg.spv b/piet-gpu/shader/gen/pathseg.spv index 37c98470f4f2f05b53b865e6e1eff5a7d53a8792..a1f223c7c9a3786d8e566f32d49258779373159a 100644 GIT binary patch literal 35000 zcmb822b5k_^}autNul@N5^Cr@^b!Ju5=iKsVKSM7fyqovCIJ-b*bo#DEJziRCL$_G zRq3MGyC`76f?!3E`hTAL-FI?w*J7>z@4{K%ZIq_uc! zbZh2_R&_1cS`?+#TB0?wswYi8Z1NU!ho*0_-S%7Quym`V^3!J-+S#m=GON39K*PL6 zTdhke4^Wn+4{gh7pn+{H9sGy6^)RH9=#wUQO`0-g(&T-*dS=h=nmcqxe{WZR-#~BI z^u9U0Jwtu-d-WS#`1K9+^v|1KI*h=h%tOxLS`+&F2YaYTlBwKlaK?V4OKs;;WOoR6}v<8`U0Rd{4;BkDQ5)93Y6VaL_$KcclEx_O*7ILGND z$C0g#sSkJVW(>}mby)ArLwjeSj;M5f7L0dfYm0`jcJB^#uUXwg@V1|GU#&@h_Yhm3 z9+z)zLtVz|a#LhF7VOsyf8ngEt!zIXEpOtCdu#9v14ljz!&T z!B|GL#>0nw!Hz@CYc>JG9mi~fgjf59frMB4Wm|c>&+L`<{ux>I>G`to{%re>L}##r zr!qk!T5Gm;g%9V{Ro#j2Iy=LDjI)}Wde(;Z&F9b9W}(vFWvCtp#OrA7M4fru8Qh-7 zj+XYhmCuOQ?$q{KZ;fi4YF-QGbFJ1s@Y=7dn*aZg{M&t2*|SIgZv!y@3B*kPdx6{e zceW;iXY_Z^Y>%O%wKu$tJ6n^$IY3-9x!1I5gZ0_b)?D`meJt0S49~sXzTdMn?cSK` z?aFm%sMUNQKp)BXKx0xo(Wg+i`)r;eqgsbytIp0D+iue}d$4a{DED(m>riZ-c=mB* z{Q9;Y-}F0T*l%R(IQ<6a4NUKvK6u)|xEbxwhK|;;=uUw5(TT%8^Jcd_+WX~%VV}-c z4|sO>obFjwo6eT^c*)fd>YX(>=M3+SkuA9wpHN-dH_@GI_1-yi2IqiX7yZ50(5pUN z??JeipB3||%XsF#fOoXcf$L+tk5HTM2>!d1x|+tW9%f;vvVXO(e;+*1o-D^!e z5zG--=?8*&H~4!C`vjIO?i%WeY`aq6{n@2Vg4qmMx6K{@hp ze4mkK5B2oNkocq2+gZgl+-5Yk?ty{97!qSK^!9UDJ9zZP(f#~i;6J(=;}Yo8S+M%E zxz2dO-1vQ}r@wFZf_ClkFBP4~&;_=3pMF=X+RPp5?m0Eq1@pR0>_tMZqjSD2Hs;gk zV#eCmma8;p!amyC@|9+k1^upo)*gFyg0^-}D`H#lJ*l*}$FmZ4Be>UM%{r`H+x#9C zn|t&9?`YizHxr+U9sm>PsKGg> zPU87#wl|?UKa75O<2$LE*TT>IqbgtTrv-EEo$fk}tZX*Rp4Q@=*_Zc|dcKpYc^}2M zocBkuP3@k;O}t><)x4NTbo1_LJzi_h!?WOWj-PMvKg#Pb*ZRK%??0{j+Du;~Td%<9 zcF*svj)}I6KBDy+dN~JbZwzZbt+&^qlL69yC79dN@b;SVEgC#LUzPvJ)>yPEUeURI zZ-DL|opwg`m9xrW6Mfm6LubtPct-1JjYso4o@3cTJp3HXc`xS)P1|k_+eA4}W7_s@ z*e1)jZQG_SWb14l+TceucvpkZT!@co^}(y5`W?NVvr`uG>1@qz@XO)(PGi1r!WVqk z>};*W2CDA0y1xzJJ$`Sl_=wg{@ELvm{k+_m-u_(cZ0&)zV6Hk_dpG#L4Zhz(d_-#s zeBo#7&eoBY<{OE*)x7q?`{q_V3Et7V5Z*m=W`#ebt%k@v*7-P+IO}dZ1Be`-o6LjYfr+fBYJQipK_;H{r>=M&S3lV{TXmO z*N)a7z*)B!z-8WlUWkuqy#$}mC)d#6ta>dvTPyP6ShiWC!Pjl@jT?NQ20y65k7)1{ z3OeN`dHsxElVX2)R!Vl%S2d2v;r^DYHNPw-hR-tk?O13&zo_{xD}IhsZO=mUxkYX7Li4#r zZNEbExkYVqp*crtQwz;!6t%+(&1Vy}qbsd*`1vFAndcFGyxc!(9To1Zo=w)REbaZ` zvx@u2wRgYzGXm$&_r~%EYQ8$$vFAA;{YPu=9sT&f$~Ai@pZm7W-pS{_nsaK*M`-9i zY44lPRzB-9#_;6e7}YxJ_C5!!3-+=2c`VM~uvc!ztpMJD+UWqowA4Bn4 z8;g6it@#s=_MTN6gRlEC_7&;PJvo-SGKN@xea6y*V>Dj0wmzcey4rvAwdv6{P-_-* z^Kfuf+02i?P(!W>&^i+qNtjubSiAf!a3uI)~nynM3>2ZXDm? zCe9vU+o+8N?MaJchCeWz(9!2W%VL z57pQ{wOgM>@!E`4a?gB_qSh?w?;>no;g^BSc-O+ccVfQ@?0!kSyR>tK-wkiiPdlHl z>(3SYL-0Mh!XJe%%N70WcvcJ3FOVoDv==KhdYcn!6ukmoTvTR0#N!WHk*X~|%|K@qd z`)^ltx#QZ6n*ZBc&3#r;GcV&#mvEbfpCW(f>`9(8bHV29Jlo$8oU|+5`#|oqj+*xQ zHMWhp_}tURt5wfC;<9M}zm&3V6C2OZSf5+WifT-V>v89#%YW0JeR zeimzYjDE(-&FL{}{;xFql>0fWrv0fJ+csl<0qo~o_{(rV?7di zxv@Ch&k^TrWw3k7d(rr-)V}K4*P*u0*vHg%b^TAJCVaIgjAvgjqTP1ut-HJTy+7ZA z=bng9r;E^P?O}|yz}_Q?u{PX%)wO@Nwr5?hhVy@=JBM<=!ABS zD_noyWtH4_Q{npieyZfYpDMZUr%LYosgnC1r{uoJ2{*p)aZ3JDgTK|QbaQD#Ig6r@5n%L!=6x_4k_cW#5cQxVKeOD81d*9cDYxjLk zxOU&!glqSmO}KX7+mzh*HYNAHP04+4Q*z(ig!@_Mdz)}SlTInP^XWUA*!`^YolUs; z`OYTX`@?rO;XaS~&L-UaeP3Ez+Q zzN-nhf8W=HYxkW^$$e)NuD|bR!u9tZP04*nQ}PE3ZhPO+#4h(8O}PE}ekNSI?`Xoc z`<|xczN;y@?`pz5PkdJs?(?eeX-e*Unv(mTrsTe-DY@@yO744_lKY;fd)s~na1pB;WOzr9KL*U9ro}R7p*e(LAIR?*F z`A)>}bNr*=ji}YNdk(8*o-PCX3{cL~<#2WTw~tSNZD$*w5w4{89HF0m%JmU@j@#G& z_;|I*iIsIQ`UY5#0(x6W9u2HVG-#PW0L8nBwrvai#J{L>WsvTyNo=ss&( zd|t`et^+s6c0HPU;@to?-lN2GPkavSXP){Ubq+UzZKv&3#&{F8m-%S>JVniX#EE}1 zxEcQzH1)*46)_|7JhnT*YTEoPmB)4$ z*!9)s=c;@sX2Z4r3V1zg+i25AUv<|o4YuEm?;fz@)6bZ4 zeT?ZIa;~jy@49^*>^$b)x)-eGcTU@DQ!DqSd#IW7PW0!v@5kovG}`t)usrMfAlT>N z@}7AJu6}#HXTAYe)5ksYEwGn+M%yhTGQV>D(&mR? z+wlKzn;$`J;~8iixqfN$W3X-bs2^_g6S!@zt=q`;OPim9ZNtaUaGRgOZF5uIMy{W0 zpE{59Ci_0;w&ikj;wPT$Xfn|=QVO+D-RTd^PhGv!qszs{tawfb@TZnwOZo;9h~?tgVWDHz}gc3 zpJ25z{=eYHcWu4WdT3!{b>LY&&gJ*^?V<2CEzY z4Q%qn9|=x;zaN+JN5QpcY@@-B%{fj^i=n9}rzOB@ez(pXFA26UZRWH)?Uw?po0H#} zy!5Dg3Gw;!CmLf z`}**)6#Z;3*C*{a1iu&m@QvX1mo*s&R`b~@_shm`+i9~;zjvyok4?d9Pt>0mn}OAQ zcd5VMOV#4PIauvB{C!^A0<4yEax1WxYoKjQ%Cye*8i>=+*5ESkHgGj_Pv6_Z)zkO( z;PkC+JIWyx*H@grcLaOBn7`kf62zn80}zdgXtSDr2R1gqJf&k1{hy__#?6DVqqNt}N62A6s516Rv;nSH@( z$!ikWywuHWBDGrj+YfABx7O>pKUmFmH?IT1Ugo9k0E(Jz-8`7 z!qwiUjsCv#P)mPD%dx3zcdgaZ-!b5Ft&fGPWv!0~dpW1tj-#kKCUN>X0bJ&FB3vzd z(04Ct$?GJrd8wP1Yps_4y20gIPlKyvt*3*%%u8DjMa{g#>8BT5<~0MZma}3eSS@*- z3^p%y^Kz}#T+8%tEzdkymuu~sdoG_!-A~D9n4h8M^*-uZwJpzx18_C3oUMcKQz)M4 z<|5b6KK$%_57^I4ZL`60pQrqJ%pCAG)V9~APan0q{rmoEE?6y}tNwgs2(CVbX8_Nh zd0;h*7tN~nes;`9bANi)nUkL{r@_@Psn4#{!Sxkp1AJ=t63N#*)NED zCd9bz`NVxcTs?8m0^5iBxpgjQgUv^qx%kfR9I(0MIm|Ji3s+CO^T4)McOH$cmhqeq zZjR>yH1&+^}tDMIVpzTa4=kbGZ^^E63VEa%{+z*4BaX*5lp12o*)hx|<{3x1n z%Xz#QuAaD;fbBy)^YJmT`Dim2=keoUb1CQXQn-5BT?V$Ty7OpkHP_I2{{+~%4Zi}c z&yw|Cyb|oZZBYAakMAeJW!q1|%eJ4c+m55HzS_O_+_Rqn+qYM=t7^@7(LRf2KVH$U zt~LFplBf63HE8x}AGXoQ@m@}?&76H7a4mQf${pn4bIEmJwVb^-fW189v|Ug67sWG9 zoPF>)aC0Brh^C%>@OiMB#mjtc?|pm|+C)mZ|GofM&px;rY#-{0dkeT3_f|CZ#Jvry zW@+xf+tG|$?!Pa>)f4v)uzjd!AAJdIKHAL1{dXtWT+03TWw?6U-37L-y6a(VHP^#+ z`wG}KO733;t6999v+TF8q1irr^KP(q_onT%o161{57>U4-|U;O!)@bbF8ZjY&AnjT zI5%l?A3SkvqmNqhzaMOyPIvHe_o z0^Hn}Pok-3ZoUmxvv@i7|2upv&Af!eYE zi#qZD4Yp0jE|2X$VCN(J6>yo`f8ovCUPVi8wzrLb8N2rM_Zry#!v6<0UdH}9xY_?3 zXzAbfw$U$R*Itg*F>8C1`YnpFJQLprFHOlldI!8JMO`1CU#ME@53$xhM=SzY+n!DC z_u>vYm*=LoMJZ~|u{g0tfXi4T;l@%=tWjX~3yEdy(cn&sx;c9OE(UI%gNvi7=PX$Q ztY-1@eq53f+vk#K&fQ)V{nGDJ;AX!|qp7FgWx#3{`yl<{ewVE^`_(V&w;b4*+H=k> z4_0@4Sr;|u!Fy)~uxoX8%~yo0#lBK)&u8DtaDBX@tx{{|8EsWG+j>P?t=9C*Gv?}O z_Gurs(Z|?6$E*SNtT~tUkgo|>vyEfXrsn?3KCrgEdu|NaeUSdf!foSsTGw}7xWC(= z-$nS_r<#80V?A(tFO95zezQK@HtK#R*+wm&vCip+aPKqM+t09#;2Tg9L#~f8^dATI zOo;!+aQ)N2T%Y)F0`^>s|E6&L^RphgKJniS?0p{p&Efjz=R9(KwEHuvEx^8mF5ky* ziKZT(t-$tiQ#}WAeYDS~?bhIV)R`C8V;eN}yffVvthO?9l=FXP<-?4H)&`QH()zrSOtzg(Zh+!wQuf1$T@yfU-!u{Qv>?65; z_OUyD`+z&?)3|$s)I>=i+qu zIu!NnHMQ~dlQuKpz2Z4^D%d>q%UP(V zU)uD8+ilus*DQGR>>5B*&-@O8)oh#nI2)XH<|@}Oaoz(i$2$kE<{6an&V{Qx-n1Kn z`*Wtmo(EUc*ErhL;y)i;#yJhHR>nCUu3qk~GvMlUIXtHK!hNq=-ka}3Q%^o;g4JxF zxp+S~?aWcGUvfMPT;_N-T+Q<}b8!w_y_}14;r^^IvCo66>1!NqYUMt4zsi@d_pNpE zI3M5gv*H4{TK0-IHQO&!w_lFhar%3AAHe4R%KS`1`ygE1d@iJxi$6r|SnTV=VDBMo ze`b0S^+zdQ+Apr{*2(n}uzlP~Y@cmE23GU4&*xeBM=17X-{Q3WIM}vwbG!^}Zsv6< zwLD|`1lTdzCjM7|)%CxeTJHOG`??Z5nc9A|W!&nC|4DEe|5I?|C+4TY#?;UDa($c& z=f&|@r@zmD?JxV_DzIACO`BRd*UpRae73k2-26R{>(JCQ=Ig=9@dohcDA!Q*Ge@~T z;v1>YVh)V=d9e9fyS87TzM0~s{g&Epo$=fXwvVjsZD6(h9-aIqihbF)_&NIad$;9U z#+0$$j;%SiFQTa@-W_1$J<5FetnekUdsF?6dVk#sww*S=JKaU?Kearz zhrq6{wg;)@+0)+uyQgiVO&@*LUBisq`rX#yo7kH3@GUg;ZHpW6Je0 zrhCY_wzj?N_9)nS%zf|}SS|N~HnnnJx`&!Mr@tq#`MWa4ejF^%@1Q&d&U5Ft!8!BH z<2!Ked7k+$SgrgWl&9hLnP-{rfz|w3bMpT_+_u`x;Yn&Wf6vde=7-=Z)Y^UkmS^An z2;7|SAET+~_riVxR`XtUPyY<;<(}5|Q;M4NEKc7)2RHlv1)6&H>o37-?&GZIui&ny zwygQD!Ri@<-1SLZYi-X^o2RzlfX&I;Gv!(8=O|vji4LWArodFW_}3wu%2=!Rq?|nOdHE>qW5lmi=hc=MU8C z;+LpjrX-iYgUu!H!v6tQ%e(M@fxTQqZU3aGIVN$|@ZaF(8vX}OJ$=6dR(p~DoY(YY z{Yq<8^|||BuyLF({f*&XdX3u4JoI^$qGlfAjO`?R>^nAVbM$%je_-dr`#0l%9j>0| zqBp>5?z8-^&6{xb?CZC|Ugo0hEsC1Ch|~8wVDGWqf27e;E5A=Kg6902qd&u06z+Fh zb^F;Atplv??^bypbb{4<4)=HSM}YnFteywPa4yvJOD-e9WiF%OYW_LP_>6`p7w2m+ z_*imLkI&*@^~7ERoY?khY&HEd?@NN4^S%_Cdggs;u$rH@*)Pk$)idwQfxVn}ZOc-Y zr#SE8<{r>rn?A{Z1+epA{?7MGaC7s#HI9DH`O4_p?9)HLA@_GToc~q8$5E?i9##cs z9*ki-HGLC%HE_8GtHagYYsqI#c=AzCK5LS_UoTtktd%Gz$cQAdh*#2oP3O9J2ici&qm-fpK)+C z&!XhB2|W4aU4!ejDY`cM_0P}9lh0;g^HEPen}d^&acrliZ}Qm!T;{VSTrI!9rA^JY zIafVf$*WwGUD370Z@0p)+}pdO zYfD~xfNiVp{3WJ!J}34Bo2T=mzxkN+Uew81p9vH-a~5Z9CxTr+bMp+{8=n23EjjNC z&e+PioP@5;wcm$Yp8d8T*!xlaB|g_YGxi4?!*Sb>KFQ?(aJkP8gsbVB{3gSbpZ4T; zFgW??pS7NXuFd=oqLwGWsbKR{Pkx7hjgkEHNq&cd%lr<5tL2<<{tkyHKkdoyNO1Bi z_v}&V+RX0=YI*WI8hj#ss{g&7zhl70NPhYxzhl8=e#gPpa&DO4@$lrQJ^7soPJZRy z?LyaPekV}Nlix|&=uZvcAhhO;cEGwqD{^AdDiv0^eujJwIjtp)9iV$G{tKP$_|WsH1(1-_IY3_im{iW zew+Fv>a^!;P4(x{sj9yhBJG!hi*3JrfmZ)%^*$EiM@Vt=lQ zzVgh$46yxcvz^>$V#hU;;@E7XEoa@yU~Qfor%-!&)~WYV)XY)rT#3~Za}L;;;d8-` zEBEX?aQ2+O=BB1!Irp9q{yjQgD^oIetJK)JSe0UY{|=$d*L-j}U#BWO_L*mmk^ z_j$0fZD(w`ezAW6Z0wsT8TZX#_4wQZHg@TAD_lLXZv)#-J?(A>8{2lqmg{F6_w^Sk z+TF|IaxZTL_u7EsUiSRhkmCLwN1eZy;!d#pH~aak1-H%j8~kTA&se_{GyU9fs$JPlS$jPHSsaS_EelIxQgKLEQ%;Xec$Gw&XM1Xgpd zvt~a5tN)miJ^fR#eQ7g}dr2+*{v50p{)-0xCA^ILE4Z3-leoVIs~b0a>NjBZXDIRe zE!h6GCBJ9E=Ag~|+}mo&=Q*%i#{E06+Vhn7{2r`N`uqb}P2cpX7XLqj%Rc`ES9_uG z`E%`)KK}w&(>HyprO&^D?K5Nj8(8f{O4j@(uxox3#XjWv*~c2xFH^L8mWXqfYysYe z;8Sfpq+S|2{@fKm6iRYTi?aTGlKk+)?#?wceKJE$i z#2W!F7z{__mq0#Ed?&)Edy7}cShqa2Twfr zl-$1DPx>d`3Si^uqfH<8qL(`=(&E*f#?^r?GDiR?F`-ZUHu?{n}Q$_n*ICvlYdAGM|xh{nPi> zVCOG=Zv$3~eOs{OjeR??T7D07d$4WPjX9or2Z}K-W6aK%TtEBt9Nw|Edq#_MMlXtO zSBlq;l$_B!)p&dAool=U^)3|8>)k5c{`<;yp+DEx^V{=k?}B???c3m!3aL`>aRt|ZTEX?7-r&6j*T28u`VTbtV8QjDUvT|TZ*cz|mU4d1Ex7*Y zH~0kwxBrU@uK&dieo4Xgzr5i3U(w)K7F>V-9hT&$|8)(1eZlp=z2N%a(coVyxc>JP zTz~%^mNMRbHP7eQu5kBU`T4aQntDFJb_c7y2S4|_d$7{jgMQ}h0awfC*PdWCKfm(s zZh}5_oP4f%mh6SD&HK;3CW6(xm&|(~u=j^?-Rp9(``kEU_qgqLqJ{U^e&7|U_18~( z?)Uw{Wv&On)%@&Bt_SJ^cTVz|Xs(mdwI$bsz-q~L3OKoXpUKV7d(1du?=RaW*F(YP zs=t2Plj~vNGS|c5Y9lGh^$2}v?^#sN>yhZ%lIu}mwd8sXIJtU{%gxXG+c@ImsBM>A zj|ZEp{`zT8t|x%YTu+3nElo+TUHa6ymh*ZNy0+xn4OUC8J>cXz4J!&@rz6V_9ItQ+{HYK^v)u+z2oYx_AZOL^WSS`7p22QT?!E*ESOf`GUe89?mR!#Pt0mX-z{&Mou-yF4 zr#6oG0&3eO*9*bss=t2Plj{e-Wv(BDtKE%%a{Z7#b*|;Sei&U_a{UNcExCRaoLnyg z%gyg%YU7A6p|)Le{TSF>_18~(a{V~C%=I$3+NHIR=j12AiFr9#Zk#Kq_0M_#Nw8z` z`(^t76q>sJS5nK9*QdeeWuNw?J?%aNF7x^7Z zQ>XvWfz|cDo?4!~ZUmc`ecG4ywEH}`%e=`e{$DUj~=Ceg&>}7bUrVRi8T7avy#TU0ZU! z8?2UGzYb2W_kiW*cQ3VZ#P?C#F1g+hHdp=i(_a4k!_U`!C|-L~@(eqn#(PljRb!t& zCf3;J*}W+~&+bc|=huhn-+9gZh({azv4Z=2^PPhGy!>>*jq$yjXJ0)4H=pwP=0P;| zJl{M7R`aaSx_kp}?)kmYZ^G4HKzoeu0Y0mI3$C6uaU74pCr~@Lj!iChT*eVQCfjDt z9|t?<`s=4X`9A?}=Kmy`dY-qw4OUy3;@?-b9eq6oR?lAeE_gznmvbmL7w1o3{l;Ve zKG=2A=3L5?=MTVsf3dyow3mBh61K?{ul*_Rjs2((DDZ(`_rO8auDSLf(vNfJn70J~ zh@xNCY%}nWYdt8L+na{ig6UM*Mz@uI&NlU~%+k!M0WRXTgbS zo!_f?4s1@&rT*q)&cCDfGG~3Br>L2;IJx{DoSc^iCzn5fwZ-p`g)!YqntFU*0+)M3 zu8%nx$3E>ld*$!o=3e;+ntJxiKf!9{UilYXJ$vOpU@ym@?cWqNlgf^lhE_{U6xa)!b1V*ZF;e+RNPZd7YwWZsM%pC9lz7+p0U4iD{j=TnucU&X4})qi^QTI=+i*gS!^CvmeK|B(<00(`N~a zn&T5^&n^Yd__D8;MpGaDbIf2hix;7W+b@fjJ)ic=p{b|+@?bTKm+kX+Gpv9n-+;0V zSl`U)is1C^TIbIouY_(pFVA58)Y5KcaN4;RX}1b`;@M6=wamq;;O1PchNhmetPWPQ z@ISpQXuk$R;$|+^L{m@uwZLi?{;%3+F4jhHF2+%eqi@Ev4mf>#e`HK!&~4}CnDkRi zyRqQ3bN{B@y6A~#JN?w$XW1v#@|<1PIZM{V=3JF$$@*yOIZHMGtC?@wZ3u6kK^vi| z=lNzF*tY6vw=vjp7|(p=`o+EpxS7wUXzIylGq9R_BF|Tw!`1CKeQg0xEZ5Ozt}W5j zljBxkHQT4%*5I@=N4b9X;WNrMwcYo#;=G^LHihDKFvWXxDz$z)6n>kd9Y*mwgt7xM zeAYdbVw=OMeYQE8dS}}CY!g1Q<{8f(aND_0hN$<1=kM&)7QYE#Z9eP!*}E55&G8vS z9^bve_L=9yec)=&cjD{|PaN%OI|-~UarOhN8OPk^`kKT3U}J|L0M;k(&JP6ZqwXBa zGuO`Hkrc0T4v&C4heuK8JHf%UE9dag20yyy8N(E~{ba0D;Tfy8_#FbSY>dqq^4Jao zt7U&44p#H|-?^DheFR+n5uVxPM}pP#8La2%D7d=wXWsI}I0kHt^mi;+E&Uw_RxA5E z9Wi)v{TRD z&CvtrNT_}uq3vAe!+d(d+NM)HTh(TO_4RDs9<0wS@;ClWuy%73m${vY-K)&)1h~0% rQ75-kX{Vmt`r*d&{?wM-W`VVpxee65eg>Wb)+f0Qg0-8Q*xddfT7Rpd literal 34732 zcmb822bf+}6|R4nnI!by1VTa$y@wu1D4~Sj>o7?sVQ?}NlS!!36%|1R!GbhVL_kDD znuv-Zf+F??B1%yyDoU^Seb0ZsWam5&_qq4Le)ji$Ywf+(E~m~U=~#HJ;jPv}twmcS zTGP5()v-)#VU$*DvDUDvo;c~?Nt?_bn!3qWTW_YrlC6%)PoJe|XR}VqjGn#$4RaQ5 zwLVSxE@f%@(6)>Q8ra6t!GD-r_dz;|K57O&Tbm+pP%tOxLTI2is2d7XEBU8D@;IwJ8dxu&mi!z>8 zYiMxJloPsV^v)PFduZ02DMK4qjW(XM`Q|&!*?b56I$Mk5*HiZ;Ut)oN7HcgJKWSFq zQ1AQpQ*~AK<$RQVExSNp9j)cyb9?%${8jg}2OzTILspOvV~K39g%A7@u#xa9z(Nw24;8noh%;4EUCLruE9xtQ-@BNXKS}{?Pm52oiMw1dS!8L zZR5^RS4=;yNqpz$GorOR`cTjG?ip1=-F;Jg2Zs8FP8owtZ!5RfqMkB1W3t;|zF!t- z-bh%a^8d|us8{g)9jg92TBE6ZYP=@(2cZC7Sv^|ZZ}1yWBz{4@E6Rg+RFCR(efnDxHkvSB4@|475D(wZr3@J`=&S$4#%SI zF@G#wt+DWVzF^0p<}s51;f`Y_LBgwj!$88T{j#OJ-Dmbnd;bip`t~EXc7)H%sk=H8-*7~XT3G5ajJRDpU>4>yTfb0?rQ%3AM$VaS!K^2{r?+)`Hv@N@}B^1 z=ik}d6+ErKXL@@K9j)EqZQR+K2+jq>HIsWxo;+AzJKCD-KEID;T9e>8m)qximZses zQ@vHW4h^-M@4e|G`R-#(idXdgsoQ-vuOY)*2V<+Qozu45qI>3G-@s7L=Z@Av*u3J| z$6@j7+k9-(@6dUE!&*n{H#lctYWLK^lLp32Yu_6>T1TNf0iH+4&GVTvv+dE|FUQXF z>1<5_&+M7iGoxzL+43AOx%$4nGX`g!;<+)bCHLSC)t!A4-MLopoi%H47T9&s-*XMU z>cjOOgnM|em`h#8GxzztqjeTsAKQI`+I+kC?-uH68oQ@33qzItw!;1>c%VIjZkfvd zJL;_4OW^jLceY*u&+0j;dZsX^*WvSX=xn{&;BUA6y=2iZR|*DR`Y+tzopAd1vRV0$ zXxJBR@WtEyTgHFshJD!vU%u^MT^`M6<%azO4Zd32f9v!=s$pMq0p8JC2i`vKxTb(J zZ@Vqv*V)>m!6!EOehq#=gCE%7M>P0R4SsZkPj2w34c^<}{S7|Q;DZf5x4};^x#!Ov^(3mW{w2EVAmFKO^g8~m~czp}xvZt!aw{H6xKxxv5L;CD3mT@8MBgWuQS z_c!R5jwpO)f!pWFi0VCLdVlxC{=vyT{d*0}9h}lL)HgV= zm-jQTWvAl1;PYH(>om0LQ3@ZgqjHa2b3M;bN85F9U|Qew_O)5=G0@%H-#decSWa56 z&FV~r*TeRcGT3Kgb(_@B)dSr_ef_;vo3^b3+X=mW(@z-c?u#+~an)<--d#WFNAE)C zK{@ig_&y`e9GcP}L*frtZ)X+Lyf!1S^$ZLQ#*i3`ptqmH+QFkQithb?zW<17jEkX9 zWx?uubDeQ}UY!Hqrx>5lsl@K8+AL9NW;TDWs`s<%{UocPtsP@2{O7-;l=k*qE{)yZ zT<2Ib2g}qp-(g~NUwoL{+UwiisOF~TF@<%vc5j|pYqt3qn77QH{_dgbR_#9dI9hps z`V@Tj3BJnMenj<-%>JBYv&QzU`o9eSzA9E{>pJk@&7@~^qqQ}f?^ljxta#otB<~|~o{qHb z*sx8I^CYBg_l9jBIZr*>_FKT#**dVn4{Pw_8hqLUysLEryc(+S$Mu}`E#TAH8f@^3 z;d!4hr?=qq-}5?KBiTUJxmNeLHhhZjtQGHSZ3~~)*Wb^Fg{kd(N@r^v+WfiV9jC!} zZ}5o=@UGT=@CBc1J6nfVnqLEES93QN-Z#72N$`%=dGMa;(<^+wwmRQWgUehlUVwMC zu7Go$m_21y&(IXEgZ6c8(f(F&c`dkmf%cuPdm8+Kinq@}_u7N->Uupmhda#4RsWBo z%^GaqTOR|rbM0t71;f=^`c^fTw)14*s5CnK#c&fv8u9?`tEsg-`5?v>{Ka3X79Tec#YJxjeN z#Up%ln^t@W8qbf*`DkYxD^rXao;>VdE&clpv}VlOBHEqpB&HHpT=SR)^YBZ+2(v~kY$F5(-YYd+`)RMEB@zh39(%&Mr zwsxWUjG^C{LUa6T8x@+ls`*lx{+6gUU(}-cOrhVlh32z_n%|M)=Q!1NDKwuO)OM@1 z&Uw!U8Ec*u^zm?Q)jBHNSv@zbSy|d^>2ri@>$foH2$c2gj&p-?sN2y(ZWr+%d-9%cgNm ze%oG-EOBQJ{HZ`Y=N0I~_@gKuBeA%D+L}KJXzwSrQTV!NVqc!#+%KbbS;(fF)WYsPat>r-bu8-Q)E zwj#C-sYg*(qMiF&?i$HAuDN3v3HST4KI+Ealv>Sk*srnN`-$ZT2xF;@WLCyf+qNtj zubSiAhT1mzI)|P&nM3>2ZXCaDCeF@a+o+8O?LwV4+MI`WyY_cQ$B?#rf^9pI;+XcT zvAV_i->=r3+XKPoYMi zhnjo+S971vq#i|Ch_YeLKLplp`=J`!r*`WzC>|TJO75BSDQeA<{w~1g5q>eajCU2> zb0_xe!0wmCyHz_!`0eoa{Iv7AQ-6-w?}hKe5q>{>X^!xR;iEXhe+^%VBm8l=*HgLk z^H+-dG5lGu=VAPx2ftVFcflE-^EC|Oe8zuCc*d)JMR+;hk#O%z&i~qA$LcvD-?ZlL zH~H9_yMNOE>1g&F{uy}kSHB$YUeoUQu7Dda{@20XW3gWkFZ;U{zF2K{k8bVYI5r|v z^BN0RE6YYin22o~bnWgH_ivsxJpXn?mpiVVsQJIG)!gR@HS;p=R0*e9_=)oO&Yb9V zW;WQIooD+Sf|GWodmhMr4pGxSx5l+wkmiLQqILa^tYn(Z&5cJHX$UhccPTE^);ndX)MXKR~%r2i|?{JtFj z@7Dg_L%hG1`3Jjp-e&)CRIm*D(g>CU0tcPlk> zx~9hVnK}Cjn&++d=fU9Z%8Hgn^3s^eitga--Sx#`}2EAxO>WPCgJX(Q3coE?bG;oAL1 z60Y5EB;nfqP7c{Z@?a%Kb;oAK+60Y6vBPI76Ny+_267KcHZzSP9i~4<}X67!?raTH@}Pk-lw zD;s%wZI#FN39y=D@VYABju_s@F95Gct*+hcuv+HnGhm+q%6YmNu5SPKaS7OVw(%L^ zQi{(J`q`&kAF!{@->C#+wkuP;!4k>a8K^4e~lv3v<^AGZ+8`_dI)HJ?-8 zq!0P$DfVUG;wRC4j<)!`lCfO{ZjSA0H1)*025h|h>oxf@*n6J(&2(SH`|Lfo~{ta;BtDjTnbR*by+Oh^Wfn5V_S*LG+)m?|{ zsO7PJ6Rf7qd#OCOZ-HH3ZQfVq+i?!K_P2u9qPC4Tee_j#4Kr@*_gjZ=V{6XCZD{Jr z;X7da&G>EyJ3jr4Dc8rC?jh&e+V-BycYvM8oLhH-)qJ0{y*9OSU%H2yId4aQj{9zG z{vMxg?*hxSuJ?d_4ld7`@4?k?s`t#jU^RW*Gd}=(xM#F|pQ7eC#OeFT;AY-h(;TGsPVU=P<*+aD=vuBSMCKL&30 z{WzL>*7MI`HP-iiD#&Dp8XxHE%&v5fYn^n?5BUi?ce!NyXWBQIX|BV+g9Cto~Bky z{C|NH|3z^6`8QZw;{OM%R>pq`ZhZG<+Wi-E~Ur?epA1cP+K|C>hT} zXtvXK0DE$M;KE>aqh#P|KUj6V#nJ!2aVc5KdZavFiAo}3m1tNCu7oE8Jy zmo{@6NBhOW>gMD-lU(fklINEDaY?Y}k+shaOH(gH@zB0(ZMRN7%Yp49&kf6i)qJ+| z9F;Fcu`l};r|rsM+lH?Kc0XhP73r_=w2*j59pId47#uMYOT&;A{w zT%WWb4KCxZ33r_{?`y$FQS`IDT%WXG2Yf32;p@WfFKe2D{n{i$pBy<9E*jRQMhdA8gctY&{cCyWPsIA7X!p{O|~ar)U6 zT;{bKTrKZ1yMxt|*B)T=Qa7&&)N1K(BG|lcsMm2%u$t>`UVDQ*%uCx|6gBe_r=LmS zGOvB%YJPV%?tWmku-G5Jn;_#XRZBaAy2G>!G{q` z-B_-*TG}20E^|K=uJ$f%^!J;GTKYR2Y=7$7U2C=UcLcax>m%W6S?i<09?q$@qbO>Q zNt}L;0hf6l3s=h?^xKPC^6Cbgm%4em)@teRcyPJaJ#e+G^%Ss&d1;$WQ8O=b`k4wY z^Xi4Gg1u*I8wAUJp7Lic9|CVdZF_C{oItH^|9-!k1y;*_ z)t`OLhO6(-Gl18gA+VaogJxBG?;Uf{+@D_S%*p%9T)6s0^|k9Huzjc}?#bY0+*8oh z6ZcfGnuQ^f{rtG6L5%C3Pu$bt>WTYduzjeXUFUKJ*nG5^i{I?d1e;5q!yNNjaP_o1 z8*E#3=h4_|8P7T3=6KFUQ_pzL1FKm)%6a?<+IEz39zP0K&v?!U+lPAMehl1<`*Ae& z#Qg+V&C;C53($;P&f_QH>WO97^@D;F{#lw7U?|J-HvaK^e)m#tP?N+dBl-$1!R_4^Zy>){5?Nxr#-pc z3$|bPWpepG+%_KOqK{hI`~Ykl@3U$1LwMrYMjthE@_e`t?D>%AyB~qoa?gJN?BV&K z?S9I$6we2-vAwVT7~I^K52C4OZhit*vv@f6(SieO|P^ZS=E$+iFk$FM`Yd{{`QglK%eS}3cQ)yt7ys1_O{V4W7nSkUIW`- z`0HTfW$bT&oBh9umi}#T8~rkN?d4b#S6ndyl(pcLKHRUSe#g$;4)Sh+*s;~H4Lo&5n>s8IN0A4Q8!1g zzazlS>);}2>baII3Rbgtcs?%9i0yMRH0N#tMZfgBIJnvG5@_n_cS*3C#Xd-XUcXD# zn*HjR^;;TjOzpYOE(2C~d|4MY=fQJlS+Hw$X3dv_tHr*2ZO^@L1-L#Q(N?TA^Nh9< znr%Iztz2vRmgqiu4Ws@qD{^HmwjMud-vQLVD~}# z8wIzG@3gM(Xt=-Ipx*`f+ozg->0?cBdoK;EetxqS+&1dolWe1wd#rQ14&3w1_4XdN zE_`iDV#xI|hW_h;y(Ywe3|#;8FV`pj>w~?n#eW01{`pytT%Y)F2=+XW|3+~A^K%}# zKHB{m)y81IL6_g-H$hX6&!%AexUQZ9xjx$G(sncO9O}%A>#;eSdcK)%0aja)Im&!* z33tBp4R9;Ce(H(6HQ3nxzNGUn*DvE83wBTI@BD8A*Wcft)L*VoVr~a6uNT|Hyf|o4(F|a(p|%)xEbnZe#hoG3xn_vNJf}Q96lX|9(f=1znrxihYd- zt9fMH6X5=CP4Pm-)`Uz`ZVsYV0p&42iRP+?M^M%HkNjKf}J026RG9fac?E9 z)?VPZ!PPtu((Yimx@(er4uRXBHe>Hct(JZd1FMA}-rz^T%`I_{gsXYJB<@jg zb>n8g9}QRc_nPB(4BY;;CBI|A=Ag~|4y9I0KF5L8!n?ucx54A#YWmq;o0`|jP08E) zWe+^Hc(l-NUXHGPevO)dU&z-64daJ4edNpSUYZ=DQR_ve%u(=5?dAG77tV|0u}*(q0NY>o!56`5SvPHJ2#`D?YN^tY{Jg!1h&zP?U zC&z2RU#46^(a#*^`iQTkK7%`R?>B zY7h6Cwp%D_<|9u0Z-blhZ$nd0{O^Fv__xE2uYOLQ(;Z;jX|r$F;7*Ebpe^fkSFO7a zw}R!deHW~z?QUv$Y~KUBzS{1gmS<1j3wBT2Mw>qRs=J06xApt2!}qZ@=ivuv>dE1U zVEfJZ?gKkM{fsHs$C&OR=i1u#uG^2m&STDl`@w2C52`tam;2H^)XX{k{TQ3SD`V^j z!1DYK%0u8hcm4#NYo2*L4A-9LnV*8y%HKix8QebeEb|Ch&7UHBx!X5YU@ zQ_p_=16a*{ob~)8-1XF!HUAS>J!6o&K8b6s?J;We)b=>ooUFa3JVE^ziih?mYrA#U z@~>d~$Tj6}V71&YA+=i9uVp_V&F?}7gSkEGZp{{IB4>;HFZ zdCslpz@A(7qfMWusMW0XOTLBw8?2UZ;V*$bTtjXDp{O|~an|s^;N}|s z4^2INzYJD;p8lNI^ke->Yj}0veFbbB=SzQMxR+j|_An29UR9@6cu(z-2DO z;A;LE%J>Y2yB9KFBjBURMLj-?fYlRwQE+10r?J)aOFxT&oAbUnntJAa39y>?+w7Mm z;p&<9rNJJ~ySAk$%TSzmadQvouT7uizbx2!Fn{NJdAPZGy)}-0&iM-H+U(OmzajT` zH=O?!!ADc8XC77pXC91UJ2ib1du4FB2CKl;+-u2aRe17IPd=-mYqMYf{D?gHtPVCG z_2e@WoP3O9J2ici&l=z|pHXl%*C6?<2~R%i$!9HeZT35wTAqB?1|LU0>d9vvaPl#Z z?bP&5KI?+ZeAa`jc`Zsl>%)^zzBRaB8=z~mU;q4!Jo#)0HXrrmvk^G?7{_*M`X--^ z!DT+1z}52mTiVoYo9n9Ap6okod9DrC_U-${rr2!bwQX~-hx=N6Gm4sfSDdkL0nXUV zwb&9}Tl}^v{LY}?tl8G++U{l_o5NVJZPl-@^V$ZiZeH6_dzhE{wiGq<5+~mF;N(@V z$qwk+;E#&F#Bqfc_#3taBA zz2R#5Cck~)$xnOo+ZUYt^v_!Fhpx^1CQ-|i-~M3pQ%`;efQ^y-^htgPg3J64f~(~^ z;rty8Pk!2y-=W~-SMJ%v(6yQ0A=L8ZcR2Vs`c(gSJ%2}ljgkEHNq$Fy%lwXltL3_3 zen-QTpZ4T;EI9dyfoqT)1WxkW)YUOoB z&GxwtSj&AbbuU||zbV+fZ|6B#y&qSK{57H)bCOsPo4I>)>MCw3{~}aKBWCJaIx)|E%0(+`&xn8KPPRhDU{(g z*T1*sj#GUq#r|9qedU>hX<+-;W;?mh#Exq^#j)8&Tds8{fVFwuIFZ`JYn^%@Ma>+= z&Xrg#F=v5|89p2AxN^?U0cX$YYi?@#m2>a)!M`KNV?|2lZlxMK7b{bY@81uU`I-wZ z=j-GKKda`MzmwqhX&$zfCwKq;O=D=YA9?2QRIu}B8*Q1t)4Y{_~vsNw95vmP~FJg4N@55!k*;pHIQnbN+rB zY&-R|`wZB4wsQ{T`o(?;*w`0S68p1Y_4r&0Hg@TA8C*TFKL@s*dfI&+Y;4;ZTdrU1 zUjQ5XLzKk+B3L~>mxGO6`g{qlp4eA_ZKs}gSAvagJ7dfBi~VY_vCE%@UISN;&zHf* zE`6?rt0(qXz_wFQyRU+cZ98Mj^^5&$U}IlL$+)ivtHr&jmW2p1@QrrS||7JhmR&d)q+Tg#d zdB*xJxc%lG?%QCsTPfyj|7!974p=SUsO|u(-A+j#cY)RKq&&h{XHwryQP2DDcWW(k zbr0O!vq$9mB<}aXWuD)MtKCaUj30p26XSb=(a(xoxN8mEX18}wbDT(o8 zuzF%V2v$pspMZ^#{U+BZF&+Y!F@6eHdzg|KKLe{L#v@?0#P~Vb7#C1nBe_0_@e8nP z6#h%FG4t*5S70^gI&1bDu==kl+0(xT+m|-uxR=z@@9)8C;eTlGKf=qne}bzyH;MZg zSlzhUQ-21lKTe6?6JYz-mi+z#HV19y=iXLJK2L(xGVZ^D)&5F}&r@K1(&y7)HGR{k zTKt~@mwo;nuJ&x<^N-pmef|@!rf>RGOP|ky?K5M20j%~sC2Rg7*fqb7Vjpt->|<5x ze^Io1EfMEhvI%$#ipQpuJeO=%;|-`cr+8i2k~-Iwm%zgd{z}0|fL|~8BH%Y0{OyA4 z&!g7@{Vm+!odwt5zoV&)@4o?1a{mp0aQ*!^0K(13e*>W8{u=<{`ulGHgzN9W0Z{VQ z3aNjTH4+L>|24=@_V3LgKeX3%(2wlP>gvoV|Kpe`q`(~ z;caWX*JyFB(Far}CD-WfYP>b|_BGyydIyTv>zyjx{`<;ypg-5w>$lgd-3spY zYL5n=SaALKE4cm#H28r9*Z+uu>wi>(A6;<$Cl_4*sSVy+aQ*uWuKz%T4;Ebixdqq% zB`s=4XxgHEIb3FvEHjI*757meEUW>|kJq%r2ay=ZZmRyenCs)sL zx%qj18%KOJwe6DYF<^7mUq9{1^;mG3>v3?kB`L|ZTc0}Da$b)|*Opv+z-q~L3OKn= z2FuOQYm0HjURP|FT&ID}Re$}oC)erVGS?H}YAaHbYo9)}ch8sedLp{EEH}Rksf{DPh}w3^^;2MT)n7mD$@SCVGS`dYYM-fnyiR@=oS2t@ z<;J;`TK`<{KL>V9zF(&Q&!eg9e;Ktrd3^zFUiN8U+SBff;4-f-!PPFWea!1BaPqnm zEH}>8)cPl{FN4kN3hMNKEm&RuYpCVP>nmXMvQPWco_1damw9~+u67+IYx{M5Xn#F* zxesrEYs=c+2v)Oi$MX$v#&Z)`ZhkjY8%O+2YTG5(TfpY3zkb@2>$kvVuHS~M-AYNW zx9L;oTJFQ|pleI6w}aJ^>z&}_dIwl;es@tDM|?N6?UL(v!RD&Je%i~Qe|Uf0o#L?z zCC{+qYrHe{gc|$&v1^Tep52Y&^Xwkfd49c*{+-u+kGQ|VA1JubH$N@7&&!V#+!#Nv zdG^&kaPujjZ@!17p68o;!D?RXvo7C)!}^B;qqbN%(xp8OvKH}n4qntGnM9s;YaNb&Eh+KRp&2CHW;{0uz4 z&dWKJn~U?OuYO~(KMHo8v^kgZ>k*c z+BMhyOZsu{9P_5&Us3eSnr#IBb*;zeH*nkOW3F<2GH<^H8`tNetm*I2)bsuA_h2>G zFTcn12e`V|gxBczPhby!R;KNb6g9^n&N=!RI5`gk8*@Y2Jr33uzdsj##)#h&=-TdK z4i-iK3)r^m{wz2#t@C>oPlCcIzHaPQJ?&EjRwZ-qI+lU%uAej zo#5nE?$s`IZSfmc__--ty70_+x;Wb!4wX|CioOZ57+O341c(&6|EpxFl zxH%WAps8mp9{{Ua_@7?pw_g<@aWfaIp{b|+>R>es|5xoZ7b6jzi!l`A=$kRE0Z!kZ z9~sjqblZ72CjHdXZZtUU+`nnJCVJx8PCqsGS@wywJl8JkTuau%=3JH6lC{y)b1hj1 ztY*Gxw=TSS4O$OPJb8-dl_6M4Sc7_M%= z>1z{sV!4h!b8U*Io*Xv=tJyy7HV3DjIm-3351&!CsO^5A73ce`w*4s{`%yec51`g> zo5F8nw1X)g2U4~nhR?bOQEYPvwa+$3P;XB=pKZc-t$D_?Gu(FWlOgI|;NvLX-?YVV zJXo90`Ws@K09JE+#*oK%H?V!?`EYl*n)97Fd%zP%d)iI}YfGFx!D_}ace%diuou|a z;d_Ji$+z=;!1|~=hw{v|b9flVqnyJ-;m+aV)Ojb^k9Or89@OAR)I4L@A8tPx>jChL zRa^WH1Xni3W(;|32ZPnJKMw(``TXzP%%naPuKpvQ+2n_T)$|#x=jd>_y7On=^29h2 zY>f1G6j&|&9Sv41`#T1%ZhwwjZhx-Fv0!6_9|zVa>(LF?N8R<1XFWW(j;44VNpVlR ze~+TL9>-9p-^sL7&)>~41=<>O7YsNHVv$=*Ve7U`ph7I<4*@` zH#c#a+i}=E%G{2Hn_D+^a{D0d)RS93+<2a!+LGH0u(mR{f!f!5;E7;;lG`9yySa(Y G?f(F7WQBeJ diff --git a/piet-gpu/shader/gen/pathtag_reduce.dxil b/piet-gpu/shader/gen/pathtag_reduce.dxil index 245c4922dfd27d0d2d5f30b89554bd048b453099..89fb5620c77cb05d2bbc41744f4bf2786eb88c97 100644 GIT binary patch delta 71 zcmV-N0J#68B%~x1L|8&YxC$tRS%oqqm5RVj5}RQSu@pK40Zo%n1Q!AZXR~Mo+yf9! dAi)k-VvaO?w{*`oeWC2LR0yjE0Z_6JBm+!&7vKN@ delta 71 zcmV-N0J#68B%~x1L|8&Y>!T=7;+1@pu<4=R3gK%Bu@pK40S%K+1Q!D4X0vDn+yf8{ dJ?Jt=YDMnD7fRxPEL4EAR0yjE0RXZOBm)J{8m9mN diff --git a/piet-gpu/shader/gen/pathtag_reduce.hlsl b/piet-gpu/shader/gen/pathtag_reduce.hlsl index 1332429..754e6e9 100644 --- a/piet-gpu/shader/gen/pathtag_reduce.hlsl +++ b/piet-gpu/shader/gen/pathtag_reduce.hlsl @@ -26,8 +26,13 @@ struct Config Alloc trans_alloc; Alloc bbox_alloc; Alloc drawmonoid_alloc; + Alloc clip_alloc; + Alloc clip_bic_alloc; + Alloc clip_stack_alloc; + Alloc clip_bbox_alloc; uint n_trans; uint n_path; + uint n_clip; uint trans_offset; uint linewidth_offset; uint pathtag_offset; @@ -37,9 +42,9 @@ struct Config static const uint3 gl_WorkGroupSize = uint3(128u, 1u, 1u); ByteAddressBuffer _139 : register(t1, space0); -ByteAddressBuffer _150 : register(t2, space0); -RWByteAddressBuffer _237 : register(u3, space0); -RWByteAddressBuffer _257 : register(u0, space0); +ByteAddressBuffer _151 : register(t2, space0); +RWByteAddressBuffer _238 : register(u3, space0); +RWByteAddressBuffer _258 : register(u0, space0); static uint3 gl_WorkGroupID; static uint3 gl_LocalInvocationID; @@ -83,13 +88,13 @@ TagMonoid combine_tag_monoid(TagMonoid a, TagMonoid b) void comp_main() { uint ix = gl_GlobalInvocationID.x * 2u; - uint scene_ix = (_139.Load(64) >> uint(2)) + ix; - uint tag_word = _150.Load(scene_ix * 4 + 0); + uint scene_ix = (_139.Load(84) >> uint(2)) + ix; + uint tag_word = _151.Load(scene_ix * 4 + 0); uint param = tag_word; TagMonoid agg = reduce_tag(param); for (uint i = 1u; i < 2u; i++) { - tag_word = _150.Load((scene_ix + i) * 4 + 0); + tag_word = _151.Load((scene_ix + i) * 4 + 0); uint param_1 = tag_word; TagMonoid param_2 = agg; TagMonoid param_3 = reduce_tag(param_1); @@ -111,11 +116,11 @@ void comp_main() } if (gl_LocalInvocationID.x == 0u) { - _237.Store(gl_WorkGroupID.x * 20 + 0, agg.trans_ix); - _237.Store(gl_WorkGroupID.x * 20 + 4, agg.linewidth_ix); - _237.Store(gl_WorkGroupID.x * 20 + 8, agg.pathseg_ix); - _237.Store(gl_WorkGroupID.x * 20 + 12, agg.path_ix); - _237.Store(gl_WorkGroupID.x * 20 + 16, agg.pathseg_offset); + _238.Store(gl_WorkGroupID.x * 20 + 0, agg.trans_ix); + _238.Store(gl_WorkGroupID.x * 20 + 4, agg.linewidth_ix); + _238.Store(gl_WorkGroupID.x * 20 + 8, agg.pathseg_ix); + _238.Store(gl_WorkGroupID.x * 20 + 12, agg.path_ix); + _238.Store(gl_WorkGroupID.x * 20 + 16, agg.pathseg_offset); } } diff --git a/piet-gpu/shader/gen/pathtag_reduce.msl b/piet-gpu/shader/gen/pathtag_reduce.msl index 6c0a64f..83a8208 100644 --- a/piet-gpu/shader/gen/pathtag_reduce.msl +++ b/piet-gpu/shader/gen/pathtag_reduce.msl @@ -33,8 +33,13 @@ struct Config Alloc trans_alloc; Alloc bbox_alloc; Alloc drawmonoid_alloc; + Alloc clip_alloc; + Alloc clip_bic_alloc; + Alloc clip_stack_alloc; + Alloc clip_bbox_alloc; uint n_trans; uint n_path; + uint n_clip; uint trans_offset; uint linewidth_offset; uint pathtag_offset; @@ -103,17 +108,17 @@ TagMonoid combine_tag_monoid(thread const TagMonoid& a, thread const TagMonoid& return c; } -kernel void main0(const device ConfigBuf& _139 [[buffer(1)]], const device SceneBuf& _150 [[buffer(2)]], device OutBuf& _237 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) +kernel void main0(const device ConfigBuf& _139 [[buffer(1)]], const device SceneBuf& _151 [[buffer(2)]], device OutBuf& _238 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) { threadgroup TagMonoid sh_scratch[128]; uint ix = gl_GlobalInvocationID.x * 2u; uint scene_ix = (_139.conf.pathtag_offset >> uint(2)) + ix; - uint tag_word = _150.scene[scene_ix]; + uint tag_word = _151.scene[scene_ix]; uint param = tag_word; TagMonoid agg = reduce_tag(param); for (uint i = 1u; i < 2u; i++) { - tag_word = _150.scene[scene_ix + i]; + tag_word = _151.scene[scene_ix + i]; uint param_1 = tag_word; TagMonoid param_2 = agg; TagMonoid param_3 = reduce_tag(param_1); @@ -135,11 +140,11 @@ kernel void main0(const device ConfigBuf& _139 [[buffer(1)]], const device Scene } if (gl_LocalInvocationID.x == 0u) { - _237.outbuf[gl_WorkGroupID.x].trans_ix = agg.trans_ix; - _237.outbuf[gl_WorkGroupID.x].linewidth_ix = agg.linewidth_ix; - _237.outbuf[gl_WorkGroupID.x].pathseg_ix = agg.pathseg_ix; - _237.outbuf[gl_WorkGroupID.x].path_ix = agg.path_ix; - _237.outbuf[gl_WorkGroupID.x].pathseg_offset = agg.pathseg_offset; + _238.outbuf[gl_WorkGroupID.x].trans_ix = agg.trans_ix; + _238.outbuf[gl_WorkGroupID.x].linewidth_ix = agg.linewidth_ix; + _238.outbuf[gl_WorkGroupID.x].pathseg_ix = agg.pathseg_ix; + _238.outbuf[gl_WorkGroupID.x].path_ix = agg.path_ix; + _238.outbuf[gl_WorkGroupID.x].pathseg_offset = agg.pathseg_offset; } } diff --git a/piet-gpu/shader/gen/pathtag_reduce.spv b/piet-gpu/shader/gen/pathtag_reduce.spv index 9fc105fcca998a019fe17a6483ef5775a4eab006..feaad0a34bd76490bf4c8661c30838dff23be8b2 100644 GIT binary patch literal 8088 zcmb7{33Oa#6~|wjNgB%5Qg%vBDQYbgsj{9@pe%-Jupl6a$}pKslaa|xm`qwY2PJ3) z6mj20sVj<#;I60$qPXF{A}a11f{L;!=UQS@AfVCeVz8qT{bUC z<|Olz{-iV~NyoxuE=-bapY)~k;LvG92TgWH4m#w}gLT*`=}GXSI8+{7zkYD&#Bz0FqCDBTuu&^F>djhtq~5Mo zJN1n<{rU@j^=7p(HBxlw#iO{6URK8DlG7`rC%2ld`Utp>KXQ&vyV9I2*Dnyu%DXn} zZmN%T#%63;jO(4KbjBuYqp77Y&+lo-OGX~o5x%Vr8zyU=1n)hQ9pJ02@nN^tsf?D# z(}gXc?6jwyGY_Y@FFpTuZDgvNmR&xzYPJ5o z+z;o#IySZ19#5KM5py-gy$No~M62HHl&h_&W+$~DQ?Q>6ZkAJzNnmQXeyrgluqWKh z-PsqTjq+flHC$<|Yi?{+E1i0)x$cBCt~a@qd9qsLWU@Q)J}-k?+i0|0kLRuKm_5@{ zayk0(t>%XMXm-Eqj%K;msEyZ}9aiSO%X`19H@ii!_a*P$=6BvKcj}E=8k5<)-?3VK zbgWaZhcWVb&$^c@*&>7Yo8Xdzo~hplKhdc+!VvZCz37)b)l5u3wo0?vnvKc!vK@TZ z!&yYswg-lXTNljw_osWbA4b}hOjBD)`RWZW((=L%-?+N0UtqZ71cYuXjt>^EGr zXZN`qcF)?r4{Pjo_uS?;9yY&|m%wK|cHGnkuxpcZROvluzrQO43}aryy!E%$o4gM0 zGWIm3C!3_q&?8p5jt+-Bz|zazmH?k*)OJ z-ze!Gj2lkkr>a2dKo&hN?5&6!5dGYCIx$@#7X*OPO;0l^ug zpWkV4zH4%RW5Kbrl-skw`JU+)Yj%CslN&%HcDtP0zrfAUx#b1U{Tq8=fpe{L2NyWs zD!D@noNtue5e3e7N$#iu=RW0*EpWa^awilx-y^w`3Y_ne+)#n@jO0!!aK1fqrx!Ti z9J#Ynt~B$m#CgU$qmN^d{*dcQaVdRs_S3+yUwng>BHq3FYni>HtOrWz8-&WQk?bDg z{VC>`i~PPt#r*2#w~x&4Z$0m4AH;k0n{!0I;9QSfwvNot9CG$X_*qvjdNICXIq%4t ze`0n|p0ROvGduKgct&#O(%~ie-F57REFzP+mJ)5ecsG1p*8t*JlK*a%>y`n0Jy*X)Whra+ zdlSnm`uKgw8NU?WZ$n+)dYyfsVmHzn%J zn|}_0<}!i#Qa$GmWVQC4?}lfxC6~ji!-L-F!C}bnX7W47<9&FN^w}xxSWNT>^d~axpTE ztYJ@{mwg$^8s+2jrNHBAXrCTZ?%A>R&+2Wvuuv_|43UZ*J`EtKjaveGN`N>U|w-y<6E^AM@?V z7DPV!<{Mz+w8gr<36}S5e1Ls_o7v$SwS5bbbB*Fy(|5t$Yx*9Xe5~pF;NqHofW8Hh zk2U=eY@D{Zmmh(>7j1EOKL*QtH@{oRFFQ(Yfx+E<>nbL9IK*mI0C z`!!f@F=D(nxni&0iXM0M8*uj;e+wsnE&Xy0^Y0M3X~aJG1GB?E(Dr*o&b5gn_g}#G zBGX8$_pf02*#FI`@nxk?m>1#^gT82*?)lbb(sH3=KB%-^>wf6`bEqGU}NSW z5%VC}m^dGG{UYWeurYIyhiR{@Kf%WIAQAH~urVtTW7PGFm`A|Il#qyd z6l}~Y#29t`yt_DG=SSGl5s1H2=AqlCj_r`ciR)*cU%=agy_+4G{k@`n5n`;hcVgZh zF~+luyV@h?!)H%)a}|B|LYI&J+Z$}0e8lYowqD$mx_)8b7i?|sBx)}K%ZJZnz}7DM z44}(L?WJJj29Su`4{UAYJX3Z3!oEM)+TLx{ek@o%d>#k3cG2hY=<-o}IoLS)hn+p*8 zcNgaPy}cT2uIT4u3c4|8cj=W}kG&p+Zrq$@yPA$bywyS5&4VI`uw2wQ4Q!3*H+6lY#_8Zk~De4fc*ge-7B1@&AtJg5^BxxU(`?{v0HF`dqNNwAl;x z(z!_F9R|yVUhUFH(2I3z=yIM-)ZGAOl%iHPsTL?-*~((57Apdxs0@?IG#Q!9gvq3Z??VY% zfm+;fN5vf#MG<#YjEI5@D(;92?jnMU%BG;d-+S*Quk)ea_I&q${%5=Q-1}~)J#&}M zOOiRsc1eHIGbc&M!elN?l5C&!rSssrQ`Q|c(HTBy_uv5~L`suTPxL)QGGFGWK zHC%vPhunkgOb%@ektxI&rH4OrnQuTTQGIY-d2s#u!F4B;tK;M4iOzYATDehg*2=^6 zcCFf}Z>;IpU+}9ptBuLwqC+np#dY+uGB%f-S{XU1)oj&=!F~LZb9CC3=0v%Eu2@#y zwOMyleYi6^W6NS(?|7v%I#C-*Eq!@@yCE+bd0a>Mwl-{-sC5#&cTaYJueQd9+*+qH zQXWeewtS+~o~(8boLT9ClPijUSFHZO4&?2nr`s`Ca=bnBP>TD~{-HDz1b;OTa(RBYCpPQKLgw>rydi))NcJ)!}(x4 z+{@W{FGd>W!A5JS(pcNv*s4}K^;UE3@o8LdauM@HwMJ*MGx0nxhFjBUv|NwOh2|tv)P)B$@VfI zKD)zNMAY^U3=Or;o%Qce_vn2XZdW#CcP^}1U+sZ+ll}QsP`fvc*)io@*=(;LFpRm3 zdE0y4o4g+G6z^A-Ce0(~Nbeo9dES^Z8NM7`85w~uC2s|fSK5`a-0uqJdWuWQ)!W!g z$u(X2wcF^u$!F0gM#~e`cBNCLw|*bKnS3AX_%6CXk91wr*rVR9;8tg})@F8Xw=vJI zt(4rpjozEwi9YkXfW0enj_!9*-tT1g9OI45Vjk@sG^YCHay=<7 zrSHK$8W^7CR_3LM=c4{PX8V_&PbqyLP-l7>Z$mM^T;%tiDCSo;zh`WIzh`^57h=!- z9vqP`IM*YWtt0a@hg{qfW34L}_e9PZIs0eLKQgEqK|i1yYWlWy|?P}*0aY02un-$ z&-Gowj?n!Ei+1<#dc(dS*gb^40_>TpTlYw$hY|W3bk`92ao`do^tI^6FhXCCzB42A z)6fSPp`VF9hY|V+y8Bdje+|Tbgnka#yB7W%!Jgx8_`jIhHM<}C_Y!9JF26e0FUYxw zpF(%&e<8E}<_P;{u>D#>){Ar75>|Fej`a=E{=X#rx@U85A=aF(;}y)-lylwcTXRnP zr8zdw-o(Z`VjkZIb^YFu`^lL{-8Ul6-}rYR?s*s3E16^8p8`)Ii;4R*^E^b(yq{tA zj>wx=-QN>((P!Tt^Xh+nZZk*ZzZu+*gWt)W;O?Hp-h5BAyEoqrb#wlS*?q`aQ{A^i zF4o}N5o@?Nx0%bH#XGVje=nlGy!j0@mv_s2>T`1L09Nar@=dTO_E0<*lZ3hxhOS z82{Zghu`h_hN{-y7X%cy7{IHy8iyIi~Q>T zt}E*Pu2Wybt}bMLDRKcag&fHq?c>W3`OWyKzdYwQGap4)?19`Gtls5UBHnM`F2C={ z^D3~m8}dA_2Foo&!sj($^G)SGuLbKTZ!P;P7yfSo%SG)sgZ(C~tu6fC0@h{@&qO_J zZv)F|^E}iKr@no8JF*aw*Y5eri9IXt$%EX9cQSi7obRJ=?`D1v;?VwHX6?>V_kCb< zTtlrZnct7d+1H22q5dw!T*itc_5)z|9r_2s=8C>u1=dGCdh;Q0^hSHcei*DRdZQk; zkAR~$-bMB3%}2rZMqYpW^)bXAnajJXF7}@4e>itUzmFq3Ao^(2$2fWWW$)axbFAeP z;Nl)Xi7vMoFev-m|%tSv_n&0ei06Zevyt z+fTvzYrCCUJ?`nxz}{11wCQ6WdCxWW?fl=)=I7Y1L#B|}_b&PKUve+_Z^+%qu86)T=Xdt+V0|6t zzm)kOi2nMzS9Sd&=0317bC8I+A8btYM_s>&c>rw8TqI&11RJvqv5vZa5%Un(m>wiz z{s}f_Kg1Yy{UYXHU}H*1#5@c(W(8u5x_+Kr^w;@ecC-rdcgj3;@2O)xau{*_%-a?4 z_F&IuM`nMoXkUaFYwewwABz}cFXOCs%lYux9o<|-pFPm!iSv7JGTna z{s?zcT)fK*(H#pA@3MDuXT)XF+VNmqF00Hu08IEx;|0& zL~wDPPe+$qi$slQfaRmcISI0V77lB=aHrMCfmW%bA4VH_2kAdYHNcc3t`b5qaSWe%_DHs0Z;9|};y4*Ph zpNZTja(2+=^o^Wyk#iDk&e-cFu-rx@&iq`kXTAb4hq`{|@cTax(e7I!_AU7jb0}iU diff --git a/piet-gpu/shader/gen/tile_alloc.hlsl b/piet-gpu/shader/gen/tile_alloc.hlsl index 5231c1d..97e1c23 100644 --- a/piet-gpu/shader/gen/tile_alloc.hlsl +++ b/piet-gpu/shader/gen/tile_alloc.hlsl @@ -60,8 +60,13 @@ struct Config Alloc trans_alloc; Alloc bbox_alloc; Alloc drawmonoid_alloc; + Alloc clip_alloc; + Alloc clip_bic_alloc; + Alloc clip_stack_alloc; + Alloc clip_bbox_alloc; uint n_trans; uint n_path; + uint n_clip; uint trans_offset; uint linewidth_offset; uint pathtag_offset; diff --git a/piet-gpu/shader/gen/tile_alloc.msl b/piet-gpu/shader/gen/tile_alloc.msl index 49bd1c4..bb10cf0 100644 --- a/piet-gpu/shader/gen/tile_alloc.msl +++ b/piet-gpu/shader/gen/tile_alloc.msl @@ -81,8 +81,13 @@ struct Config Alloc_1 trans_alloc; Alloc_1 bbox_alloc; Alloc_1 drawmonoid_alloc; + Alloc_1 clip_alloc; + Alloc_1 clip_bic_alloc; + Alloc_1 clip_stack_alloc; + Alloc_1 clip_bbox_alloc; uint n_trans; uint n_path; + uint n_clip; uint trans_offset; uint linewidth_offset; uint pathtag_offset; diff --git a/piet-gpu/shader/gen/tile_alloc.spv b/piet-gpu/shader/gen/tile_alloc.spv index 55d62adefde50e2cd91a9a6a32d822c8d0f4267e..12277f10cd2a226e5c9a75069988e44e2b566c94 100644 GIT binary patch delta 353 zcmdl{wxwc28CN|UgB~LT0}lfOLvl`LL40CPPJS{20~-T7SezFoo|Ku4kmCT$@xkPZ zOA?c_VX_Fl{4n{Xr2Gn~G#djeSg!z3E-${MC@~nWQj)@qzqK(0mcw9 zkU=0ZA0#mms8|4!m?%^%1W8N`Di*Q1l3$U9QF8KZVbjS6gf%8V5N6r@LO4Jj0P5I1 A761SM delta 120 zcmdl|v7>B58J9jQgB~LT0}lfOLtcDIQDR;(0~-TGj2A3ckXVwz02E^ftK*xj$t}&u zKiQRAURD4o50WiOOpnh`ODj$-L8uel+{oR?vN=sahJ{gNaZHt(M}{u@o!_5D8?3D!@T2w0Bo9KvssS V^0QC~Jq7}p0h5jn8MCMkcnAGI7f1jA delta 62 zcmV-E0KxyDETAkDL|8&Yf-#SD#?TI5OB#H=7P+D7u@o!_5E3=EMd>Gcs4iAauiHjp UbF)wgJq7`AlaCG=v!@Pu2lJK}U;qFB diff --git a/piet-gpu/shader/gen/transform_leaf.hlsl b/piet-gpu/shader/gen/transform_leaf.hlsl index 38136c9..3528d6c 100644 --- a/piet-gpu/shader/gen/transform_leaf.hlsl +++ b/piet-gpu/shader/gen/transform_leaf.hlsl @@ -39,8 +39,13 @@ struct Config Alloc trans_alloc; Alloc bbox_alloc; Alloc drawmonoid_alloc; + Alloc clip_alloc; + Alloc clip_bic_alloc; + Alloc clip_stack_alloc; + Alloc clip_bbox_alloc; uint n_trans; uint n_path; + uint n_clip; uint trans_offset; uint linewidth_offset; uint pathtag_offset; @@ -150,7 +155,7 @@ void TransformSeg_write(Alloc a, TransformSegRef ref, TransformSeg s) void comp_main() { uint ix = gl_GlobalInvocationID.x * 8u; - TransformRef _285 = { _278.Load(56) + (ix * 24u) }; + TransformRef _285 = { _278.Load(76) + (ix * 24u) }; TransformRef ref = _285; TransformRef param = ref; Transform agg = Transform_read(param); diff --git a/piet-gpu/shader/gen/transform_leaf.msl b/piet-gpu/shader/gen/transform_leaf.msl index 6a55784..6a99fae 100644 --- a/piet-gpu/shader/gen/transform_leaf.msl +++ b/piet-gpu/shader/gen/transform_leaf.msl @@ -102,8 +102,13 @@ struct Config Alloc_1 trans_alloc; Alloc_1 bbox_alloc; Alloc_1 drawmonoid_alloc; + Alloc_1 clip_alloc; + Alloc_1 clip_bic_alloc; + Alloc_1 clip_stack_alloc; + Alloc_1 clip_bbox_alloc; uint n_trans; uint n_path; + uint n_clip; uint trans_offset; uint linewidth_offset; uint pathtag_offset; diff --git a/piet-gpu/shader/gen/transform_leaf.spv b/piet-gpu/shader/gen/transform_leaf.spv index f418bbef9311d0422c800bbcce4d1f3d78e2a24b..b9a0a8311f65b5e268e9cd9fa71b41989e4693a0 100644 GIT binary patch delta 337 zcmcbUcq4g(5Jx>5gD@il0}lfOLvl`LL40CPPJS{20~-T7SezFoo|Ku4kmCT$@xkPZ zOA?c_VX_Fl{4n{Xr2Gn~G#djeSg!z3E-${MDA6RZ7-$(pP!KFwkXVudG6W(f1Qr81 z4PmhGWJeBZMv=*}9P*5!lWT!&vCT6%Ub4g^v?v3$d4MrQOc=-piTNOji9p2ykiE!5T604TmY!U11Rp~eBhoU4+BI0W?7EQth|yyDPa~* m9gsB7DQvn9PO-TMHy6 diff --git a/piet-gpu/shader/gen/transform_reduce.hlsl b/piet-gpu/shader/gen/transform_reduce.hlsl index af52cdb..cce1a22 100644 --- a/piet-gpu/shader/gen/transform_reduce.hlsl +++ b/piet-gpu/shader/gen/transform_reduce.hlsl @@ -28,8 +28,13 @@ struct Config Alloc trans_alloc; Alloc bbox_alloc; Alloc drawmonoid_alloc; + Alloc clip_alloc; + Alloc clip_bic_alloc; + Alloc clip_stack_alloc; + Alloc clip_bbox_alloc; uint n_trans; uint n_path; + uint n_clip; uint trans_offset; uint linewidth_offset; uint pathtag_offset; @@ -87,7 +92,7 @@ Transform combine_monoid(Transform a, Transform b) void comp_main() { uint ix = gl_GlobalInvocationID.x * 8u; - TransformRef _168 = { _161.Load(56) + (ix * 24u) }; + TransformRef _168 = { _161.Load(76) + (ix * 24u) }; TransformRef ref = _168; TransformRef param = ref; Transform agg = Transform_read(param); diff --git a/piet-gpu/shader/gen/transform_reduce.msl b/piet-gpu/shader/gen/transform_reduce.msl index c387f03..3695563 100644 --- a/piet-gpu/shader/gen/transform_reduce.msl +++ b/piet-gpu/shader/gen/transform_reduce.msl @@ -40,8 +40,13 @@ struct Config Alloc trans_alloc; Alloc bbox_alloc; Alloc drawmonoid_alloc; + Alloc clip_alloc; + Alloc clip_bic_alloc; + Alloc clip_stack_alloc; + Alloc clip_bbox_alloc; uint n_trans; uint n_path; + uint n_clip; uint trans_offset; uint linewidth_offset; uint pathtag_offset; diff --git a/piet-gpu/shader/gen/transform_reduce.spv b/piet-gpu/shader/gen/transform_reduce.spv index af5ffb9175997a0b9a18f45cf6ddb417f3e1cccc..e74cb8d5a5cc7b2bdec5bdf1b18a5233d1d2830e 100644 GIT binary patch delta 340 zcmdmDyTN`#A4@$O!#oBC1|A?z&dDr@Pt3{5PiA0XV_*l1^TNcFGLsQ<9AG& z3>KcemqnUUWb#cGc}CI6KY?tq&7!P~Eb#~}%0O)%Uk t2$Gl>R4ih%Am>?TM#;(dc}yqq3QQK^m6-g2hh?%3uLYy<=2TvPK>+=VJ^26t delta 110 zcmdmBzr}V#AB#RK!#oBC1|A^Ji!Uik%qwPKV}OY9g2f6FOEMUMV(egbe3Knnr5X7r z$Fj;Z3QVpAvIRHKWMyR8{EzblGqVW8yvc%m!jtdtDop0#lbEc*XTiv~Ig!s_5CA)s B7?J=0 diff --git a/piet-gpu/shader/kernel4.comp b/piet-gpu/shader/kernel4.comp index 9aba204..dd4a855 100644 --- a/piet-gpu/shader/kernel4.comp +++ b/piet-gpu/shader/kernel4.comp @@ -91,7 +91,6 @@ void main() { vec2 xy = vec2(xy_uint); mediump vec4 rgba[CHUNK]; uint blend_stack[MAX_BLEND_STACK][CHUNK]; - mediump float blend_alpha_stack[MAX_BLEND_STACK][CHUNK]; for (uint i = 0; i < CHUNK; i++) { rgba[i] = vec4(0.0); } @@ -211,7 +210,6 @@ void main() { // The following is a sanity check so we don't corrupt memory should there be malformed inputs. uint d = min(clip_depth, MAX_BLEND_STACK - 1); blend_stack[d][k] = packsRGB(vec4(rgba[k])); - blend_alpha_stack[d][k] = clamp(abs(area[k]), 0.0, 1.0); rgba[k] = vec4(0.0); } clip_depth++; @@ -222,7 +220,7 @@ void main() { for (uint k = 0; k < CHUNK; k++) { uint d = min(clip_depth, MAX_BLEND_STACK - 1); mediump vec4 bg = unpacksRGB(blend_stack[d][k]); - mediump vec4 fg = rgba[k] * area[k] * blend_alpha_stack[d][k]; + mediump vec4 fg = rgba[k] * area[k]; rgba[k] = bg * (1.0 - fg.a) + fg; } cmd_ref.offset += 4; diff --git a/piet-gpu/shader/setup.h b/piet-gpu/shader/setup.h index 4211b08..0dccecb 100644 --- a/piet-gpu/shader/setup.h +++ b/piet-gpu/shader/setup.h @@ -46,11 +46,23 @@ struct Config { // Monoid for draw objects Alloc drawmonoid_alloc; + // BeginClip(path_ix) / EndClip + Alloc clip_alloc; + // Intermediate bicyclic semigroup + Alloc clip_bic_alloc; + // Intermediate stack + Alloc clip_stack_alloc; + // Clip processing results (path_ix + bbox) + Alloc clip_bbox_alloc; + // Number of transforms in scene // This is probably not needed. uint n_trans; - // This only counts actual paths, not EndClip. + // This *should* count only actual paths, but in the current + // implementation is redundant with n_elements. uint n_path; + // Total number of BeginClip and EndClip draw objects. + uint n_clip; // Offset (in bytes) of transform stream in scene buffer uint trans_offset; // Offset (in bytes) of linewidth stream in scene diff --git a/piet-gpu/src/encoder.rs b/piet-gpu/src/encoder.rs index 87fff1c..767f4ba 100644 --- a/piet-gpu/src/encoder.rs +++ b/piet-gpu/src/encoder.rs @@ -20,7 +20,8 @@ use bytemuck::{Pod, Zeroable}; use piet_gpu_hal::BufWrite; use crate::stages::{ - self, Config, PathEncoder, Transform, DRAW_PART_SIZE, PATHSEG_PART_SIZE, TRANSFORM_PART_SIZE, + self, Config, PathEncoder, Transform, CLIP_PART_SIZE, DRAW_PART_SIZE, PATHSEG_PART_SIZE, + TRANSFORM_PART_SIZE, }; pub struct Encoder { @@ -31,6 +32,7 @@ pub struct Encoder { drawobj_stream: Vec, n_path: u32, n_pathseg: u32, + n_clip: u32, } /// A scene fragment encoding a glyph. @@ -98,6 +100,7 @@ impl Encoder { drawobj_stream: Vec::new(), n_path: 0, n_pathseg: 0, + n_clip: 0, } } @@ -155,6 +158,7 @@ impl Encoder { ..Default::default() }; self.drawobj_stream.extend(bytemuck::bytes_of(&element)); + self.n_clip += 1; saved } @@ -170,6 +174,7 @@ impl Encoder { // This is a dummy path, and will go away with the new clip impl. self.tag_stream.push(0x10); self.n_path += 1; + self.n_clip += 1; } /// Return a config for the element processing pipeline. @@ -203,6 +208,20 @@ impl Encoder { alloc += n_drawobj_padded * DRAWMONOID_SIZE; let anno_alloc = alloc; alloc += n_drawobj * ANNOTATED_SIZE; + let clip_alloc = alloc; + let n_clip = self.n_clip as usize; + const CLIP_SIZE: usize = 4; + alloc += n_clip * CLIP_SIZE; + let clip_bic_alloc = alloc; + const CLIP_BIC_SIZE: usize = 8; + // This can round down, as we only reduce the prefix + alloc += (n_clip / CLIP_PART_SIZE as usize) * CLIP_BIC_SIZE; + let clip_stack_alloc = alloc; + const CLIP_EL_SIZE: usize = 20; + alloc += n_clip * CLIP_EL_SIZE; + let clip_bbox_alloc = alloc; + const CLIP_BBOX_SIZE: usize = 16; + alloc += align_up(n_clip as usize, CLIP_PART_SIZE as usize) * CLIP_BBOX_SIZE; let config = Config { n_elements: n_drawobj as u32, @@ -212,8 +231,13 @@ impl Encoder { trans_alloc: trans_alloc as u32, bbox_alloc: bbox_alloc as u32, drawmonoid_alloc: drawmonoid_alloc as u32, + clip_alloc: clip_alloc as u32, + clip_bic_alloc: clip_bic_alloc as u32, + clip_stack_alloc: clip_stack_alloc as u32, + clip_bbox_alloc: clip_bbox_alloc as u32, n_trans: n_trans as u32, n_path: self.n_path, + n_clip: self.n_clip, trans_offset: trans_offset as u32, linewidth_offset: linewidth_offset as u32, pathtag_offset: pathtag_offset as u32, @@ -261,6 +285,10 @@ impl Encoder { self.tag_stream.len() } + pub(crate) fn n_clip(&self) -> u32 { + self.n_clip + } + pub(crate) fn encode_glyph(&mut self, glyph: &GlyphEncoder) { self.tag_stream.extend(&glyph.tag_stream); self.pathseg_stream.extend(&glyph.pathseg_stream); diff --git a/piet-gpu/src/lib.rs b/piet-gpu/src/lib.rs index 97e1f28..b8b7532 100644 --- a/piet-gpu/src/lib.rs +++ b/piet-gpu/src/lib.rs @@ -20,9 +20,9 @@ use piet_gpu_hal::{ }; use pico_svg::PicoSvg; -use stages::{ElementBinding, ElementCode}; +use stages::{ClipBinding, ElementBinding, ElementCode}; -use crate::stages::{Config, ElementStage}; +use crate::stages::{ClipCode, Config, ElementStage}; const TILE_W: usize = 16; const TILE_H: usize = 16; @@ -86,6 +86,9 @@ pub struct Renderer { element_stage: ElementStage, element_bindings: Vec, + clip_code: ClipCode, + clip_binding: ClipBinding, + tile_pipeline: Pipeline, tile_ds: DescriptorSet, @@ -110,6 +113,7 @@ pub struct Renderer { n_paths: usize, n_pathseg: usize, n_pathtag: usize, + n_clip: u32, // Keep a reference to the image so that it is not destroyed. _bg_image: Image, @@ -191,18 +195,20 @@ impl Renderer { let element_stage = ElementStage::new(session, &element_code); let element_bindings = scene_bufs .iter() - .zip(&config_bufs) - .map(|(scene_buf, config_buf)| { + .map(|scene_buf| { element_stage.bind( session, &element_code, - config_buf, + &config_buf, scene_buf, &memory_buf_dev, ) }) .collect(); + let clip_code = ClipCode::new(session); + let clip_binding = ClipBinding::new(session, &clip_code, &config_buf, &memory_buf_dev); + let tile_alloc_code = include_shader!(session, "../shader/gen/tile_alloc"); let tile_pipeline = session .create_compute_pipeline(tile_alloc_code, &[BindType::Buffer, BindType::BufReadOnly])?; @@ -286,6 +292,8 @@ impl Renderer { element_code, element_stage, element_bindings, + clip_code, + clip_binding, tile_pipeline, tile_ds, path_pipeline, @@ -304,6 +312,7 @@ impl Renderer { n_paths: 0, n_pathseg: 0, n_pathtag: 0, + n_clip: 0, _bg_image: bg_image, gradient_bufs, gradients, @@ -329,6 +338,7 @@ impl Renderer { self.n_drawobj = render_ctx.n_drawobj(); self.n_pathseg = render_ctx.n_pathseg() as usize; self.n_pathtag = render_ctx.n_pathtag(); + self.n_clip = render_ctx.n_clip(); // These constants depend on encoding and may need to be updated. // Perhaps we can plumb these from piet-gpu-derive? @@ -342,6 +352,7 @@ impl Renderer { alloc += ((n_drawobj + 255) & !255) * BIN_SIZE; let ptcl_base = alloc; alloc += width_in_tiles * height_in_tiles * PTCL_INITIAL_ALLOC; + config.width_in_tiles = width_in_tiles as u32; config.height_in_tiles = height_in_tiles as u32; config.tile_alloc = tile_base as u32; @@ -401,6 +412,19 @@ impl Renderer { cmd_buf.end_debug_label(); cmd_buf.write_timestamp(&query_pool, 1); cmd_buf.memory_barrier(); + cmd_buf.begin_debug_label("Clip bounding box calculation"); + self.clip_binding + .record(cmd_buf, &self.clip_code, self.n_clip as u32); + cmd_buf.end_debug_label(); + cmd_buf.begin_debug_label("Element binning"); + cmd_buf.dispatch( + &self.bin_pipeline, + &self.bin_ds, + (((self.n_paths + 255) / 256) as u32, 1, 1), + (256, 1, 1), + ); + cmd_buf.end_debug_label(); + cmd_buf.memory_barrier(); cmd_buf.begin_debug_label("Tile allocation"); cmd_buf.dispatch( &self.tile_pipeline, @@ -430,18 +454,7 @@ impl Renderer { ); cmd_buf.end_debug_label(); cmd_buf.write_timestamp(&query_pool, 4); - // Note: this barrier is not needed as an actual dependency between - // pipeline stages, but I am keeping it in so that timer queries are - // easier to interpret. - cmd_buf.memory_barrier(); - cmd_buf.begin_debug_label("Element binning"); - cmd_buf.dispatch( - &self.bin_pipeline, - &self.bin_ds, - (((self.n_paths + 255) / 256) as u32, 1, 1), - (256, 1, 1), - ); - cmd_buf.end_debug_label(); + // TODO: redo query accounting cmd_buf.write_timestamp(&query_pool, 5); cmd_buf.memory_barrier(); cmd_buf.begin_debug_label("Coarse raster"); diff --git a/piet-gpu/src/render_ctx.rs b/piet-gpu/src/render_ctx.rs index 96bbf03..ef0a3a7 100644 --- a/piet-gpu/src/render_ctx.rs +++ b/piet-gpu/src/render_ctx.rs @@ -123,6 +123,10 @@ impl PietGpuRenderContext { self.new_encoder.n_transform() } + pub fn n_clip(&self) -> u32 { + self.new_encoder.n_clip() + } + pub fn write_scene(&self, buf: &mut BufWrite) { self.new_encoder.write_scene(buf); } diff --git a/piet-gpu/src/stages.rs b/piet-gpu/src/stages.rs index 014cef4..e155c50 100644 --- a/piet-gpu/src/stages.rs +++ b/piet-gpu/src/stages.rs @@ -16,12 +16,14 @@ //! Stages for new element pipeline, exposed for testing. +mod clip; mod draw; mod path; mod transform; use bytemuck::{Pod, Zeroable}; +pub use clip::{ClipBinding, ClipCode, CLIP_PART_SIZE}; pub use draw::{DrawBinding, DrawCode, DrawMonoid, DrawStage, DRAW_PART_SIZE}; pub use path::{PathBinding, PathCode, PathEncoder, PathStage, PATHSEG_PART_SIZE}; use piet_gpu_hal::{Buffer, CmdBuf, Session}; @@ -47,8 +49,13 @@ pub struct Config { pub trans_alloc: u32, pub bbox_alloc: u32, pub drawmonoid_alloc: u32, + pub clip_alloc: u32, + pub clip_bic_alloc: u32, + pub clip_stack_alloc: u32, + pub clip_bbox_alloc: u32, pub n_trans: u32, pub n_path: u32, + pub n_clip: u32, pub trans_offset: u32, pub linewidth_offset: u32, pub pathtag_offset: u32, diff --git a/piet-gpu/src/stages/clip.rs b/piet-gpu/src/stages/clip.rs new file mode 100644 index 0000000..e4bc3db --- /dev/null +++ b/piet-gpu/src/stages/clip.rs @@ -0,0 +1,94 @@ +// Copyright 2022 The piet-gpu authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +//! The clip processing stage (includes substages). + +use piet_gpu_hal::{include_shader, BindType, Buffer, CmdBuf, DescriptorSet, Pipeline, Session}; + +// Note that this isn't the code/stage/binding pattern of most of the other stages +// in the new element processing pipeline. We want to move those temporary buffers +// into common memory and converge on this pattern. +pub struct ClipCode { + reduce_pipeline: Pipeline, + leaf_pipeline: Pipeline, +} + +pub struct ClipBinding { + reduce_ds: DescriptorSet, + leaf_ds: DescriptorSet, +} + +pub const CLIP_PART_SIZE: u32 = 256; + +impl ClipCode { + pub unsafe fn new(session: &Session) -> ClipCode { + let reduce_code = include_shader!(session, "../../shader/gen/clip_reduce"); + let reduce_pipeline = session + .create_compute_pipeline(reduce_code, &[BindType::Buffer, BindType::BufReadOnly]) + .unwrap(); + let leaf_code = include_shader!(session, "../../shader/gen/clip_leaf"); + let leaf_pipeline = session + .create_compute_pipeline(leaf_code, &[BindType::Buffer, BindType::BufReadOnly]) + .unwrap(); + ClipCode { + reduce_pipeline, + leaf_pipeline, + } + } +} + +impl ClipBinding { + pub unsafe fn new( + session: &Session, + code: &ClipCode, + config: &Buffer, + memory: &Buffer, + ) -> ClipBinding { + let reduce_ds = session + .create_simple_descriptor_set(&code.reduce_pipeline, &[memory, config]) + .unwrap(); + let leaf_ds = session + .create_simple_descriptor_set(&code.leaf_pipeline, &[memory, config]) + .unwrap(); + ClipBinding { reduce_ds, leaf_ds } + } + + /// Record the clip dispatches. + /// + /// Assumes memory barrier on entry. Provides memory barrier on exit. + pub unsafe fn record(&self, cmd_buf: &mut CmdBuf, code: &ClipCode, n_clip: u32) { + let n_wg_reduce = n_clip.saturating_sub(1) / CLIP_PART_SIZE; + if n_wg_reduce > 0 { + cmd_buf.dispatch( + &code.reduce_pipeline, + &self.reduce_ds, + (n_wg_reduce, 1, 1), + (CLIP_PART_SIZE, 1, 1), + ); + cmd_buf.memory_barrier(); + } + let n_wg = (n_clip + CLIP_PART_SIZE - 1) / CLIP_PART_SIZE; + if n_wg > 0 { + cmd_buf.dispatch( + &code.leaf_pipeline, + &self.leaf_ds, + (n_wg, 1, 1), + (CLIP_PART_SIZE, 1, 1), + ); + cmd_buf.memory_barrier(); + } + } +} diff --git a/tests/src/clip.rs b/tests/src/clip.rs new file mode 100644 index 0000000..cfd8a35 --- /dev/null +++ b/tests/src/clip.rs @@ -0,0 +1,237 @@ +// Copyright 2022 The piet-gpu authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +//! Tests for the piet-gpu clip processing stage. + +use bytemuck::{Pod, Zeroable}; +use rand::Rng; + +use piet_gpu::stages::{self, ClipBinding, ClipCode, DrawMonoid}; +use piet_gpu_hal::{BufWrite, BufferUsage}; + +use crate::{Config, Runner, TestResult}; + +struct ClipData { + clip_stream: Vec, + // In the atomic-int friendly encoding + path_bbox_stream: Vec, +} + +#[derive(Copy, Clone, Debug, Pod, Zeroable, Default)] +#[repr(C)] +struct PathBbox { + bbox: [u32; 4], + linewidth: f32, + trans_ix: u32, +} + +pub unsafe fn clip_test(runner: &mut Runner, config: &Config) -> TestResult { + let mut result = TestResult::new("clip"); + let n_clip: u64 = config.size.choose(1 << 8, 1 << 12, 1 << 16); + let data = ClipData::new(n_clip); + let stage_config = data.get_config(); + let config_buf = runner + .session + .create_buffer_init(std::slice::from_ref(&stage_config), BufferUsage::STORAGE) + .unwrap(); + // Need to actually get data uploaded + let mut memory = runner.buf_down(data.memory_size(), BufferUsage::STORAGE); + { + let mut buf_write = memory.map_write(..); + data.fill_memory(&mut buf_write); + } + + let code = ClipCode::new(&runner.session); + let binding = ClipBinding::new(&runner.session, &code, &config_buf, &memory.dev_buf); + + let mut commands = runner.commands(); + commands.write_timestamp(0); + commands.upload(&memory); + binding.record(&mut commands.cmd_buf, &code, n_clip as u32); + commands.download(&memory); + commands.write_timestamp(1); + runner.submit(commands); + let dst = memory.map_read(..); + if let Some(failure) = data.verify(&dst) { + result.fail(failure); + } + result +} + +fn rand_bbox() -> [u32; 4] { + let mut rng = rand::thread_rng(); + const Y_MIN: u32 = 32768; + const Y_MAX: u32 = Y_MIN + 1000; + let mut x0 = rng.gen_range(Y_MIN, Y_MAX); + let mut y0 = rng.gen_range(Y_MIN, Y_MAX); + let mut x1 = rng.gen_range(Y_MIN, Y_MAX); + let mut y1 = rng.gen_range(Y_MIN, Y_MAX); + if x0 > x1 { + std::mem::swap(&mut x0, &mut x1); + } + if y0 > y1 { + std::mem::swap(&mut y0, &mut y1); + } + [x0, y0, x1, y1] +} + +/// Convert from atomic-friendly to normal float bbox. +fn decode_bbox(raw: [u32; 4]) -> [f32; 4] { + fn decode(x: u32) -> f32 { + x as f32 - 32768.0 + } + [ + decode(raw[0]), + decode(raw[1]), + decode(raw[2]), + decode(raw[3]), + ] +} + +fn intersect_bbox(b0: [f32; 4], b1: [f32; 4]) -> [f32; 4] { + [ + b0[0].max(b1[0]), + b0[1].max(b1[1]), + b0[2].min(b1[2]), + b0[3].min(b1[3]), + ] +} + +const INFTY_BBOX: [f32; 4] = [-1e9, -1e9, 1e9, 1e9]; + +impl ClipData { + /// Generate a random clip sequence + fn new(n: u64) -> ClipData { + // Simple LCG random generator, for deterministic results + let mut z = 20170705u64; + let mut depth = 0; + let mut path_bbox_stream = Vec::new(); + let clip_stream = (0..n) + .map(|i| { + let is_push = if depth == 0 { + true + } else if depth >= 255 { + false + } else { + z = z.wrapping_mul(742938285) % ((1 << 31) - 1); + (z % 2) != 0 + }; + if is_push { + depth += 1; + let path_ix = path_bbox_stream.len() as u32; + let bbox = rand_bbox(); + let path_bbox = PathBbox { + bbox, + ..Default::default() + }; + path_bbox_stream.push(path_bbox); + path_ix + } else { + depth -= 1; + !(i as u32) + } + }) + .collect(); + ClipData { + clip_stream, + path_bbox_stream, + } + } + + fn get_config(&self) -> stages::Config { + let n_clip = self.clip_stream.len(); + let n_path = self.path_bbox_stream.len(); + let clip_alloc = 0; + let path_bbox_alloc = clip_alloc + 4 * n_clip; + let drawmonoid_alloc = path_bbox_alloc + 24 * n_path; + let clip_bic_alloc = drawmonoid_alloc + 8 * n_clip; + // TODO: this is over-allocated, we only need one bic per wg + let clip_stack_alloc = clip_bic_alloc + 8 * n_clip; + let clip_bbox_alloc = clip_stack_alloc + 20 * n_clip; + stages::Config { + clip_alloc: clip_alloc as u32, + // TODO: this wants to be renamed to path_bbox_alloc + bbox_alloc: path_bbox_alloc as u32, + drawmonoid_alloc: drawmonoid_alloc as u32, + clip_bic_alloc: clip_bic_alloc as u32, + clip_stack_alloc: clip_stack_alloc as u32, + clip_bbox_alloc: clip_bbox_alloc as u32, + n_clip: n_clip as u32, + ..Default::default() + } + } + + fn memory_size(&self) -> u64 { + (8 + self.clip_stream.len() * (4 + 8 + 8 + 20 + 16) + self.path_bbox_stream.len() * 24) + as u64 + } + + fn fill_memory(&self, buf: &mut BufWrite) { + // offset / header; no dynamic allocation + buf.fill_zero(8); + buf.extend_slice(&self.clip_stream); + buf.extend_slice(&self.path_bbox_stream); + // drawmonoid is left uninitialized + } + + fn verify(&self, buf: &[u8]) -> Option { + let n_clip = self.clip_stream.len(); + let n_path = self.path_bbox_stream.len(); + let clip_bbox_start = 8 + n_clip * (4 + 8 + 8 + 20) + n_path * 24; + let clip_range = clip_bbox_start..(clip_bbox_start + n_clip * 16); + let clip_result = bytemuck::cast_slice::(&buf[clip_range]); + let draw_start = 8 + n_clip * 4 + n_path * 24; + let draw_range = draw_start..(draw_start + n_clip * 8); + let draw_result = bytemuck::cast_slice::(&buf[draw_range]); + let mut bbox_stack = Vec::new(); + let mut parent_stack = Vec::new(); + for (i, path_ix) in self.clip_stream.iter().enumerate() { + let mut expected_path = None; + if *path_ix >= 0x8000_0000 { + let parent = parent_stack.pop().unwrap(); + expected_path = Some(self.clip_stream[parent as usize]); + bbox_stack.pop().unwrap(); + } else { + parent_stack.push(i); + let path_bbox_stream = self.path_bbox_stream[*path_ix as usize]; + let bbox = decode_bbox(path_bbox_stream.bbox); + let new = match bbox_stack.last() { + None => bbox, + Some(old) => intersect_bbox(*old, bbox), + }; + bbox_stack.push(new); + }; + let expected = bbox_stack.last().copied().unwrap_or(INFTY_BBOX); + let clip_bbox = clip_result[i]; + if clip_bbox != expected { + return Some(format!( + "{}: path_ix={}, expected bbox={:?}, clip_bbox={:?}", + i, path_ix, expected, clip_bbox + )); + } + if let Some(expected_path) = expected_path { + let actual_path = draw_result[i].path_ix; + if expected_path != actual_path { + return Some(format!( + "{}: expected path {}, actual {}", + i, expected_path, actual_path + )); + } + } + } + None + } +} diff --git a/tests/src/draw.rs b/tests/src/draw.rs index d79a9d9..7b264d4 100644 --- a/tests/src/draw.rs +++ b/tests/src/draw.rs @@ -102,17 +102,21 @@ impl DrawTestData { // Layout of memory let drawmonoid_alloc = 0; let anno_alloc = drawmonoid_alloc + 8 * n_tags; + let clip_alloc = anno_alloc + ANNOTATED_SIZE * n_tags; let stage_config = stages::Config { n_elements: n_tags as u32, anno_alloc: anno_alloc as u32, drawmonoid_alloc: drawmonoid_alloc as u32, + clip_alloc: clip_alloc as u32, ..Default::default() }; stage_config } fn memory_size(&self) -> u64 { - (8 + self.tags.len() * (8 + ANNOTATED_SIZE)) as u64 + // Note: this overallocates the clip buf a bit - only needed for the + // total number of begin_clip and end_clip tags. + (8 + self.tags.len() * (8 + 4 + ANNOTATED_SIZE)) as u64 } fn fill_scene(&self, buf: &mut BufWrite) { @@ -128,14 +132,13 @@ impl DrawTestData { let actual = bytemuck::cast_slice::(&buf[8..8 + size]); let mut expected = DrawMonoid::default(); for (i, (tag, actual)) in self.tags.iter().zip(actual).enumerate() { - // We compute an inclusive prefix sum, but for this application - // exclusive would be slightly better. We can adapt though. + // Verify exclusive prefix sum. let (path_ix, clip_ix) = Self::reduce_tag(*tag); - expected.path_ix += path_ix; - expected.clip_ix += clip_ix; if *actual != expected { return Some(format!("draw mismatch at {}", i)); } + expected.path_ix += path_ix; + expected.clip_ix += clip_ix; } None } diff --git a/tests/src/main.rs b/tests/src/main.rs index e52ce85..5599f70 100644 --- a/tests/src/main.rs +++ b/tests/src/main.rs @@ -17,6 +17,7 @@ //! Tests for piet-gpu shaders and GPU capabilities. mod clear; +mod clip; mod config; mod draw; mod linkedlist; @@ -139,6 +140,7 @@ fn main() { report(&transform::transform_test(&mut runner, &config)); report(&path::path_test(&mut runner, &config)); report(&draw::draw_test(&mut runner, &config)); + report(&clip::clip_test(&mut runner, &config)); } } } diff --git a/tests/src/runner.rs b/tests/src/runner.rs index 9bca26b..1fd6774 100644 --- a/tests/src/runner.rs +++ b/tests/src/runner.rs @@ -20,8 +20,8 @@ use std::ops::RangeBounds; use bytemuck::Pod; use piet_gpu_hal::{ - BackendType, BufReadGuard, Buffer, BufferUsage, CmdBuf, Instance, InstanceFlags, QueryPool, - Session, + BackendType, BufReadGuard, BufWriteGuard, Buffer, BufferUsage, CmdBuf, Instance, InstanceFlags, + QueryPool, Session, }; pub struct Runner { @@ -37,15 +37,8 @@ pub struct Commands { query_pool: QueryPool, } -/// Buffer for uploading data to GPU. -#[allow(unused)] -pub struct BufUp { - pub stage_buf: Buffer, - pub dev_buf: Buffer, -} - -/// Buffer for downloading data from GPU. -pub struct BufDown { +/// Buffer for both uploading and downloading +pub struct BufStage { pub stage_buf: Buffer, pub dev_buf: Buffer, } @@ -92,7 +85,7 @@ impl Runner { } #[allow(unused)] - pub fn buf_up(&self, size: u64) -> BufUp { + pub fn buf_up(&self, size: u64) -> BufStage { let stage_buf = self .session .create_buffer(size, BufferUsage::MAP_WRITE | BufferUsage::COPY_SRC) @@ -101,13 +94,13 @@ impl Runner { .session .create_buffer(size, BufferUsage::COPY_DST | BufferUsage::STORAGE) .unwrap(); - BufUp { stage_buf, dev_buf } + BufStage { stage_buf, dev_buf } } /// Create a buffer for download (readback). /// /// The `usage` parameter need not include COPY_SRC and STORAGE. - pub fn buf_down(&self, size: u64, usage: BufferUsage) -> BufDown { + pub fn buf_down(&self, size: u64, usage: BufferUsage) -> BufStage { let stage_buf = self .session .create_buffer(size, BufferUsage::MAP_READ | BufferUsage::COPY_DST) @@ -116,7 +109,7 @@ impl Runner { .session .create_buffer(size, usage | BufferUsage::COPY_SRC | BufferUsage::STORAGE) .unwrap(); - BufDown { stage_buf, dev_buf } + BufStage { stage_buf, dev_buf } } pub fn backend_type(&self) -> BackendType { @@ -129,17 +122,16 @@ impl Commands { self.cmd_buf.write_timestamp(&self.query_pool, query); } - #[allow(unused)] - pub unsafe fn upload(&mut self, buf: &BufUp) { + pub unsafe fn upload(&mut self, buf: &BufStage) { self.cmd_buf.copy_buffer(&buf.stage_buf, &buf.dev_buf); } - pub unsafe fn download(&mut self, buf: &BufDown) { + pub unsafe fn download(&mut self, buf: &BufStage) { self.cmd_buf.copy_buffer(&buf.dev_buf, &buf.stage_buf); } } -impl BufDown { +impl BufStage { pub unsafe fn read(&self, dst: &mut Vec) { self.stage_buf.read(dst).unwrap() } @@ -147,4 +139,8 @@ impl BufDown { pub unsafe fn map_read<'a>(&'a self, range: impl RangeBounds) -> BufReadGuard<'a> { self.stage_buf.map_read(range).unwrap() } + + pub unsafe fn map_write<'a>(&'a mut self, range: impl RangeBounds) -> BufWriteGuard { + self.stage_buf.map_write(range).unwrap() + } }