Merge pull request #150 from linebender/clip

New clip implementation
This commit is contained in:
Raph Levien 2022-02-21 13:23:31 -08:00 committed by GitHub
commit d81e5cb4ee
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
80 changed files with 3318 additions and 1343 deletions

View file

@ -20,6 +20,7 @@ layout(set = 0, binding = 1) readonly buffer ConfigBuf {
#include "annotated.h" #include "annotated.h"
#include "bins.h" #include "bins.h"
#include "drawtag.h"
// scale factors useful for converting coordinates to bins // scale factors useful for converting coordinates to bins
#define SX (1.0 / float(N_TILE_X * TILE_WIDTH_PX)) #define SX (1.0 / float(N_TILE_X * TILE_WIDTH_PX))
@ -35,6 +36,47 @@ shared uint count[N_SLICE][N_TILE];
shared Alloc sh_chunk_alloc[N_TILE]; shared Alloc sh_chunk_alloc[N_TILE];
shared bool sh_alloc_failed; shared bool sh_alloc_failed;
DrawMonoid load_draw_monoid(uint element_ix) {
uint base = (conf.drawmonoid_alloc.offset >> 2) + 2 * element_ix;
uint path_ix = memory[base];
uint clip_ix = memory[base + 1];
return DrawMonoid(path_ix, clip_ix);
}
// Load bounding box computed by clip processing
vec4 load_clip_bbox(uint clip_ix) {
uint base = (conf.clip_bbox_alloc.offset >> 2) + 4 * clip_ix;
float x0 = uintBitsToFloat(memory[base]);
float y0 = uintBitsToFloat(memory[base + 1]);
float x1 = uintBitsToFloat(memory[base + 2]);
float y1 = uintBitsToFloat(memory[base + 3]);
vec4 bbox = vec4(x0, y0, x1, y1);
return bbox;
}
vec4 bbox_intersect(vec4 a, vec4 b) {
return vec4(max(a.xy, b.xy), min(a.zw, b.zw));
}
// Load path's bbox from bbox (as written by pathseg).
vec4 load_path_bbox(uint path_ix) {
uint base = (conf.bbox_alloc.offset >> 2) + 6 * path_ix;
float bbox_l = float(memory[base]) - 32768.0;
float bbox_t = float(memory[base + 1]) - 32768.0;
float bbox_r = float(memory[base + 2]) - 32768.0;
float bbox_b = float(memory[base + 3]) - 32768.0;
vec4 bbox = vec4(bbox_l, bbox_t, bbox_r, bbox_b);
return bbox;
}
void store_path_bbox(AnnotatedRef ref, vec4 bbox) {
uint ix = ref.offset >> 2;
memory[ix + 1] = floatBitsToUint(bbox.x);
memory[ix + 2] = floatBitsToUint(bbox.y);
memory[ix + 3] = floatBitsToUint(bbox.z);
memory[ix + 4] = floatBitsToUint(bbox.w);
}
void main() { void main() {
uint my_n_elements = conf.n_elements; uint my_n_elements = conf.n_elements;
uint my_partition = gl_WorkGroupID.x; uint my_partition = gl_WorkGroupID.x;
@ -61,13 +103,27 @@ void main() {
case Annotated_Image: case Annotated_Image:
case Annotated_BeginClip: case Annotated_BeginClip:
case Annotated_EndClip: case Annotated_EndClip:
// Note: we take advantage of the fact that these drawing elements DrawMonoid draw_monoid = load_draw_monoid(element_ix);
// have the bbox at the same place in their layout. uint path_ix = draw_monoid.path_ix;
AnnoEndClip clip = Annotated_EndClip_read(conf.anno_alloc, ref); vec4 clip_bbox = vec4(-1e9, -1e9, 1e9, 1e9);
x0 = int(floor(clip.bbox.x * SX)); uint clip_ix = draw_monoid.clip_ix;
y0 = int(floor(clip.bbox.y * SY)); if (clip_ix > 0) {
x1 = int(ceil(clip.bbox.z * SX)); clip_bbox = load_clip_bbox(clip_ix - 1);
y1 = int(ceil(clip.bbox.w * SY)); }
// For clip elements, clip_bbox is the bbox of the clip path, intersected
// with enclosing clips.
// For other elements, it is the bbox of the enclosing clips.
vec4 path_bbox = load_path_bbox(path_ix);
vec4 bbox = bbox_intersect(path_bbox, clip_bbox);
// Avoid negative-size bbox (is this necessary)?
bbox.zw = max(bbox.xy, bbox.zw);
// Store clip-intersected bbox for tile_alloc.
store_path_bbox(ref, bbox);
x0 = int(floor(bbox.x * SX));
y0 = int(floor(bbox.y * SY));
x1 = int(ceil(bbox.z * SX));
y1 = int(ceil(bbox.w * SY));
break; break;
} }

View file

@ -22,7 +22,7 @@ rule dxil
rule msl rule msl
command = $spirv_cross --msl $in --output $out $msl_flags command = $spirv_cross --msl $in --output $out $msl_flags
build gen/binning.spv: glsl binning.comp | annotated.h state.h bins.h setup.h mem.h build gen/binning.spv: glsl binning.comp | annotated.h bins.h drawtag.h setup.h mem.h
build gen/binning.hlsl: hlsl gen/binning.spv build gen/binning.hlsl: hlsl gen/binning.spv
build gen/binning.dxil: dxil gen/binning.hlsl build gen/binning.dxil: dxil gen/binning.hlsl
build gen/binning.msl: msl gen/binning.spv build gen/binning.msl: msl gen/binning.spv
@ -119,6 +119,16 @@ build gen/draw_leaf.hlsl: hlsl gen/draw_leaf.spv
build gen/draw_leaf.dxil: dxil gen/draw_leaf.hlsl build gen/draw_leaf.dxil: dxil gen/draw_leaf.hlsl
build gen/draw_leaf.msl: msl gen/draw_leaf.spv build gen/draw_leaf.msl: msl gen/draw_leaf.spv
build spv: phony gen/backdrop_lg.spv gen/backdrop.spv gen/bbox_clear.spv gen/binning.spv gen/coarse.spv gen/draw_leaf.spv gen/draw_reduce.spv gen/draw_root.spv gen/kernel4.spv gen/kernel4_gray.spv gen/path_coarse.spv gen/pathseg.spv gen/pathtag_reduce.spv gen/pathtag_root.spv gen/tile_alloc.spv gen/transform_leaf.spv gen/transform_reduce.spv gen/transform_root.spv build gen/clip_reduce.spv: glsl clip_reduce.comp | mem.h setup.h annotated.h
build dxil: phony gen/backdrop.hlsl gen/backdrop_lg.hlsl gen/bbox_clear.hlsl gen/binning.hlsl gen/coarse.hlsl gen/draw_leaf.hlsl gen/draw_reduce.hlsl gen/draw_root.hlsl gen/kernel4.hlsl gen/kernel4_gray.hlsl gen/path_coarse.hlsl gen/pathseg.hlsl gen/pathtag_reduce.hlsl gen/pathtag_root.hlsl gen/tile_alloc.hlsl gen/transform_leaf.hlsl gen/transform_reduce.hlsl gen/transform_root.hlsl build gen/clip_reduce.hlsl: hlsl gen/clip_reduce.spv
build msl: phony gen/backdrop_lg.msl gen/backdrop.msl gen/bbox_clear.msl gen/binning.msl gen/coarse.msl gen/draw_leaf.msl gen/draw_reduce.msl gen/draw_root.msl gen/kernel4.msl gen/kernel4_gray.msl gen/path_coarse.msl gen/pathseg.msl gen/pathtag_reduce.msl gen/pathtag_root.msl gen/tile_alloc.msl gen/transform_leaf.msl gen/transform_reduce.msl gen/transform_root.msl build gen/clip_reduce.dxil: dxil gen/clip_reduce.hlsl
build gen/clip_reduce.msl: msl gen/clip_reduce.spv
build gen/clip_leaf.spv: glsl clip_leaf.comp | mem.h setup.h annotated.h
build gen/clip_leaf.hlsl: hlsl gen/clip_leaf.spv
build gen/clip_leaf.dxil: dxil gen/clip_leaf.hlsl
build gen/clip_leaf.msl: msl gen/clip_leaf.spv
build spv: phony gen/backdrop_lg.spv gen/backdrop.spv gen/bbox_clear.spv gen/binning.spv gen/clip_leaf.spv gen/clip_reduce.spv gen/coarse.spv gen/draw_leaf.spv gen/draw_reduce.spv gen/draw_root.spv gen/kernel4.spv gen/kernel4_gray.spv gen/path_coarse.spv gen/pathseg.spv gen/pathtag_reduce.spv gen/pathtag_root.spv gen/tile_alloc.spv gen/transform_leaf.spv gen/transform_reduce.spv gen/transform_root.spv
build dxil: phony gen/backdrop.hlsl gen/backdrop_lg.hlsl gen/bbox_clear.hlsl gen/binning.hlsl gen/clip_leaf.hlsl gen/clip_reduce.hlsl gen/coarse.hlsl gen/draw_leaf.hlsl gen/draw_reduce.hlsl gen/draw_root.hlsl gen/kernel4.hlsl gen/kernel4_gray.hlsl gen/path_coarse.hlsl gen/pathseg.hlsl gen/pathtag_reduce.hlsl gen/pathtag_root.hlsl gen/tile_alloc.hlsl gen/transform_leaf.hlsl gen/transform_reduce.hlsl gen/transform_root.hlsl
build msl: phony gen/backdrop_lg.msl gen/backdrop.msl gen/bbox_clear.msl gen/binning.msl gen/clip_leaf.msl gen/clip_reduce.msl gen/coarse.msl gen/draw_leaf.msl gen/draw_reduce.msl gen/draw_root.msl gen/kernel4.msl gen/kernel4_gray.msl gen/path_coarse.msl gen/pathseg.msl gen/pathtag_reduce.msl gen/pathtag_root.msl gen/tile_alloc.msl gen/transform_leaf.msl gen/transform_reduce.msl gen/transform_root.msl

View file

@ -0,0 +1,287 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
// The second dispatch of clip stack processing.
#version 450
#extension GL_GOOGLE_include_directive : enable
#include "mem.h"
#include "setup.h"
#define LG_WG_SIZE (7 + LG_WG_FACTOR)
#define WG_SIZE (1 << LG_WG_SIZE)
#define PARTITION_SIZE WG_SIZE
layout(local_size_x = WG_SIZE) in;
layout(binding = 1) readonly buffer ConfigBuf {
Config conf;
};
#include "annotated.h"
// Some of this is cut'n'paste duplication with the reduce pass, and
// arguably should be moved to a common .h file.
// The bicyclic monoid
struct ClipEl {
// index of parent node
uint parent_ix;
// bounding box
vec4 bbox;
};
struct Bic {
uint a;
uint b;
};
Bic bic_combine(Bic x, Bic y) {
uint m = min(x.b, y.a);
return Bic(x.a + y.a - m, x.b + y.b - m);
}
// Load path's bbox from bbox (as written by pathseg).
vec4 load_path_bbox(uint path_ix) {
uint base = (conf.bbox_alloc.offset >> 2) + 6 * path_ix;
float bbox_l = float(memory[base]) - 32768.0;
float bbox_t = float(memory[base + 1]) - 32768.0;
float bbox_r = float(memory[base + 2]) - 32768.0;
float bbox_b = float(memory[base + 3]) - 32768.0;
vec4 bbox = vec4(bbox_l, bbox_t, bbox_r, bbox_b);
return bbox;
}
vec4 bbox_intersect(vec4 a, vec4 b) {
return vec4(max(a.xy, b.xy), min(a.zw, b.zw));
}
shared Bic sh_bic[WG_SIZE * 2 - 2];
shared uint sh_stack[PARTITION_SIZE];
shared vec4 sh_stack_bbox[PARTITION_SIZE];
shared uint sh_link[PARTITION_SIZE];
shared vec4 sh_bbox[PARTITION_SIZE];
// This is adapted directly from the stack monoid impl.
// Return value is reference within partition if >= 0,
// otherwise reference to stack.
uint search_link(inout Bic bic) {
uint ix = gl_LocalInvocationID.x;
uint j = 0;
while (j < LG_WG_SIZE) {
uint base = 2 * WG_SIZE - (2u << (LG_WG_SIZE - j));
if (((ix >> j) & 1) != 0) {
Bic test = bic_combine(sh_bic[base + (ix >> j) - 1], bic);
if (test.b > 0) {
break;
}
bic = test;
ix -= 1u << j;
}
j++;
}
if (ix > 0) {
while (j > 0) {
j--;
uint base = 2 * WG_SIZE - (2u << (LG_WG_SIZE - j));
Bic test = bic_combine(sh_bic[base + (ix >> j) - 1], bic);
if (test.b == 0) {
bic = test;
ix -= 1u << j;
}
}
}
// ix is the smallest value such that reduce(ix..th).b == 0
if (ix > 0) {
return ix - 1;
} else {
return ~0u - bic.a;
}
}
Bic load_bic(uint ix) {
uint base = (conf.clip_bic_alloc.offset >> 2) + 2 * ix;
return Bic(memory[base], memory[base + 1]);
}
ClipEl load_clip_el(uint ix) {
uint base = (conf.clip_stack_alloc.offset >> 2) + 5 * ix;
uint parent_ix = memory[base];
float x0 = uintBitsToFloat(memory[base + 1]);
float y0 = uintBitsToFloat(memory[base + 2]);
float x1 = uintBitsToFloat(memory[base + 3]);
float y1 = uintBitsToFloat(memory[base + 4]);
vec4 bbox = vec4(x0, y0, x1, y1);
return ClipEl(parent_ix, bbox);
}
uint load_path_ix(uint ix) {
// This is one approach to a partial final block. Another would be
// to do a memset to the padding in the command queue.
if (ix < conf.n_clip) {
return memory[(conf.clip_alloc.offset >> 2) + ix];
} else {
// EndClip tags don't implicate further loads.
return 0x80000000;
}
}
void store_clip_bbox(uint ix, vec4 bbox) {
uint base = (conf.clip_bbox_alloc.offset >> 2) + 4 * ix;
memory[base] = floatBitsToUint(bbox.x);
memory[base + 1] = floatBitsToUint(bbox.y);
memory[base + 2] = floatBitsToUint(bbox.z);
memory[base + 3] = floatBitsToUint(bbox.w);
}
void main() {
// materialize stack up to the start of this partition. This
// is based on the pure stack monoid, but with two additions.
// First, (this only matters if the stack goes deeper than the
// partition size, which might be unlikely in practice), the
// topmost stack element from each partition is picked, then an
// exclusive scan of those. Also note that if this is skipped,
// a scan is not needed in the reduce stage.
// Second, after the stream compaction, do a scan of the retrieved
// bbox values.
uint th = gl_LocalInvocationID.x;
Bic bic = Bic(0, 0);
if (th < gl_WorkGroupID.x) {
bic = load_bic(th);
}
sh_bic[th] = bic;
for (uint i = 0; i < LG_WG_SIZE; i++) {
barrier();
if (th + (1u << i) < WG_SIZE) {
Bic other = sh_bic[th + (1u << i)];
bic = bic_combine(bic, other);
}
barrier();
sh_bic[th] = bic;
}
barrier();
uint stack_size = sh_bic[0].b;
// TODO: do bbox scan here (to unlock greater stack depth)
// binary search in stack
uint sp = PARTITION_SIZE - 1 - th;
uint ix = 0;
for (uint i = 0; i < LG_WG_SIZE; i++) {
uint probe = ix + (uint(PARTITION_SIZE / 2) >> i);
if (sp < sh_bic[probe].b) {
ix = probe;
}
}
// ix is largest value such that sp < sh_bic[ix].b (if any)
uint b = sh_bic[ix].b;
vec4 bbox = vec4(-1e9, -1e9, 1e9, 1e9);
if (sp < b) {
// maybe store the index here for future use?
ClipEl el = load_clip_el(ix * PARTITION_SIZE + b - sp - 1);
sh_stack[th] = el.parent_ix;
bbox = el.bbox;
// other element values here?
}
// forward scan of bbox values of prefix stack
for (uint i = 0; i < LG_WG_SIZE; i++) {
sh_stack_bbox[th] = bbox;
barrier();
if (th >= (1u << i)) {
bbox = bbox_intersect(sh_stack_bbox[th - (1u << i)], bbox);
}
barrier();
}
sh_stack_bbox[th] = bbox;
// Read input and compute bicyclic semigroup binary tree
uint inp = load_path_ix(gl_GlobalInvocationID.x);
bool is_push = int(inp) >= 0;
bic = Bic(1 - uint(is_push), uint(is_push));
sh_bic[th] = bic;
if (is_push) {
bbox = load_path_bbox(inp);
} else {
bbox = vec4(-1e9, -1e9, 1e9, 1e9);
}
uint inbase = 0;
for (uint i = 0; i < LG_WG_SIZE - 1; i++) {
uint outbase = 2 * WG_SIZE - (1u << (LG_WG_SIZE - i));
barrier();
if (th < (1u << (LG_WG_SIZE - 1 - i))) {
sh_bic[outbase + th] = bic_combine(sh_bic[inbase + th * 2], sh_bic[inbase + th * 2 + 1]);
}
inbase = outbase;
}
barrier();
// Search for predecessor node
bic = Bic(0, 0);
uint link = search_link(bic);
// we use N_SEQ > 1 convention here:
// link >= 0 is index within partition
// link < 0 is reference to stack
// We want grandparent bbox for pop nodes, so follow those links.
sh_link[th] = link;
barrier();
uint grandparent;
if (int(link) >= 0) {
grandparent = sh_link[link];
} else {
grandparent = link - 1;
}
// Resolve parent
uint parent;
if (int(link) >= 0) {
parent = gl_WorkGroupID.x * PARTITION_SIZE + link;
} else if (int(link + stack_size) >= 0) {
parent = sh_stack[PARTITION_SIZE + link];
} else {
parent = ~0u;
}
// bbox scan along parent links
for (uint i = 0; i < LG_WG_SIZE; i++) {
// sh_link was already stored for first iteration
if (i != 0) {
sh_link[th] = link;
}
sh_bbox[th] = bbox;
barrier();
if (int(link) >= 0) {
bbox = bbox_intersect(sh_bbox[link], bbox);
link = sh_link[link];
}
barrier();
}
if (int(link + stack_size) >= 0) {
bbox = bbox_intersect(sh_stack_bbox[PARTITION_SIZE + link], bbox);
}
// At this point, bbox is the reduction of bounding boxes along the tree.
sh_bbox[th] = bbox;
barrier();
uint path_ix = inp;
if (!is_push && gl_GlobalInvocationID.x < conf.n_clip) {
// Is this load expensive? If so, it's loaded earlier for in-partition
// and is in the ClipEl for cross-partition.
// If not, can probably get rid of it in the stack intermediate buf.
path_ix = load_path_ix(parent);
uint drawmonoid_out_base = (conf.drawmonoid_alloc.offset >> 2) + 2 * ~inp;
// Fix up drawmonoid so path_ix at EndClip matches BeginClip
memory[drawmonoid_out_base] = path_ix;
if (int(grandparent) >= 0) {
bbox = sh_bbox[grandparent];
} else if (int(grandparent + stack_size) >= 0) {
bbox = sh_stack_bbox[PARTITION_SIZE + grandparent];
} else {
bbox = vec4(-1e9, -1e9, 1e9, 1e9);
}
}
store_clip_bbox(gl_GlobalInvocationID.x, bbox);
}

View file

@ -0,0 +1,148 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
// The reduce pass for clip stack processing.
// The primary input is a sequence of path ids representing paths to
// push, with a special value of ~0 to represent pop.
// For each path, the bounding box is found in the anno stream
// (anno_alloc), though this may change.
// Output is a stack monoid reduction for the partition. The Bic
// is stored in the BicBuf, and the stack slice in StackBuf.
// Note: for this shader, only pushes are represented in the stack
// monoid reduction output, so we don't have to worry about the
// interpretation of pops.
#version 450
#extension GL_GOOGLE_include_directive : enable
#include "mem.h"
#include "setup.h"
#define LG_WG_SIZE (7 + LG_WG_FACTOR)
#define WG_SIZE (1 << LG_WG_SIZE)
#define PARTITION_SIZE WG_SIZE
layout(local_size_x = WG_SIZE) in;
layout(binding = 1) readonly buffer ConfigBuf {
Config conf;
};
#include "annotated.h"
// The intermediate state for clip processing.
struct ClipEl {
// index of parent node
uint parent_ix;
// bounding box
vec4 bbox;
};
// The bicyclic monoid
struct Bic {
uint a;
uint b;
};
Bic bic_combine(Bic x, Bic y) {
uint m = min(x.b, y.a);
return Bic(x.a + y.a - m, x.b + y.b - m);
}
shared Bic sh_bic[WG_SIZE];
shared uint sh_parent[WG_SIZE];
shared uint sh_path_ix[WG_SIZE];
shared vec4 sh_bbox[WG_SIZE];
// Load path's bbox from bbox (as written by pathseg).
vec4 load_path_bbox(uint path_ix) {
uint base = (conf.bbox_alloc.offset >> 2) + 6 * path_ix;
float bbox_l = float(memory[base]) - 32768.0;
float bbox_t = float(memory[base + 1]) - 32768.0;
float bbox_r = float(memory[base + 2]) - 32768.0;
float bbox_b = float(memory[base + 3]) - 32768.0;
vec4 bbox = vec4(bbox_l, bbox_t, bbox_r, bbox_b);
return bbox;
}
vec4 bbox_intersect(vec4 a, vec4 b) {
return vec4(max(a.xy, b.xy), min(a.zw, b.zw));
}
void store_bic(uint ix, Bic bic) {
uint base = (conf.clip_bic_alloc.offset >> 2) + 2 * ix;
memory[base] = bic.a;
memory[base + 1] = bic.b;
}
void store_clip_el(uint ix, ClipEl el) {
uint base = (conf.clip_stack_alloc.offset >> 2) + 5 * ix;
memory[base] = el.parent_ix;
memory[base + 1] = floatBitsToUint(el.bbox.x);
memory[base + 2] = floatBitsToUint(el.bbox.y);
memory[base + 3] = floatBitsToUint(el.bbox.z);
memory[base + 4] = floatBitsToUint(el.bbox.w);
}
void main() {
uint th = gl_LocalInvocationID.x;
uint inp = memory[(conf.clip_alloc.offset >> 2) + gl_GlobalInvocationID.x];
bool is_push = int(inp) >= 0;
// reverse scan of bicyclic semigroup
Bic bic = Bic(1 - uint(is_push), uint(is_push));
sh_bic[gl_LocalInvocationID.x] = bic;
for (uint i = 0; i < LG_WG_SIZE; i++) {
barrier();
if (th + (1u << i) < WG_SIZE) {
Bic other = sh_bic[gl_LocalInvocationID.x + (1u << i)];
bic = bic_combine(bic, other);
}
barrier();
sh_bic[th] = bic;
}
if (th == 0) {
store_bic(gl_WorkGroupID.x, bic);
}
barrier();
uint size = sh_bic[0].b;
bic = Bic(0, 0);
if (th + 1 < WG_SIZE) {
bic = sh_bic[th + 1];
}
if (is_push && bic.a == 0) {
uint local_ix = size - bic.b - 1;
sh_parent[local_ix] = th;
sh_path_ix[local_ix] = inp;
}
barrier();
// Do forward scan of bounding box intersection
vec4 bbox;
uint path_ix;
if (th < size) {
path_ix = sh_path_ix[th];
bbox = load_path_bbox(path_ix);
}
// Not necessary if depth is bounded by wg size
#if 0
for (uint i = 0; i < LG_WG_SIZE; i++) {
// We gate so we never access uninit data, but it might
// be more efficient to avoid the conditionals.
if (th < size) {
sh_bbox[th] = bbox;
}
barrier();
if (th < size && th >= (1u << i)) {
bbox = bbox_intersect(sh_bbox[th - (1u << i)], bbox);
}
barrier();
}
#endif
if (th < size) {
uint parent_ix = sh_parent[th] + gl_WorkGroupID.x * PARTITION_SIZE;
ClipEl el = ClipEl(parent_ix, bbox);
store_clip_el(gl_GlobalInvocationID.x, el);
}
}

View file

@ -136,9 +136,6 @@ void main() {
// currently in a clip for which the entire tile has an alpha of zero, and // currently in a clip for which the entire tile has an alpha of zero, and
// the value is the depth after the "begin clip" of that element. // the value is the depth after the "begin clip" of that element.
uint clip_zero_depth = 0; uint clip_zero_depth = 0;
// State for the "clip one" optimization. If bit `i` is set, then that means
// that the clip pushed at depth `i` has an alpha of all one.
uint clip_one_mask = 0;
// I'm sure we can figure out how to do this with at least one fewer register... // I'm sure we can figure out how to do this with at least one fewer register...
// Items up to rd_ix have been read from sh_elements // Items up to rd_ix have been read from sh_elements
@ -227,9 +224,8 @@ void main() {
case Annotated_LinGradient: case Annotated_LinGradient:
case Annotated_BeginClip: case Annotated_BeginClip:
case Annotated_EndClip: case Annotated_EndClip:
// We have one "path" for each element, even if the element isn't uint drawmonoid_base = (conf.drawmonoid_alloc.offset >> 2) + 2 * element_ix;
// actually a path (currently EndClip, but images etc in the future). uint path_ix = memory[drawmonoid_base];
uint path_ix = element_ix;
Path path = Path_read(conf.tile_alloc, PathRef(conf.tile_alloc.offset + path_ix * Path_size)); Path path = Path_read(conf.tile_alloc, PathRef(conf.tile_alloc.offset + path_ix * Path_size));
uint stride = path.bbox.z - path.bbox.x; uint stride = path.bbox.z - path.bbox.x;
sh_tile_stride[th_ix] = stride; sh_tile_stride[th_ix] = stride;
@ -283,15 +279,15 @@ void main() {
uint x = sh_tile_x0[el_ix] + seq_ix % width; uint x = sh_tile_x0[el_ix] + seq_ix % width;
uint y = sh_tile_y0[el_ix] + seq_ix / width; uint y = sh_tile_y0[el_ix] + seq_ix / width;
bool include_tile = false; bool include_tile = false;
if (tag == Annotated_BeginClip || tag == Annotated_EndClip) { if (mem_ok) {
include_tile = true;
} else if (mem_ok) {
Tile tile = Tile_read(read_tile_alloc(el_ix, mem_ok), Tile tile = Tile_read(read_tile_alloc(el_ix, mem_ok),
TileRef(sh_tile_base[el_ix] + (sh_tile_stride[el_ix] * y + x) * Tile_size)); TileRef(sh_tile_base[el_ix] + (sh_tile_stride[el_ix] * y + x) * Tile_size));
// Include the path in the tile if bool is_clip = tag == Annotated_BeginClip || tag == Annotated_EndClip;
// - the tile contains at least a segment (tile offset non-zero) // Always include the tile if it contains a path segment.
// - the tile is completely covered (backdrop non-zero) // For draws, include the tile if it is solid.
include_tile = tile.tile.offset != 0 || tile.backdrop != 0; // For clips, include the tile if it is empty - this way, logic
// below will suppress the drawing of inner elements.
include_tile = tile.tile.offset != 0 || (tile.backdrop == 0) == is_clip;
} }
if (include_tile) { if (include_tile) {
uint el_slice = el_ix / 32; uint el_slice = el_ix / 32;
@ -378,33 +374,26 @@ void main() {
(sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size)); (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
if (tile.tile.offset == 0 && tile.backdrop == 0) { if (tile.tile.offset == 0 && tile.backdrop == 0) {
clip_zero_depth = clip_depth + 1; clip_zero_depth = clip_depth + 1;
} else if (tile.tile.offset == 0 && clip_depth < 32) {
clip_one_mask |= (1u << clip_depth);
} else { } else {
AnnoBeginClip begin_clip = Annotated_BeginClip_read(conf.anno_alloc, ref);
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) { if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
break; break;
} }
write_fill(cmd_alloc, cmd_ref, tag.flags, tile, begin_clip.linewidth);
Cmd_BeginClip_write(cmd_alloc, cmd_ref); Cmd_BeginClip_write(cmd_alloc, cmd_ref);
cmd_ref.offset += 4; cmd_ref.offset += 4;
if (clip_depth < 32) {
clip_one_mask &= ~(1u << clip_depth);
}
} }
clip_depth++; clip_depth++;
break; break;
case Annotated_EndClip: case Annotated_EndClip:
tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok),
TileRef(sh_tile_base[element_ref_ix] +
(sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
clip_depth--; clip_depth--;
if (clip_depth >= 32 || (clip_one_mask & (1u << clip_depth)) == 0) { if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) { break;
break;
}
Cmd_Solid_write(cmd_alloc, cmd_ref);
cmd_ref.offset += 4;
Cmd_EndClip_write(cmd_alloc, cmd_ref);
cmd_ref.offset += 4;
} }
write_fill(cmd_alloc, cmd_ref, MODE_NONZERO, tile, 0.0);
Cmd_EndClip_write(cmd_alloc, cmd_ref);
cmd_ref.offset += 4;
break; break;
} }
} else { } else {

View file

@ -72,9 +72,14 @@ void main() {
} }
uint out_ix = gl_GlobalInvocationID.x * N_ROWS; uint out_ix = gl_GlobalInvocationID.x * N_ROWS;
uint out_base = (conf.drawmonoid_alloc.offset >> 2) + out_ix * 2; uint out_base = (conf.drawmonoid_alloc.offset >> 2) + out_ix * 2;
uint clip_out_base = conf.clip_alloc.offset >> 2;
AnnotatedRef out_ref = AnnotatedRef(conf.anno_alloc.offset + out_ix * Annotated_size); AnnotatedRef out_ref = AnnotatedRef(conf.anno_alloc.offset + out_ix * Annotated_size);
for (uint i = 0; i < N_ROWS; i++) { for (uint i = 0; i < N_ROWS; i++) {
Monoid m = combine_tag_monoid(row, local[i]); Monoid m = row;
if (i > 0) {
m = combine_tag_monoid(m, local[i - 1]);
}
// m now holds exclusive scan of draw monoid
memory[out_base + i * 2] = m.path_ix; memory[out_base + i * 2] = m.path_ix;
memory[out_base + i * 2 + 1] = m.clip_ix; memory[out_base + i * 2 + 1] = m.clip_ix;
@ -83,8 +88,9 @@ void main() {
// later stages read scene + bbox etc. // later stages read scene + bbox etc.
ElementRef this_ref = Element_index(ref, i); ElementRef this_ref = Element_index(ref, i);
tag_word = Element_tag(this_ref).tag; tag_word = Element_tag(this_ref).tag;
if (tag_word == Element_FillColor || tag_word == Element_FillLinGradient || tag_word == Element_FillImage) { if (tag_word == Element_FillColor || tag_word == Element_FillLinGradient || tag_word == Element_FillImage ||
uint bbox_offset = (conf.bbox_alloc.offset >> 2) + 6 * (m.path_ix - 1); tag_word == Element_BeginClip) {
uint bbox_offset = (conf.bbox_alloc.offset >> 2) + 6 * m.path_ix;
float bbox_l = float(memory[bbox_offset]) - 32768.0; float bbox_l = float(memory[bbox_offset]) - 32768.0;
float bbox_t = float(memory[bbox_offset + 1]) - 32768.0; float bbox_t = float(memory[bbox_offset + 1]) - 32768.0;
float bbox_r = float(memory[bbox_offset + 2]) - 32768.0; float bbox_r = float(memory[bbox_offset + 2]) - 32768.0;
@ -142,21 +148,27 @@ void main() {
anno_img.offset = fill_img.offset; anno_img.offset = fill_img.offset;
Annotated_Image_write(conf.anno_alloc, out_ref, fill_mode, anno_img); Annotated_Image_write(conf.anno_alloc, out_ref, fill_mode, anno_img);
break; break;
case Element_BeginClip:
AnnoBeginClip anno_begin_clip;
anno_begin_clip.bbox = bbox;
anno_begin_clip.linewidth = 0.0; // don't support clip-with-stroke
Annotated_BeginClip_write(conf.anno_alloc, out_ref, 0, anno_begin_clip);
break;
} }
} else if (tag_word == Element_BeginClip) {
Clip begin_clip = Element_BeginClip_read(this_ref);
AnnoBeginClip anno_begin_clip;
// This is the absolute bbox, it's been transformed during encoding.
anno_begin_clip.bbox = begin_clip.bbox;
anno_begin_clip.linewidth = 0.0; // don't support clip-with-stroke
Annotated_BeginClip_write(conf.anno_alloc, out_ref, 0, anno_begin_clip);
} else if (tag_word == Element_EndClip) { } else if (tag_word == Element_EndClip) {
Clip end_clip = Element_EndClip_read(this_ref);
AnnoEndClip anno_end_clip; AnnoEndClip anno_end_clip;
// This bbox is expected to be the same as the begin one. // The actual bbox will be reconstructed from clip stream output.
anno_end_clip.bbox = end_clip.bbox; anno_end_clip.bbox = vec4(-1e9, -1e9, 1e9, 1e9);
Annotated_EndClip_write(conf.anno_alloc, out_ref, anno_end_clip); Annotated_EndClip_write(conf.anno_alloc, out_ref, anno_end_clip);
} }
// Generate clip stream.
if (tag_word == Element_BeginClip || tag_word == Element_EndClip) {
uint path_ix = ~(out_ix + i);
if (tag_word == Element_BeginClip) {
path_ix = m.path_ix;
}
memory[clip_out_base + m.clip_ix] = path_ix;
}
out_ref.offset += Annotated_size; out_ref.offset += Annotated_size;
} }
} }

View file

@ -44,8 +44,13 @@ struct Config
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc bbox_alloc;
Alloc drawmonoid_alloc; Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip;
uint trans_offset; uint trans_offset;
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;

View file

@ -63,8 +63,13 @@ struct Config
Alloc_1 trans_alloc; Alloc_1 trans_alloc;
Alloc_1 bbox_alloc; Alloc_1 bbox_alloc;
Alloc_1 drawmonoid_alloc; Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip;
uint trans_offset; uint trans_offset;
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;

Binary file not shown.

View file

@ -44,8 +44,13 @@ struct Config
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc bbox_alloc;
Alloc drawmonoid_alloc; Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip;
uint trans_offset; uint trans_offset;
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;

View file

@ -63,8 +63,13 @@ struct Config
Alloc_1 trans_alloc; Alloc_1 trans_alloc;
Alloc_1 bbox_alloc; Alloc_1 bbox_alloc;
Alloc_1 drawmonoid_alloc; Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip;
uint trans_offset; uint trans_offset;
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;

Binary file not shown.

Binary file not shown.

View file

@ -17,8 +17,13 @@ struct Config
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc bbox_alloc;
Alloc drawmonoid_alloc; Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip;
uint trans_offset; uint trans_offset;
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
@ -39,7 +44,7 @@ struct SPIRV_Cross_Input
void comp_main() void comp_main()
{ {
uint ix = gl_GlobalInvocationID.x; uint ix = gl_GlobalInvocationID.x;
if (ix < _21.Load(52)) if (ix < _21.Load(68))
{ {
uint out_ix = (_21.Load(40) >> uint(2)) + (6u * ix); uint out_ix = (_21.Load(40) >> uint(2)) + (6u * ix);
_45.Store(out_ix * 4 + 8, 65535u); _45.Store(out_ix * 4 + 8, 65535u);

View file

@ -22,8 +22,13 @@ struct Config
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc bbox_alloc;
Alloc drawmonoid_alloc; Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip;
uint trans_offset; uint trans_offset;
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;

Binary file not shown.

Binary file not shown.

View file

@ -9,16 +9,6 @@ struct MallocResult
bool failed; bool failed;
}; };
struct AnnoEndClipRef
{
uint offset;
};
struct AnnoEndClip
{
float4 bbox;
};
struct AnnotatedRef struct AnnotatedRef
{ {
uint offset; uint offset;
@ -40,6 +30,12 @@ struct BinInstance
uint element_ix; uint element_ix;
}; };
struct DrawMonoid
{
uint path_ix;
uint clip_ix;
};
struct Config struct Config
{ {
uint n_elements; uint n_elements;
@ -54,8 +50,13 @@ struct Config
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc bbox_alloc;
Alloc drawmonoid_alloc; Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip;
uint trans_offset; uint trans_offset;
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
@ -64,8 +65,8 @@ struct Config
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
RWByteAddressBuffer _84 : register(u0, space0); RWByteAddressBuffer _94 : register(u0, space0);
ByteAddressBuffer _253 : register(t1, space0); ByteAddressBuffer _202 : register(t1, space0);
static uint3 gl_WorkGroupID; static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID; static uint3 gl_LocalInvocationID;
@ -93,7 +94,7 @@ uint read_mem(Alloc alloc, uint offset)
{ {
return 0u; return 0u;
} }
uint v = _84.Load(offset * 4 + 8); uint v = _94.Load(offset * 4 + 8);
return v; return v;
} }
@ -102,36 +103,53 @@ AnnotatedTag Annotated_tag(Alloc a, AnnotatedRef ref)
Alloc param = a; Alloc param = a;
uint param_1 = ref.offset >> uint(2); uint param_1 = ref.offset >> uint(2);
uint tag_and_flags = read_mem(param, param_1); uint tag_and_flags = read_mem(param, param_1);
AnnotatedTag _221 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; AnnotatedTag _181 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) };
return _221; return _181;
} }
AnnoEndClip AnnoEndClip_read(Alloc a, AnnoEndClipRef ref) DrawMonoid load_draw_monoid(uint element_ix)
{
uint base = (_202.Load(44) >> uint(2)) + (2u * element_ix);
uint path_ix = _94.Load(base * 4 + 8);
uint clip_ix = _94.Load((base + 1u) * 4 + 8);
DrawMonoid _222 = { path_ix, clip_ix };
return _222;
}
float4 load_clip_bbox(uint clip_ix)
{
uint base = (_202.Load(60) >> uint(2)) + (4u * clip_ix);
float x0 = asfloat(_94.Load(base * 4 + 8));
float y0 = asfloat(_94.Load((base + 1u) * 4 + 8));
float x1 = asfloat(_94.Load((base + 2u) * 4 + 8));
float y1 = asfloat(_94.Load((base + 3u) * 4 + 8));
float4 bbox = float4(x0, y0, x1, y1);
return bbox;
}
float4 load_path_bbox(uint path_ix)
{
uint base = (_202.Load(40) >> uint(2)) + (6u * path_ix);
float bbox_l = float(_94.Load(base * 4 + 8)) - 32768.0f;
float bbox_t = float(_94.Load((base + 1u) * 4 + 8)) - 32768.0f;
float bbox_r = float(_94.Load((base + 2u) * 4 + 8)) - 32768.0f;
float bbox_b = float(_94.Load((base + 3u) * 4 + 8)) - 32768.0f;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
return bbox;
}
float4 bbox_intersect(float4 a, float4 b)
{
return float4(max(a.xy, b.xy), min(a.zw, b.zw));
}
void store_path_bbox(AnnotatedRef ref, float4 bbox)
{ {
uint ix = ref.offset >> uint(2); uint ix = ref.offset >> uint(2);
Alloc param = a; _94.Store((ix + 1u) * 4 + 8, asuint(bbox.x));
uint param_1 = ix + 0u; _94.Store((ix + 2u) * 4 + 8, asuint(bbox.y));
uint raw0 = read_mem(param, param_1); _94.Store((ix + 3u) * 4 + 8, asuint(bbox.z));
Alloc param_2 = a; _94.Store((ix + 4u) * 4 + 8, asuint(bbox.w));
uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3);
Alloc param_4 = a;
uint param_5 = ix + 2u;
uint raw2 = read_mem(param_4, param_5);
Alloc param_6 = a;
uint param_7 = ix + 3u;
uint raw3 = read_mem(param_6, param_7);
AnnoEndClip s;
s.bbox = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3));
return s;
}
AnnoEndClip Annotated_EndClip_read(Alloc a, AnnotatedRef ref)
{
AnnoEndClipRef _228 = { ref.offset + 4u };
Alloc param = a;
AnnoEndClipRef param_1 = _228;
return AnnoEndClip_read(param, param_1);
} }
Alloc new_alloc(uint offset, uint size, bool mem_ok) Alloc new_alloc(uint offset, uint size, bool mem_ok)
@ -143,22 +161,22 @@ Alloc new_alloc(uint offset, uint size, bool mem_ok)
MallocResult malloc(uint size) MallocResult malloc(uint size)
{ {
uint _90; uint _100;
_84.InterlockedAdd(0, size, _90); _94.InterlockedAdd(0, size, _100);
uint offset = _90; uint offset = _100;
uint _97; uint _107;
_84.GetDimensions(_97); _94.GetDimensions(_107);
_97 = (_97 - 8) / 4; _107 = (_107 - 8) / 4;
MallocResult r; MallocResult r;
r.failed = (offset + size) > uint(int(_97) * 4); r.failed = (offset + size) > uint(int(_107) * 4);
uint param = offset; uint param = offset;
uint param_1 = size; uint param_1 = size;
bool param_2 = !r.failed; bool param_2 = !r.failed;
r.alloc = new_alloc(param, param_1, param_2); r.alloc = new_alloc(param, param_1, param_2);
if (r.failed) if (r.failed)
{ {
uint _119; uint _129;
_84.InterlockedMax(4, 1u, _119); _94.InterlockedMax(4, 1u, _129);
return r; return r;
} }
return r; return r;
@ -172,7 +190,7 @@ void write_mem(Alloc alloc, uint offset, uint val)
{ {
return; return;
} }
_84.Store(offset * 4 + 8, val); _94.Store(offset * 4 + 8, val);
} }
void BinInstance_write(Alloc a, BinInstanceRef ref, BinInstance s) void BinInstance_write(Alloc a, BinInstanceRef ref, BinInstance s)
@ -186,7 +204,7 @@ void BinInstance_write(Alloc a, BinInstanceRef ref, BinInstance s)
void comp_main() void comp_main()
{ {
uint my_n_elements = _253.Load(0); uint my_n_elements = _202.Load(0);
uint my_partition = gl_WorkGroupID.x; uint my_partition = gl_WorkGroupID.x;
for (uint i = 0u; i < 8u; i++) for (uint i = 0u; i < 8u; i++)
{ {
@ -198,15 +216,15 @@ void comp_main()
} }
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();
uint element_ix = (my_partition * 256u) + gl_LocalInvocationID.x; uint element_ix = (my_partition * 256u) + gl_LocalInvocationID.x;
AnnotatedRef _308 = { _253.Load(32) + (element_ix * 40u) }; AnnotatedRef _415 = { _202.Load(32) + (element_ix * 40u) };
AnnotatedRef ref = _308; AnnotatedRef ref = _415;
uint tag = 0u; uint tag = 0u;
if (element_ix < my_n_elements) if (element_ix < my_n_elements)
{ {
Alloc _318; Alloc _425;
_318.offset = _253.Load(32); _425.offset = _202.Load(32);
Alloc param; Alloc param;
param.offset = _318.offset; param.offset = _425.offset;
AnnotatedRef param_1 = ref; AnnotatedRef param_1 = ref;
tag = Annotated_tag(param, param_1).tag; tag = Annotated_tag(param, param_1).tag;
} }
@ -222,21 +240,38 @@ void comp_main()
case 4u: case 4u:
case 5u: case 5u:
{ {
Alloc _336; uint param_2 = element_ix;
_336.offset = _253.Load(32); DrawMonoid draw_monoid = load_draw_monoid(param_2);
Alloc param_2; uint path_ix = draw_monoid.path_ix;
param_2.offset = _336.offset; float4 clip_bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f);
AnnotatedRef param_3 = ref; uint clip_ix = draw_monoid.clip_ix;
AnnoEndClip clip = Annotated_EndClip_read(param_2, param_3); if (clip_ix > 0u)
x0 = int(floor(clip.bbox.x * 0.00390625f)); {
y0 = int(floor(clip.bbox.y * 0.00390625f)); uint param_3 = clip_ix - 1u;
x1 = int(ceil(clip.bbox.z * 0.00390625f)); clip_bbox = load_clip_bbox(param_3);
y1 = int(ceil(clip.bbox.w * 0.00390625f)); }
uint param_4 = path_ix;
float4 path_bbox = load_path_bbox(param_4);
float4 param_5 = path_bbox;
float4 param_6 = clip_bbox;
float4 bbox = bbox_intersect(param_5, param_6);
float4 _473 = bbox;
float4 _475 = bbox;
float2 _477 = max(_473.xy, _475.zw);
bbox.z = _477.x;
bbox.w = _477.y;
AnnotatedRef param_7 = ref;
float4 param_8 = bbox;
store_path_bbox(param_7, param_8);
x0 = int(floor(bbox.x * 0.00390625f));
y0 = int(floor(bbox.y * 0.00390625f));
x1 = int(ceil(bbox.z * 0.00390625f));
y1 = int(ceil(bbox.w * 0.00390625f));
break; break;
} }
} }
uint width_in_bins = ((_253.Load(8) + 16u) - 1u) / 16u; uint width_in_bins = ((_202.Load(8) + 16u) - 1u) / 16u;
uint height_in_bins = ((_253.Load(12) + 16u) - 1u) / 16u; uint height_in_bins = ((_202.Load(12) + 16u) - 1u) / 16u;
x0 = clamp(x0, 0, int(width_in_bins)); x0 = clamp(x0, 0, int(width_in_bins));
x1 = clamp(x1, x0, int(width_in_bins)); x1 = clamp(x1, x0, int(width_in_bins));
y0 = clamp(y0, 0, int(height_in_bins)); y0 = clamp(y0, 0, int(height_in_bins));
@ -251,8 +286,8 @@ void comp_main()
uint my_mask = 1u << (gl_LocalInvocationID.x & 31u); uint my_mask = 1u << (gl_LocalInvocationID.x & 31u);
while (y < y1) while (y < y1)
{ {
uint _437; uint _581;
InterlockedOr(bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, _437); InterlockedOr(bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, _581);
x++; x++;
if (x == x1) if (x == x1)
{ {
@ -267,15 +302,15 @@ void comp_main()
element_count += uint(int(countbits(bitmaps[i_1][gl_LocalInvocationID.x]))); element_count += uint(int(countbits(bitmaps[i_1][gl_LocalInvocationID.x])));
count[i_1][gl_LocalInvocationID.x] = element_count; count[i_1][gl_LocalInvocationID.x] = element_count;
} }
uint param_4 = 0u; uint param_9 = 0u;
uint param_5 = 0u; uint param_10 = 0u;
bool param_6 = true; bool param_11 = true;
Alloc chunk_alloc = new_alloc(param_4, param_5, param_6); Alloc chunk_alloc = new_alloc(param_9, param_10, param_11);
if (element_count != 0u) if (element_count != 0u)
{ {
uint param_7 = element_count * 4u; uint param_12 = element_count * 4u;
MallocResult _487 = malloc(param_7); MallocResult _631 = malloc(param_12);
MallocResult chunk = _487; MallocResult chunk = _631;
chunk_alloc = chunk.alloc; chunk_alloc = chunk.alloc;
sh_chunk_alloc[gl_LocalInvocationID.x] = chunk_alloc; sh_chunk_alloc[gl_LocalInvocationID.x] = chunk_alloc;
if (chunk.failed) if (chunk.failed)
@ -283,32 +318,32 @@ void comp_main()
sh_alloc_failed = true; sh_alloc_failed = true;
} }
} }
uint out_ix = (_253.Load(20) >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u); uint out_ix = (_202.Load(20) >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u);
Alloc _516; Alloc _660;
_516.offset = _253.Load(20); _660.offset = _202.Load(20);
Alloc param_8; Alloc param_13;
param_8.offset = _516.offset; param_13.offset = _660.offset;
uint param_9 = out_ix; uint param_14 = out_ix;
uint param_10 = element_count; uint param_15 = element_count;
write_mem(param_8, param_9, param_10); write_mem(param_13, param_14, param_15);
Alloc _528; Alloc _672;
_528.offset = _253.Load(20); _672.offset = _202.Load(20);
Alloc param_11; Alloc param_16;
param_11.offset = _528.offset; param_16.offset = _672.offset;
uint param_12 = out_ix + 1u; uint param_17 = out_ix + 1u;
uint param_13 = chunk_alloc.offset; uint param_18 = chunk_alloc.offset;
write_mem(param_11, param_12, param_13); write_mem(param_16, param_17, param_18);
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();
bool _543; bool _687;
if (!sh_alloc_failed) if (!sh_alloc_failed)
{ {
_543 = _84.Load(4) != 0u; _687 = _94.Load(4) != 0u;
} }
else else
{ {
_543 = sh_alloc_failed; _687 = sh_alloc_failed;
} }
if (_543) if (_687)
{ {
return; return;
} }
@ -327,12 +362,12 @@ void comp_main()
} }
Alloc out_alloc = sh_chunk_alloc[bin_ix]; Alloc out_alloc = sh_chunk_alloc[bin_ix];
uint out_offset = out_alloc.offset + (idx * 4u); uint out_offset = out_alloc.offset + (idx * 4u);
BinInstanceRef _605 = { out_offset }; BinInstanceRef _749 = { out_offset };
BinInstance _607 = { element_ix }; BinInstance _751 = { element_ix };
Alloc param_14 = out_alloc; Alloc param_19 = out_alloc;
BinInstanceRef param_15 = _605; BinInstanceRef param_20 = _749;
BinInstance param_16 = _607; BinInstance param_21 = _751;
BinInstance_write(param_14, param_15, param_16); BinInstance_write(param_19, param_20, param_21);
} }
x++; x++;
if (x == x1) if (x == x1)

View file

@ -18,16 +18,6 @@ struct MallocResult
bool failed; bool failed;
}; };
struct AnnoEndClipRef
{
uint offset;
};
struct AnnoEndClip
{
float4 bbox;
};
struct AnnotatedRef struct AnnotatedRef
{ {
uint offset; uint offset;
@ -49,6 +39,12 @@ struct BinInstance
uint element_ix; uint element_ix;
}; };
struct DrawMonoid
{
uint path_ix;
uint clip_ix;
};
struct Memory struct Memory
{ {
uint mem_offset; uint mem_offset;
@ -75,8 +71,13 @@ struct Config
Alloc_1 trans_alloc; Alloc_1 trans_alloc;
Alloc_1 bbox_alloc; Alloc_1 bbox_alloc;
Alloc_1 drawmonoid_alloc; Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip;
uint trans_offset; uint trans_offset;
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
@ -97,7 +98,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_84, constant uint& v_84BufferSize) uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_94, constant uint& v_94BufferSize)
{ {
Alloc param = alloc; Alloc param = alloc;
uint param_1 = offset; uint param_1 = offset;
@ -105,46 +106,66 @@ uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memor
{ {
return 0u; return 0u;
} }
uint v = v_84.memory[offset]; uint v = v_94.memory[offset];
return v; return v;
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
AnnotatedTag Annotated_tag(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_84, constant uint& v_84BufferSize) AnnotatedTag Annotated_tag(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_94, constant uint& v_94BufferSize)
{ {
Alloc param = a; Alloc param = a;
uint param_1 = ref.offset >> uint(2); uint param_1 = ref.offset >> uint(2);
uint tag_and_flags = read_mem(param, param_1, v_84, v_84BufferSize); uint tag_and_flags = read_mem(param, param_1, v_94, v_94BufferSize);
return AnnotatedTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) }; return AnnotatedTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) };
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
AnnoEndClip AnnoEndClip_read(thread const Alloc& a, thread const AnnoEndClipRef& ref, device Memory& v_84, constant uint& v_84BufferSize) DrawMonoid load_draw_monoid(thread const uint& element_ix, device Memory& v_94, constant uint& v_94BufferSize, const device ConfigBuf& v_202)
{ {
uint ix = ref.offset >> uint(2); uint base = (v_202.conf.drawmonoid_alloc.offset >> uint(2)) + (2u * element_ix);
Alloc param = a; uint path_ix = v_94.memory[base];
uint param_1 = ix + 0u; uint clip_ix = v_94.memory[base + 1u];
uint raw0 = read_mem(param, param_1, v_84, v_84BufferSize); return DrawMonoid{ path_ix, clip_ix };
Alloc param_2 = a;
uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3, v_84, v_84BufferSize);
Alloc param_4 = a;
uint param_5 = ix + 2u;
uint raw2 = read_mem(param_4, param_5, v_84, v_84BufferSize);
Alloc param_6 = a;
uint param_7 = ix + 3u;
uint raw3 = read_mem(param_6, param_7, v_84, v_84BufferSize);
AnnoEndClip s;
s.bbox = float4(as_type<float>(raw0), as_type<float>(raw1), as_type<float>(raw2), as_type<float>(raw3));
return s;
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
AnnoEndClip Annotated_EndClip_read(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_84, constant uint& v_84BufferSize) float4 load_clip_bbox(thread const uint& clip_ix, device Memory& v_94, constant uint& v_94BufferSize, const device ConfigBuf& v_202)
{ {
Alloc param = a; uint base = (v_202.conf.clip_bbox_alloc.offset >> uint(2)) + (4u * clip_ix);
AnnoEndClipRef param_1 = AnnoEndClipRef{ ref.offset + 4u }; float x0 = as_type<float>(v_94.memory[base]);
return AnnoEndClip_read(param, param_1, v_84, v_84BufferSize); float y0 = as_type<float>(v_94.memory[base + 1u]);
float x1 = as_type<float>(v_94.memory[base + 2u]);
float y1 = as_type<float>(v_94.memory[base + 3u]);
float4 bbox = float4(x0, y0, x1, y1);
return bbox;
}
static inline __attribute__((always_inline))
float4 load_path_bbox(thread const uint& path_ix, device Memory& v_94, constant uint& v_94BufferSize, const device ConfigBuf& v_202)
{
uint base = (v_202.conf.bbox_alloc.offset >> uint(2)) + (6u * path_ix);
float bbox_l = float(v_94.memory[base]) - 32768.0;
float bbox_t = float(v_94.memory[base + 1u]) - 32768.0;
float bbox_r = float(v_94.memory[base + 2u]) - 32768.0;
float bbox_b = float(v_94.memory[base + 3u]) - 32768.0;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
return bbox;
}
static inline __attribute__((always_inline))
float4 bbox_intersect(thread const float4& a, thread const float4& b)
{
return float4(fast::max(a.xy, b.xy), fast::min(a.zw, b.zw));
}
static inline __attribute__((always_inline))
void store_path_bbox(thread const AnnotatedRef& ref, thread const float4& bbox, device Memory& v_94, constant uint& v_94BufferSize)
{
uint ix = ref.offset >> uint(2);
v_94.memory[ix + 1u] = as_type<uint>(bbox.x);
v_94.memory[ix + 2u] = as_type<uint>(bbox.y);
v_94.memory[ix + 3u] = as_type<uint>(bbox.z);
v_94.memory[ix + 4u] = as_type<uint>(bbox.w);
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
@ -156,26 +177,26 @@ Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
MallocResult malloc(thread const uint& size, device Memory& v_84, constant uint& v_84BufferSize) MallocResult malloc(thread const uint& size, device Memory& v_94, constant uint& v_94BufferSize)
{ {
uint _90 = atomic_fetch_add_explicit((device atomic_uint*)&v_84.mem_offset, size, memory_order_relaxed); uint _100 = atomic_fetch_add_explicit((device atomic_uint*)&v_94.mem_offset, size, memory_order_relaxed);
uint offset = _90; uint offset = _100;
MallocResult r; MallocResult r;
r.failed = (offset + size) > uint(int((v_84BufferSize - 8) / 4) * 4); r.failed = (offset + size) > uint(int((v_94BufferSize - 8) / 4) * 4);
uint param = offset; uint param = offset;
uint param_1 = size; uint param_1 = size;
bool param_2 = !r.failed; bool param_2 = !r.failed;
r.alloc = new_alloc(param, param_1, param_2); r.alloc = new_alloc(param, param_1, param_2);
if (r.failed) if (r.failed)
{ {
uint _119 = atomic_fetch_max_explicit((device atomic_uint*)&v_84.mem_error, 1u, memory_order_relaxed); uint _129 = atomic_fetch_max_explicit((device atomic_uint*)&v_94.mem_error, 1u, memory_order_relaxed);
return r; return r;
} }
return r; return r;
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_84, constant uint& v_84BufferSize) void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_94, constant uint& v_94BufferSize)
{ {
Alloc param = alloc; Alloc param = alloc;
uint param_1 = offset; uint param_1 = offset;
@ -183,27 +204,27 @@ void write_mem(thread const Alloc& alloc, thread const uint& offset, thread cons
{ {
return; return;
} }
v_84.memory[offset] = val; v_94.memory[offset] = val;
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
void BinInstance_write(thread const Alloc& a, thread const BinInstanceRef& ref, thread const BinInstance& s, device Memory& v_84, constant uint& v_84BufferSize) void BinInstance_write(thread const Alloc& a, thread const BinInstanceRef& ref, thread const BinInstance& s, device Memory& v_94, constant uint& v_94BufferSize)
{ {
uint ix = ref.offset >> uint(2); uint ix = ref.offset >> uint(2);
Alloc param = a; Alloc param = a;
uint param_1 = ix + 0u; uint param_1 = ix + 0u;
uint param_2 = s.element_ix; uint param_2 = s.element_ix;
write_mem(param, param_1, param_2, v_84, v_84BufferSize); write_mem(param, param_1, param_2, v_94, v_94BufferSize);
} }
kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_84 [[buffer(0)]], const device ConfigBuf& _253 [[buffer(1)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_94 [[buffer(0)]], const device ConfigBuf& v_202 [[buffer(1)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
{ {
threadgroup uint bitmaps[8][256]; threadgroup uint bitmaps[8][256];
threadgroup short sh_alloc_failed; threadgroup short sh_alloc_failed;
threadgroup uint count[8][256]; threadgroup uint count[8][256];
threadgroup Alloc sh_chunk_alloc[256]; threadgroup Alloc sh_chunk_alloc[256];
constant uint& v_84BufferSize = spvBufferSizeConstants[0]; constant uint& v_94BufferSize = spvBufferSizeConstants[0];
uint my_n_elements = _253.conf.n_elements; uint my_n_elements = v_202.conf.n_elements;
uint my_partition = gl_WorkGroupID.x; uint my_partition = gl_WorkGroupID.x;
for (uint i = 0u; i < 8u; i++) for (uint i = 0u; i < 8u; i++)
{ {
@ -215,14 +236,14 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);
uint element_ix = (my_partition * 256u) + gl_LocalInvocationID.x; uint element_ix = (my_partition * 256u) + gl_LocalInvocationID.x;
AnnotatedRef ref = AnnotatedRef{ _253.conf.anno_alloc.offset + (element_ix * 40u) }; AnnotatedRef ref = AnnotatedRef{ v_202.conf.anno_alloc.offset + (element_ix * 40u) };
uint tag = 0u; uint tag = 0u;
if (element_ix < my_n_elements) if (element_ix < my_n_elements)
{ {
Alloc param; Alloc param;
param.offset = _253.conf.anno_alloc.offset; param.offset = v_202.conf.anno_alloc.offset;
AnnotatedRef param_1 = ref; AnnotatedRef param_1 = ref;
tag = Annotated_tag(param, param_1, v_84, v_84BufferSize).tag; tag = Annotated_tag(param, param_1, v_94, v_94BufferSize).tag;
} }
int x0 = 0; int x0 = 0;
int y0 = 0; int y0 = 0;
@ -236,19 +257,38 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
case 4u: case 4u:
case 5u: case 5u:
{ {
Alloc param_2; uint param_2 = element_ix;
param_2.offset = _253.conf.anno_alloc.offset; DrawMonoid draw_monoid = load_draw_monoid(param_2, v_94, v_94BufferSize, v_202);
AnnotatedRef param_3 = ref; uint path_ix = draw_monoid.path_ix;
AnnoEndClip clip = Annotated_EndClip_read(param_2, param_3, v_84, v_84BufferSize); float4 clip_bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0);
x0 = int(floor(clip.bbox.x * 0.00390625)); uint clip_ix = draw_monoid.clip_ix;
y0 = int(floor(clip.bbox.y * 0.00390625)); if (clip_ix > 0u)
x1 = int(ceil(clip.bbox.z * 0.00390625)); {
y1 = int(ceil(clip.bbox.w * 0.00390625)); uint param_3 = clip_ix - 1u;
clip_bbox = load_clip_bbox(param_3, v_94, v_94BufferSize, v_202);
}
uint param_4 = path_ix;
float4 path_bbox = load_path_bbox(param_4, v_94, v_94BufferSize, v_202);
float4 param_5 = path_bbox;
float4 param_6 = clip_bbox;
float4 bbox = bbox_intersect(param_5, param_6);
float4 _473 = bbox;
float4 _475 = bbox;
float2 _477 = fast::max(_473.xy, _475.zw);
bbox.z = _477.x;
bbox.w = _477.y;
AnnotatedRef param_7 = ref;
float4 param_8 = bbox;
store_path_bbox(param_7, param_8, v_94, v_94BufferSize);
x0 = int(floor(bbox.x * 0.00390625));
y0 = int(floor(bbox.y * 0.00390625));
x1 = int(ceil(bbox.z * 0.00390625));
y1 = int(ceil(bbox.w * 0.00390625));
break; break;
} }
} }
uint width_in_bins = ((_253.conf.width_in_tiles + 16u) - 1u) / 16u; uint width_in_bins = ((v_202.conf.width_in_tiles + 16u) - 1u) / 16u;
uint height_in_bins = ((_253.conf.height_in_tiles + 16u) - 1u) / 16u; uint height_in_bins = ((v_202.conf.height_in_tiles + 16u) - 1u) / 16u;
x0 = clamp(x0, 0, int(width_in_bins)); x0 = clamp(x0, 0, int(width_in_bins));
x1 = clamp(x1, x0, int(width_in_bins)); x1 = clamp(x1, x0, int(width_in_bins));
y0 = clamp(y0, 0, int(height_in_bins)); y0 = clamp(y0, 0, int(height_in_bins));
@ -263,7 +303,7 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
uint my_mask = 1u << (gl_LocalInvocationID.x & 31u); uint my_mask = 1u << (gl_LocalInvocationID.x & 31u);
while (y < y1) while (y < y1)
{ {
uint _437 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, memory_order_relaxed); uint _581 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, memory_order_relaxed);
x++; x++;
if (x == x1) if (x == x1)
{ {
@ -278,15 +318,15 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
element_count += uint(int(popcount(bitmaps[i_1][gl_LocalInvocationID.x]))); element_count += uint(int(popcount(bitmaps[i_1][gl_LocalInvocationID.x])));
count[i_1][gl_LocalInvocationID.x] = element_count; count[i_1][gl_LocalInvocationID.x] = element_count;
} }
uint param_4 = 0u; uint param_9 = 0u;
uint param_5 = 0u; uint param_10 = 0u;
bool param_6 = true; bool param_11 = true;
Alloc chunk_alloc = new_alloc(param_4, param_5, param_6); Alloc chunk_alloc = new_alloc(param_9, param_10, param_11);
if (element_count != 0u) if (element_count != 0u)
{ {
uint param_7 = element_count * 4u; uint param_12 = element_count * 4u;
MallocResult _487 = malloc(param_7, v_84, v_84BufferSize); MallocResult _631 = malloc(param_12, v_94, v_94BufferSize);
MallocResult chunk = _487; MallocResult chunk = _631;
chunk_alloc = chunk.alloc; chunk_alloc = chunk.alloc;
sh_chunk_alloc[gl_LocalInvocationID.x] = chunk_alloc; sh_chunk_alloc[gl_LocalInvocationID.x] = chunk_alloc;
if (chunk.failed) if (chunk.failed)
@ -294,28 +334,28 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
sh_alloc_failed = short(true); sh_alloc_failed = short(true);
} }
} }
uint out_ix = (_253.conf.bin_alloc.offset >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u); uint out_ix = (v_202.conf.bin_alloc.offset >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u);
Alloc param_8; Alloc param_13;
param_8.offset = _253.conf.bin_alloc.offset; param_13.offset = v_202.conf.bin_alloc.offset;
uint param_9 = out_ix; uint param_14 = out_ix;
uint param_10 = element_count; uint param_15 = element_count;
write_mem(param_8, param_9, param_10, v_84, v_84BufferSize); write_mem(param_13, param_14, param_15, v_94, v_94BufferSize);
Alloc param_11; Alloc param_16;
param_11.offset = _253.conf.bin_alloc.offset; param_16.offset = v_202.conf.bin_alloc.offset;
uint param_12 = out_ix + 1u; uint param_17 = out_ix + 1u;
uint param_13 = chunk_alloc.offset; uint param_18 = chunk_alloc.offset;
write_mem(param_11, param_12, param_13, v_84, v_84BufferSize); write_mem(param_16, param_17, param_18, v_94, v_94BufferSize);
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);
bool _543; bool _687;
if (!bool(sh_alloc_failed)) if (!bool(sh_alloc_failed))
{ {
_543 = v_84.mem_error != 0u; _687 = v_94.mem_error != 0u;
} }
else else
{ {
_543 = bool(sh_alloc_failed); _687 = bool(sh_alloc_failed);
} }
if (_543) if (_687)
{ {
return; return;
} }
@ -334,10 +374,10 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
} }
Alloc out_alloc = sh_chunk_alloc[bin_ix]; Alloc out_alloc = sh_chunk_alloc[bin_ix];
uint out_offset = out_alloc.offset + (idx * 4u); uint out_offset = out_alloc.offset + (idx * 4u);
Alloc param_14 = out_alloc; Alloc param_19 = out_alloc;
BinInstanceRef param_15 = BinInstanceRef{ out_offset }; BinInstanceRef param_20 = BinInstanceRef{ out_offset };
BinInstance param_16 = BinInstance{ element_ix }; BinInstance param_21 = BinInstance{ element_ix };
BinInstance_write(param_14, param_15, param_16, v_84, v_84BufferSize); BinInstance_write(param_19, param_20, param_21, v_94, v_94BufferSize);
} }
x++; x++;
if (x == x1) if (x == x1)

Binary file not shown.

BIN
piet-gpu/shader/gen/clip_leaf.dxil generated Normal file

Binary file not shown.

367
piet-gpu/shader/gen/clip_leaf.hlsl generated Normal file
View file

@ -0,0 +1,367 @@
struct Bic
{
uint a;
uint b;
};
struct ClipEl
{
uint parent_ix;
float4 bbox;
};
struct Alloc
{
uint offset;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
static const Bic _393 = { 0u, 0u };
ByteAddressBuffer _80 : register(t1, space0);
RWByteAddressBuffer _96 : register(u0, space0);
static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_WorkGroupID : SV_GroupID;
uint3 gl_LocalInvocationID : SV_GroupThreadID;
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
groupshared Bic sh_bic[510];
groupshared uint sh_stack[256];
groupshared float4 sh_stack_bbox[256];
groupshared uint sh_link[256];
groupshared float4 sh_bbox[256];
Bic load_bic(uint ix)
{
uint base = (_80.Load(52) >> uint(2)) + (2u * ix);
Bic _286 = { _96.Load(base * 4 + 8), _96.Load((base + 1u) * 4 + 8) };
return _286;
}
Bic bic_combine(Bic x, Bic y)
{
uint m = min(x.b, y.a);
Bic _72 = { (x.a + y.a) - m, (x.b + y.b) - m };
return _72;
}
ClipEl load_clip_el(uint ix)
{
uint base = (_80.Load(56) >> uint(2)) + (5u * ix);
uint parent_ix = _96.Load(base * 4 + 8);
float x0 = asfloat(_96.Load((base + 1u) * 4 + 8));
float y0 = asfloat(_96.Load((base + 2u) * 4 + 8));
float x1 = asfloat(_96.Load((base + 3u) * 4 + 8));
float y1 = asfloat(_96.Load((base + 4u) * 4 + 8));
float4 bbox = float4(x0, y0, x1, y1);
ClipEl _335 = { parent_ix, bbox };
return _335;
}
float4 bbox_intersect(float4 a, float4 b)
{
return float4(max(a.xy, b.xy), min(a.zw, b.zw));
}
uint load_path_ix(uint ix)
{
if (ix < _80.Load(72))
{
return _96.Load(((_80.Load(48) >> uint(2)) + ix) * 4 + 8);
}
else
{
return 2147483648u;
}
}
float4 load_path_bbox(uint path_ix)
{
uint base = (_80.Load(40) >> uint(2)) + (6u * path_ix);
float bbox_l = float(_96.Load(base * 4 + 8)) - 32768.0f;
float bbox_t = float(_96.Load((base + 1u) * 4 + 8)) - 32768.0f;
float bbox_r = float(_96.Load((base + 2u) * 4 + 8)) - 32768.0f;
float bbox_b = float(_96.Load((base + 3u) * 4 + 8)) - 32768.0f;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
return bbox;
}
uint search_link(inout Bic bic)
{
uint ix = gl_LocalInvocationID.x;
uint j = 0u;
while (j < 8u)
{
uint base = 512u - (2u << (8u - j));
if (((ix >> j) & 1u) != 0u)
{
Bic param = sh_bic[(base + (ix >> j)) - 1u];
Bic param_1 = bic;
Bic test = bic_combine(param, param_1);
if (test.b > 0u)
{
break;
}
bic = test;
ix -= (1u << j);
}
j++;
}
if (ix > 0u)
{
while (j > 0u)
{
j--;
uint base_1 = 512u - (2u << (8u - j));
Bic param_2 = sh_bic[(base_1 + (ix >> j)) - 1u];
Bic param_3 = bic;
Bic test_1 = bic_combine(param_2, param_3);
if (test_1.b == 0u)
{
bic = test_1;
ix -= (1u << j);
}
}
}
if (ix > 0u)
{
return ix - 1u;
}
else
{
return 4294967295u - bic.a;
}
}
void store_clip_bbox(uint ix, float4 bbox)
{
uint base = (_80.Load(60) >> uint(2)) + (4u * ix);
_96.Store(base * 4 + 8, asuint(bbox.x));
_96.Store((base + 1u) * 4 + 8, asuint(bbox.y));
_96.Store((base + 2u) * 4 + 8, asuint(bbox.z));
_96.Store((base + 3u) * 4 + 8, asuint(bbox.w));
}
void comp_main()
{
uint th = gl_LocalInvocationID.x;
Bic bic = _393;
if (th < gl_WorkGroupID.x)
{
uint param = th;
bic = load_bic(param);
}
sh_bic[th] = bic;
for (uint i = 0u; i < 8u; i++)
{
GroupMemoryBarrierWithGroupSync();
if ((th + (1u << i)) < 256u)
{
Bic other = sh_bic[th + (1u << i)];
Bic param_1 = bic;
Bic param_2 = other;
bic = bic_combine(param_1, param_2);
}
GroupMemoryBarrierWithGroupSync();
sh_bic[th] = bic;
}
GroupMemoryBarrierWithGroupSync();
uint stack_size = sh_bic[0].b;
uint sp = 255u - th;
uint ix = 0u;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
uint probe = ix + (128u >> i_1);
if (sp < sh_bic[probe].b)
{
ix = probe;
}
}
uint b = sh_bic[ix].b;
float4 bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f);
if (sp < b)
{
uint param_3 = (((ix * 256u) + b) - sp) - 1u;
ClipEl el = load_clip_el(param_3);
sh_stack[th] = el.parent_ix;
bbox = el.bbox;
}
for (uint i_2 = 0u; i_2 < 8u; i_2++)
{
sh_stack_bbox[th] = bbox;
GroupMemoryBarrierWithGroupSync();
if (th >= (1u << i_2))
{
float4 param_4 = sh_stack_bbox[th - (1u << i_2)];
float4 param_5 = bbox;
bbox = bbox_intersect(param_4, param_5);
}
GroupMemoryBarrierWithGroupSync();
}
sh_stack_bbox[th] = bbox;
uint param_6 = gl_GlobalInvocationID.x;
uint inp = load_path_ix(param_6);
bool is_push = int(inp) >= 0;
Bic _559 = { 1u - uint(is_push), uint(is_push) };
bic = _559;
sh_bic[th] = bic;
if (is_push)
{
uint param_7 = inp;
bbox = load_path_bbox(param_7);
}
else
{
bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f);
}
uint inbase = 0u;
for (uint i_3 = 0u; i_3 < 7u; i_3++)
{
uint outbase = 512u - (1u << (8u - i_3));
GroupMemoryBarrierWithGroupSync();
if (th < (1u << (7u - i_3)))
{
Bic param_8 = sh_bic[inbase + (th * 2u)];
Bic param_9 = sh_bic[(inbase + (th * 2u)) + 1u];
sh_bic[outbase + th] = bic_combine(param_8, param_9);
}
inbase = outbase;
}
GroupMemoryBarrierWithGroupSync();
bic = _393;
Bic param_10 = bic;
uint _618 = search_link(param_10);
bic = param_10;
uint link = _618;
sh_link[th] = link;
GroupMemoryBarrierWithGroupSync();
uint grandparent;
if (int(link) >= 0)
{
grandparent = sh_link[link];
}
else
{
grandparent = link - 1u;
}
uint parent;
if (int(link) >= 0)
{
parent = (gl_WorkGroupID.x * 256u) + link;
}
else
{
if (int(link + stack_size) >= 0)
{
parent = sh_stack[256u + link];
}
else
{
parent = 4294967295u;
}
}
for (uint i_4 = 0u; i_4 < 8u; i_4++)
{
if (i_4 != 0u)
{
sh_link[th] = link;
}
sh_bbox[th] = bbox;
GroupMemoryBarrierWithGroupSync();
if (int(link) >= 0)
{
float4 param_11 = sh_bbox[link];
float4 param_12 = bbox;
bbox = bbox_intersect(param_11, param_12);
link = sh_link[link];
}
GroupMemoryBarrierWithGroupSync();
}
if (int(link + stack_size) >= 0)
{
float4 param_13 = sh_stack_bbox[256u + link];
float4 param_14 = bbox;
bbox = bbox_intersect(param_13, param_14);
}
sh_bbox[th] = bbox;
GroupMemoryBarrierWithGroupSync();
uint path_ix = inp;
bool _717 = !is_push;
bool _725;
if (_717)
{
_725 = gl_GlobalInvocationID.x < _80.Load(72);
}
else
{
_725 = _717;
}
if (_725)
{
uint param_15 = parent;
path_ix = load_path_ix(param_15);
uint drawmonoid_out_base = (_80.Load(44) >> uint(2)) + (2u * (~inp));
_96.Store(drawmonoid_out_base * 4 + 8, path_ix);
if (int(grandparent) >= 0)
{
bbox = sh_bbox[grandparent];
}
else
{
if (int(grandparent + stack_size) >= 0)
{
bbox = sh_stack_bbox[256u + grandparent];
}
else
{
bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f);
}
}
}
uint param_16 = gl_GlobalInvocationID.x;
float4 param_17 = bbox;
store_clip_bbox(param_16, param_17);
}
[numthreads(256, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_WorkGroupID = stage_input.gl_WorkGroupID;
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

366
piet-gpu/shader/gen/clip_leaf.msl generated Normal file
View file

@ -0,0 +1,366 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct Bic
{
uint a;
uint b;
};
struct ClipEl
{
uint parent_ix;
float4 bbox;
};
struct Alloc
{
uint offset;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
};
struct ConfigBuf
{
Config conf;
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint memory[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
Bic load_bic(thread const uint& ix, const device ConfigBuf& v_80, device Memory& v_96)
{
uint base = (v_80.conf.clip_bic_alloc.offset >> uint(2)) + (2u * ix);
return Bic{ v_96.memory[base], v_96.memory[base + 1u] };
}
static inline __attribute__((always_inline))
Bic bic_combine(thread const Bic& x, thread const Bic& y)
{
uint m = min(x.b, y.a);
return Bic{ (x.a + y.a) - m, (x.b + y.b) - m };
}
static inline __attribute__((always_inline))
ClipEl load_clip_el(thread const uint& ix, const device ConfigBuf& v_80, device Memory& v_96)
{
uint base = (v_80.conf.clip_stack_alloc.offset >> uint(2)) + (5u * ix);
uint parent_ix = v_96.memory[base];
float x0 = as_type<float>(v_96.memory[base + 1u]);
float y0 = as_type<float>(v_96.memory[base + 2u]);
float x1 = as_type<float>(v_96.memory[base + 3u]);
float y1 = as_type<float>(v_96.memory[base + 4u]);
float4 bbox = float4(x0, y0, x1, y1);
return ClipEl{ parent_ix, bbox };
}
static inline __attribute__((always_inline))
float4 bbox_intersect(thread const float4& a, thread const float4& b)
{
return float4(fast::max(a.xy, b.xy), fast::min(a.zw, b.zw));
}
static inline __attribute__((always_inline))
uint load_path_ix(thread const uint& ix, const device ConfigBuf& v_80, device Memory& v_96)
{
if (ix < v_80.conf.n_clip)
{
return v_96.memory[(v_80.conf.clip_alloc.offset >> uint(2)) + ix];
}
else
{
return 2147483648u;
}
}
static inline __attribute__((always_inline))
float4 load_path_bbox(thread const uint& path_ix, const device ConfigBuf& v_80, device Memory& v_96)
{
uint base = (v_80.conf.bbox_alloc.offset >> uint(2)) + (6u * path_ix);
float bbox_l = float(v_96.memory[base]) - 32768.0;
float bbox_t = float(v_96.memory[base + 1u]) - 32768.0;
float bbox_r = float(v_96.memory[base + 2u]) - 32768.0;
float bbox_b = float(v_96.memory[base + 3u]) - 32768.0;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
return bbox;
}
static inline __attribute__((always_inline))
uint search_link(thread Bic& bic, thread uint3& gl_LocalInvocationID, threadgroup Bic (&sh_bic)[510])
{
uint ix = gl_LocalInvocationID.x;
uint j = 0u;
while (j < 8u)
{
uint base = 512u - (2u << (8u - j));
if (((ix >> j) & 1u) != 0u)
{
Bic param = sh_bic[(base + (ix >> j)) - 1u];
Bic param_1 = bic;
Bic test = bic_combine(param, param_1);
if (test.b > 0u)
{
break;
}
bic = test;
ix -= (1u << j);
}
j++;
}
if (ix > 0u)
{
while (j > 0u)
{
j--;
uint base_1 = 512u - (2u << (8u - j));
Bic param_2 = sh_bic[(base_1 + (ix >> j)) - 1u];
Bic param_3 = bic;
Bic test_1 = bic_combine(param_2, param_3);
if (test_1.b == 0u)
{
bic = test_1;
ix -= (1u << j);
}
}
}
if (ix > 0u)
{
return ix - 1u;
}
else
{
return 4294967295u - bic.a;
}
}
static inline __attribute__((always_inline))
void store_clip_bbox(thread const uint& ix, thread const float4& bbox, const device ConfigBuf& v_80, device Memory& v_96)
{
uint base = (v_80.conf.clip_bbox_alloc.offset >> uint(2)) + (4u * ix);
v_96.memory[base] = as_type<uint>(bbox.x);
v_96.memory[base + 1u] = as_type<uint>(bbox.y);
v_96.memory[base + 2u] = as_type<uint>(bbox.z);
v_96.memory[base + 3u] = as_type<uint>(bbox.w);
}
kernel void main0(device Memory& v_96 [[buffer(0)]], const device ConfigBuf& v_80 [[buffer(1)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
threadgroup Bic sh_bic[510];
threadgroup uint sh_stack[256];
threadgroup float4 sh_stack_bbox[256];
threadgroup uint sh_link[256];
threadgroup float4 sh_bbox[256];
uint th = gl_LocalInvocationID.x;
Bic bic = Bic{ 0u, 0u };
if (th < gl_WorkGroupID.x)
{
uint param = th;
bic = load_bic(param, v_80, v_96);
}
sh_bic[th] = bic;
for (uint i = 0u; i < 8u; i++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
if ((th + (1u << i)) < 256u)
{
Bic other = sh_bic[th + (1u << i)];
Bic param_1 = bic;
Bic param_2 = other;
bic = bic_combine(param_1, param_2);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
sh_bic[th] = bic;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
uint stack_size = sh_bic[0].b;
uint sp = 255u - th;
uint ix = 0u;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
uint probe = ix + (128u >> i_1);
if (sp < sh_bic[probe].b)
{
ix = probe;
}
}
uint b = sh_bic[ix].b;
float4 bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0);
if (sp < b)
{
uint param_3 = (((ix * 256u) + b) - sp) - 1u;
ClipEl el = load_clip_el(param_3, v_80, v_96);
sh_stack[th] = el.parent_ix;
bbox = el.bbox;
}
for (uint i_2 = 0u; i_2 < 8u; i_2++)
{
sh_stack_bbox[th] = bbox;
threadgroup_barrier(mem_flags::mem_threadgroup);
if (th >= (1u << i_2))
{
float4 param_4 = sh_stack_bbox[th - (1u << i_2)];
float4 param_5 = bbox;
bbox = bbox_intersect(param_4, param_5);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
}
sh_stack_bbox[th] = bbox;
uint param_6 = gl_GlobalInvocationID.x;
uint inp = load_path_ix(param_6, v_80, v_96);
bool is_push = int(inp) >= 0;
bic = Bic{ 1u - uint(is_push), uint(is_push) };
sh_bic[th] = bic;
if (is_push)
{
uint param_7 = inp;
bbox = load_path_bbox(param_7, v_80, v_96);
}
else
{
bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0);
}
uint inbase = 0u;
for (uint i_3 = 0u; i_3 < 7u; i_3++)
{
uint outbase = 512u - (1u << (8u - i_3));
threadgroup_barrier(mem_flags::mem_threadgroup);
if (th < (1u << (7u - i_3)))
{
Bic param_8 = sh_bic[inbase + (th * 2u)];
Bic param_9 = sh_bic[(inbase + (th * 2u)) + 1u];
sh_bic[outbase + th] = bic_combine(param_8, param_9);
}
inbase = outbase;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
bic = Bic{ 0u, 0u };
Bic param_10 = bic;
uint _618 = search_link(param_10, gl_LocalInvocationID, sh_bic);
bic = param_10;
uint link = _618;
sh_link[th] = link;
threadgroup_barrier(mem_flags::mem_threadgroup);
uint grandparent;
if (int(link) >= 0)
{
grandparent = sh_link[link];
}
else
{
grandparent = link - 1u;
}
uint parent;
if (int(link) >= 0)
{
parent = (gl_WorkGroupID.x * 256u) + link;
}
else
{
if (int(link + stack_size) >= 0)
{
parent = sh_stack[256u + link];
}
else
{
parent = 4294967295u;
}
}
for (uint i_4 = 0u; i_4 < 8u; i_4++)
{
if (i_4 != 0u)
{
sh_link[th] = link;
}
sh_bbox[th] = bbox;
threadgroup_barrier(mem_flags::mem_threadgroup);
if (int(link) >= 0)
{
float4 param_11 = sh_bbox[link];
float4 param_12 = bbox;
bbox = bbox_intersect(param_11, param_12);
link = sh_link[link];
}
threadgroup_barrier(mem_flags::mem_threadgroup);
}
if (int(link + stack_size) >= 0)
{
float4 param_13 = sh_stack_bbox[256u + link];
float4 param_14 = bbox;
bbox = bbox_intersect(param_13, param_14);
}
sh_bbox[th] = bbox;
threadgroup_barrier(mem_flags::mem_threadgroup);
uint path_ix = inp;
bool _717 = !is_push;
bool _725;
if (_717)
{
_725 = gl_GlobalInvocationID.x < v_80.conf.n_clip;
}
else
{
_725 = _717;
}
if (_725)
{
uint param_15 = parent;
path_ix = load_path_ix(param_15, v_80, v_96);
uint drawmonoid_out_base = (v_80.conf.drawmonoid_alloc.offset >> uint(2)) + (2u * (~inp));
v_96.memory[drawmonoid_out_base] = path_ix;
if (int(grandparent) >= 0)
{
bbox = sh_bbox[grandparent];
}
else
{
if (int(grandparent + stack_size) >= 0)
{
bbox = sh_stack_bbox[256u + grandparent];
}
else
{
bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0);
}
}
}
uint param_16 = gl_GlobalInvocationID.x;
float4 param_17 = bbox;
store_clip_bbox(param_16, param_17, v_80, v_96);
}

BIN
piet-gpu/shader/gen/clip_leaf.spv generated Normal file

Binary file not shown.

BIN
piet-gpu/shader/gen/clip_reduce.dxil generated Normal file

Binary file not shown.

177
piet-gpu/shader/gen/clip_reduce.hlsl generated Normal file
View file

@ -0,0 +1,177 @@
struct Bic
{
uint a;
uint b;
};
struct ClipEl
{
uint parent_ix;
float4 bbox;
};
struct Alloc
{
uint offset;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
static const Bic _267 = { 0u, 0u };
ByteAddressBuffer _64 : register(t1, space0);
RWByteAddressBuffer _80 : register(u0, space0);
static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_WorkGroupID : SV_GroupID;
uint3 gl_LocalInvocationID : SV_GroupThreadID;
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
groupshared Bic sh_bic[256];
groupshared uint sh_parent[256];
groupshared uint sh_path_ix[256];
groupshared float4 sh_bbox[256];
Bic bic_combine(Bic x, Bic y)
{
uint m = min(x.b, y.a);
Bic _56 = { (x.a + y.a) - m, (x.b + y.b) - m };
return _56;
}
void store_bic(uint ix, Bic bic)
{
uint base = (_64.Load(52) >> uint(2)) + (2u * ix);
_80.Store(base * 4 + 8, bic.a);
_80.Store((base + 1u) * 4 + 8, bic.b);
}
float4 load_path_bbox(uint path_ix)
{
uint base = (_64.Load(40) >> uint(2)) + (6u * path_ix);
float bbox_l = float(_80.Load(base * 4 + 8)) - 32768.0f;
float bbox_t = float(_80.Load((base + 1u) * 4 + 8)) - 32768.0f;
float bbox_r = float(_80.Load((base + 2u) * 4 + 8)) - 32768.0f;
float bbox_b = float(_80.Load((base + 3u) * 4 + 8)) - 32768.0f;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
return bbox;
}
void store_clip_el(uint ix, ClipEl el)
{
uint base = (_64.Load(56) >> uint(2)) + (5u * ix);
_80.Store(base * 4 + 8, el.parent_ix);
_80.Store((base + 1u) * 4 + 8, asuint(el.bbox.x));
_80.Store((base + 2u) * 4 + 8, asuint(el.bbox.y));
_80.Store((base + 3u) * 4 + 8, asuint(el.bbox.z));
_80.Store((base + 4u) * 4 + 8, asuint(el.bbox.w));
}
void comp_main()
{
uint th = gl_LocalInvocationID.x;
uint inp = _80.Load(((_64.Load(48) >> uint(2)) + gl_GlobalInvocationID.x) * 4 + 8);
bool is_push = int(inp) >= 0;
Bic _207 = { 1u - uint(is_push), uint(is_push) };
Bic bic = _207;
sh_bic[gl_LocalInvocationID.x] = bic;
for (uint i = 0u; i < 8u; i++)
{
GroupMemoryBarrierWithGroupSync();
if ((th + (1u << i)) < 256u)
{
Bic other = sh_bic[gl_LocalInvocationID.x + (1u << i)];
Bic param = bic;
Bic param_1 = other;
bic = bic_combine(param, param_1);
}
GroupMemoryBarrierWithGroupSync();
sh_bic[th] = bic;
}
if (th == 0u)
{
uint param_2 = gl_WorkGroupID.x;
Bic param_3 = bic;
store_bic(param_2, param_3);
}
GroupMemoryBarrierWithGroupSync();
uint size = sh_bic[0].b;
bic = _267;
if ((th + 1u) < 256u)
{
bic = sh_bic[th + 1u];
}
bool _283;
if (is_push)
{
_283 = bic.a == 0u;
}
else
{
_283 = is_push;
}
if (_283)
{
uint local_ix = (size - bic.b) - 1u;
sh_parent[local_ix] = th;
sh_path_ix[local_ix] = inp;
}
GroupMemoryBarrierWithGroupSync();
float4 bbox;
if (th < size)
{
uint path_ix = sh_path_ix[th];
uint param_4 = path_ix;
bbox = load_path_bbox(param_4);
}
if (th < size)
{
uint parent_ix = sh_parent[th] + (gl_WorkGroupID.x * 256u);
ClipEl _331 = { parent_ix, bbox };
ClipEl el = _331;
uint param_5 = gl_GlobalInvocationID.x;
ClipEl param_6 = el;
store_clip_el(param_5, param_6);
}
}
[numthreads(256, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_WorkGroupID = stage_input.gl_WorkGroupID;
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

173
piet-gpu/shader/gen/clip_reduce.msl generated Normal file
View file

@ -0,0 +1,173 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct Bic
{
uint a;
uint b;
};
struct ClipEl
{
uint parent_ix;
float4 bbox;
};
struct Alloc
{
uint offset;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
};
struct ConfigBuf
{
Config conf;
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint memory[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
Bic bic_combine(thread const Bic& x, thread const Bic& y)
{
uint m = min(x.b, y.a);
return Bic{ (x.a + y.a) - m, (x.b + y.b) - m };
}
static inline __attribute__((always_inline))
void store_bic(thread const uint& ix, thread const Bic& bic, const device ConfigBuf& v_64, device Memory& v_80)
{
uint base = (v_64.conf.clip_bic_alloc.offset >> uint(2)) + (2u * ix);
v_80.memory[base] = bic.a;
v_80.memory[base + 1u] = bic.b;
}
static inline __attribute__((always_inline))
float4 load_path_bbox(thread const uint& path_ix, const device ConfigBuf& v_64, device Memory& v_80)
{
uint base = (v_64.conf.bbox_alloc.offset >> uint(2)) + (6u * path_ix);
float bbox_l = float(v_80.memory[base]) - 32768.0;
float bbox_t = float(v_80.memory[base + 1u]) - 32768.0;
float bbox_r = float(v_80.memory[base + 2u]) - 32768.0;
float bbox_b = float(v_80.memory[base + 3u]) - 32768.0;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
return bbox;
}
static inline __attribute__((always_inline))
void store_clip_el(thread const uint& ix, thread const ClipEl& el, const device ConfigBuf& v_64, device Memory& v_80)
{
uint base = (v_64.conf.clip_stack_alloc.offset >> uint(2)) + (5u * ix);
v_80.memory[base] = el.parent_ix;
v_80.memory[base + 1u] = as_type<uint>(el.bbox.x);
v_80.memory[base + 2u] = as_type<uint>(el.bbox.y);
v_80.memory[base + 3u] = as_type<uint>(el.bbox.z);
v_80.memory[base + 4u] = as_type<uint>(el.bbox.w);
}
kernel void main0(device Memory& v_80 [[buffer(0)]], const device ConfigBuf& v_64 [[buffer(1)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
{
threadgroup Bic sh_bic[256];
threadgroup uint sh_parent[256];
threadgroup uint sh_path_ix[256];
threadgroup float4 sh_bbox[256];
uint th = gl_LocalInvocationID.x;
uint inp = v_80.memory[(v_64.conf.clip_alloc.offset >> uint(2)) + gl_GlobalInvocationID.x];
bool is_push = int(inp) >= 0;
Bic bic = Bic{ 1u - uint(is_push), uint(is_push) };
sh_bic[gl_LocalInvocationID.x] = bic;
for (uint i = 0u; i < 8u; i++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
if ((th + (1u << i)) < 256u)
{
Bic other = sh_bic[gl_LocalInvocationID.x + (1u << i)];
Bic param = bic;
Bic param_1 = other;
bic = bic_combine(param, param_1);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
sh_bic[th] = bic;
}
if (th == 0u)
{
uint param_2 = gl_WorkGroupID.x;
Bic param_3 = bic;
store_bic(param_2, param_3, v_64, v_80);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
uint size = sh_bic[0].b;
bic = Bic{ 0u, 0u };
if ((th + 1u) < 256u)
{
bic = sh_bic[th + 1u];
}
bool _283;
if (is_push)
{
_283 = bic.a == 0u;
}
else
{
_283 = is_push;
}
if (_283)
{
uint local_ix = (size - bic.b) - 1u;
sh_parent[local_ix] = th;
sh_path_ix[local_ix] = inp;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
float4 bbox;
if (th < size)
{
uint path_ix = sh_path_ix[th];
uint param_4 = path_ix;
bbox = load_path_bbox(param_4, v_64, v_80);
}
if (th < size)
{
uint parent_ix = sh_parent[th] + (gl_WorkGroupID.x * 256u);
ClipEl el = ClipEl{ parent_ix, bbox };
uint param_5 = gl_GlobalInvocationID.x;
ClipEl param_6 = el;
store_clip_el(param_5, param_6, v_64, v_80);
}
}

BIN
piet-gpu/shader/gen/clip_reduce.spv generated Normal file

Binary file not shown.

Binary file not shown.

View file

@ -49,17 +49,6 @@ struct AnnoLinGradient
float line_c; float line_c;
}; };
struct AnnoBeginClipRef
{
uint offset;
};
struct AnnoBeginClip
{
float4 bbox;
float linewidth;
};
struct AnnotatedRef struct AnnotatedRef
{ {
uint offset; uint offset;
@ -193,8 +182,13 @@ struct Config
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc bbox_alloc;
Alloc drawmonoid_alloc; Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip;
uint trans_offset; uint trans_offset;
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
@ -203,8 +197,8 @@ struct Config
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
RWByteAddressBuffer _296 : register(u0, space0); RWByteAddressBuffer _283 : register(u0, space0);
ByteAddressBuffer _1249 : register(t1, space0); ByteAddressBuffer _1169 : register(t1, space0);
static uint3 gl_WorkGroupID; static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID; static uint3 gl_LocalInvocationID;
@ -227,8 +221,8 @@ groupshared uint sh_tile_count[256];
Alloc slice_mem(Alloc a, uint offset, uint size) Alloc slice_mem(Alloc a, uint offset, uint size)
{ {
Alloc _373 = { a.offset + offset }; Alloc _360 = { a.offset + offset };
return _373; return _360;
} }
bool touch_mem(Alloc alloc, uint offset) bool touch_mem(Alloc alloc, uint offset)
@ -244,7 +238,7 @@ uint read_mem(Alloc alloc, uint offset)
{ {
return 0u; return 0u;
} }
uint v = _296.Load(offset * 4 + 8); uint v = _283.Load(offset * 4 + 8);
return v; return v;
} }
@ -257,8 +251,8 @@ Alloc new_alloc(uint offset, uint size, bool mem_ok)
BinInstanceRef BinInstance_index(BinInstanceRef ref, uint index) BinInstanceRef BinInstance_index(BinInstanceRef ref, uint index)
{ {
BinInstanceRef _754 = { ref.offset + (index * 4u) }; BinInstanceRef _674 = { ref.offset + (index * 4u) };
return _754; return _674;
} }
BinInstance BinInstance_read(Alloc a, BinInstanceRef ref) BinInstance BinInstance_read(Alloc a, BinInstanceRef ref)
@ -277,8 +271,8 @@ AnnotatedTag Annotated_tag(Alloc a, AnnotatedRef ref)
Alloc param = a; Alloc param = a;
uint param_1 = ref.offset >> uint(2); uint param_1 = ref.offset >> uint(2);
uint tag_and_flags = read_mem(param, param_1); uint tag_and_flags = read_mem(param, param_1);
AnnotatedTag _706 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; AnnotatedTag _636 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) };
return _706; return _636;
} }
Path Path_read(Alloc a, PathRef ref) Path Path_read(Alloc a, PathRef ref)
@ -295,8 +289,8 @@ Path Path_read(Alloc a, PathRef ref)
uint raw2 = read_mem(param_4, param_5); uint raw2 = read_mem(param_4, param_5);
Path s; Path s;
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
TileRef _814 = { raw2 }; TileRef _734 = { raw2 };
s.tiles = _814; s.tiles = _734;
return s; return s;
} }
@ -306,11 +300,11 @@ void write_tile_alloc(uint el_ix, Alloc a)
Alloc read_tile_alloc(uint el_ix, bool mem_ok) Alloc read_tile_alloc(uint el_ix, bool mem_ok)
{ {
uint _1135; uint _1055;
_296.GetDimensions(_1135); _283.GetDimensions(_1055);
_1135 = (_1135 - 8) / 4; _1055 = (_1055 - 8) / 4;
uint param = 0u; uint param = 0u;
uint param_1 = uint(int(_1135) * 4); uint param_1 = uint(int(_1055) * 4);
bool param_2 = mem_ok; bool param_2 = mem_ok;
return new_alloc(param, param_1, param_2); return new_alloc(param, param_1, param_2);
} }
@ -324,9 +318,9 @@ Tile Tile_read(Alloc a, TileRef ref)
Alloc param_2 = a; Alloc param_2 = a;
uint param_3 = ix + 1u; uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3); uint raw1 = read_mem(param_2, param_3);
TileSegRef _839 = { raw0 }; TileSegRef _759 = { raw0 };
Tile s; Tile s;
s.tile = _839; s.tile = _759;
s.backdrop = int(raw1); s.backdrop = int(raw1);
return s; return s;
} }
@ -361,30 +355,30 @@ AnnoColor AnnoColor_read(Alloc a, AnnoColorRef ref)
AnnoColor Annotated_Color_read(Alloc a, AnnotatedRef ref) AnnoColor Annotated_Color_read(Alloc a, AnnotatedRef ref)
{ {
AnnoColorRef _712 = { ref.offset + 4u }; AnnoColorRef _642 = { ref.offset + 4u };
Alloc param = a; Alloc param = a;
AnnoColorRef param_1 = _712; AnnoColorRef param_1 = _642;
return AnnoColor_read(param, param_1); return AnnoColor_read(param, param_1);
} }
MallocResult malloc(uint size) MallocResult malloc(uint size)
{ {
uint _302; uint _289;
_296.InterlockedAdd(0, size, _302); _283.InterlockedAdd(0, size, _289);
uint offset = _302; uint offset = _289;
uint _309; uint _296;
_296.GetDimensions(_309); _283.GetDimensions(_296);
_309 = (_309 - 8) / 4; _296 = (_296 - 8) / 4;
MallocResult r; MallocResult r;
r.failed = (offset + size) > uint(int(_309) * 4); r.failed = (offset + size) > uint(int(_296) * 4);
uint param = offset; uint param = offset;
uint param_1 = size; uint param_1 = size;
bool param_2 = !r.failed; bool param_2 = !r.failed;
r.alloc = new_alloc(param, param_1, param_2); r.alloc = new_alloc(param, param_1, param_2);
if (r.failed) if (r.failed)
{ {
uint _331; uint _318;
_296.InterlockedMax(4, 1u, _331); _283.InterlockedMax(4, 1u, _318);
return r; return r;
} }
return r; return r;
@ -398,7 +392,7 @@ void write_mem(Alloc alloc, uint offset, uint val)
{ {
return; return;
} }
_296.Store(offset * 4 + 8, val); _283.Store(offset * 4 + 8, val);
} }
void CmdJump_write(Alloc a, CmdJumpRef ref, CmdJump s) void CmdJump_write(Alloc a, CmdJumpRef ref, CmdJump s)
@ -416,9 +410,9 @@ void Cmd_Jump_write(Alloc a, CmdRef ref, CmdJump s)
uint param_1 = ref.offset >> uint(2); uint param_1 = ref.offset >> uint(2);
uint param_2 = 10u; uint param_2 = 10u;
write_mem(param, param_1, param_2); write_mem(param, param_1, param_2);
CmdJumpRef _1128 = { ref.offset + 4u }; CmdJumpRef _1048 = { ref.offset + 4u };
Alloc param_3 = a; Alloc param_3 = a;
CmdJumpRef param_4 = _1128; CmdJumpRef param_4 = _1048;
CmdJump param_5 = s; CmdJump param_5 = s;
CmdJump_write(param_3, param_4, param_5); CmdJump_write(param_3, param_4, param_5);
} }
@ -430,21 +424,21 @@ bool alloc_cmd(inout Alloc cmd_alloc, inout CmdRef cmd_ref, inout uint cmd_limit
return true; return true;
} }
uint param = 1024u; uint param = 1024u;
MallocResult _1156 = malloc(param); MallocResult _1076 = malloc(param);
MallocResult new_cmd = _1156; MallocResult new_cmd = _1076;
if (new_cmd.failed) if (new_cmd.failed)
{ {
return false; return false;
} }
CmdJump _1166 = { new_cmd.alloc.offset }; CmdJump _1086 = { new_cmd.alloc.offset };
CmdJump jump = _1166; CmdJump jump = _1086;
Alloc param_1 = cmd_alloc; Alloc param_1 = cmd_alloc;
CmdRef param_2 = cmd_ref; CmdRef param_2 = cmd_ref;
CmdJump param_3 = jump; CmdJump param_3 = jump;
Cmd_Jump_write(param_1, param_2, param_3); Cmd_Jump_write(param_1, param_2, param_3);
cmd_alloc = new_cmd.alloc; cmd_alloc = new_cmd.alloc;
CmdRef _1178 = { cmd_alloc.offset }; CmdRef _1098 = { cmd_alloc.offset };
cmd_ref = _1178; cmd_ref = _1098;
cmd_limit = (cmd_alloc.offset + 1024u) - 60u; cmd_limit = (cmd_alloc.offset + 1024u) - 60u;
return true; return true;
} }
@ -473,9 +467,9 @@ void Cmd_Fill_write(Alloc a, CmdRef ref, CmdFill s)
uint param_1 = ref.offset >> uint(2); uint param_1 = ref.offset >> uint(2);
uint param_2 = 1u; uint param_2 = 1u;
write_mem(param, param_1, param_2); write_mem(param, param_1, param_2);
CmdFillRef _1012 = { ref.offset + 4u }; CmdFillRef _932 = { ref.offset + 4u };
Alloc param_3 = a; Alloc param_3 = a;
CmdFillRef param_4 = _1012; CmdFillRef param_4 = _932;
CmdFill param_5 = s; CmdFill param_5 = s;
CmdFill_write(param_3, param_4, param_5); CmdFill_write(param_3, param_4, param_5);
} }
@ -507,9 +501,9 @@ void Cmd_Stroke_write(Alloc a, CmdRef ref, CmdStroke s)
uint param_1 = ref.offset >> uint(2); uint param_1 = ref.offset >> uint(2);
uint param_2 = 2u; uint param_2 = 2u;
write_mem(param, param_1, param_2); write_mem(param, param_1, param_2);
CmdStrokeRef _1030 = { ref.offset + 4u }; CmdStrokeRef _950 = { ref.offset + 4u };
Alloc param_3 = a; Alloc param_3 = a;
CmdStrokeRef param_4 = _1030; CmdStrokeRef param_4 = _950;
CmdStroke param_5 = s; CmdStroke param_5 = s;
CmdStroke_write(param_3, param_4, param_5); CmdStroke_write(param_3, param_4, param_5);
} }
@ -521,8 +515,8 @@ void write_fill(Alloc alloc, inout CmdRef cmd_ref, uint flags, Tile tile, float
{ {
if (tile.tile.offset != 0u) if (tile.tile.offset != 0u)
{ {
CmdFill _1202 = { tile.tile.offset, tile.backdrop }; CmdFill _1122 = { tile.tile.offset, tile.backdrop };
CmdFill cmd_fill = _1202; CmdFill cmd_fill = _1122;
Alloc param_1 = alloc; Alloc param_1 = alloc;
CmdRef param_2 = cmd_ref; CmdRef param_2 = cmd_ref;
CmdFill param_3 = cmd_fill; CmdFill param_3 = cmd_fill;
@ -539,8 +533,8 @@ void write_fill(Alloc alloc, inout CmdRef cmd_ref, uint flags, Tile tile, float
} }
else else
{ {
CmdStroke _1232 = { tile.tile.offset, 0.5f * linewidth }; CmdStroke _1152 = { tile.tile.offset, 0.5f * linewidth };
CmdStroke cmd_stroke = _1232; CmdStroke cmd_stroke = _1152;
Alloc param_6 = alloc; Alloc param_6 = alloc;
CmdRef param_7 = cmd_ref; CmdRef param_7 = cmd_ref;
CmdStroke param_8 = cmd_stroke; CmdStroke param_8 = cmd_stroke;
@ -564,9 +558,9 @@ void Cmd_Color_write(Alloc a, CmdRef ref, CmdColor s)
uint param_1 = ref.offset >> uint(2); uint param_1 = ref.offset >> uint(2);
uint param_2 = 5u; uint param_2 = 5u;
write_mem(param, param_1, param_2); write_mem(param, param_1, param_2);
CmdColorRef _1056 = { ref.offset + 4u }; CmdColorRef _976 = { ref.offset + 4u };
Alloc param_3 = a; Alloc param_3 = a;
CmdColorRef param_4 = _1056; CmdColorRef param_4 = _976;
CmdColor param_5 = s; CmdColor param_5 = s;
CmdColor_write(param_3, param_4, param_5); CmdColor_write(param_3, param_4, param_5);
} }
@ -613,9 +607,9 @@ AnnoLinGradient AnnoLinGradient_read(Alloc a, AnnoLinGradientRef ref)
AnnoLinGradient Annotated_LinGradient_read(Alloc a, AnnotatedRef ref) AnnoLinGradient Annotated_LinGradient_read(Alloc a, AnnotatedRef ref)
{ {
AnnoLinGradientRef _722 = { ref.offset + 4u }; AnnoLinGradientRef _652 = { ref.offset + 4u };
Alloc param = a; Alloc param = a;
AnnoLinGradientRef param_1 = _722; AnnoLinGradientRef param_1 = _652;
return AnnoLinGradient_read(param, param_1); return AnnoLinGradient_read(param, param_1);
} }
@ -646,9 +640,9 @@ void Cmd_LinGrad_write(Alloc a, CmdRef ref, CmdLinGrad s)
uint param_1 = ref.offset >> uint(2); uint param_1 = ref.offset >> uint(2);
uint param_2 = 6u; uint param_2 = 6u;
write_mem(param, param_1, param_2); write_mem(param, param_1, param_2);
CmdLinGradRef _1074 = { ref.offset + 4u }; CmdLinGradRef _994 = { ref.offset + 4u };
Alloc param_3 = a; Alloc param_3 = a;
CmdLinGradRef param_4 = _1074; CmdLinGradRef param_4 = _994;
CmdLinGrad param_5 = s; CmdLinGrad param_5 = s;
CmdLinGrad_write(param_3, param_4, param_5); CmdLinGrad_write(param_3, param_4, param_5);
} }
@ -687,9 +681,9 @@ AnnoImage AnnoImage_read(Alloc a, AnnoImageRef ref)
AnnoImage Annotated_Image_read(Alloc a, AnnotatedRef ref) AnnoImage Annotated_Image_read(Alloc a, AnnotatedRef ref)
{ {
AnnoImageRef _732 = { ref.offset + 4u }; AnnoImageRef _662 = { ref.offset + 4u };
Alloc param = a; Alloc param = a;
AnnoImageRef param_1 = _732; AnnoImageRef param_1 = _662;
return AnnoImage_read(param, param_1); return AnnoImage_read(param, param_1);
} }
@ -712,45 +706,13 @@ void Cmd_Image_write(Alloc a, CmdRef ref, CmdImage s)
uint param_1 = ref.offset >> uint(2); uint param_1 = ref.offset >> uint(2);
uint param_2 = 7u; uint param_2 = 7u;
write_mem(param, param_1, param_2); write_mem(param, param_1, param_2);
CmdImageRef _1092 = { ref.offset + 4u }; CmdImageRef _1012 = { ref.offset + 4u };
Alloc param_3 = a; Alloc param_3 = a;
CmdImageRef param_4 = _1092; CmdImageRef param_4 = _1012;
CmdImage param_5 = s; CmdImage param_5 = s;
CmdImage_write(param_3, param_4, param_5); CmdImage_write(param_3, param_4, param_5);
} }
AnnoBeginClip AnnoBeginClip_read(Alloc a, AnnoBeginClipRef ref)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint raw0 = read_mem(param, param_1);
Alloc param_2 = a;
uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3);
Alloc param_4 = a;
uint param_5 = ix + 2u;
uint raw2 = read_mem(param_4, param_5);
Alloc param_6 = a;
uint param_7 = ix + 3u;
uint raw3 = read_mem(param_6, param_7);
Alloc param_8 = a;
uint param_9 = ix + 4u;
uint raw4 = read_mem(param_8, param_9);
AnnoBeginClip s;
s.bbox = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3));
s.linewidth = asfloat(raw4);
return s;
}
AnnoBeginClip Annotated_BeginClip_read(Alloc a, AnnotatedRef ref)
{
AnnoBeginClipRef _742 = { ref.offset + 4u };
Alloc param = a;
AnnoBeginClipRef param_1 = _742;
return AnnoBeginClip_read(param, param_1);
}
void Cmd_BeginClip_write(Alloc a, CmdRef ref) void Cmd_BeginClip_write(Alloc a, CmdRef ref)
{ {
Alloc param = a; Alloc param = a;
@ -777,44 +739,43 @@ void Cmd_End_write(Alloc a, CmdRef ref)
void comp_main() void comp_main()
{ {
uint width_in_bins = ((_1249.Load(8) + 16u) - 1u) / 16u; uint width_in_bins = ((_1169.Load(8) + 16u) - 1u) / 16u;
uint bin_ix = (width_in_bins * gl_WorkGroupID.y) + gl_WorkGroupID.x; uint bin_ix = (width_in_bins * gl_WorkGroupID.y) + gl_WorkGroupID.x;
uint partition_ix = 0u; uint partition_ix = 0u;
uint n_partitions = ((_1249.Load(0) + 256u) - 1u) / 256u; uint n_partitions = ((_1169.Load(0) + 256u) - 1u) / 256u;
uint th_ix = gl_LocalInvocationID.x; uint th_ix = gl_LocalInvocationID.x;
uint bin_tile_x = 16u * gl_WorkGroupID.x; uint bin_tile_x = 16u * gl_WorkGroupID.x;
uint bin_tile_y = 16u * gl_WorkGroupID.y; uint bin_tile_y = 16u * gl_WorkGroupID.y;
uint tile_x = gl_LocalInvocationID.x % 16u; uint tile_x = gl_LocalInvocationID.x % 16u;
uint tile_y = gl_LocalInvocationID.x / 16u; uint tile_y = gl_LocalInvocationID.x / 16u;
uint this_tile_ix = (((bin_tile_y + tile_y) * _1249.Load(8)) + bin_tile_x) + tile_x; uint this_tile_ix = (((bin_tile_y + tile_y) * _1169.Load(8)) + bin_tile_x) + tile_x;
Alloc _1314; Alloc _1234;
_1314.offset = _1249.Load(24); _1234.offset = _1169.Load(24);
Alloc param; Alloc param;
param.offset = _1314.offset; param.offset = _1234.offset;
uint param_1 = this_tile_ix * 1024u; uint param_1 = this_tile_ix * 1024u;
uint param_2 = 1024u; uint param_2 = 1024u;
Alloc cmd_alloc = slice_mem(param, param_1, param_2); Alloc cmd_alloc = slice_mem(param, param_1, param_2);
CmdRef _1323 = { cmd_alloc.offset }; CmdRef _1243 = { cmd_alloc.offset };
CmdRef cmd_ref = _1323; CmdRef cmd_ref = _1243;
uint cmd_limit = (cmd_ref.offset + 1024u) - 60u; uint cmd_limit = (cmd_ref.offset + 1024u) - 60u;
uint clip_depth = 0u; uint clip_depth = 0u;
uint clip_zero_depth = 0u; uint clip_zero_depth = 0u;
uint clip_one_mask = 0u;
uint rd_ix = 0u; uint rd_ix = 0u;
uint wr_ix = 0u; uint wr_ix = 0u;
uint part_start_ix = 0u; uint part_start_ix = 0u;
uint ready_ix = 0u; uint ready_ix = 0u;
bool mem_ok = _296.Load(4) == 0u; bool mem_ok = _283.Load(4) == 0u;
Alloc param_3; Alloc param_3;
Alloc param_5; Alloc param_5;
uint _1529; uint _1448;
uint element_ix; uint element_ix;
AnnotatedRef ref; AnnotatedRef ref;
Alloc param_14; Alloc param_14;
Alloc param_16; Alloc param_16;
uint tile_count; uint tile_count;
Alloc param_23; Alloc param_23;
uint _1841; uint _1770;
Alloc param_29; Alloc param_29;
Tile tile_1; Tile tile_1;
AnnoColor fill; AnnoColor fill;
@ -822,41 +783,40 @@ void comp_main()
Alloc param_52; Alloc param_52;
CmdLinGrad cmd_lin; CmdLinGrad cmd_lin;
Alloc param_69; Alloc param_69;
Alloc param_86;
while (true) while (true)
{ {
for (uint i = 0u; i < 8u; i++) for (uint i = 0u; i < 8u; i++)
{ {
sh_bitmaps[i][th_ix] = 0u; sh_bitmaps[i][th_ix] = 0u;
} }
bool _1581; bool _1500;
for (;;) for (;;)
{ {
if ((ready_ix == wr_ix) && (partition_ix < n_partitions)) if ((ready_ix == wr_ix) && (partition_ix < n_partitions))
{ {
part_start_ix = ready_ix; part_start_ix = ready_ix;
uint count = 0u; uint count = 0u;
bool _1379 = th_ix < 256u; bool _1298 = th_ix < 256u;
bool _1387; bool _1306;
if (_1379) if (_1298)
{ {
_1387 = (partition_ix + th_ix) < n_partitions; _1306 = (partition_ix + th_ix) < n_partitions;
} }
else else
{ {
_1387 = _1379; _1306 = _1298;
} }
if (_1387) if (_1306)
{ {
uint in_ix = (_1249.Load(20) >> uint(2)) + ((((partition_ix + th_ix) * 256u) + bin_ix) * 2u); uint in_ix = (_1169.Load(20) >> uint(2)) + ((((partition_ix + th_ix) * 256u) + bin_ix) * 2u);
Alloc _1404; Alloc _1323;
_1404.offset = _1249.Load(20); _1323.offset = _1169.Load(20);
param_3.offset = _1404.offset; param_3.offset = _1323.offset;
uint param_4 = in_ix; uint param_4 = in_ix;
count = read_mem(param_3, param_4); count = read_mem(param_3, param_4);
Alloc _1415; Alloc _1334;
_1415.offset = _1249.Load(20); _1334.offset = _1169.Load(20);
param_5.offset = _1415.offset; param_5.offset = _1334.offset;
uint param_6 = in_ix + 1u; uint param_6 = in_ix + 1u;
uint offset = read_mem(param_5, param_6); uint offset = read_mem(param_5, param_6);
uint param_7 = offset; uint param_7 = offset;
@ -902,16 +862,16 @@ void comp_main()
} }
if (part_ix > 0u) if (part_ix > 0u)
{ {
_1529 = sh_part_count[part_ix - 1u]; _1448 = sh_part_count[part_ix - 1u];
} }
else else
{ {
_1529 = part_start_ix; _1448 = part_start_ix;
} }
ix -= _1529; ix -= _1448;
Alloc bin_alloc = sh_part_elements[part_ix]; Alloc bin_alloc = sh_part_elements[part_ix];
BinInstanceRef _1548 = { bin_alloc.offset }; BinInstanceRef _1467 = { bin_alloc.offset };
BinInstanceRef inst_ref = _1548; BinInstanceRef inst_ref = _1467;
BinInstanceRef param_10 = inst_ref; BinInstanceRef param_10 = inst_ref;
uint param_11 = ix; uint param_11 = ix;
Alloc param_12 = bin_alloc; Alloc param_12 = bin_alloc;
@ -921,16 +881,16 @@ void comp_main()
} }
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();
wr_ix = min((rd_ix + 256u), ready_ix); wr_ix = min((rd_ix + 256u), ready_ix);
bool _1571 = (wr_ix - rd_ix) < 256u; bool _1490 = (wr_ix - rd_ix) < 256u;
if (_1571) if (_1490)
{ {
_1581 = (wr_ix < ready_ix) || (partition_ix < n_partitions); _1500 = (wr_ix < ready_ix) || (partition_ix < n_partitions);
} }
else else
{ {
_1581 = _1571; _1500 = _1490;
} }
if (_1581) if (_1500)
{ {
continue; continue;
} }
@ -943,11 +903,11 @@ void comp_main()
if ((th_ix + rd_ix) < wr_ix) if ((th_ix + rd_ix) < wr_ix)
{ {
element_ix = sh_elements[th_ix]; element_ix = sh_elements[th_ix];
AnnotatedRef _1602 = { _1249.Load(32) + (element_ix * 40u) }; AnnotatedRef _1521 = { _1169.Load(32) + (element_ix * 40u) };
ref = _1602; ref = _1521;
Alloc _1605; Alloc _1524;
_1605.offset = _1249.Load(32); _1524.offset = _1169.Load(32);
param_14.offset = _1605.offset; param_14.offset = _1524.offset;
AnnotatedRef param_15 = ref; AnnotatedRef param_15 = ref;
tag = Annotated_tag(param_14, param_15).tag; tag = Annotated_tag(param_14, param_15).tag;
} }
@ -959,12 +919,13 @@ void comp_main()
case 4u: case 4u:
case 5u: case 5u:
{ {
uint path_ix = element_ix; uint drawmonoid_base = (_1169.Load(44) >> uint(2)) + (2u * element_ix);
PathRef _1624 = { _1249.Load(16) + (path_ix * 12u) }; uint path_ix = _283.Load(drawmonoid_base * 4 + 8);
Alloc _1627; PathRef _1553 = { _1169.Load(16) + (path_ix * 12u) };
_1627.offset = _1249.Load(16); Alloc _1556;
param_16.offset = _1627.offset; _1556.offset = _1169.Load(16);
PathRef param_17 = _1624; param_16.offset = _1556.offset;
PathRef param_17 = _1553;
Path path = Path_read(param_16, param_17); Path path = Path_read(param_16, param_17);
uint stride = path.bbox.z - path.bbox.x; uint stride = path.bbox.z - path.bbox.x;
sh_tile_stride[th_ix] = stride; sh_tile_stride[th_ix] = stride;
@ -1019,59 +980,53 @@ void comp_main()
el_ix = probe_1; el_ix = probe_1;
} }
} }
AnnotatedRef _1826 = { _1249.Load(32) + (sh_elements[el_ix] * 40u) }; AnnotatedRef _1755 = { _1169.Load(32) + (sh_elements[el_ix] * 40u) };
AnnotatedRef ref_1 = _1826; AnnotatedRef ref_1 = _1755;
Alloc _1830; Alloc _1759;
_1830.offset = _1249.Load(32); _1759.offset = _1169.Load(32);
param_23.offset = _1830.offset; param_23.offset = _1759.offset;
AnnotatedRef param_24 = ref_1; AnnotatedRef param_24 = ref_1;
uint tag_1 = Annotated_tag(param_23, param_24).tag; uint tag_1 = Annotated_tag(param_23, param_24).tag;
if (el_ix > 0u) if (el_ix > 0u)
{ {
_1841 = sh_tile_count[el_ix - 1u]; _1770 = sh_tile_count[el_ix - 1u];
} }
else else
{ {
_1841 = 0u; _1770 = 0u;
} }
uint seq_ix = ix_1 - _1841; uint seq_ix = ix_1 - _1770;
uint width = sh_tile_width[el_ix]; uint width = sh_tile_width[el_ix];
uint x = sh_tile_x0[el_ix] + (seq_ix % width); uint x = sh_tile_x0[el_ix] + (seq_ix % width);
uint y = sh_tile_y0[el_ix] + (seq_ix / width); uint y = sh_tile_y0[el_ix] + (seq_ix / width);
bool include_tile = false; bool include_tile = false;
if ((tag_1 == 4u) || (tag_1 == 5u)) if (mem_ok)
{ {
include_tile = true; uint param_25 = el_ix;
} bool param_26 = mem_ok;
else TileRef _1822 = { sh_tile_base[el_ix] + (((sh_tile_stride[el_ix] * y) + x) * 8u) };
{ Alloc param_27 = read_tile_alloc(param_25, param_26);
if (mem_ok) TileRef param_28 = _1822;
Tile tile = Tile_read(param_27, param_28);
bool is_clip = (tag_1 == 4u) || (tag_1 == 5u);
bool _1834 = tile.tile.offset != 0u;
bool _1843;
if (!_1834)
{ {
uint param_25 = el_ix; _1843 = (tile.backdrop == 0) == is_clip;
bool param_26 = mem_ok;
TileRef _1901 = { sh_tile_base[el_ix] + (((sh_tile_stride[el_ix] * y) + x) * 8u) };
Alloc param_27 = read_tile_alloc(param_25, param_26);
TileRef param_28 = _1901;
Tile tile = Tile_read(param_27, param_28);
bool _1907 = tile.tile.offset != 0u;
bool _1914;
if (!_1907)
{
_1914 = tile.backdrop != 0;
}
else
{
_1914 = _1907;
}
include_tile = _1914;
} }
else
{
_1843 = _1834;
}
include_tile = _1843;
} }
if (include_tile) if (include_tile)
{ {
uint el_slice = el_ix / 32u; uint el_slice = el_ix / 32u;
uint el_mask = 1u << (el_ix & 31u); uint el_mask = 1u << (el_ix & 31u);
uint _1934; uint _1863;
InterlockedOr(sh_bitmaps[el_slice][(y * 16u) + x], el_mask, _1934); InterlockedOr(sh_bitmaps[el_slice][(y * 16u) + x], el_mask, _1863);
} }
} }
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();
@ -1095,11 +1050,11 @@ void comp_main()
uint element_ref_ix = (slice_ix * 32u) + uint(int(firstbitlow(bitmap))); uint element_ref_ix = (slice_ix * 32u) + uint(int(firstbitlow(bitmap)));
uint element_ix_1 = sh_elements[element_ref_ix]; uint element_ix_1 = sh_elements[element_ref_ix];
bitmap &= (bitmap - 1u); bitmap &= (bitmap - 1u);
AnnotatedRef _1988 = { _1249.Load(32) + (element_ix_1 * 40u) }; AnnotatedRef _1917 = { _1169.Load(32) + (element_ix_1 * 40u) };
ref = _1988; ref = _1917;
Alloc _1993; Alloc _1922;
_1993.offset = _1249.Load(32); _1922.offset = _1169.Load(32);
param_29.offset = _1993.offset; param_29.offset = _1922.offset;
AnnotatedRef param_30 = ref; AnnotatedRef param_30 = ref;
AnnotatedTag tag_2 = Annotated_tag(param_29, param_30); AnnotatedTag tag_2 = Annotated_tag(param_29, param_30);
if (clip_zero_depth == 0u) if (clip_zero_depth == 0u)
@ -1110,23 +1065,23 @@ void comp_main()
{ {
uint param_31 = element_ref_ix; uint param_31 = element_ref_ix;
bool param_32 = mem_ok; bool param_32 = mem_ok;
TileRef _2029 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; TileRef _1958 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) };
Alloc param_33 = read_tile_alloc(param_31, param_32); Alloc param_33 = read_tile_alloc(param_31, param_32);
TileRef param_34 = _2029; TileRef param_34 = _1958;
tile_1 = Tile_read(param_33, param_34); tile_1 = Tile_read(param_33, param_34);
Alloc _2036; Alloc _1965;
_2036.offset = _1249.Load(32); _1965.offset = _1169.Load(32);
param_35.offset = _2036.offset; param_35.offset = _1965.offset;
AnnotatedRef param_36 = ref; AnnotatedRef param_36 = ref;
fill = Annotated_Color_read(param_35, param_36); fill = Annotated_Color_read(param_35, param_36);
Alloc param_37 = cmd_alloc; Alloc param_37 = cmd_alloc;
CmdRef param_38 = cmd_ref; CmdRef param_38 = cmd_ref;
uint param_39 = cmd_limit; uint param_39 = cmd_limit;
bool _2048 = alloc_cmd(param_37, param_38, param_39); bool _1977 = alloc_cmd(param_37, param_38, param_39);
cmd_alloc = param_37; cmd_alloc = param_37;
cmd_ref = param_38; cmd_ref = param_38;
cmd_limit = param_39; cmd_limit = param_39;
if (!_2048) if (!_1977)
{ {
break; break;
} }
@ -1137,10 +1092,10 @@ void comp_main()
float param_44 = fill.linewidth; float param_44 = fill.linewidth;
write_fill(param_40, param_41, param_42, param_43, param_44); write_fill(param_40, param_41, param_42, param_43, param_44);
cmd_ref = param_41; cmd_ref = param_41;
CmdColor _2072 = { fill.rgba_color }; CmdColor _2001 = { fill.rgba_color };
Alloc param_45 = cmd_alloc; Alloc param_45 = cmd_alloc;
CmdRef param_46 = cmd_ref; CmdRef param_46 = cmd_ref;
CmdColor param_47 = _2072; CmdColor param_47 = _2001;
Cmd_Color_write(param_45, param_46, param_47); Cmd_Color_write(param_45, param_46, param_47);
cmd_ref.offset += 8u; cmd_ref.offset += 8u;
break; break;
@ -1149,23 +1104,23 @@ void comp_main()
{ {
uint param_48 = element_ref_ix; uint param_48 = element_ref_ix;
bool param_49 = mem_ok; bool param_49 = mem_ok;
TileRef _2101 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; TileRef _2030 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) };
Alloc param_50 = read_tile_alloc(param_48, param_49); Alloc param_50 = read_tile_alloc(param_48, param_49);
TileRef param_51 = _2101; TileRef param_51 = _2030;
tile_1 = Tile_read(param_50, param_51); tile_1 = Tile_read(param_50, param_51);
Alloc _2108; Alloc _2037;
_2108.offset = _1249.Load(32); _2037.offset = _1169.Load(32);
param_52.offset = _2108.offset; param_52.offset = _2037.offset;
AnnotatedRef param_53 = ref; AnnotatedRef param_53 = ref;
AnnoLinGradient lin = Annotated_LinGradient_read(param_52, param_53); AnnoLinGradient lin = Annotated_LinGradient_read(param_52, param_53);
Alloc param_54 = cmd_alloc; Alloc param_54 = cmd_alloc;
CmdRef param_55 = cmd_ref; CmdRef param_55 = cmd_ref;
uint param_56 = cmd_limit; uint param_56 = cmd_limit;
bool _2120 = alloc_cmd(param_54, param_55, param_56); bool _2049 = alloc_cmd(param_54, param_55, param_56);
cmd_alloc = param_54; cmd_alloc = param_54;
cmd_ref = param_55; cmd_ref = param_55;
cmd_limit = param_56; cmd_limit = param_56;
if (!_2120) if (!_2049)
{ {
break; break;
} }
@ -1191,23 +1146,23 @@ void comp_main()
{ {
uint param_65 = element_ref_ix; uint param_65 = element_ref_ix;
bool param_66 = mem_ok; bool param_66 = mem_ok;
TileRef _2185 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; TileRef _2114 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) };
Alloc param_67 = read_tile_alloc(param_65, param_66); Alloc param_67 = read_tile_alloc(param_65, param_66);
TileRef param_68 = _2185; TileRef param_68 = _2114;
tile_1 = Tile_read(param_67, param_68); tile_1 = Tile_read(param_67, param_68);
Alloc _2192; Alloc _2121;
_2192.offset = _1249.Load(32); _2121.offset = _1169.Load(32);
param_69.offset = _2192.offset; param_69.offset = _2121.offset;
AnnotatedRef param_70 = ref; AnnotatedRef param_70 = ref;
AnnoImage fill_img = Annotated_Image_read(param_69, param_70); AnnoImage fill_img = Annotated_Image_read(param_69, param_70);
Alloc param_71 = cmd_alloc; Alloc param_71 = cmd_alloc;
CmdRef param_72 = cmd_ref; CmdRef param_72 = cmd_ref;
uint param_73 = cmd_limit; uint param_73 = cmd_limit;
bool _2204 = alloc_cmd(param_71, param_72, param_73); bool _2133 = alloc_cmd(param_71, param_72, param_73);
cmd_alloc = param_71; cmd_alloc = param_71;
cmd_ref = param_72; cmd_ref = param_72;
cmd_limit = param_73; cmd_limit = param_73;
if (!_2204) if (!_2133)
{ {
break; break;
} }
@ -1218,10 +1173,10 @@ void comp_main()
float param_78 = fill_img.linewidth; float param_78 = fill_img.linewidth;
write_fill(param_74, param_75, param_76, param_77, param_78); write_fill(param_74, param_75, param_76, param_77, param_78);
cmd_ref = param_75; cmd_ref = param_75;
CmdImage _2230 = { fill_img.index, fill_img.offset }; CmdImage _2159 = { fill_img.index, fill_img.offset };
Alloc param_79 = cmd_alloc; Alloc param_79 = cmd_alloc;
CmdRef param_80 = cmd_ref; CmdRef param_80 = cmd_ref;
CmdImage param_81 = _2230; CmdImage param_81 = _2159;
Cmd_Image_write(param_79, param_80, param_81); Cmd_Image_write(param_79, param_80, param_81);
cmd_ref.offset += 12u; cmd_ref.offset += 12u;
break; break;
@ -1230,103 +1185,76 @@ void comp_main()
{ {
uint param_82 = element_ref_ix; uint param_82 = element_ref_ix;
bool param_83 = mem_ok; bool param_83 = mem_ok;
TileRef _2259 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; TileRef _2188 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) };
Alloc param_84 = read_tile_alloc(param_82, param_83); Alloc param_84 = read_tile_alloc(param_82, param_83);
TileRef param_85 = _2259; TileRef param_85 = _2188;
tile_1 = Tile_read(param_84, param_85); tile_1 = Tile_read(param_84, param_85);
bool _2265 = tile_1.tile.offset == 0u; bool _2194 = tile_1.tile.offset == 0u;
bool _2271; bool _2200;
if (_2265) if (_2194)
{ {
_2271 = tile_1.backdrop == 0; _2200 = tile_1.backdrop == 0;
} }
else else
{ {
_2271 = _2265; _2200 = _2194;
} }
if (_2271) if (_2200)
{ {
clip_zero_depth = clip_depth + 1u; clip_zero_depth = clip_depth + 1u;
} }
else else
{ {
if ((tile_1.tile.offset == 0u) && (clip_depth < 32u)) Alloc param_86 = cmd_alloc;
CmdRef param_87 = cmd_ref;
uint param_88 = cmd_limit;
bool _2212 = alloc_cmd(param_86, param_87, param_88);
cmd_alloc = param_86;
cmd_ref = param_87;
cmd_limit = param_88;
if (!_2212)
{ {
clip_one_mask |= (1u << clip_depth); break;
}
else
{
Alloc _2293;
_2293.offset = _1249.Load(32);
param_86.offset = _2293.offset;
AnnotatedRef param_87 = ref;
AnnoBeginClip begin_clip = Annotated_BeginClip_read(param_86, param_87);
Alloc param_88 = cmd_alloc;
CmdRef param_89 = cmd_ref;
uint param_90 = cmd_limit;
bool _2305 = alloc_cmd(param_88, param_89, param_90);
cmd_alloc = param_88;
cmd_ref = param_89;
cmd_limit = param_90;
if (!_2305)
{
break;
}
Alloc param_91 = cmd_alloc;
CmdRef param_92 = cmd_ref;
uint param_93 = tag_2.flags;
Tile param_94 = tile_1;
float param_95 = begin_clip.linewidth;
write_fill(param_91, param_92, param_93, param_94, param_95);
cmd_ref = param_92;
Alloc param_96 = cmd_alloc;
CmdRef param_97 = cmd_ref;
Cmd_BeginClip_write(param_96, param_97);
cmd_ref.offset += 4u;
if (clip_depth < 32u)
{
clip_one_mask &= (~(1u << clip_depth));
}
} }
Alloc param_89 = cmd_alloc;
CmdRef param_90 = cmd_ref;
Cmd_BeginClip_write(param_89, param_90);
cmd_ref.offset += 4u;
} }
clip_depth++; clip_depth++;
break; break;
} }
case 5u: case 5u:
{ {
uint param_91 = element_ref_ix;
bool param_92 = mem_ok;
TileRef _2249 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) };
Alloc param_93 = read_tile_alloc(param_91, param_92);
TileRef param_94 = _2249;
tile_1 = Tile_read(param_93, param_94);
clip_depth--; clip_depth--;
bool _2351 = clip_depth >= 32u; Alloc param_95 = cmd_alloc;
bool _2360; CmdRef param_96 = cmd_ref;
if (!_2351) uint param_97 = cmd_limit;
bool _2261 = alloc_cmd(param_95, param_96, param_97);
cmd_alloc = param_95;
cmd_ref = param_96;
cmd_limit = param_97;
if (!_2261)
{ {
_2360 = (clip_one_mask & (1u << clip_depth)) == 0u; break;
}
else
{
_2360 = _2351;
}
if (_2360)
{
Alloc param_98 = cmd_alloc;
CmdRef param_99 = cmd_ref;
uint param_100 = cmd_limit;
bool _2369 = alloc_cmd(param_98, param_99, param_100);
cmd_alloc = param_98;
cmd_ref = param_99;
cmd_limit = param_100;
if (!_2369)
{
break;
}
Alloc param_101 = cmd_alloc;
CmdRef param_102 = cmd_ref;
Cmd_Solid_write(param_101, param_102);
cmd_ref.offset += 4u;
Alloc param_103 = cmd_alloc;
CmdRef param_104 = cmd_ref;
Cmd_EndClip_write(param_103, param_104);
cmd_ref.offset += 4u;
} }
Alloc param_98 = cmd_alloc;
CmdRef param_99 = cmd_ref;
uint param_100 = 0u;
Tile param_101 = tile_1;
float param_102 = 0.0f;
write_fill(param_98, param_99, param_100, param_101, param_102);
cmd_ref = param_99;
Alloc param_103 = cmd_alloc;
CmdRef param_104 = cmd_ref;
Cmd_EndClip_write(param_103, param_104);
cmd_ref.offset += 4u;
break; break;
} }
} }
@ -1359,17 +1287,17 @@ void comp_main()
break; break;
} }
} }
bool _2432 = (bin_tile_x + tile_x) < _1249.Load(8); bool _2326 = (bin_tile_x + tile_x) < _1169.Load(8);
bool _2441; bool _2335;
if (_2432) if (_2326)
{ {
_2441 = (bin_tile_y + tile_y) < _1249.Load(12); _2335 = (bin_tile_y + tile_y) < _1169.Load(12);
} }
else else
{ {
_2441 = _2432; _2335 = _2326;
} }
if (_2441) if (_2335)
{ {
Alloc param_105 = cmd_alloc; Alloc param_105 = cmd_alloc;
CmdRef param_106 = cmd_ref; CmdRef param_106 = cmd_ref;

File diff suppressed because it is too large Load diff

Binary file not shown.

Binary file not shown.

View file

@ -41,16 +41,6 @@ struct FillImage
int2 offset; int2 offset;
}; };
struct ClipRef
{
uint offset;
};
struct Clip
{
float4 bbox;
};
struct ElementTag struct ElementTag
{ {
uint tag; uint tag;
@ -143,8 +133,13 @@ struct Config
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc bbox_alloc;
Alloc drawmonoid_alloc; Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip;
uint trans_offset; uint trans_offset;
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
@ -153,14 +148,14 @@ struct Config
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
static const DrawMonoid _418 = { 0u, 0u }; static const DrawMonoid _348 = { 0u, 0u };
static const DrawMonoid _442 = { 1u, 0u }; static const DrawMonoid _372 = { 1u, 0u };
static const DrawMonoid _444 = { 1u, 1u }; static const DrawMonoid _374 = { 1u, 1u };
RWByteAddressBuffer _201 : register(u0, space0); RWByteAddressBuffer _187 : register(u0, space0);
ByteAddressBuffer _225 : register(t2, space0); ByteAddressBuffer _211 : register(t2, space0);
ByteAddressBuffer _1004 : register(t3, space0); ByteAddressBuffer _934 : register(t3, space0);
ByteAddressBuffer _1038 : register(t1, space0); ByteAddressBuffer _968 : register(t1, space0);
static uint3 gl_WorkGroupID; static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID; static uint3 gl_LocalInvocationID;
@ -176,9 +171,9 @@ groupshared DrawMonoid sh_scratch[256];
ElementTag Element_tag(ElementRef ref) ElementTag Element_tag(ElementRef ref)
{ {
uint tag_and_flags = _225.Load((ref.offset >> uint(2)) * 4 + 0); uint tag_and_flags = _211.Load((ref.offset >> uint(2)) * 4 + 0);
ElementTag _375 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; ElementTag _321 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) };
return _375; return _321;
} }
DrawMonoid map_tag(uint tag_word) DrawMonoid map_tag(uint tag_word)
@ -189,24 +184,24 @@ DrawMonoid map_tag(uint tag_word)
case 5u: case 5u:
case 6u: case 6u:
{ {
return _442; return _372;
} }
case 9u: case 9u:
case 10u: case 10u:
{ {
return _444; return _374;
} }
default: default:
{ {
return _418; return _348;
} }
} }
} }
ElementRef Element_index(ElementRef ref, uint index) ElementRef Element_index(ElementRef ref, uint index)
{ {
ElementRef _214 = { ref.offset + (index * 36u) }; ElementRef _200 = { ref.offset + (index * 36u) };
return _214; return _200;
} }
DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b) DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b)
@ -219,13 +214,13 @@ DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b)
DrawMonoid tag_monoid_identity() DrawMonoid tag_monoid_identity()
{ {
return _418; return _348;
} }
FillColor FillColor_read(FillColorRef ref) FillColor FillColor_read(FillColorRef ref)
{ {
uint ix = ref.offset >> uint(2); uint ix = ref.offset >> uint(2);
uint raw0 = _225.Load((ix + 0u) * 4 + 0); uint raw0 = _211.Load((ix + 0u) * 4 + 0);
FillColor s; FillColor s;
s.rgba_color = raw0; s.rgba_color = raw0;
return s; return s;
@ -233,8 +228,8 @@ FillColor FillColor_read(FillColorRef ref)
FillColor Element_FillColor_read(ElementRef ref) FillColor Element_FillColor_read(ElementRef ref)
{ {
FillColorRef _381 = { ref.offset + 4u }; FillColorRef _327 = { ref.offset + 4u };
FillColorRef param = _381; FillColorRef param = _327;
return FillColor_read(param); return FillColor_read(param);
} }
@ -251,7 +246,7 @@ void write_mem(Alloc alloc, uint offset, uint val)
{ {
return; return;
} }
_201.Store(offset * 4 + 8, val); _187.Store(offset * 4 + 8, val);
} }
void AnnoColor_write(Alloc a, AnnoColorRef ref, AnnoColor s) void AnnoColor_write(Alloc a, AnnoColorRef ref, AnnoColor s)
@ -289,9 +284,9 @@ void Annotated_Color_write(Alloc a, AnnotatedRef ref, uint flags, AnnoColor s)
uint param_1 = ref.offset >> uint(2); uint param_1 = ref.offset >> uint(2);
uint param_2 = (flags << uint(16)) | 1u; uint param_2 = (flags << uint(16)) | 1u;
write_mem(param, param_1, param_2); write_mem(param, param_1, param_2);
AnnoColorRef _805 = { ref.offset + 4u }; AnnoColorRef _735 = { ref.offset + 4u };
Alloc param_3 = a; Alloc param_3 = a;
AnnoColorRef param_4 = _805; AnnoColorRef param_4 = _735;
AnnoColor param_5 = s; AnnoColor param_5 = s;
AnnoColor_write(param_3, param_4, param_5); AnnoColor_write(param_3, param_4, param_5);
} }
@ -299,11 +294,11 @@ void Annotated_Color_write(Alloc a, AnnotatedRef ref, uint flags, AnnoColor s)
FillLinGradient FillLinGradient_read(FillLinGradientRef ref) FillLinGradient FillLinGradient_read(FillLinGradientRef ref)
{ {
uint ix = ref.offset >> uint(2); uint ix = ref.offset >> uint(2);
uint raw0 = _225.Load((ix + 0u) * 4 + 0); uint raw0 = _211.Load((ix + 0u) * 4 + 0);
uint raw1 = _225.Load((ix + 1u) * 4 + 0); uint raw1 = _211.Load((ix + 1u) * 4 + 0);
uint raw2 = _225.Load((ix + 2u) * 4 + 0); uint raw2 = _211.Load((ix + 2u) * 4 + 0);
uint raw3 = _225.Load((ix + 3u) * 4 + 0); uint raw3 = _211.Load((ix + 3u) * 4 + 0);
uint raw4 = _225.Load((ix + 4u) * 4 + 0); uint raw4 = _211.Load((ix + 4u) * 4 + 0);
FillLinGradient s; FillLinGradient s;
s.index = raw0; s.index = raw0;
s.p0 = float2(asfloat(raw1), asfloat(raw2)); s.p0 = float2(asfloat(raw1), asfloat(raw2));
@ -313,8 +308,8 @@ FillLinGradient FillLinGradient_read(FillLinGradientRef ref)
FillLinGradient Element_FillLinGradient_read(ElementRef ref) FillLinGradient Element_FillLinGradient_read(ElementRef ref)
{ {
FillLinGradientRef _389 = { ref.offset + 4u }; FillLinGradientRef _335 = { ref.offset + 4u };
FillLinGradientRef param = _389; FillLinGradientRef param = _335;
return FillLinGradient_read(param); return FillLinGradient_read(param);
} }
@ -365,9 +360,9 @@ void Annotated_LinGradient_write(Alloc a, AnnotatedRef ref, uint flags, AnnoLinG
uint param_1 = ref.offset >> uint(2); uint param_1 = ref.offset >> uint(2);
uint param_2 = (flags << uint(16)) | 2u; uint param_2 = (flags << uint(16)) | 2u;
write_mem(param, param_1, param_2); write_mem(param, param_1, param_2);
AnnoLinGradientRef _826 = { ref.offset + 4u }; AnnoLinGradientRef _756 = { ref.offset + 4u };
Alloc param_3 = a; Alloc param_3 = a;
AnnoLinGradientRef param_4 = _826; AnnoLinGradientRef param_4 = _756;
AnnoLinGradient param_5 = s; AnnoLinGradient param_5 = s;
AnnoLinGradient_write(param_3, param_4, param_5); AnnoLinGradient_write(param_3, param_4, param_5);
} }
@ -375,8 +370,8 @@ void Annotated_LinGradient_write(Alloc a, AnnotatedRef ref, uint flags, AnnoLinG
FillImage FillImage_read(FillImageRef ref) FillImage FillImage_read(FillImageRef ref)
{ {
uint ix = ref.offset >> uint(2); uint ix = ref.offset >> uint(2);
uint raw0 = _225.Load((ix + 0u) * 4 + 0); uint raw0 = _211.Load((ix + 0u) * 4 + 0);
uint raw1 = _225.Load((ix + 1u) * 4 + 0); uint raw1 = _211.Load((ix + 1u) * 4 + 0);
FillImage s; FillImage s;
s.index = raw0; s.index = raw0;
s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16);
@ -385,8 +380,8 @@ FillImage FillImage_read(FillImageRef ref)
FillImage Element_FillImage_read(ElementRef ref) FillImage Element_FillImage_read(ElementRef ref)
{ {
FillImageRef _397 = { ref.offset + 4u }; FillImageRef _343 = { ref.offset + 4u };
FillImageRef param = _397; FillImageRef param = _343;
return FillImage_read(param); return FillImage_read(param);
} }
@ -429,32 +424,13 @@ void Annotated_Image_write(Alloc a, AnnotatedRef ref, uint flags, AnnoImage s)
uint param_1 = ref.offset >> uint(2); uint param_1 = ref.offset >> uint(2);
uint param_2 = (flags << uint(16)) | 3u; uint param_2 = (flags << uint(16)) | 3u;
write_mem(param, param_1, param_2); write_mem(param, param_1, param_2);
AnnoImageRef _847 = { ref.offset + 4u }; AnnoImageRef _777 = { ref.offset + 4u };
Alloc param_3 = a; Alloc param_3 = a;
AnnoImageRef param_4 = _847; AnnoImageRef param_4 = _777;
AnnoImage param_5 = s; AnnoImage param_5 = s;
AnnoImage_write(param_3, param_4, param_5); AnnoImage_write(param_3, param_4, param_5);
} }
Clip Clip_read(ClipRef ref)
{
uint ix = ref.offset >> uint(2);
uint raw0 = _225.Load((ix + 0u) * 4 + 0);
uint raw1 = _225.Load((ix + 1u) * 4 + 0);
uint raw2 = _225.Load((ix + 2u) * 4 + 0);
uint raw3 = _225.Load((ix + 3u) * 4 + 0);
Clip s;
s.bbox = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3));
return s;
}
Clip Element_BeginClip_read(ElementRef ref)
{
ClipRef _405 = { ref.offset + 4u };
ClipRef param = _405;
return Clip_read(param);
}
void AnnoBeginClip_write(Alloc a, AnnoBeginClipRef ref, AnnoBeginClip s) void AnnoBeginClip_write(Alloc a, AnnoBeginClipRef ref, AnnoBeginClip s)
{ {
uint ix = ref.offset >> uint(2); uint ix = ref.offset >> uint(2);
@ -486,20 +462,13 @@ void Annotated_BeginClip_write(Alloc a, AnnotatedRef ref, uint flags, AnnoBeginC
uint param_1 = ref.offset >> uint(2); uint param_1 = ref.offset >> uint(2);
uint param_2 = (flags << uint(16)) | 4u; uint param_2 = (flags << uint(16)) | 4u;
write_mem(param, param_1, param_2); write_mem(param, param_1, param_2);
AnnoBeginClipRef _868 = { ref.offset + 4u }; AnnoBeginClipRef _798 = { ref.offset + 4u };
Alloc param_3 = a; Alloc param_3 = a;
AnnoBeginClipRef param_4 = _868; AnnoBeginClipRef param_4 = _798;
AnnoBeginClip param_5 = s; AnnoBeginClip param_5 = s;
AnnoBeginClip_write(param_3, param_4, param_5); AnnoBeginClip_write(param_3, param_4, param_5);
} }
Clip Element_EndClip_read(ElementRef ref)
{
ClipRef _413 = { ref.offset + 4u };
ClipRef param = _413;
return Clip_read(param);
}
void AnnoEndClip_write(Alloc a, AnnoEndClipRef ref, AnnoEndClip s) void AnnoEndClip_write(Alloc a, AnnoEndClipRef ref, AnnoEndClip s)
{ {
uint ix = ref.offset >> uint(2); uint ix = ref.offset >> uint(2);
@ -527,9 +496,9 @@ void Annotated_EndClip_write(Alloc a, AnnotatedRef ref, AnnoEndClip s)
uint param_1 = ref.offset >> uint(2); uint param_1 = ref.offset >> uint(2);
uint param_2 = 5u; uint param_2 = 5u;
write_mem(param, param_1, param_2); write_mem(param, param_1, param_2);
AnnoEndClipRef _886 = { ref.offset + 4u }; AnnoEndClipRef _816 = { ref.offset + 4u };
Alloc param_3 = a; Alloc param_3 = a;
AnnoEndClipRef param_4 = _886; AnnoEndClipRef param_4 = _816;
AnnoEndClip param_5 = s; AnnoEndClip param_5 = s;
AnnoEndClip_write(param_3, param_4, param_5); AnnoEndClip_write(param_3, param_4, param_5);
} }
@ -537,8 +506,8 @@ void Annotated_EndClip_write(Alloc a, AnnotatedRef ref, AnnoEndClip s)
void comp_main() void comp_main()
{ {
uint ix = gl_GlobalInvocationID.x * 8u; uint ix = gl_GlobalInvocationID.x * 8u;
ElementRef _904 = { ix * 36u }; ElementRef _834 = { ix * 36u };
ElementRef ref = _904; ElementRef ref = _834;
ElementRef param = ref; ElementRef param = ref;
uint tag_word = Element_tag(param).tag; uint tag_word = Element_tag(param).tag;
uint param_1 = tag_word; uint param_1 = tag_word;
@ -575,11 +544,11 @@ void comp_main()
DrawMonoid row = tag_monoid_identity(); DrawMonoid row = tag_monoid_identity();
if (gl_WorkGroupID.x > 0u) if (gl_WorkGroupID.x > 0u)
{ {
DrawMonoid _1010; DrawMonoid _940;
_1010.path_ix = _1004.Load((gl_WorkGroupID.x - 1u) * 8 + 0); _940.path_ix = _934.Load((gl_WorkGroupID.x - 1u) * 8 + 0);
_1010.clip_ix = _1004.Load((gl_WorkGroupID.x - 1u) * 8 + 4); _940.clip_ix = _934.Load((gl_WorkGroupID.x - 1u) * 8 + 4);
row.path_ix = _1010.path_ix; row.path_ix = _940.path_ix;
row.clip_ix = _1010.clip_ix; row.clip_ix = _940.clip_ix;
} }
if (gl_LocalInvocationID.x > 0u) if (gl_LocalInvocationID.x > 0u)
{ {
@ -588,9 +557,10 @@ void comp_main()
row = combine_tag_monoid(param_10, param_11); row = combine_tag_monoid(param_10, param_11);
} }
uint out_ix = gl_GlobalInvocationID.x * 8u; uint out_ix = gl_GlobalInvocationID.x * 8u;
uint out_base = (_1038.Load(44) >> uint(2)) + (out_ix * 2u); uint out_base = (_968.Load(44) >> uint(2)) + (out_ix * 2u);
AnnotatedRef _1054 = { _1038.Load(32) + (out_ix * 40u) }; uint clip_out_base = _968.Load(48) >> uint(2);
AnnotatedRef out_ref = _1054; AnnotatedRef _989 = { _968.Load(32) + (out_ix * 40u) };
AnnotatedRef out_ref = _989;
float4 mat; float4 mat;
float2 translate; float2 translate;
AnnoColor anno_fill; AnnoColor anno_fill;
@ -600,39 +570,43 @@ void comp_main()
AnnoImage anno_img; AnnoImage anno_img;
Alloc param_28; Alloc param_28;
AnnoBeginClip anno_begin_clip; AnnoBeginClip anno_begin_clip;
Alloc param_33; Alloc param_32;
AnnoEndClip anno_end_clip; AnnoEndClip anno_end_clip;
Alloc param_38; Alloc param_36;
for (uint i_2 = 0u; i_2 < 8u; i_2++) for (uint i_2 = 0u; i_2 < 8u; i_2++)
{ {
DrawMonoid param_12 = row; DrawMonoid m = row;
DrawMonoid param_13 = local[i_2]; if (i_2 > 0u)
DrawMonoid m = combine_tag_monoid(param_12, param_13); {
_201.Store((out_base + (i_2 * 2u)) * 4 + 8, m.path_ix); DrawMonoid param_12 = m;
_201.Store(((out_base + (i_2 * 2u)) + 1u) * 4 + 8, m.clip_ix); DrawMonoid param_13 = local[i_2 - 1u];
m = combine_tag_monoid(param_12, param_13);
}
_187.Store((out_base + (i_2 * 2u)) * 4 + 8, m.path_ix);
_187.Store(((out_base + (i_2 * 2u)) + 1u) * 4 + 8, m.clip_ix);
ElementRef param_14 = ref; ElementRef param_14 = ref;
uint param_15 = i_2; uint param_15 = i_2;
ElementRef this_ref = Element_index(param_14, param_15); ElementRef this_ref = Element_index(param_14, param_15);
ElementRef param_16 = this_ref; ElementRef param_16 = this_ref;
tag_word = Element_tag(param_16).tag; tag_word = Element_tag(param_16).tag;
if (((tag_word == 4u) || (tag_word == 5u)) || (tag_word == 6u)) if ((((tag_word == 4u) || (tag_word == 5u)) || (tag_word == 6u)) || (tag_word == 9u))
{ {
uint bbox_offset = (_1038.Load(40) >> uint(2)) + (6u * (m.path_ix - 1u)); uint bbox_offset = (_968.Load(40) >> uint(2)) + (6u * m.path_ix);
float bbox_l = float(_201.Load(bbox_offset * 4 + 8)) - 32768.0f; float bbox_l = float(_187.Load(bbox_offset * 4 + 8)) - 32768.0f;
float bbox_t = float(_201.Load((bbox_offset + 1u) * 4 + 8)) - 32768.0f; float bbox_t = float(_187.Load((bbox_offset + 1u) * 4 + 8)) - 32768.0f;
float bbox_r = float(_201.Load((bbox_offset + 2u) * 4 + 8)) - 32768.0f; float bbox_r = float(_187.Load((bbox_offset + 2u) * 4 + 8)) - 32768.0f;
float bbox_b = float(_201.Load((bbox_offset + 3u) * 4 + 8)) - 32768.0f; float bbox_b = float(_187.Load((bbox_offset + 3u) * 4 + 8)) - 32768.0f;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
float linewidth = asfloat(_201.Load((bbox_offset + 4u) * 4 + 8)); float linewidth = asfloat(_187.Load((bbox_offset + 4u) * 4 + 8));
uint fill_mode = uint(linewidth >= 0.0f); uint fill_mode = uint(linewidth >= 0.0f);
if ((linewidth >= 0.0f) || (tag_word == 5u)) if ((linewidth >= 0.0f) || (tag_word == 5u))
{ {
uint trans_ix = _201.Load((bbox_offset + 5u) * 4 + 8); uint trans_ix = _187.Load((bbox_offset + 5u) * 4 + 8);
uint t = (_1038.Load(36) >> uint(2)) + (6u * trans_ix); uint t = (_968.Load(36) >> uint(2)) + (6u * trans_ix);
mat = asfloat(uint4(_201.Load(t * 4 + 8), _201.Load((t + 1u) * 4 + 8), _201.Load((t + 2u) * 4 + 8), _201.Load((t + 3u) * 4 + 8))); mat = asfloat(uint4(_187.Load(t * 4 + 8), _187.Load((t + 1u) * 4 + 8), _187.Load((t + 2u) * 4 + 8), _187.Load((t + 3u) * 4 + 8)));
if (tag_word == 5u) if (tag_word == 5u)
{ {
translate = asfloat(uint2(_201.Load((t + 4u) * 4 + 8), _201.Load((t + 5u) * 4 + 8))); translate = asfloat(uint2(_187.Load((t + 4u) * 4 + 8), _187.Load((t + 5u) * 4 + 8)));
} }
} }
if (linewidth >= 0.0f) if (linewidth >= 0.0f)
@ -649,9 +623,9 @@ void comp_main()
anno_fill.bbox = bbox; anno_fill.bbox = bbox;
anno_fill.linewidth = linewidth; anno_fill.linewidth = linewidth;
anno_fill.rgba_color = fill.rgba_color; anno_fill.rgba_color = fill.rgba_color;
Alloc _1257; Alloc _1203;
_1257.offset = _1038.Load(32); _1203.offset = _968.Load(32);
param_18.offset = _1257.offset; param_18.offset = _1203.offset;
AnnotatedRef param_19 = out_ref; AnnotatedRef param_19 = out_ref;
uint param_20 = fill_mode; uint param_20 = fill_mode;
AnnoColor param_21 = anno_fill; AnnoColor param_21 = anno_fill;
@ -674,9 +648,9 @@ void comp_main()
anno_lin.line_x = line_x; anno_lin.line_x = line_x;
anno_lin.line_y = line_y; anno_lin.line_y = line_y;
anno_lin.line_c = -((p0.x * line_x) + (p0.y * line_y)); anno_lin.line_c = -((p0.x * line_x) + (p0.y * line_y));
Alloc _1353; Alloc _1299;
_1353.offset = _1038.Load(32); _1299.offset = _968.Load(32);
param_23.offset = _1353.offset; param_23.offset = _1299.offset;
AnnotatedRef param_24 = out_ref; AnnotatedRef param_24 = out_ref;
uint param_25 = fill_mode; uint param_25 = fill_mode;
AnnoLinGradient param_26 = anno_lin; AnnoLinGradient param_26 = anno_lin;
@ -691,48 +665,51 @@ void comp_main()
anno_img.linewidth = linewidth; anno_img.linewidth = linewidth;
anno_img.index = fill_img.index; anno_img.index = fill_img.index;
anno_img.offset = fill_img.offset; anno_img.offset = fill_img.offset;
Alloc _1381; Alloc _1327;
_1381.offset = _1038.Load(32); _1327.offset = _968.Load(32);
param_28.offset = _1381.offset; param_28.offset = _1327.offset;
AnnotatedRef param_29 = out_ref; AnnotatedRef param_29 = out_ref;
uint param_30 = fill_mode; uint param_30 = fill_mode;
AnnoImage param_31 = anno_img; AnnoImage param_31 = anno_img;
Annotated_Image_write(param_28, param_29, param_30, param_31); Annotated_Image_write(param_28, param_29, param_30, param_31);
break; break;
} }
case 9u:
{
anno_begin_clip.bbox = bbox;
anno_begin_clip.linewidth = 0.0f;
Alloc _1344;
_1344.offset = _968.Load(32);
param_32.offset = _1344.offset;
AnnotatedRef param_33 = out_ref;
uint param_34 = 0u;
AnnoBeginClip param_35 = anno_begin_clip;
Annotated_BeginClip_write(param_32, param_33, param_34, param_35);
break;
}
} }
} }
else else
{ {
if (tag_word == 10u)
{
anno_end_clip.bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f);
Alloc _1368;
_1368.offset = _968.Load(32);
param_36.offset = _1368.offset;
AnnotatedRef param_37 = out_ref;
AnnoEndClip param_38 = anno_end_clip;
Annotated_EndClip_write(param_36, param_37, param_38);
}
}
if ((tag_word == 9u) || (tag_word == 10u))
{
uint path_ix = ~(out_ix + i_2);
if (tag_word == 9u) if (tag_word == 9u)
{ {
ElementRef param_32 = this_ref; path_ix = m.path_ix;
Clip begin_clip = Element_BeginClip_read(param_32);
anno_begin_clip.bbox = begin_clip.bbox;
anno_begin_clip.linewidth = 0.0f;
Alloc _1410;
_1410.offset = _1038.Load(32);
param_33.offset = _1410.offset;
AnnotatedRef param_34 = out_ref;
uint param_35 = 0u;
AnnoBeginClip param_36 = anno_begin_clip;
Annotated_BeginClip_write(param_33, param_34, param_35, param_36);
}
else
{
if (tag_word == 10u)
{
ElementRef param_37 = this_ref;
Clip end_clip = Element_EndClip_read(param_37);
anno_end_clip.bbox = end_clip.bbox;
Alloc _1435;
_1435.offset = _1038.Load(32);
param_38.offset = _1435.offset;
AnnotatedRef param_39 = out_ref;
AnnoEndClip param_40 = anno_end_clip;
Annotated_EndClip_write(param_38, param_39, param_40);
}
} }
_187.Store((clip_out_base + m.clip_ix) * 4 + 8, path_ix);
} }
out_ref.offset += 40u; out_ref.offset += 40u;
} }

View file

@ -87,16 +87,6 @@ struct FillImage
int2 offset; int2 offset;
}; };
struct ClipRef
{
uint offset;
};
struct Clip
{
float4 bbox;
};
struct ElementTag struct ElementTag
{ {
uint tag; uint tag;
@ -217,8 +207,13 @@ struct Config
Alloc_1 trans_alloc; Alloc_1 trans_alloc;
Alloc_1 bbox_alloc; Alloc_1 bbox_alloc;
Alloc_1 drawmonoid_alloc; Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip;
uint trans_offset; uint trans_offset;
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
@ -233,9 +228,9 @@ struct ConfigBuf
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
ElementTag Element_tag(thread const ElementRef& ref, const device SceneBuf& v_225) ElementTag Element_tag(thread const ElementRef& ref, const device SceneBuf& v_211)
{ {
uint tag_and_flags = v_225.scene[ref.offset >> uint(2)]; uint tag_and_flags = v_211.scene[ref.offset >> uint(2)];
return ElementTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) }; return ElementTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) };
} }
@ -284,20 +279,20 @@ DrawMonoid tag_monoid_identity()
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
FillColor FillColor_read(thread const FillColorRef& ref, const device SceneBuf& v_225) FillColor FillColor_read(thread const FillColorRef& ref, const device SceneBuf& v_211)
{ {
uint ix = ref.offset >> uint(2); uint ix = ref.offset >> uint(2);
uint raw0 = v_225.scene[ix + 0u]; uint raw0 = v_211.scene[ix + 0u];
FillColor s; FillColor s;
s.rgba_color = raw0; s.rgba_color = raw0;
return s; return s;
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
FillColor Element_FillColor_read(thread const ElementRef& ref, const device SceneBuf& v_225) FillColor Element_FillColor_read(thread const ElementRef& ref, const device SceneBuf& v_211)
{ {
FillColorRef param = FillColorRef{ ref.offset + 4u }; FillColorRef param = FillColorRef{ ref.offset + 4u };
return FillColor_read(param, v_225); return FillColor_read(param, v_211);
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
@ -307,7 +302,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_201) void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_187)
{ {
Alloc param = alloc; Alloc param = alloc;
uint param_1 = offset; uint param_1 = offset;
@ -315,61 +310,61 @@ void write_mem(thread const Alloc& alloc, thread const uint& offset, thread cons
{ {
return; return;
} }
v_201.memory[offset] = val; v_187.memory[offset] = val;
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
void AnnoColor_write(thread const Alloc& a, thread const AnnoColorRef& ref, thread const AnnoColor& s, device Memory& v_201) void AnnoColor_write(thread const Alloc& a, thread const AnnoColorRef& ref, thread const AnnoColor& s, device Memory& v_187)
{ {
uint ix = ref.offset >> uint(2); uint ix = ref.offset >> uint(2);
Alloc param = a; Alloc param = a;
uint param_1 = ix + 0u; uint param_1 = ix + 0u;
uint param_2 = as_type<uint>(s.bbox.x); uint param_2 = as_type<uint>(s.bbox.x);
write_mem(param, param_1, param_2, v_201); write_mem(param, param_1, param_2, v_187);
Alloc param_3 = a; Alloc param_3 = a;
uint param_4 = ix + 1u; uint param_4 = ix + 1u;
uint param_5 = as_type<uint>(s.bbox.y); uint param_5 = as_type<uint>(s.bbox.y);
write_mem(param_3, param_4, param_5, v_201); write_mem(param_3, param_4, param_5, v_187);
Alloc param_6 = a; Alloc param_6 = a;
uint param_7 = ix + 2u; uint param_7 = ix + 2u;
uint param_8 = as_type<uint>(s.bbox.z); uint param_8 = as_type<uint>(s.bbox.z);
write_mem(param_6, param_7, param_8, v_201); write_mem(param_6, param_7, param_8, v_187);
Alloc param_9 = a; Alloc param_9 = a;
uint param_10 = ix + 3u; uint param_10 = ix + 3u;
uint param_11 = as_type<uint>(s.bbox.w); uint param_11 = as_type<uint>(s.bbox.w);
write_mem(param_9, param_10, param_11, v_201); write_mem(param_9, param_10, param_11, v_187);
Alloc param_12 = a; Alloc param_12 = a;
uint param_13 = ix + 4u; uint param_13 = ix + 4u;
uint param_14 = as_type<uint>(s.linewidth); uint param_14 = as_type<uint>(s.linewidth);
write_mem(param_12, param_13, param_14, v_201); write_mem(param_12, param_13, param_14, v_187);
Alloc param_15 = a; Alloc param_15 = a;
uint param_16 = ix + 5u; uint param_16 = ix + 5u;
uint param_17 = s.rgba_color; uint param_17 = s.rgba_color;
write_mem(param_15, param_16, param_17, v_201); write_mem(param_15, param_16, param_17, v_187);
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
void Annotated_Color_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoColor& s, device Memory& v_201) void Annotated_Color_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoColor& s, device Memory& v_187)
{ {
Alloc param = a; Alloc param = a;
uint param_1 = ref.offset >> uint(2); uint param_1 = ref.offset >> uint(2);
uint param_2 = (flags << uint(16)) | 1u; uint param_2 = (flags << uint(16)) | 1u;
write_mem(param, param_1, param_2, v_201); write_mem(param, param_1, param_2, v_187);
Alloc param_3 = a; Alloc param_3 = a;
AnnoColorRef param_4 = AnnoColorRef{ ref.offset + 4u }; AnnoColorRef param_4 = AnnoColorRef{ ref.offset + 4u };
AnnoColor param_5 = s; AnnoColor param_5 = s;
AnnoColor_write(param_3, param_4, param_5, v_201); AnnoColor_write(param_3, param_4, param_5, v_187);
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
FillLinGradient FillLinGradient_read(thread const FillLinGradientRef& ref, const device SceneBuf& v_225) FillLinGradient FillLinGradient_read(thread const FillLinGradientRef& ref, const device SceneBuf& v_211)
{ {
uint ix = ref.offset >> uint(2); uint ix = ref.offset >> uint(2);
uint raw0 = v_225.scene[ix + 0u]; uint raw0 = v_211.scene[ix + 0u];
uint raw1 = v_225.scene[ix + 1u]; uint raw1 = v_211.scene[ix + 1u];
uint raw2 = v_225.scene[ix + 2u]; uint raw2 = v_211.scene[ix + 2u];
uint raw3 = v_225.scene[ix + 3u]; uint raw3 = v_211.scene[ix + 3u];
uint raw4 = v_225.scene[ix + 4u]; uint raw4 = v_211.scene[ix + 4u];
FillLinGradient s; FillLinGradient s;
s.index = raw0; s.index = raw0;
s.p0 = float2(as_type<float>(raw1), as_type<float>(raw2)); s.p0 = float2(as_type<float>(raw1), as_type<float>(raw2));
@ -378,73 +373,73 @@ FillLinGradient FillLinGradient_read(thread const FillLinGradientRef& ref, const
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
FillLinGradient Element_FillLinGradient_read(thread const ElementRef& ref, const device SceneBuf& v_225) FillLinGradient Element_FillLinGradient_read(thread const ElementRef& ref, const device SceneBuf& v_211)
{ {
FillLinGradientRef param = FillLinGradientRef{ ref.offset + 4u }; FillLinGradientRef param = FillLinGradientRef{ ref.offset + 4u };
return FillLinGradient_read(param, v_225); return FillLinGradient_read(param, v_211);
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
void AnnoLinGradient_write(thread const Alloc& a, thread const AnnoLinGradientRef& ref, thread const AnnoLinGradient& s, device Memory& v_201) void AnnoLinGradient_write(thread const Alloc& a, thread const AnnoLinGradientRef& ref, thread const AnnoLinGradient& s, device Memory& v_187)
{ {
uint ix = ref.offset >> uint(2); uint ix = ref.offset >> uint(2);
Alloc param = a; Alloc param = a;
uint param_1 = ix + 0u; uint param_1 = ix + 0u;
uint param_2 = as_type<uint>(s.bbox.x); uint param_2 = as_type<uint>(s.bbox.x);
write_mem(param, param_1, param_2, v_201); write_mem(param, param_1, param_2, v_187);
Alloc param_3 = a; Alloc param_3 = a;
uint param_4 = ix + 1u; uint param_4 = ix + 1u;
uint param_5 = as_type<uint>(s.bbox.y); uint param_5 = as_type<uint>(s.bbox.y);
write_mem(param_3, param_4, param_5, v_201); write_mem(param_3, param_4, param_5, v_187);
Alloc param_6 = a; Alloc param_6 = a;
uint param_7 = ix + 2u; uint param_7 = ix + 2u;
uint param_8 = as_type<uint>(s.bbox.z); uint param_8 = as_type<uint>(s.bbox.z);
write_mem(param_6, param_7, param_8, v_201); write_mem(param_6, param_7, param_8, v_187);
Alloc param_9 = a; Alloc param_9 = a;
uint param_10 = ix + 3u; uint param_10 = ix + 3u;
uint param_11 = as_type<uint>(s.bbox.w); uint param_11 = as_type<uint>(s.bbox.w);
write_mem(param_9, param_10, param_11, v_201); write_mem(param_9, param_10, param_11, v_187);
Alloc param_12 = a; Alloc param_12 = a;
uint param_13 = ix + 4u; uint param_13 = ix + 4u;
uint param_14 = as_type<uint>(s.linewidth); uint param_14 = as_type<uint>(s.linewidth);
write_mem(param_12, param_13, param_14, v_201); write_mem(param_12, param_13, param_14, v_187);
Alloc param_15 = a; Alloc param_15 = a;
uint param_16 = ix + 5u; uint param_16 = ix + 5u;
uint param_17 = s.index; uint param_17 = s.index;
write_mem(param_15, param_16, param_17, v_201); write_mem(param_15, param_16, param_17, v_187);
Alloc param_18 = a; Alloc param_18 = a;
uint param_19 = ix + 6u; uint param_19 = ix + 6u;
uint param_20 = as_type<uint>(s.line_x); uint param_20 = as_type<uint>(s.line_x);
write_mem(param_18, param_19, param_20, v_201); write_mem(param_18, param_19, param_20, v_187);
Alloc param_21 = a; Alloc param_21 = a;
uint param_22 = ix + 7u; uint param_22 = ix + 7u;
uint param_23 = as_type<uint>(s.line_y); uint param_23 = as_type<uint>(s.line_y);
write_mem(param_21, param_22, param_23, v_201); write_mem(param_21, param_22, param_23, v_187);
Alloc param_24 = a; Alloc param_24 = a;
uint param_25 = ix + 8u; uint param_25 = ix + 8u;
uint param_26 = as_type<uint>(s.line_c); uint param_26 = as_type<uint>(s.line_c);
write_mem(param_24, param_25, param_26, v_201); write_mem(param_24, param_25, param_26, v_187);
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
void Annotated_LinGradient_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoLinGradient& s, device Memory& v_201) void Annotated_LinGradient_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoLinGradient& s, device Memory& v_187)
{ {
Alloc param = a; Alloc param = a;
uint param_1 = ref.offset >> uint(2); uint param_1 = ref.offset >> uint(2);
uint param_2 = (flags << uint(16)) | 2u; uint param_2 = (flags << uint(16)) | 2u;
write_mem(param, param_1, param_2, v_201); write_mem(param, param_1, param_2, v_187);
Alloc param_3 = a; Alloc param_3 = a;
AnnoLinGradientRef param_4 = AnnoLinGradientRef{ ref.offset + 4u }; AnnoLinGradientRef param_4 = AnnoLinGradientRef{ ref.offset + 4u };
AnnoLinGradient param_5 = s; AnnoLinGradient param_5 = s;
AnnoLinGradient_write(param_3, param_4, param_5, v_201); AnnoLinGradient_write(param_3, param_4, param_5, v_187);
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
FillImage FillImage_read(thread const FillImageRef& ref, const device SceneBuf& v_225) FillImage FillImage_read(thread const FillImageRef& ref, const device SceneBuf& v_211)
{ {
uint ix = ref.offset >> uint(2); uint ix = ref.offset >> uint(2);
uint raw0 = v_225.scene[ix + 0u]; uint raw0 = v_211.scene[ix + 0u];
uint raw1 = v_225.scene[ix + 1u]; uint raw1 = v_211.scene[ix + 1u];
FillImage s; FillImage s;
s.index = raw0; s.index = raw0;
s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16);
@ -452,167 +447,140 @@ FillImage FillImage_read(thread const FillImageRef& ref, const device SceneBuf&
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
FillImage Element_FillImage_read(thread const ElementRef& ref, const device SceneBuf& v_225) FillImage Element_FillImage_read(thread const ElementRef& ref, const device SceneBuf& v_211)
{ {
FillImageRef param = FillImageRef{ ref.offset + 4u }; FillImageRef param = FillImageRef{ ref.offset + 4u };
return FillImage_read(param, v_225); return FillImage_read(param, v_211);
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
void AnnoImage_write(thread const Alloc& a, thread const AnnoImageRef& ref, thread const AnnoImage& s, device Memory& v_201) void AnnoImage_write(thread const Alloc& a, thread const AnnoImageRef& ref, thread const AnnoImage& s, device Memory& v_187)
{ {
uint ix = ref.offset >> uint(2); uint ix = ref.offset >> uint(2);
Alloc param = a; Alloc param = a;
uint param_1 = ix + 0u; uint param_1 = ix + 0u;
uint param_2 = as_type<uint>(s.bbox.x); uint param_2 = as_type<uint>(s.bbox.x);
write_mem(param, param_1, param_2, v_201); write_mem(param, param_1, param_2, v_187);
Alloc param_3 = a; Alloc param_3 = a;
uint param_4 = ix + 1u; uint param_4 = ix + 1u;
uint param_5 = as_type<uint>(s.bbox.y); uint param_5 = as_type<uint>(s.bbox.y);
write_mem(param_3, param_4, param_5, v_201); write_mem(param_3, param_4, param_5, v_187);
Alloc param_6 = a; Alloc param_6 = a;
uint param_7 = ix + 2u; uint param_7 = ix + 2u;
uint param_8 = as_type<uint>(s.bbox.z); uint param_8 = as_type<uint>(s.bbox.z);
write_mem(param_6, param_7, param_8, v_201); write_mem(param_6, param_7, param_8, v_187);
Alloc param_9 = a; Alloc param_9 = a;
uint param_10 = ix + 3u; uint param_10 = ix + 3u;
uint param_11 = as_type<uint>(s.bbox.w); uint param_11 = as_type<uint>(s.bbox.w);
write_mem(param_9, param_10, param_11, v_201); write_mem(param_9, param_10, param_11, v_187);
Alloc param_12 = a; Alloc param_12 = a;
uint param_13 = ix + 4u; uint param_13 = ix + 4u;
uint param_14 = as_type<uint>(s.linewidth); uint param_14 = as_type<uint>(s.linewidth);
write_mem(param_12, param_13, param_14, v_201); write_mem(param_12, param_13, param_14, v_187);
Alloc param_15 = a; Alloc param_15 = a;
uint param_16 = ix + 5u; uint param_16 = ix + 5u;
uint param_17 = s.index; uint param_17 = s.index;
write_mem(param_15, param_16, param_17, v_201); write_mem(param_15, param_16, param_17, v_187);
Alloc param_18 = a; Alloc param_18 = a;
uint param_19 = ix + 6u; uint param_19 = ix + 6u;
uint param_20 = (uint(s.offset.x) & 65535u) | (uint(s.offset.y) << uint(16)); uint param_20 = (uint(s.offset.x) & 65535u) | (uint(s.offset.y) << uint(16));
write_mem(param_18, param_19, param_20, v_201); write_mem(param_18, param_19, param_20, v_187);
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
void Annotated_Image_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoImage& s, device Memory& v_201) void Annotated_Image_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoImage& s, device Memory& v_187)
{ {
Alloc param = a; Alloc param = a;
uint param_1 = ref.offset >> uint(2); uint param_1 = ref.offset >> uint(2);
uint param_2 = (flags << uint(16)) | 3u; uint param_2 = (flags << uint(16)) | 3u;
write_mem(param, param_1, param_2, v_201); write_mem(param, param_1, param_2, v_187);
Alloc param_3 = a; Alloc param_3 = a;
AnnoImageRef param_4 = AnnoImageRef{ ref.offset + 4u }; AnnoImageRef param_4 = AnnoImageRef{ ref.offset + 4u };
AnnoImage param_5 = s; AnnoImage param_5 = s;
AnnoImage_write(param_3, param_4, param_5, v_201); AnnoImage_write(param_3, param_4, param_5, v_187);
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
Clip Clip_read(thread const ClipRef& ref, const device SceneBuf& v_225) void AnnoBeginClip_write(thread const Alloc& a, thread const AnnoBeginClipRef& ref, thread const AnnoBeginClip& s, device Memory& v_187)
{
uint ix = ref.offset >> uint(2);
uint raw0 = v_225.scene[ix + 0u];
uint raw1 = v_225.scene[ix + 1u];
uint raw2 = v_225.scene[ix + 2u];
uint raw3 = v_225.scene[ix + 3u];
Clip s;
s.bbox = float4(as_type<float>(raw0), as_type<float>(raw1), as_type<float>(raw2), as_type<float>(raw3));
return s;
}
static inline __attribute__((always_inline))
Clip Element_BeginClip_read(thread const ElementRef& ref, const device SceneBuf& v_225)
{
ClipRef param = ClipRef{ ref.offset + 4u };
return Clip_read(param, v_225);
}
static inline __attribute__((always_inline))
void AnnoBeginClip_write(thread const Alloc& a, thread const AnnoBeginClipRef& ref, thread const AnnoBeginClip& s, device Memory& v_201)
{ {
uint ix = ref.offset >> uint(2); uint ix = ref.offset >> uint(2);
Alloc param = a; Alloc param = a;
uint param_1 = ix + 0u; uint param_1 = ix + 0u;
uint param_2 = as_type<uint>(s.bbox.x); uint param_2 = as_type<uint>(s.bbox.x);
write_mem(param, param_1, param_2, v_201); write_mem(param, param_1, param_2, v_187);
Alloc param_3 = a; Alloc param_3 = a;
uint param_4 = ix + 1u; uint param_4 = ix + 1u;
uint param_5 = as_type<uint>(s.bbox.y); uint param_5 = as_type<uint>(s.bbox.y);
write_mem(param_3, param_4, param_5, v_201); write_mem(param_3, param_4, param_5, v_187);
Alloc param_6 = a; Alloc param_6 = a;
uint param_7 = ix + 2u; uint param_7 = ix + 2u;
uint param_8 = as_type<uint>(s.bbox.z); uint param_8 = as_type<uint>(s.bbox.z);
write_mem(param_6, param_7, param_8, v_201); write_mem(param_6, param_7, param_8, v_187);
Alloc param_9 = a; Alloc param_9 = a;
uint param_10 = ix + 3u; uint param_10 = ix + 3u;
uint param_11 = as_type<uint>(s.bbox.w); uint param_11 = as_type<uint>(s.bbox.w);
write_mem(param_9, param_10, param_11, v_201); write_mem(param_9, param_10, param_11, v_187);
Alloc param_12 = a; Alloc param_12 = a;
uint param_13 = ix + 4u; uint param_13 = ix + 4u;
uint param_14 = as_type<uint>(s.linewidth); uint param_14 = as_type<uint>(s.linewidth);
write_mem(param_12, param_13, param_14, v_201); write_mem(param_12, param_13, param_14, v_187);
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
void Annotated_BeginClip_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoBeginClip& s, device Memory& v_201) void Annotated_BeginClip_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoBeginClip& s, device Memory& v_187)
{ {
Alloc param = a; Alloc param = a;
uint param_1 = ref.offset >> uint(2); uint param_1 = ref.offset >> uint(2);
uint param_2 = (flags << uint(16)) | 4u; uint param_2 = (flags << uint(16)) | 4u;
write_mem(param, param_1, param_2, v_201); write_mem(param, param_1, param_2, v_187);
Alloc param_3 = a; Alloc param_3 = a;
AnnoBeginClipRef param_4 = AnnoBeginClipRef{ ref.offset + 4u }; AnnoBeginClipRef param_4 = AnnoBeginClipRef{ ref.offset + 4u };
AnnoBeginClip param_5 = s; AnnoBeginClip param_5 = s;
AnnoBeginClip_write(param_3, param_4, param_5, v_201); AnnoBeginClip_write(param_3, param_4, param_5, v_187);
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
Clip Element_EndClip_read(thread const ElementRef& ref, const device SceneBuf& v_225) void AnnoEndClip_write(thread const Alloc& a, thread const AnnoEndClipRef& ref, thread const AnnoEndClip& s, device Memory& v_187)
{
ClipRef param = ClipRef{ ref.offset + 4u };
return Clip_read(param, v_225);
}
static inline __attribute__((always_inline))
void AnnoEndClip_write(thread const Alloc& a, thread const AnnoEndClipRef& ref, thread const AnnoEndClip& s, device Memory& v_201)
{ {
uint ix = ref.offset >> uint(2); uint ix = ref.offset >> uint(2);
Alloc param = a; Alloc param = a;
uint param_1 = ix + 0u; uint param_1 = ix + 0u;
uint param_2 = as_type<uint>(s.bbox.x); uint param_2 = as_type<uint>(s.bbox.x);
write_mem(param, param_1, param_2, v_201); write_mem(param, param_1, param_2, v_187);
Alloc param_3 = a; Alloc param_3 = a;
uint param_4 = ix + 1u; uint param_4 = ix + 1u;
uint param_5 = as_type<uint>(s.bbox.y); uint param_5 = as_type<uint>(s.bbox.y);
write_mem(param_3, param_4, param_5, v_201); write_mem(param_3, param_4, param_5, v_187);
Alloc param_6 = a; Alloc param_6 = a;
uint param_7 = ix + 2u; uint param_7 = ix + 2u;
uint param_8 = as_type<uint>(s.bbox.z); uint param_8 = as_type<uint>(s.bbox.z);
write_mem(param_6, param_7, param_8, v_201); write_mem(param_6, param_7, param_8, v_187);
Alloc param_9 = a; Alloc param_9 = a;
uint param_10 = ix + 3u; uint param_10 = ix + 3u;
uint param_11 = as_type<uint>(s.bbox.w); uint param_11 = as_type<uint>(s.bbox.w);
write_mem(param_9, param_10, param_11, v_201); write_mem(param_9, param_10, param_11, v_187);
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
void Annotated_EndClip_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const AnnoEndClip& s, device Memory& v_201) void Annotated_EndClip_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const AnnoEndClip& s, device Memory& v_187)
{ {
Alloc param = a; Alloc param = a;
uint param_1 = ref.offset >> uint(2); uint param_1 = ref.offset >> uint(2);
uint param_2 = 5u; uint param_2 = 5u;
write_mem(param, param_1, param_2, v_201); write_mem(param, param_1, param_2, v_187);
Alloc param_3 = a; Alloc param_3 = a;
AnnoEndClipRef param_4 = AnnoEndClipRef{ ref.offset + 4u }; AnnoEndClipRef param_4 = AnnoEndClipRef{ ref.offset + 4u };
AnnoEndClip param_5 = s; AnnoEndClip param_5 = s;
AnnoEndClip_write(param_3, param_4, param_5, v_201); AnnoEndClip_write(param_3, param_4, param_5, v_187);
} }
kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1038 [[buffer(1)]], const device SceneBuf& v_225 [[buffer(2)]], const device ParentBuf& _1004 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) kernel void main0(device Memory& v_187 [[buffer(0)]], const device ConfigBuf& _968 [[buffer(1)]], const device SceneBuf& v_211 [[buffer(2)]], const device ParentBuf& _934 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
{ {
threadgroup DrawMonoid sh_scratch[256]; threadgroup DrawMonoid sh_scratch[256];
uint ix = gl_GlobalInvocationID.x * 8u; uint ix = gl_GlobalInvocationID.x * 8u;
ElementRef ref = ElementRef{ ix * 36u }; ElementRef ref = ElementRef{ ix * 36u };
ElementRef param = ref; ElementRef param = ref;
uint tag_word = Element_tag(param, v_225).tag; uint tag_word = Element_tag(param, v_211).tag;
uint param_1 = tag_word; uint param_1 = tag_word;
DrawMonoid agg = map_tag(param_1); DrawMonoid agg = map_tag(param_1);
spvUnsafeArray<DrawMonoid, 8> local; spvUnsafeArray<DrawMonoid, 8> local;
@ -622,7 +590,7 @@ kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1
ElementRef param_2 = ref; ElementRef param_2 = ref;
uint param_3 = i; uint param_3 = i;
ElementRef param_4 = Element_index(param_2, param_3); ElementRef param_4 = Element_index(param_2, param_3);
tag_word = Element_tag(param_4, v_225).tag; tag_word = Element_tag(param_4, v_211).tag;
uint param_5 = tag_word; uint param_5 = tag_word;
DrawMonoid param_6 = agg; DrawMonoid param_6 = agg;
DrawMonoid param_7 = map_tag(param_5); DrawMonoid param_7 = map_tag(param_5);
@ -647,9 +615,9 @@ kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1
DrawMonoid row = tag_monoid_identity(); DrawMonoid row = tag_monoid_identity();
if (gl_WorkGroupID.x > 0u) if (gl_WorkGroupID.x > 0u)
{ {
uint _1007 = gl_WorkGroupID.x - 1u; uint _937 = gl_WorkGroupID.x - 1u;
row.path_ix = _1004.parent[_1007].path_ix; row.path_ix = _934.parent[_937].path_ix;
row.clip_ix = _1004.parent[_1007].clip_ix; row.clip_ix = _934.parent[_937].clip_ix;
} }
if (gl_LocalInvocationID.x > 0u) if (gl_LocalInvocationID.x > 0u)
{ {
@ -658,8 +626,9 @@ kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1
row = combine_tag_monoid(param_10, param_11); row = combine_tag_monoid(param_10, param_11);
} }
uint out_ix = gl_GlobalInvocationID.x * 8u; uint out_ix = gl_GlobalInvocationID.x * 8u;
uint out_base = (_1038.conf.drawmonoid_alloc.offset >> uint(2)) + (out_ix * 2u); uint out_base = (_968.conf.drawmonoid_alloc.offset >> uint(2)) + (out_ix * 2u);
AnnotatedRef out_ref = AnnotatedRef{ _1038.conf.anno_alloc.offset + (out_ix * 40u) }; uint clip_out_base = _968.conf.clip_alloc.offset >> uint(2);
AnnotatedRef out_ref = AnnotatedRef{ _968.conf.anno_alloc.offset + (out_ix * 40u) };
float4 mat; float4 mat;
float2 translate; float2 translate;
AnnoColor anno_fill; AnnoColor anno_fill;
@ -669,39 +638,43 @@ kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1
AnnoImage anno_img; AnnoImage anno_img;
Alloc param_28; Alloc param_28;
AnnoBeginClip anno_begin_clip; AnnoBeginClip anno_begin_clip;
Alloc param_33; Alloc param_32;
AnnoEndClip anno_end_clip; AnnoEndClip anno_end_clip;
Alloc param_38; Alloc param_36;
for (uint i_2 = 0u; i_2 < 8u; i_2++) for (uint i_2 = 0u; i_2 < 8u; i_2++)
{ {
DrawMonoid param_12 = row; DrawMonoid m = row;
DrawMonoid param_13 = local[i_2]; if (i_2 > 0u)
DrawMonoid m = combine_tag_monoid(param_12, param_13); {
v_201.memory[out_base + (i_2 * 2u)] = m.path_ix; DrawMonoid param_12 = m;
v_201.memory[(out_base + (i_2 * 2u)) + 1u] = m.clip_ix; DrawMonoid param_13 = local[i_2 - 1u];
m = combine_tag_monoid(param_12, param_13);
}
v_187.memory[out_base + (i_2 * 2u)] = m.path_ix;
v_187.memory[(out_base + (i_2 * 2u)) + 1u] = m.clip_ix;
ElementRef param_14 = ref; ElementRef param_14 = ref;
uint param_15 = i_2; uint param_15 = i_2;
ElementRef this_ref = Element_index(param_14, param_15); ElementRef this_ref = Element_index(param_14, param_15);
ElementRef param_16 = this_ref; ElementRef param_16 = this_ref;
tag_word = Element_tag(param_16, v_225).tag; tag_word = Element_tag(param_16, v_211).tag;
if (((tag_word == 4u) || (tag_word == 5u)) || (tag_word == 6u)) if ((((tag_word == 4u) || (tag_word == 5u)) || (tag_word == 6u)) || (tag_word == 9u))
{ {
uint bbox_offset = (_1038.conf.bbox_alloc.offset >> uint(2)) + (6u * (m.path_ix - 1u)); uint bbox_offset = (_968.conf.bbox_alloc.offset >> uint(2)) + (6u * m.path_ix);
float bbox_l = float(v_201.memory[bbox_offset]) - 32768.0; float bbox_l = float(v_187.memory[bbox_offset]) - 32768.0;
float bbox_t = float(v_201.memory[bbox_offset + 1u]) - 32768.0; float bbox_t = float(v_187.memory[bbox_offset + 1u]) - 32768.0;
float bbox_r = float(v_201.memory[bbox_offset + 2u]) - 32768.0; float bbox_r = float(v_187.memory[bbox_offset + 2u]) - 32768.0;
float bbox_b = float(v_201.memory[bbox_offset + 3u]) - 32768.0; float bbox_b = float(v_187.memory[bbox_offset + 3u]) - 32768.0;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
float linewidth = as_type<float>(v_201.memory[bbox_offset + 4u]); float linewidth = as_type<float>(v_187.memory[bbox_offset + 4u]);
uint fill_mode = uint(linewidth >= 0.0); uint fill_mode = uint(linewidth >= 0.0);
if ((linewidth >= 0.0) || (tag_word == 5u)) if ((linewidth >= 0.0) || (tag_word == 5u))
{ {
uint trans_ix = v_201.memory[bbox_offset + 5u]; uint trans_ix = v_187.memory[bbox_offset + 5u];
uint t = (_1038.conf.trans_alloc.offset >> uint(2)) + (6u * trans_ix); uint t = (_968.conf.trans_alloc.offset >> uint(2)) + (6u * trans_ix);
mat = as_type<float4>(uint4(v_201.memory[t], v_201.memory[t + 1u], v_201.memory[t + 2u], v_201.memory[t + 3u])); mat = as_type<float4>(uint4(v_187.memory[t], v_187.memory[t + 1u], v_187.memory[t + 2u], v_187.memory[t + 3u]));
if (tag_word == 5u) if (tag_word == 5u)
{ {
translate = as_type<float2>(uint2(v_201.memory[t + 4u], v_201.memory[t + 5u])); translate = as_type<float2>(uint2(v_187.memory[t + 4u], v_187.memory[t + 5u]));
} }
} }
if (linewidth >= 0.0) if (linewidth >= 0.0)
@ -714,21 +687,21 @@ kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1
case 4u: case 4u:
{ {
ElementRef param_17 = this_ref; ElementRef param_17 = this_ref;
FillColor fill = Element_FillColor_read(param_17, v_225); FillColor fill = Element_FillColor_read(param_17, v_211);
anno_fill.bbox = bbox; anno_fill.bbox = bbox;
anno_fill.linewidth = linewidth; anno_fill.linewidth = linewidth;
anno_fill.rgba_color = fill.rgba_color; anno_fill.rgba_color = fill.rgba_color;
param_18.offset = _1038.conf.anno_alloc.offset; param_18.offset = _968.conf.anno_alloc.offset;
AnnotatedRef param_19 = out_ref; AnnotatedRef param_19 = out_ref;
uint param_20 = fill_mode; uint param_20 = fill_mode;
AnnoColor param_21 = anno_fill; AnnoColor param_21 = anno_fill;
Annotated_Color_write(param_18, param_19, param_20, param_21, v_201); Annotated_Color_write(param_18, param_19, param_20, param_21, v_187);
break; break;
} }
case 5u: case 5u:
{ {
ElementRef param_22 = this_ref; ElementRef param_22 = this_ref;
FillLinGradient lin = Element_FillLinGradient_read(param_22, v_225); FillLinGradient lin = Element_FillLinGradient_read(param_22, v_211);
anno_lin.bbox = bbox; anno_lin.bbox = bbox;
anno_lin.linewidth = linewidth; anno_lin.linewidth = linewidth;
anno_lin.index = lin.index; anno_lin.index = lin.index;
@ -741,57 +714,60 @@ kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1
anno_lin.line_x = line_x; anno_lin.line_x = line_x;
anno_lin.line_y = line_y; anno_lin.line_y = line_y;
anno_lin.line_c = -((p0.x * line_x) + (p0.y * line_y)); anno_lin.line_c = -((p0.x * line_x) + (p0.y * line_y));
param_23.offset = _1038.conf.anno_alloc.offset; param_23.offset = _968.conf.anno_alloc.offset;
AnnotatedRef param_24 = out_ref; AnnotatedRef param_24 = out_ref;
uint param_25 = fill_mode; uint param_25 = fill_mode;
AnnoLinGradient param_26 = anno_lin; AnnoLinGradient param_26 = anno_lin;
Annotated_LinGradient_write(param_23, param_24, param_25, param_26, v_201); Annotated_LinGradient_write(param_23, param_24, param_25, param_26, v_187);
break; break;
} }
case 6u: case 6u:
{ {
ElementRef param_27 = this_ref; ElementRef param_27 = this_ref;
FillImage fill_img = Element_FillImage_read(param_27, v_225); FillImage fill_img = Element_FillImage_read(param_27, v_211);
anno_img.bbox = bbox; anno_img.bbox = bbox;
anno_img.linewidth = linewidth; anno_img.linewidth = linewidth;
anno_img.index = fill_img.index; anno_img.index = fill_img.index;
anno_img.offset = fill_img.offset; anno_img.offset = fill_img.offset;
param_28.offset = _1038.conf.anno_alloc.offset; param_28.offset = _968.conf.anno_alloc.offset;
AnnotatedRef param_29 = out_ref; AnnotatedRef param_29 = out_ref;
uint param_30 = fill_mode; uint param_30 = fill_mode;
AnnoImage param_31 = anno_img; AnnoImage param_31 = anno_img;
Annotated_Image_write(param_28, param_29, param_30, param_31, v_201); Annotated_Image_write(param_28, param_29, param_30, param_31, v_187);
break;
}
case 9u:
{
anno_begin_clip.bbox = bbox;
anno_begin_clip.linewidth = 0.0;
param_32.offset = _968.conf.anno_alloc.offset;
AnnotatedRef param_33 = out_ref;
uint param_34 = 0u;
AnnoBeginClip param_35 = anno_begin_clip;
Annotated_BeginClip_write(param_32, param_33, param_34, param_35, v_187);
break; break;
} }
} }
} }
else else
{ {
if (tag_word == 10u)
{
anno_end_clip.bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0);
param_36.offset = _968.conf.anno_alloc.offset;
AnnotatedRef param_37 = out_ref;
AnnoEndClip param_38 = anno_end_clip;
Annotated_EndClip_write(param_36, param_37, param_38, v_187);
}
}
if ((tag_word == 9u) || (tag_word == 10u))
{
uint path_ix = ~(out_ix + i_2);
if (tag_word == 9u) if (tag_word == 9u)
{ {
ElementRef param_32 = this_ref; path_ix = m.path_ix;
Clip begin_clip = Element_BeginClip_read(param_32, v_225);
anno_begin_clip.bbox = begin_clip.bbox;
anno_begin_clip.linewidth = 0.0;
param_33.offset = _1038.conf.anno_alloc.offset;
AnnotatedRef param_34 = out_ref;
uint param_35 = 0u;
AnnoBeginClip param_36 = anno_begin_clip;
Annotated_BeginClip_write(param_33, param_34, param_35, param_36, v_201);
}
else
{
if (tag_word == 10u)
{
ElementRef param_37 = this_ref;
Clip end_clip = Element_EndClip_read(param_37, v_225);
anno_end_clip.bbox = end_clip.bbox;
param_38.offset = _1038.conf.anno_alloc.offset;
AnnotatedRef param_39 = out_ref;
AnnoEndClip param_40 = anno_end_clip;
Annotated_EndClip_write(param_38, param_39, param_40, v_201);
}
} }
v_187.memory[clip_out_base + m.clip_ix] = path_ix;
} }
out_ref.offset += 40u; out_ref.offset += 40u;
} }

Binary file not shown.

View file

@ -36,8 +36,13 @@ struct Config
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc bbox_alloc;
Alloc drawmonoid_alloc; Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip;
uint trans_offset; uint trans_offset;
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;

View file

@ -66,8 +66,13 @@ struct Config
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc bbox_alloc;
Alloc drawmonoid_alloc; Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip;
uint trans_offset; uint trans_offset;
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;

Binary file not shown.

Binary file not shown.

View file

@ -117,8 +117,13 @@ struct Config
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc bbox_alloc;
Alloc drawmonoid_alloc; Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip;
uint trans_offset; uint trans_offset;
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
@ -457,7 +462,6 @@ void comp_main()
TileSegRef tile_seg_ref; TileSegRef tile_seg_ref;
float area[8]; float area[8];
uint blend_stack[128][8]; uint blend_stack[128][8];
float blend_alpha_stack[128][8];
while (mem_ok) while (mem_ok)
{ {
Alloc param_3 = cmd_alloc; Alloc param_3 = cmd_alloc;
@ -640,7 +644,6 @@ void comp_main()
float4 param_34 = float4(rgba[k_11]); float4 param_34 = float4(rgba[k_11]);
uint _1390 = packsRGB(param_34); uint _1390 = packsRGB(param_34);
blend_stack[d_2][k_11] = _1390; blend_stack[d_2][k_11] = _1390;
blend_alpha_stack[d_2][k_11] = clamp(abs(area[k_11]), 0.0f, 1.0f);
rgba[k_11] = 0.0f.xxxx; rgba[k_11] = 0.0f.xxxx;
} }
clip_depth++; clip_depth++;
@ -655,7 +658,7 @@ void comp_main()
uint d_3 = min(clip_depth, 127u); uint d_3 = min(clip_depth, 127u);
uint param_35 = blend_stack[d_3][k_12]; uint param_35 = blend_stack[d_3][k_12];
float4 bg = unpacksRGB(param_35); float4 bg = unpacksRGB(param_35);
float4 fg_1 = (rgba[k_12] * area[k_12]) * blend_alpha_stack[d_3][k_12]; float4 fg_1 = rgba[k_12] * area[k_12];
rgba[k_12] = (bg * (1.0f - fg_1.w)) + fg_1; rgba[k_12] = (bg * (1.0f - fg_1.w)) + fg_1;
} }
cmd_ref.offset += 4u; cmd_ref.offset += 4u;
@ -665,8 +668,8 @@ void comp_main()
{ {
Alloc param_36 = cmd_alloc; Alloc param_36 = cmd_alloc;
CmdRef param_37 = cmd_ref; CmdRef param_37 = cmd_ref;
CmdRef _1469 = { Cmd_Jump_read(param_36, param_37).new_ref }; CmdRef _1453 = { Cmd_Jump_read(param_36, param_37).new_ref };
cmd_ref = _1469; cmd_ref = _1453;
cmd_alloc.offset = cmd_ref.offset; cmd_alloc.offset = cmd_ref.offset;
break; break;
} }

View file

@ -175,8 +175,13 @@ struct Config
Alloc_1 trans_alloc; Alloc_1 trans_alloc;
Alloc_1 bbox_alloc; Alloc_1 bbox_alloc;
Alloc_1 drawmonoid_alloc; Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip;
uint trans_offset; uint trans_offset;
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
@ -507,7 +512,6 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7
TileSegRef tile_seg_ref; TileSegRef tile_seg_ref;
spvUnsafeArray<float, 8> area; spvUnsafeArray<float, 8> area;
spvUnsafeArray<spvUnsafeArray<uint, 8>, 128> blend_stack; spvUnsafeArray<spvUnsafeArray<uint, 8>, 128> blend_stack;
spvUnsafeArray<spvUnsafeArray<float, 8>, 128> blend_alpha_stack;
while (mem_ok) while (mem_ok)
{ {
Alloc param_3 = cmd_alloc; Alloc param_3 = cmd_alloc;
@ -687,7 +691,6 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7
float4 param_34 = float4(rgba[k_11]); float4 param_34 = float4(rgba[k_11]);
uint _1390 = packsRGB(param_34); uint _1390 = packsRGB(param_34);
blend_stack[d_2][k_11] = _1390; blend_stack[d_2][k_11] = _1390;
blend_alpha_stack[d_2][k_11] = fast::clamp(abs(area[k_11]), 0.0, 1.0);
rgba[k_11] = float4(0.0); rgba[k_11] = float4(0.0);
} }
clip_depth++; clip_depth++;
@ -702,7 +705,7 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7
uint d_3 = min(clip_depth, 127u); uint d_3 = min(clip_depth, 127u);
uint param_35 = blend_stack[d_3][k_12]; uint param_35 = blend_stack[d_3][k_12];
float4 bg = unpacksRGB(param_35); float4 bg = unpacksRGB(param_35);
float4 fg_1 = (rgba[k_12] * area[k_12]) * blend_alpha_stack[d_3][k_12]; float4 fg_1 = rgba[k_12] * area[k_12];
rgba[k_12] = (bg * (1.0 - fg_1.w)) + fg_1; rgba[k_12] = (bg * (1.0 - fg_1.w)) + fg_1;
} }
cmd_ref.offset += 4u; cmd_ref.offset += 4u;

Binary file not shown.

Binary file not shown.

View file

@ -117,8 +117,13 @@ struct Config
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc bbox_alloc;
Alloc drawmonoid_alloc; Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip;
uint trans_offset; uint trans_offset;
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
@ -457,7 +462,6 @@ void comp_main()
TileSegRef tile_seg_ref; TileSegRef tile_seg_ref;
float area[8]; float area[8];
uint blend_stack[128][8]; uint blend_stack[128][8];
float blend_alpha_stack[128][8];
while (mem_ok) while (mem_ok)
{ {
Alloc param_3 = cmd_alloc; Alloc param_3 = cmd_alloc;
@ -640,7 +644,6 @@ void comp_main()
float4 param_34 = float4(rgba[k_11]); float4 param_34 = float4(rgba[k_11]);
uint _1390 = packsRGB(param_34); uint _1390 = packsRGB(param_34);
blend_stack[d_2][k_11] = _1390; blend_stack[d_2][k_11] = _1390;
blend_alpha_stack[d_2][k_11] = clamp(abs(area[k_11]), 0.0f, 1.0f);
rgba[k_11] = 0.0f.xxxx; rgba[k_11] = 0.0f.xxxx;
} }
clip_depth++; clip_depth++;
@ -655,7 +658,7 @@ void comp_main()
uint d_3 = min(clip_depth, 127u); uint d_3 = min(clip_depth, 127u);
uint param_35 = blend_stack[d_3][k_12]; uint param_35 = blend_stack[d_3][k_12];
float4 bg = unpacksRGB(param_35); float4 bg = unpacksRGB(param_35);
float4 fg_1 = (rgba[k_12] * area[k_12]) * blend_alpha_stack[d_3][k_12]; float4 fg_1 = rgba[k_12] * area[k_12];
rgba[k_12] = (bg * (1.0f - fg_1.w)) + fg_1; rgba[k_12] = (bg * (1.0f - fg_1.w)) + fg_1;
} }
cmd_ref.offset += 4u; cmd_ref.offset += 4u;
@ -665,8 +668,8 @@ void comp_main()
{ {
Alloc param_36 = cmd_alloc; Alloc param_36 = cmd_alloc;
CmdRef param_37 = cmd_ref; CmdRef param_37 = cmd_ref;
CmdRef _1469 = { Cmd_Jump_read(param_36, param_37).new_ref }; CmdRef _1453 = { Cmd_Jump_read(param_36, param_37).new_ref };
cmd_ref = _1469; cmd_ref = _1453;
cmd_alloc.offset = cmd_ref.offset; cmd_alloc.offset = cmd_ref.offset;
break; break;
} }

View file

@ -175,8 +175,13 @@ struct Config
Alloc_1 trans_alloc; Alloc_1 trans_alloc;
Alloc_1 bbox_alloc; Alloc_1 bbox_alloc;
Alloc_1 drawmonoid_alloc; Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip;
uint trans_offset; uint trans_offset;
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
@ -507,7 +512,6 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7
TileSegRef tile_seg_ref; TileSegRef tile_seg_ref;
spvUnsafeArray<float, 8> area; spvUnsafeArray<float, 8> area;
spvUnsafeArray<spvUnsafeArray<uint, 8>, 128> blend_stack; spvUnsafeArray<spvUnsafeArray<uint, 8>, 128> blend_stack;
spvUnsafeArray<spvUnsafeArray<float, 8>, 128> blend_alpha_stack;
while (mem_ok) while (mem_ok)
{ {
Alloc param_3 = cmd_alloc; Alloc param_3 = cmd_alloc;
@ -687,7 +691,6 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7
float4 param_34 = float4(rgba[k_11]); float4 param_34 = float4(rgba[k_11]);
uint _1390 = packsRGB(param_34); uint _1390 = packsRGB(param_34);
blend_stack[d_2][k_11] = _1390; blend_stack[d_2][k_11] = _1390;
blend_alpha_stack[d_2][k_11] = fast::clamp(abs(area[k_11]), 0.0, 1.0);
rgba[k_11] = float4(0.0); rgba[k_11] = float4(0.0);
} }
clip_depth++; clip_depth++;
@ -702,7 +705,7 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7
uint d_3 = min(clip_depth, 127u); uint d_3 = min(clip_depth, 127u);
uint param_35 = blend_stack[d_3][k_12]; uint param_35 = blend_stack[d_3][k_12];
float4 bg = unpacksRGB(param_35); float4 bg = unpacksRGB(param_35);
float4 fg_1 = (rgba[k_12] * area[k_12]) * blend_alpha_stack[d_3][k_12]; float4 fg_1 = rgba[k_12] * area[k_12];
rgba[k_12] = (bg * (1.0 - fg_1.w)) + fg_1; rgba[k_12] = (bg * (1.0 - fg_1.w)) + fg_1;
} }
cmd_ref.offset += 4u; cmd_ref.offset += 4u;

Binary file not shown.

View file

@ -86,8 +86,13 @@ struct Config
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc bbox_alloc;
Alloc drawmonoid_alloc; Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip;
uint trans_offset; uint trans_offset;
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;

View file

@ -146,8 +146,13 @@ struct Config
Alloc_1 trans_alloc; Alloc_1 trans_alloc;
Alloc_1 bbox_alloc; Alloc_1 bbox_alloc;
Alloc_1 drawmonoid_alloc; Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip;
uint trans_offset; uint trans_offset;
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;

Binary file not shown.

Binary file not shown.

View file

@ -64,8 +64,13 @@ struct Config
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc bbox_alloc;
Alloc drawmonoid_alloc; Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip;
uint trans_offset; uint trans_offset;
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
@ -80,7 +85,7 @@ static const Monoid _567 = { 0.0f.xxxx, 0u };
RWByteAddressBuffer _111 : register(u0, space0); RWByteAddressBuffer _111 : register(u0, space0);
ByteAddressBuffer _574 : register(t2, space0); ByteAddressBuffer _574 : register(t2, space0);
ByteAddressBuffer _639 : register(t1, space0); ByteAddressBuffer _639 : register(t1, space0);
ByteAddressBuffer _709 : register(t3, space0); ByteAddressBuffer _710 : register(t3, space0);
static uint3 gl_WorkGroupID; static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID; static uint3 gl_LocalInvocationID;
@ -356,7 +361,7 @@ uint round_up(float x)
void comp_main() void comp_main()
{ {
uint ix = gl_GlobalInvocationID.x * 4u; uint ix = gl_GlobalInvocationID.x * 4u;
uint tag_word = _574.Load(((_639.Load(64) >> uint(2)) + (ix >> uint(2))) * 4 + 0); uint tag_word = _574.Load(((_639.Load(84) >> uint(2)) + (ix >> uint(2))) * 4 + 0);
uint param = tag_word; uint param = tag_word;
TagMonoid local_tm = reduce_tag(param); TagMonoid local_tm = reduce_tag(param);
sh_tag[gl_LocalInvocationID.x] = local_tm; sh_tag[gl_LocalInvocationID.x] = local_tm;
@ -377,17 +382,17 @@ void comp_main()
TagMonoid tm = tag_monoid_identity(); TagMonoid tm = tag_monoid_identity();
if (gl_WorkGroupID.x > 0u) if (gl_WorkGroupID.x > 0u)
{ {
TagMonoid _715; TagMonoid _716;
_715.trans_ix = _709.Load((gl_WorkGroupID.x - 1u) * 20 + 0); _716.trans_ix = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 0);
_715.linewidth_ix = _709.Load((gl_WorkGroupID.x - 1u) * 20 + 4); _716.linewidth_ix = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 4);
_715.pathseg_ix = _709.Load((gl_WorkGroupID.x - 1u) * 20 + 8); _716.pathseg_ix = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 8);
_715.path_ix = _709.Load((gl_WorkGroupID.x - 1u) * 20 + 12); _716.path_ix = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 12);
_715.pathseg_offset = _709.Load((gl_WorkGroupID.x - 1u) * 20 + 16); _716.pathseg_offset = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 16);
tm.trans_ix = _715.trans_ix; tm.trans_ix = _716.trans_ix;
tm.linewidth_ix = _715.linewidth_ix; tm.linewidth_ix = _716.linewidth_ix;
tm.pathseg_ix = _715.pathseg_ix; tm.pathseg_ix = _716.pathseg_ix;
tm.path_ix = _715.path_ix; tm.path_ix = _716.path_ix;
tm.pathseg_offset = _715.pathseg_offset; tm.pathseg_offset = _716.pathseg_offset;
} }
if (gl_LocalInvocationID.x > 0u) if (gl_LocalInvocationID.x > 0u)
{ {
@ -395,14 +400,14 @@ void comp_main()
TagMonoid param_4 = sh_tag[gl_LocalInvocationID.x - 1u]; TagMonoid param_4 = sh_tag[gl_LocalInvocationID.x - 1u];
tm = combine_tag_monoid(param_3, param_4); tm = combine_tag_monoid(param_3, param_4);
} }
uint ps_ix = (_639.Load(68) >> uint(2)) + tm.pathseg_offset; uint ps_ix = (_639.Load(88) >> uint(2)) + tm.pathseg_offset;
uint lw_ix = (_639.Load(60) >> uint(2)) + tm.linewidth_ix; uint lw_ix = (_639.Load(80) >> uint(2)) + tm.linewidth_ix;
uint save_path_ix = tm.path_ix; uint save_path_ix = tm.path_ix;
uint trans_ix = tm.trans_ix; uint trans_ix = tm.trans_ix;
TransformSegRef _770 = { _639.Load(36) + (trans_ix * 24u) }; TransformSegRef _771 = { _639.Load(36) + (trans_ix * 24u) };
TransformSegRef trans_ref = _770; TransformSegRef trans_ref = _771;
PathSegRef _780 = { _639.Load(28) + (tm.pathseg_ix * 52u) }; PathSegRef _781 = { _639.Load(28) + (tm.pathseg_ix * 52u) };
PathSegRef ps_ref = _780; PathSegRef ps_ref = _781;
float linewidth[4]; float linewidth[4];
uint save_trans_ix[4]; uint save_trans_ix[4];
float2 p0; float2 p0;
@ -455,9 +460,9 @@ void comp_main()
} }
} }
} }
Alloc _876; Alloc _877;
_876.offset = _639.Load(36); _877.offset = _639.Load(36);
param_13.offset = _876.offset; param_13.offset = _877.offset;
TransformSegRef param_14 = trans_ref; TransformSegRef param_14 = trans_ref;
TransformSeg transform = TransformSeg_read(param_13, param_14); TransformSeg transform = TransformSeg_read(param_13, param_14);
p0 = ((transform.mat.xy * p0.x) + (transform.mat.zw * p0.y)) + transform.translate; p0 = ((transform.mat.xy * p0.x) + (transform.mat.zw * p0.y)) + transform.translate;
@ -466,25 +471,25 @@ void comp_main()
if (seg_type >= 2u) if (seg_type >= 2u)
{ {
p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate; p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate;
float4 _946 = bbox; float4 _947 = bbox;
float2 _949 = min(_946.xy, p2); float2 _950 = min(_947.xy, p2);
bbox.x = _949.x; bbox.x = _950.x;
bbox.y = _949.y; bbox.y = _950.y;
float4 _954 = bbox; float4 _955 = bbox;
float2 _957 = max(_954.zw, p2); float2 _958 = max(_955.zw, p2);
bbox.z = _957.x; bbox.z = _958.x;
bbox.w = _957.y; bbox.w = _958.y;
if (seg_type == 3u) if (seg_type == 3u)
{ {
p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate; p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate;
float4 _982 = bbox; float4 _983 = bbox;
float2 _985 = min(_982.xy, p3); float2 _986 = min(_983.xy, p3);
bbox.x = _985.x; bbox.x = _986.x;
bbox.y = _985.y; bbox.y = _986.y;
float4 _990 = bbox; float4 _991 = bbox;
float2 _993 = max(_990.zw, p3); float2 _994 = max(_991.zw, p3);
bbox.z = _993.x; bbox.z = _994.x;
bbox.w = _993.y; bbox.w = _994.y;
} }
else else
{ {
@ -515,9 +520,9 @@ void comp_main()
cubic.trans_ix = (gl_GlobalInvocationID.x * 4u) + i_1; cubic.trans_ix = (gl_GlobalInvocationID.x * 4u) + i_1;
cubic.stroke = stroke; cubic.stroke = stroke;
uint fill_mode = uint(linewidth[i_1] >= 0.0f); uint fill_mode = uint(linewidth[i_1] >= 0.0f);
Alloc _1088; Alloc _1089;
_1088.offset = _639.Load(28); _1089.offset = _639.Load(28);
param_15.offset = _1088.offset; param_15.offset = _1089.offset;
PathSegRef param_16 = ps_ref; PathSegRef param_16 = ps_ref;
uint param_17 = fill_mode; uint param_17 = fill_mode;
PathCubic param_18 = cubic; PathCubic param_18 = cubic;
@ -574,17 +579,17 @@ void comp_main()
Monoid param_24 = local[i_4]; Monoid param_24 = local[i_4];
Monoid m = combine_monoid(param_23, param_24); Monoid m = combine_monoid(param_23, param_24);
bool do_atomic = false; bool do_atomic = false;
bool _1263 = i_4 == 3u; bool _1264 = i_4 == 3u;
bool _1269; bool _1270;
if (_1263) if (_1264)
{ {
_1269 = gl_LocalInvocationID.x == 255u; _1270 = gl_LocalInvocationID.x == 255u;
} }
else else
{ {
_1269 = _1263; _1270 = _1264;
} }
if (_1269) if (_1270)
{ {
do_atomic = true; do_atomic = true;
} }
@ -612,30 +617,30 @@ void comp_main()
} }
if (do_atomic) if (do_atomic)
{ {
bool _1334 = m.bbox.z > m.bbox.x; bool _1335 = m.bbox.z > m.bbox.x;
bool _1343; bool _1344;
if (!_1334) if (!_1335)
{ {
_1343 = m.bbox.w > m.bbox.y; _1344 = m.bbox.w > m.bbox.y;
} }
else else
{ {
_1343 = _1334; _1344 = _1335;
} }
if (_1343) if (_1344)
{ {
float param_29 = m.bbox.x; float param_29 = m.bbox.x;
uint _1352; uint _1353;
_111.InterlockedMin(bbox_out_ix * 4 + 8, round_down(param_29), _1352); _111.InterlockedMin(bbox_out_ix * 4 + 8, round_down(param_29), _1353);
float param_30 = m.bbox.y; float param_30 = m.bbox.y;
uint _1360; uint _1361;
_111.InterlockedMin((bbox_out_ix + 1u) * 4 + 8, round_down(param_30), _1360); _111.InterlockedMin((bbox_out_ix + 1u) * 4 + 8, round_down(param_30), _1361);
float param_31 = m.bbox.z; float param_31 = m.bbox.z;
uint _1368; uint _1369;
_111.InterlockedMax((bbox_out_ix + 2u) * 4 + 8, round_up(param_31), _1368); _111.InterlockedMax((bbox_out_ix + 2u) * 4 + 8, round_up(param_31), _1369);
float param_32 = m.bbox.w; float param_32 = m.bbox.w;
uint _1376; uint _1377;
_111.InterlockedMax((bbox_out_ix + 3u) * 4 + 8, round_up(param_32), _1376); _111.InterlockedMax((bbox_out_ix + 3u) * 4 + 8, round_up(param_32), _1377);
} }
bbox_out_ix += 6u; bbox_out_ix += 6u;
} }

View file

@ -129,8 +129,13 @@ struct Config
Alloc_1 trans_alloc; Alloc_1 trans_alloc;
Alloc_1 bbox_alloc; Alloc_1 bbox_alloc;
Alloc_1 drawmonoid_alloc; Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip;
uint trans_offset; uint trans_offset;
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
@ -430,7 +435,7 @@ uint round_up(thread const float& x)
return uint(fast::min(65535.0, ceil(x) + 32768.0)); return uint(fast::min(65535.0, ceil(x) + 32768.0));
} }
kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _639 [[buffer(1)]], const device SceneBuf& v_574 [[buffer(2)]], const device ParentBuf& _709 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _639 [[buffer(1)]], const device SceneBuf& v_574 [[buffer(2)]], const device ParentBuf& _710 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
{ {
threadgroup TagMonoid sh_tag[256]; threadgroup TagMonoid sh_tag[256];
threadgroup Monoid sh_scratch[256]; threadgroup Monoid sh_scratch[256];
@ -456,12 +461,12 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6
TagMonoid tm = tag_monoid_identity(); TagMonoid tm = tag_monoid_identity();
if (gl_WorkGroupID.x > 0u) if (gl_WorkGroupID.x > 0u)
{ {
uint _712 = gl_WorkGroupID.x - 1u; uint _713 = gl_WorkGroupID.x - 1u;
tm.trans_ix = _709.parent[_712].trans_ix; tm.trans_ix = _710.parent[_713].trans_ix;
tm.linewidth_ix = _709.parent[_712].linewidth_ix; tm.linewidth_ix = _710.parent[_713].linewidth_ix;
tm.pathseg_ix = _709.parent[_712].pathseg_ix; tm.pathseg_ix = _710.parent[_713].pathseg_ix;
tm.path_ix = _709.parent[_712].path_ix; tm.path_ix = _710.parent[_713].path_ix;
tm.pathseg_offset = _709.parent[_712].pathseg_offset; tm.pathseg_offset = _710.parent[_713].pathseg_offset;
} }
if (gl_LocalInvocationID.x > 0u) if (gl_LocalInvocationID.x > 0u)
{ {
@ -536,25 +541,25 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6
if (seg_type >= 2u) if (seg_type >= 2u)
{ {
p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate; p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate;
float4 _946 = bbox; float4 _947 = bbox;
float2 _949 = fast::min(_946.xy, p2); float2 _950 = fast::min(_947.xy, p2);
bbox.x = _949.x; bbox.x = _950.x;
bbox.y = _949.y; bbox.y = _950.y;
float4 _954 = bbox; float4 _955 = bbox;
float2 _957 = fast::max(_954.zw, p2); float2 _958 = fast::max(_955.zw, p2);
bbox.z = _957.x; bbox.z = _958.x;
bbox.w = _957.y; bbox.w = _958.y;
if (seg_type == 3u) if (seg_type == 3u)
{ {
p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate; p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate;
float4 _982 = bbox; float4 _983 = bbox;
float2 _985 = fast::min(_982.xy, p3); float2 _986 = fast::min(_983.xy, p3);
bbox.x = _985.x; bbox.x = _986.x;
bbox.y = _985.y; bbox.y = _986.y;
float4 _990 = bbox; float4 _991 = bbox;
float2 _993 = fast::max(_990.zw, p3); float2 _994 = fast::max(_991.zw, p3);
bbox.z = _993.x; bbox.z = _994.x;
bbox.w = _993.y; bbox.w = _994.y;
} }
else else
{ {
@ -642,17 +647,17 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6
Monoid param_24 = local[i_4]; Monoid param_24 = local[i_4];
Monoid m = combine_monoid(param_23, param_24); Monoid m = combine_monoid(param_23, param_24);
bool do_atomic = false; bool do_atomic = false;
bool _1263 = i_4 == 3u; bool _1264 = i_4 == 3u;
bool _1269; bool _1270;
if (_1263) if (_1264)
{ {
_1269 = gl_LocalInvocationID.x == 255u; _1270 = gl_LocalInvocationID.x == 255u;
} }
else else
{ {
_1269 = _1263; _1270 = _1264;
} }
if (_1269) if (_1270)
{ {
do_atomic = true; do_atomic = true;
} }
@ -680,26 +685,26 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6
} }
if (do_atomic) if (do_atomic)
{ {
bool _1334 = m.bbox.z > m.bbox.x; bool _1335 = m.bbox.z > m.bbox.x;
bool _1343; bool _1344;
if (!_1334) if (!_1335)
{ {
_1343 = m.bbox.w > m.bbox.y; _1344 = m.bbox.w > m.bbox.y;
} }
else else
{ {
_1343 = _1334; _1344 = _1335;
} }
if (_1343) if (_1344)
{ {
float param_29 = m.bbox.x; float param_29 = m.bbox.x;
uint _1352 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix], round_down(param_29), memory_order_relaxed); uint _1353 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix], round_down(param_29), memory_order_relaxed);
float param_30 = m.bbox.y; float param_30 = m.bbox.y;
uint _1360 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 1u], round_down(param_30), memory_order_relaxed); uint _1361 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 1u], round_down(param_30), memory_order_relaxed);
float param_31 = m.bbox.z; float param_31 = m.bbox.z;
uint _1368 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 2u], round_up(param_31), memory_order_relaxed); uint _1369 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 2u], round_up(param_31), memory_order_relaxed);
float param_32 = m.bbox.w; float param_32 = m.bbox.w;
uint _1376 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 3u], round_up(param_32), memory_order_relaxed); uint _1377 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 3u], round_up(param_32), memory_order_relaxed);
} }
bbox_out_ix += 6u; bbox_out_ix += 6u;
} }

Binary file not shown.

Binary file not shown.

View file

@ -26,8 +26,13 @@ struct Config
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc bbox_alloc;
Alloc drawmonoid_alloc; Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip;
uint trans_offset; uint trans_offset;
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
@ -37,9 +42,9 @@ struct Config
static const uint3 gl_WorkGroupSize = uint3(128u, 1u, 1u); static const uint3 gl_WorkGroupSize = uint3(128u, 1u, 1u);
ByteAddressBuffer _139 : register(t1, space0); ByteAddressBuffer _139 : register(t1, space0);
ByteAddressBuffer _150 : register(t2, space0); ByteAddressBuffer _151 : register(t2, space0);
RWByteAddressBuffer _237 : register(u3, space0); RWByteAddressBuffer _238 : register(u3, space0);
RWByteAddressBuffer _257 : register(u0, space0); RWByteAddressBuffer _258 : register(u0, space0);
static uint3 gl_WorkGroupID; static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID; static uint3 gl_LocalInvocationID;
@ -83,13 +88,13 @@ TagMonoid combine_tag_monoid(TagMonoid a, TagMonoid b)
void comp_main() void comp_main()
{ {
uint ix = gl_GlobalInvocationID.x * 2u; uint ix = gl_GlobalInvocationID.x * 2u;
uint scene_ix = (_139.Load(64) >> uint(2)) + ix; uint scene_ix = (_139.Load(84) >> uint(2)) + ix;
uint tag_word = _150.Load(scene_ix * 4 + 0); uint tag_word = _151.Load(scene_ix * 4 + 0);
uint param = tag_word; uint param = tag_word;
TagMonoid agg = reduce_tag(param); TagMonoid agg = reduce_tag(param);
for (uint i = 1u; i < 2u; i++) for (uint i = 1u; i < 2u; i++)
{ {
tag_word = _150.Load((scene_ix + i) * 4 + 0); tag_word = _151.Load((scene_ix + i) * 4 + 0);
uint param_1 = tag_word; uint param_1 = tag_word;
TagMonoid param_2 = agg; TagMonoid param_2 = agg;
TagMonoid param_3 = reduce_tag(param_1); TagMonoid param_3 = reduce_tag(param_1);
@ -111,11 +116,11 @@ void comp_main()
} }
if (gl_LocalInvocationID.x == 0u) if (gl_LocalInvocationID.x == 0u)
{ {
_237.Store(gl_WorkGroupID.x * 20 + 0, agg.trans_ix); _238.Store(gl_WorkGroupID.x * 20 + 0, agg.trans_ix);
_237.Store(gl_WorkGroupID.x * 20 + 4, agg.linewidth_ix); _238.Store(gl_WorkGroupID.x * 20 + 4, agg.linewidth_ix);
_237.Store(gl_WorkGroupID.x * 20 + 8, agg.pathseg_ix); _238.Store(gl_WorkGroupID.x * 20 + 8, agg.pathseg_ix);
_237.Store(gl_WorkGroupID.x * 20 + 12, agg.path_ix); _238.Store(gl_WorkGroupID.x * 20 + 12, agg.path_ix);
_237.Store(gl_WorkGroupID.x * 20 + 16, agg.pathseg_offset); _238.Store(gl_WorkGroupID.x * 20 + 16, agg.pathseg_offset);
} }
} }

View file

@ -33,8 +33,13 @@ struct Config
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc bbox_alloc;
Alloc drawmonoid_alloc; Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip;
uint trans_offset; uint trans_offset;
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
@ -103,17 +108,17 @@ TagMonoid combine_tag_monoid(thread const TagMonoid& a, thread const TagMonoid&
return c; return c;
} }
kernel void main0(const device ConfigBuf& _139 [[buffer(1)]], const device SceneBuf& _150 [[buffer(2)]], device OutBuf& _237 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) kernel void main0(const device ConfigBuf& _139 [[buffer(1)]], const device SceneBuf& _151 [[buffer(2)]], device OutBuf& _238 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
{ {
threadgroup TagMonoid sh_scratch[128]; threadgroup TagMonoid sh_scratch[128];
uint ix = gl_GlobalInvocationID.x * 2u; uint ix = gl_GlobalInvocationID.x * 2u;
uint scene_ix = (_139.conf.pathtag_offset >> uint(2)) + ix; uint scene_ix = (_139.conf.pathtag_offset >> uint(2)) + ix;
uint tag_word = _150.scene[scene_ix]; uint tag_word = _151.scene[scene_ix];
uint param = tag_word; uint param = tag_word;
TagMonoid agg = reduce_tag(param); TagMonoid agg = reduce_tag(param);
for (uint i = 1u; i < 2u; i++) for (uint i = 1u; i < 2u; i++)
{ {
tag_word = _150.scene[scene_ix + i]; tag_word = _151.scene[scene_ix + i];
uint param_1 = tag_word; uint param_1 = tag_word;
TagMonoid param_2 = agg; TagMonoid param_2 = agg;
TagMonoid param_3 = reduce_tag(param_1); TagMonoid param_3 = reduce_tag(param_1);
@ -135,11 +140,11 @@ kernel void main0(const device ConfigBuf& _139 [[buffer(1)]], const device Scene
} }
if (gl_LocalInvocationID.x == 0u) if (gl_LocalInvocationID.x == 0u)
{ {
_237.outbuf[gl_WorkGroupID.x].trans_ix = agg.trans_ix; _238.outbuf[gl_WorkGroupID.x].trans_ix = agg.trans_ix;
_237.outbuf[gl_WorkGroupID.x].linewidth_ix = agg.linewidth_ix; _238.outbuf[gl_WorkGroupID.x].linewidth_ix = agg.linewidth_ix;
_237.outbuf[gl_WorkGroupID.x].pathseg_ix = agg.pathseg_ix; _238.outbuf[gl_WorkGroupID.x].pathseg_ix = agg.pathseg_ix;
_237.outbuf[gl_WorkGroupID.x].path_ix = agg.path_ix; _238.outbuf[gl_WorkGroupID.x].path_ix = agg.path_ix;
_237.outbuf[gl_WorkGroupID.x].pathseg_offset = agg.pathseg_offset; _238.outbuf[gl_WorkGroupID.x].pathseg_offset = agg.pathseg_offset;
} }
} }

Binary file not shown.

View file

@ -60,8 +60,13 @@ struct Config
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc bbox_alloc;
Alloc drawmonoid_alloc; Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip;
uint trans_offset; uint trans_offset;
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;

View file

@ -81,8 +81,13 @@ struct Config
Alloc_1 trans_alloc; Alloc_1 trans_alloc;
Alloc_1 bbox_alloc; Alloc_1 bbox_alloc;
Alloc_1 drawmonoid_alloc; Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip;
uint trans_offset; uint trans_offset;
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;

Binary file not shown.

Binary file not shown.

View file

@ -39,8 +39,13 @@ struct Config
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc bbox_alloc;
Alloc drawmonoid_alloc; Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip;
uint trans_offset; uint trans_offset;
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
@ -150,7 +155,7 @@ void TransformSeg_write(Alloc a, TransformSegRef ref, TransformSeg s)
void comp_main() void comp_main()
{ {
uint ix = gl_GlobalInvocationID.x * 8u; uint ix = gl_GlobalInvocationID.x * 8u;
TransformRef _285 = { _278.Load(56) + (ix * 24u) }; TransformRef _285 = { _278.Load(76) + (ix * 24u) };
TransformRef ref = _285; TransformRef ref = _285;
TransformRef param = ref; TransformRef param = ref;
Transform agg = Transform_read(param); Transform agg = Transform_read(param);

View file

@ -102,8 +102,13 @@ struct Config
Alloc_1 trans_alloc; Alloc_1 trans_alloc;
Alloc_1 bbox_alloc; Alloc_1 bbox_alloc;
Alloc_1 drawmonoid_alloc; Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip;
uint trans_offset; uint trans_offset;
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;

Binary file not shown.

Binary file not shown.

View file

@ -28,8 +28,13 @@ struct Config
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc bbox_alloc;
Alloc drawmonoid_alloc; Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip;
uint trans_offset; uint trans_offset;
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
@ -87,7 +92,7 @@ Transform combine_monoid(Transform a, Transform b)
void comp_main() void comp_main()
{ {
uint ix = gl_GlobalInvocationID.x * 8u; uint ix = gl_GlobalInvocationID.x * 8u;
TransformRef _168 = { _161.Load(56) + (ix * 24u) }; TransformRef _168 = { _161.Load(76) + (ix * 24u) };
TransformRef ref = _168; TransformRef ref = _168;
TransformRef param = ref; TransformRef param = ref;
Transform agg = Transform_read(param); Transform agg = Transform_read(param);

View file

@ -40,8 +40,13 @@ struct Config
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc bbox_alloc;
Alloc drawmonoid_alloc; Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip;
uint trans_offset; uint trans_offset;
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;

Binary file not shown.

View file

@ -91,7 +91,6 @@ void main() {
vec2 xy = vec2(xy_uint); vec2 xy = vec2(xy_uint);
mediump vec4 rgba[CHUNK]; mediump vec4 rgba[CHUNK];
uint blend_stack[MAX_BLEND_STACK][CHUNK]; uint blend_stack[MAX_BLEND_STACK][CHUNK];
mediump float blend_alpha_stack[MAX_BLEND_STACK][CHUNK];
for (uint i = 0; i < CHUNK; i++) { for (uint i = 0; i < CHUNK; i++) {
rgba[i] = vec4(0.0); rgba[i] = vec4(0.0);
} }
@ -211,7 +210,6 @@ void main() {
// The following is a sanity check so we don't corrupt memory should there be malformed inputs. // The following is a sanity check so we don't corrupt memory should there be malformed inputs.
uint d = min(clip_depth, MAX_BLEND_STACK - 1); uint d = min(clip_depth, MAX_BLEND_STACK - 1);
blend_stack[d][k] = packsRGB(vec4(rgba[k])); blend_stack[d][k] = packsRGB(vec4(rgba[k]));
blend_alpha_stack[d][k] = clamp(abs(area[k]), 0.0, 1.0);
rgba[k] = vec4(0.0); rgba[k] = vec4(0.0);
} }
clip_depth++; clip_depth++;
@ -222,7 +220,7 @@ void main() {
for (uint k = 0; k < CHUNK; k++) { for (uint k = 0; k < CHUNK; k++) {
uint d = min(clip_depth, MAX_BLEND_STACK - 1); uint d = min(clip_depth, MAX_BLEND_STACK - 1);
mediump vec4 bg = unpacksRGB(blend_stack[d][k]); mediump vec4 bg = unpacksRGB(blend_stack[d][k]);
mediump vec4 fg = rgba[k] * area[k] * blend_alpha_stack[d][k]; mediump vec4 fg = rgba[k] * area[k];
rgba[k] = bg * (1.0 - fg.a) + fg; rgba[k] = bg * (1.0 - fg.a) + fg;
} }
cmd_ref.offset += 4; cmd_ref.offset += 4;

View file

@ -46,11 +46,23 @@ struct Config {
// Monoid for draw objects // Monoid for draw objects
Alloc drawmonoid_alloc; Alloc drawmonoid_alloc;
// BeginClip(path_ix) / EndClip
Alloc clip_alloc;
// Intermediate bicyclic semigroup
Alloc clip_bic_alloc;
// Intermediate stack
Alloc clip_stack_alloc;
// Clip processing results (path_ix + bbox)
Alloc clip_bbox_alloc;
// Number of transforms in scene // Number of transforms in scene
// This is probably not needed. // This is probably not needed.
uint n_trans; uint n_trans;
// This only counts actual paths, not EndClip. // This *should* count only actual paths, but in the current
// implementation is redundant with n_elements.
uint n_path; uint n_path;
// Total number of BeginClip and EndClip draw objects.
uint n_clip;
// Offset (in bytes) of transform stream in scene buffer // Offset (in bytes) of transform stream in scene buffer
uint trans_offset; uint trans_offset;
// Offset (in bytes) of linewidth stream in scene // Offset (in bytes) of linewidth stream in scene

View file

@ -20,7 +20,8 @@ use bytemuck::{Pod, Zeroable};
use piet_gpu_hal::BufWrite; use piet_gpu_hal::BufWrite;
use crate::stages::{ use crate::stages::{
self, Config, PathEncoder, Transform, DRAW_PART_SIZE, PATHSEG_PART_SIZE, TRANSFORM_PART_SIZE, self, Config, PathEncoder, Transform, CLIP_PART_SIZE, DRAW_PART_SIZE, PATHSEG_PART_SIZE,
TRANSFORM_PART_SIZE,
}; };
pub struct Encoder { pub struct Encoder {
@ -31,6 +32,7 @@ pub struct Encoder {
drawobj_stream: Vec<u8>, drawobj_stream: Vec<u8>,
n_path: u32, n_path: u32,
n_pathseg: u32, n_pathseg: u32,
n_clip: u32,
} }
/// A scene fragment encoding a glyph. /// A scene fragment encoding a glyph.
@ -98,6 +100,7 @@ impl Encoder {
drawobj_stream: Vec::new(), drawobj_stream: Vec::new(),
n_path: 0, n_path: 0,
n_pathseg: 0, n_pathseg: 0,
n_clip: 0,
} }
} }
@ -155,6 +158,7 @@ impl Encoder {
..Default::default() ..Default::default()
}; };
self.drawobj_stream.extend(bytemuck::bytes_of(&element)); self.drawobj_stream.extend(bytemuck::bytes_of(&element));
self.n_clip += 1;
saved saved
} }
@ -170,6 +174,7 @@ impl Encoder {
// This is a dummy path, and will go away with the new clip impl. // This is a dummy path, and will go away with the new clip impl.
self.tag_stream.push(0x10); self.tag_stream.push(0x10);
self.n_path += 1; self.n_path += 1;
self.n_clip += 1;
} }
/// Return a config for the element processing pipeline. /// Return a config for the element processing pipeline.
@ -203,6 +208,20 @@ impl Encoder {
alloc += n_drawobj_padded * DRAWMONOID_SIZE; alloc += n_drawobj_padded * DRAWMONOID_SIZE;
let anno_alloc = alloc; let anno_alloc = alloc;
alloc += n_drawobj * ANNOTATED_SIZE; alloc += n_drawobj * ANNOTATED_SIZE;
let clip_alloc = alloc;
let n_clip = self.n_clip as usize;
const CLIP_SIZE: usize = 4;
alloc += n_clip * CLIP_SIZE;
let clip_bic_alloc = alloc;
const CLIP_BIC_SIZE: usize = 8;
// This can round down, as we only reduce the prefix
alloc += (n_clip / CLIP_PART_SIZE as usize) * CLIP_BIC_SIZE;
let clip_stack_alloc = alloc;
const CLIP_EL_SIZE: usize = 20;
alloc += n_clip * CLIP_EL_SIZE;
let clip_bbox_alloc = alloc;
const CLIP_BBOX_SIZE: usize = 16;
alloc += align_up(n_clip as usize, CLIP_PART_SIZE as usize) * CLIP_BBOX_SIZE;
let config = Config { let config = Config {
n_elements: n_drawobj as u32, n_elements: n_drawobj as u32,
@ -212,8 +231,13 @@ impl Encoder {
trans_alloc: trans_alloc as u32, trans_alloc: trans_alloc as u32,
bbox_alloc: bbox_alloc as u32, bbox_alloc: bbox_alloc as u32,
drawmonoid_alloc: drawmonoid_alloc as u32, drawmonoid_alloc: drawmonoid_alloc as u32,
clip_alloc: clip_alloc as u32,
clip_bic_alloc: clip_bic_alloc as u32,
clip_stack_alloc: clip_stack_alloc as u32,
clip_bbox_alloc: clip_bbox_alloc as u32,
n_trans: n_trans as u32, n_trans: n_trans as u32,
n_path: self.n_path, n_path: self.n_path,
n_clip: self.n_clip,
trans_offset: trans_offset as u32, trans_offset: trans_offset as u32,
linewidth_offset: linewidth_offset as u32, linewidth_offset: linewidth_offset as u32,
pathtag_offset: pathtag_offset as u32, pathtag_offset: pathtag_offset as u32,
@ -261,6 +285,10 @@ impl Encoder {
self.tag_stream.len() self.tag_stream.len()
} }
pub(crate) fn n_clip(&self) -> u32 {
self.n_clip
}
pub(crate) fn encode_glyph(&mut self, glyph: &GlyphEncoder) { pub(crate) fn encode_glyph(&mut self, glyph: &GlyphEncoder) {
self.tag_stream.extend(&glyph.tag_stream); self.tag_stream.extend(&glyph.tag_stream);
self.pathseg_stream.extend(&glyph.pathseg_stream); self.pathseg_stream.extend(&glyph.pathseg_stream);

View file

@ -20,9 +20,9 @@ use piet_gpu_hal::{
}; };
use pico_svg::PicoSvg; use pico_svg::PicoSvg;
use stages::{ElementBinding, ElementCode}; use stages::{ClipBinding, ElementBinding, ElementCode};
use crate::stages::{Config, ElementStage}; use crate::stages::{ClipCode, Config, ElementStage};
const TILE_W: usize = 16; const TILE_W: usize = 16;
const TILE_H: usize = 16; const TILE_H: usize = 16;
@ -86,6 +86,9 @@ pub struct Renderer {
element_stage: ElementStage, element_stage: ElementStage,
element_bindings: Vec<ElementBinding>, element_bindings: Vec<ElementBinding>,
clip_code: ClipCode,
clip_binding: ClipBinding,
tile_pipeline: Pipeline, tile_pipeline: Pipeline,
tile_ds: DescriptorSet, tile_ds: DescriptorSet,
@ -110,6 +113,7 @@ pub struct Renderer {
n_paths: usize, n_paths: usize,
n_pathseg: usize, n_pathseg: usize,
n_pathtag: usize, n_pathtag: usize,
n_clip: u32,
// Keep a reference to the image so that it is not destroyed. // Keep a reference to the image so that it is not destroyed.
_bg_image: Image, _bg_image: Image,
@ -191,18 +195,20 @@ impl Renderer {
let element_stage = ElementStage::new(session, &element_code); let element_stage = ElementStage::new(session, &element_code);
let element_bindings = scene_bufs let element_bindings = scene_bufs
.iter() .iter()
.zip(&config_bufs) .map(|scene_buf| {
.map(|(scene_buf, config_buf)| {
element_stage.bind( element_stage.bind(
session, session,
&element_code, &element_code,
config_buf, &config_buf,
scene_buf, scene_buf,
&memory_buf_dev, &memory_buf_dev,
) )
}) })
.collect(); .collect();
let clip_code = ClipCode::new(session);
let clip_binding = ClipBinding::new(session, &clip_code, &config_buf, &memory_buf_dev);
let tile_alloc_code = include_shader!(session, "../shader/gen/tile_alloc"); let tile_alloc_code = include_shader!(session, "../shader/gen/tile_alloc");
let tile_pipeline = session let tile_pipeline = session
.create_compute_pipeline(tile_alloc_code, &[BindType::Buffer, BindType::BufReadOnly])?; .create_compute_pipeline(tile_alloc_code, &[BindType::Buffer, BindType::BufReadOnly])?;
@ -286,6 +292,8 @@ impl Renderer {
element_code, element_code,
element_stage, element_stage,
element_bindings, element_bindings,
clip_code,
clip_binding,
tile_pipeline, tile_pipeline,
tile_ds, tile_ds,
path_pipeline, path_pipeline,
@ -304,6 +312,7 @@ impl Renderer {
n_paths: 0, n_paths: 0,
n_pathseg: 0, n_pathseg: 0,
n_pathtag: 0, n_pathtag: 0,
n_clip: 0,
_bg_image: bg_image, _bg_image: bg_image,
gradient_bufs, gradient_bufs,
gradients, gradients,
@ -329,6 +338,7 @@ impl Renderer {
self.n_drawobj = render_ctx.n_drawobj(); self.n_drawobj = render_ctx.n_drawobj();
self.n_pathseg = render_ctx.n_pathseg() as usize; self.n_pathseg = render_ctx.n_pathseg() as usize;
self.n_pathtag = render_ctx.n_pathtag(); self.n_pathtag = render_ctx.n_pathtag();
self.n_clip = render_ctx.n_clip();
// These constants depend on encoding and may need to be updated. // These constants depend on encoding and may need to be updated.
// Perhaps we can plumb these from piet-gpu-derive? // Perhaps we can plumb these from piet-gpu-derive?
@ -342,6 +352,7 @@ impl Renderer {
alloc += ((n_drawobj + 255) & !255) * BIN_SIZE; alloc += ((n_drawobj + 255) & !255) * BIN_SIZE;
let ptcl_base = alloc; let ptcl_base = alloc;
alloc += width_in_tiles * height_in_tiles * PTCL_INITIAL_ALLOC; alloc += width_in_tiles * height_in_tiles * PTCL_INITIAL_ALLOC;
config.width_in_tiles = width_in_tiles as u32; config.width_in_tiles = width_in_tiles as u32;
config.height_in_tiles = height_in_tiles as u32; config.height_in_tiles = height_in_tiles as u32;
config.tile_alloc = tile_base as u32; config.tile_alloc = tile_base as u32;
@ -401,6 +412,19 @@ impl Renderer {
cmd_buf.end_debug_label(); cmd_buf.end_debug_label();
cmd_buf.write_timestamp(&query_pool, 1); cmd_buf.write_timestamp(&query_pool, 1);
cmd_buf.memory_barrier(); cmd_buf.memory_barrier();
cmd_buf.begin_debug_label("Clip bounding box calculation");
self.clip_binding
.record(cmd_buf, &self.clip_code, self.n_clip as u32);
cmd_buf.end_debug_label();
cmd_buf.begin_debug_label("Element binning");
cmd_buf.dispatch(
&self.bin_pipeline,
&self.bin_ds,
(((self.n_paths + 255) / 256) as u32, 1, 1),
(256, 1, 1),
);
cmd_buf.end_debug_label();
cmd_buf.memory_barrier();
cmd_buf.begin_debug_label("Tile allocation"); cmd_buf.begin_debug_label("Tile allocation");
cmd_buf.dispatch( cmd_buf.dispatch(
&self.tile_pipeline, &self.tile_pipeline,
@ -430,18 +454,7 @@ impl Renderer {
); );
cmd_buf.end_debug_label(); cmd_buf.end_debug_label();
cmd_buf.write_timestamp(&query_pool, 4); cmd_buf.write_timestamp(&query_pool, 4);
// Note: this barrier is not needed as an actual dependency between // TODO: redo query accounting
// pipeline stages, but I am keeping it in so that timer queries are
// easier to interpret.
cmd_buf.memory_barrier();
cmd_buf.begin_debug_label("Element binning");
cmd_buf.dispatch(
&self.bin_pipeline,
&self.bin_ds,
(((self.n_paths + 255) / 256) as u32, 1, 1),
(256, 1, 1),
);
cmd_buf.end_debug_label();
cmd_buf.write_timestamp(&query_pool, 5); cmd_buf.write_timestamp(&query_pool, 5);
cmd_buf.memory_barrier(); cmd_buf.memory_barrier();
cmd_buf.begin_debug_label("Coarse raster"); cmd_buf.begin_debug_label("Coarse raster");

View file

@ -123,6 +123,10 @@ impl PietGpuRenderContext {
self.new_encoder.n_transform() self.new_encoder.n_transform()
} }
pub fn n_clip(&self) -> u32 {
self.new_encoder.n_clip()
}
pub fn write_scene(&self, buf: &mut BufWrite) { pub fn write_scene(&self, buf: &mut BufWrite) {
self.new_encoder.write_scene(buf); self.new_encoder.write_scene(buf);
} }

View file

@ -16,12 +16,14 @@
//! Stages for new element pipeline, exposed for testing. //! Stages for new element pipeline, exposed for testing.
mod clip;
mod draw; mod draw;
mod path; mod path;
mod transform; mod transform;
use bytemuck::{Pod, Zeroable}; use bytemuck::{Pod, Zeroable};
pub use clip::{ClipBinding, ClipCode, CLIP_PART_SIZE};
pub use draw::{DrawBinding, DrawCode, DrawMonoid, DrawStage, DRAW_PART_SIZE}; pub use draw::{DrawBinding, DrawCode, DrawMonoid, DrawStage, DRAW_PART_SIZE};
pub use path::{PathBinding, PathCode, PathEncoder, PathStage, PATHSEG_PART_SIZE}; pub use path::{PathBinding, PathCode, PathEncoder, PathStage, PATHSEG_PART_SIZE};
use piet_gpu_hal::{Buffer, CmdBuf, Session}; use piet_gpu_hal::{Buffer, CmdBuf, Session};
@ -47,8 +49,13 @@ pub struct Config {
pub trans_alloc: u32, pub trans_alloc: u32,
pub bbox_alloc: u32, pub bbox_alloc: u32,
pub drawmonoid_alloc: u32, pub drawmonoid_alloc: u32,
pub clip_alloc: u32,
pub clip_bic_alloc: u32,
pub clip_stack_alloc: u32,
pub clip_bbox_alloc: u32,
pub n_trans: u32, pub n_trans: u32,
pub n_path: u32, pub n_path: u32,
pub n_clip: u32,
pub trans_offset: u32, pub trans_offset: u32,
pub linewidth_offset: u32, pub linewidth_offset: u32,
pub pathtag_offset: u32, pub pathtag_offset: u32,

View file

@ -0,0 +1,94 @@
// Copyright 2022 The piet-gpu authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Also licensed under MIT license, at your choice.
//! The clip processing stage (includes substages).
use piet_gpu_hal::{include_shader, BindType, Buffer, CmdBuf, DescriptorSet, Pipeline, Session};
// Note that this isn't the code/stage/binding pattern of most of the other stages
// in the new element processing pipeline. We want to move those temporary buffers
// into common memory and converge on this pattern.
pub struct ClipCode {
reduce_pipeline: Pipeline,
leaf_pipeline: Pipeline,
}
pub struct ClipBinding {
reduce_ds: DescriptorSet,
leaf_ds: DescriptorSet,
}
pub const CLIP_PART_SIZE: u32 = 256;
impl ClipCode {
pub unsafe fn new(session: &Session) -> ClipCode {
let reduce_code = include_shader!(session, "../../shader/gen/clip_reduce");
let reduce_pipeline = session
.create_compute_pipeline(reduce_code, &[BindType::Buffer, BindType::BufReadOnly])
.unwrap();
let leaf_code = include_shader!(session, "../../shader/gen/clip_leaf");
let leaf_pipeline = session
.create_compute_pipeline(leaf_code, &[BindType::Buffer, BindType::BufReadOnly])
.unwrap();
ClipCode {
reduce_pipeline,
leaf_pipeline,
}
}
}
impl ClipBinding {
pub unsafe fn new(
session: &Session,
code: &ClipCode,
config: &Buffer,
memory: &Buffer,
) -> ClipBinding {
let reduce_ds = session
.create_simple_descriptor_set(&code.reduce_pipeline, &[memory, config])
.unwrap();
let leaf_ds = session
.create_simple_descriptor_set(&code.leaf_pipeline, &[memory, config])
.unwrap();
ClipBinding { reduce_ds, leaf_ds }
}
/// Record the clip dispatches.
///
/// Assumes memory barrier on entry. Provides memory barrier on exit.
pub unsafe fn record(&self, cmd_buf: &mut CmdBuf, code: &ClipCode, n_clip: u32) {
let n_wg_reduce = n_clip.saturating_sub(1) / CLIP_PART_SIZE;
if n_wg_reduce > 0 {
cmd_buf.dispatch(
&code.reduce_pipeline,
&self.reduce_ds,
(n_wg_reduce, 1, 1),
(CLIP_PART_SIZE, 1, 1),
);
cmd_buf.memory_barrier();
}
let n_wg = (n_clip + CLIP_PART_SIZE - 1) / CLIP_PART_SIZE;
if n_wg > 0 {
cmd_buf.dispatch(
&code.leaf_pipeline,
&self.leaf_ds,
(n_wg, 1, 1),
(CLIP_PART_SIZE, 1, 1),
);
cmd_buf.memory_barrier();
}
}
}

237
tests/src/clip.rs Normal file
View file

@ -0,0 +1,237 @@
// Copyright 2022 The piet-gpu authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Also licensed under MIT license, at your choice.
//! Tests for the piet-gpu clip processing stage.
use bytemuck::{Pod, Zeroable};
use rand::Rng;
use piet_gpu::stages::{self, ClipBinding, ClipCode, DrawMonoid};
use piet_gpu_hal::{BufWrite, BufferUsage};
use crate::{Config, Runner, TestResult};
struct ClipData {
clip_stream: Vec<u32>,
// In the atomic-int friendly encoding
path_bbox_stream: Vec<PathBbox>,
}
#[derive(Copy, Clone, Debug, Pod, Zeroable, Default)]
#[repr(C)]
struct PathBbox {
bbox: [u32; 4],
linewidth: f32,
trans_ix: u32,
}
pub unsafe fn clip_test(runner: &mut Runner, config: &Config) -> TestResult {
let mut result = TestResult::new("clip");
let n_clip: u64 = config.size.choose(1 << 8, 1 << 12, 1 << 16);
let data = ClipData::new(n_clip);
let stage_config = data.get_config();
let config_buf = runner
.session
.create_buffer_init(std::slice::from_ref(&stage_config), BufferUsage::STORAGE)
.unwrap();
// Need to actually get data uploaded
let mut memory = runner.buf_down(data.memory_size(), BufferUsage::STORAGE);
{
let mut buf_write = memory.map_write(..);
data.fill_memory(&mut buf_write);
}
let code = ClipCode::new(&runner.session);
let binding = ClipBinding::new(&runner.session, &code, &config_buf, &memory.dev_buf);
let mut commands = runner.commands();
commands.write_timestamp(0);
commands.upload(&memory);
binding.record(&mut commands.cmd_buf, &code, n_clip as u32);
commands.download(&memory);
commands.write_timestamp(1);
runner.submit(commands);
let dst = memory.map_read(..);
if let Some(failure) = data.verify(&dst) {
result.fail(failure);
}
result
}
fn rand_bbox() -> [u32; 4] {
let mut rng = rand::thread_rng();
const Y_MIN: u32 = 32768;
const Y_MAX: u32 = Y_MIN + 1000;
let mut x0 = rng.gen_range(Y_MIN, Y_MAX);
let mut y0 = rng.gen_range(Y_MIN, Y_MAX);
let mut x1 = rng.gen_range(Y_MIN, Y_MAX);
let mut y1 = rng.gen_range(Y_MIN, Y_MAX);
if x0 > x1 {
std::mem::swap(&mut x0, &mut x1);
}
if y0 > y1 {
std::mem::swap(&mut y0, &mut y1);
}
[x0, y0, x1, y1]
}
/// Convert from atomic-friendly to normal float bbox.
fn decode_bbox(raw: [u32; 4]) -> [f32; 4] {
fn decode(x: u32) -> f32 {
x as f32 - 32768.0
}
[
decode(raw[0]),
decode(raw[1]),
decode(raw[2]),
decode(raw[3]),
]
}
fn intersect_bbox(b0: [f32; 4], b1: [f32; 4]) -> [f32; 4] {
[
b0[0].max(b1[0]),
b0[1].max(b1[1]),
b0[2].min(b1[2]),
b0[3].min(b1[3]),
]
}
const INFTY_BBOX: [f32; 4] = [-1e9, -1e9, 1e9, 1e9];
impl ClipData {
/// Generate a random clip sequence
fn new(n: u64) -> ClipData {
// Simple LCG random generator, for deterministic results
let mut z = 20170705u64;
let mut depth = 0;
let mut path_bbox_stream = Vec::new();
let clip_stream = (0..n)
.map(|i| {
let is_push = if depth == 0 {
true
} else if depth >= 255 {
false
} else {
z = z.wrapping_mul(742938285) % ((1 << 31) - 1);
(z % 2) != 0
};
if is_push {
depth += 1;
let path_ix = path_bbox_stream.len() as u32;
let bbox = rand_bbox();
let path_bbox = PathBbox {
bbox,
..Default::default()
};
path_bbox_stream.push(path_bbox);
path_ix
} else {
depth -= 1;
!(i as u32)
}
})
.collect();
ClipData {
clip_stream,
path_bbox_stream,
}
}
fn get_config(&self) -> stages::Config {
let n_clip = self.clip_stream.len();
let n_path = self.path_bbox_stream.len();
let clip_alloc = 0;
let path_bbox_alloc = clip_alloc + 4 * n_clip;
let drawmonoid_alloc = path_bbox_alloc + 24 * n_path;
let clip_bic_alloc = drawmonoid_alloc + 8 * n_clip;
// TODO: this is over-allocated, we only need one bic per wg
let clip_stack_alloc = clip_bic_alloc + 8 * n_clip;
let clip_bbox_alloc = clip_stack_alloc + 20 * n_clip;
stages::Config {
clip_alloc: clip_alloc as u32,
// TODO: this wants to be renamed to path_bbox_alloc
bbox_alloc: path_bbox_alloc as u32,
drawmonoid_alloc: drawmonoid_alloc as u32,
clip_bic_alloc: clip_bic_alloc as u32,
clip_stack_alloc: clip_stack_alloc as u32,
clip_bbox_alloc: clip_bbox_alloc as u32,
n_clip: n_clip as u32,
..Default::default()
}
}
fn memory_size(&self) -> u64 {
(8 + self.clip_stream.len() * (4 + 8 + 8 + 20 + 16) + self.path_bbox_stream.len() * 24)
as u64
}
fn fill_memory(&self, buf: &mut BufWrite) {
// offset / header; no dynamic allocation
buf.fill_zero(8);
buf.extend_slice(&self.clip_stream);
buf.extend_slice(&self.path_bbox_stream);
// drawmonoid is left uninitialized
}
fn verify(&self, buf: &[u8]) -> Option<String> {
let n_clip = self.clip_stream.len();
let n_path = self.path_bbox_stream.len();
let clip_bbox_start = 8 + n_clip * (4 + 8 + 8 + 20) + n_path * 24;
let clip_range = clip_bbox_start..(clip_bbox_start + n_clip * 16);
let clip_result = bytemuck::cast_slice::<u8, [f32; 4]>(&buf[clip_range]);
let draw_start = 8 + n_clip * 4 + n_path * 24;
let draw_range = draw_start..(draw_start + n_clip * 8);
let draw_result = bytemuck::cast_slice::<u8, DrawMonoid>(&buf[draw_range]);
let mut bbox_stack = Vec::new();
let mut parent_stack = Vec::new();
for (i, path_ix) in self.clip_stream.iter().enumerate() {
let mut expected_path = None;
if *path_ix >= 0x8000_0000 {
let parent = parent_stack.pop().unwrap();
expected_path = Some(self.clip_stream[parent as usize]);
bbox_stack.pop().unwrap();
} else {
parent_stack.push(i);
let path_bbox_stream = self.path_bbox_stream[*path_ix as usize];
let bbox = decode_bbox(path_bbox_stream.bbox);
let new = match bbox_stack.last() {
None => bbox,
Some(old) => intersect_bbox(*old, bbox),
};
bbox_stack.push(new);
};
let expected = bbox_stack.last().copied().unwrap_or(INFTY_BBOX);
let clip_bbox = clip_result[i];
if clip_bbox != expected {
return Some(format!(
"{}: path_ix={}, expected bbox={:?}, clip_bbox={:?}",
i, path_ix, expected, clip_bbox
));
}
if let Some(expected_path) = expected_path {
let actual_path = draw_result[i].path_ix;
if expected_path != actual_path {
return Some(format!(
"{}: expected path {}, actual {}",
i, expected_path, actual_path
));
}
}
}
None
}
}

View file

@ -102,17 +102,21 @@ impl DrawTestData {
// Layout of memory // Layout of memory
let drawmonoid_alloc = 0; let drawmonoid_alloc = 0;
let anno_alloc = drawmonoid_alloc + 8 * n_tags; let anno_alloc = drawmonoid_alloc + 8 * n_tags;
let clip_alloc = anno_alloc + ANNOTATED_SIZE * n_tags;
let stage_config = stages::Config { let stage_config = stages::Config {
n_elements: n_tags as u32, n_elements: n_tags as u32,
anno_alloc: anno_alloc as u32, anno_alloc: anno_alloc as u32,
drawmonoid_alloc: drawmonoid_alloc as u32, drawmonoid_alloc: drawmonoid_alloc as u32,
clip_alloc: clip_alloc as u32,
..Default::default() ..Default::default()
}; };
stage_config stage_config
} }
fn memory_size(&self) -> u64 { fn memory_size(&self) -> u64 {
(8 + self.tags.len() * (8 + ANNOTATED_SIZE)) as u64 // Note: this overallocates the clip buf a bit - only needed for the
// total number of begin_clip and end_clip tags.
(8 + self.tags.len() * (8 + 4 + ANNOTATED_SIZE)) as u64
} }
fn fill_scene(&self, buf: &mut BufWrite) { fn fill_scene(&self, buf: &mut BufWrite) {
@ -128,14 +132,13 @@ impl DrawTestData {
let actual = bytemuck::cast_slice::<u8, DrawMonoid>(&buf[8..8 + size]); let actual = bytemuck::cast_slice::<u8, DrawMonoid>(&buf[8..8 + size]);
let mut expected = DrawMonoid::default(); let mut expected = DrawMonoid::default();
for (i, (tag, actual)) in self.tags.iter().zip(actual).enumerate() { for (i, (tag, actual)) in self.tags.iter().zip(actual).enumerate() {
// We compute an inclusive prefix sum, but for this application // Verify exclusive prefix sum.
// exclusive would be slightly better. We can adapt though.
let (path_ix, clip_ix) = Self::reduce_tag(*tag); let (path_ix, clip_ix) = Self::reduce_tag(*tag);
expected.path_ix += path_ix;
expected.clip_ix += clip_ix;
if *actual != expected { if *actual != expected {
return Some(format!("draw mismatch at {}", i)); return Some(format!("draw mismatch at {}", i));
} }
expected.path_ix += path_ix;
expected.clip_ix += clip_ix;
} }
None None
} }

View file

@ -17,6 +17,7 @@
//! Tests for piet-gpu shaders and GPU capabilities. //! Tests for piet-gpu shaders and GPU capabilities.
mod clear; mod clear;
mod clip;
mod config; mod config;
mod draw; mod draw;
mod linkedlist; mod linkedlist;
@ -139,6 +140,7 @@ fn main() {
report(&transform::transform_test(&mut runner, &config)); report(&transform::transform_test(&mut runner, &config));
report(&path::path_test(&mut runner, &config)); report(&path::path_test(&mut runner, &config));
report(&draw::draw_test(&mut runner, &config)); report(&draw::draw_test(&mut runner, &config));
report(&clip::clip_test(&mut runner, &config));
} }
} }
} }

View file

@ -20,8 +20,8 @@ use std::ops::RangeBounds;
use bytemuck::Pod; use bytemuck::Pod;
use piet_gpu_hal::{ use piet_gpu_hal::{
BackendType, BufReadGuard, Buffer, BufferUsage, CmdBuf, Instance, InstanceFlags, QueryPool, BackendType, BufReadGuard, BufWriteGuard, Buffer, BufferUsage, CmdBuf, Instance, InstanceFlags,
Session, QueryPool, Session,
}; };
pub struct Runner { pub struct Runner {
@ -37,15 +37,8 @@ pub struct Commands {
query_pool: QueryPool, query_pool: QueryPool,
} }
/// Buffer for uploading data to GPU. /// Buffer for both uploading and downloading
#[allow(unused)] pub struct BufStage {
pub struct BufUp {
pub stage_buf: Buffer,
pub dev_buf: Buffer,
}
/// Buffer for downloading data from GPU.
pub struct BufDown {
pub stage_buf: Buffer, pub stage_buf: Buffer,
pub dev_buf: Buffer, pub dev_buf: Buffer,
} }
@ -92,7 +85,7 @@ impl Runner {
} }
#[allow(unused)] #[allow(unused)]
pub fn buf_up(&self, size: u64) -> BufUp { pub fn buf_up(&self, size: u64) -> BufStage {
let stage_buf = self let stage_buf = self
.session .session
.create_buffer(size, BufferUsage::MAP_WRITE | BufferUsage::COPY_SRC) .create_buffer(size, BufferUsage::MAP_WRITE | BufferUsage::COPY_SRC)
@ -101,13 +94,13 @@ impl Runner {
.session .session
.create_buffer(size, BufferUsage::COPY_DST | BufferUsage::STORAGE) .create_buffer(size, BufferUsage::COPY_DST | BufferUsage::STORAGE)
.unwrap(); .unwrap();
BufUp { stage_buf, dev_buf } BufStage { stage_buf, dev_buf }
} }
/// Create a buffer for download (readback). /// Create a buffer for download (readback).
/// ///
/// The `usage` parameter need not include COPY_SRC and STORAGE. /// The `usage` parameter need not include COPY_SRC and STORAGE.
pub fn buf_down(&self, size: u64, usage: BufferUsage) -> BufDown { pub fn buf_down(&self, size: u64, usage: BufferUsage) -> BufStage {
let stage_buf = self let stage_buf = self
.session .session
.create_buffer(size, BufferUsage::MAP_READ | BufferUsage::COPY_DST) .create_buffer(size, BufferUsage::MAP_READ | BufferUsage::COPY_DST)
@ -116,7 +109,7 @@ impl Runner {
.session .session
.create_buffer(size, usage | BufferUsage::COPY_SRC | BufferUsage::STORAGE) .create_buffer(size, usage | BufferUsage::COPY_SRC | BufferUsage::STORAGE)
.unwrap(); .unwrap();
BufDown { stage_buf, dev_buf } BufStage { stage_buf, dev_buf }
} }
pub fn backend_type(&self) -> BackendType { pub fn backend_type(&self) -> BackendType {
@ -129,17 +122,16 @@ impl Commands {
self.cmd_buf.write_timestamp(&self.query_pool, query); self.cmd_buf.write_timestamp(&self.query_pool, query);
} }
#[allow(unused)] pub unsafe fn upload(&mut self, buf: &BufStage) {
pub unsafe fn upload(&mut self, buf: &BufUp) {
self.cmd_buf.copy_buffer(&buf.stage_buf, &buf.dev_buf); self.cmd_buf.copy_buffer(&buf.stage_buf, &buf.dev_buf);
} }
pub unsafe fn download(&mut self, buf: &BufDown) { pub unsafe fn download(&mut self, buf: &BufStage) {
self.cmd_buf.copy_buffer(&buf.dev_buf, &buf.stage_buf); self.cmd_buf.copy_buffer(&buf.dev_buf, &buf.stage_buf);
} }
} }
impl BufDown { impl BufStage {
pub unsafe fn read(&self, dst: &mut Vec<impl Pod>) { pub unsafe fn read(&self, dst: &mut Vec<impl Pod>) {
self.stage_buf.read(dst).unwrap() self.stage_buf.read(dst).unwrap()
} }
@ -147,4 +139,8 @@ impl BufDown {
pub unsafe fn map_read<'a>(&'a self, range: impl RangeBounds<usize>) -> BufReadGuard<'a> { pub unsafe fn map_read<'a>(&'a self, range: impl RangeBounds<usize>) -> BufReadGuard<'a> {
self.stage_buf.map_read(range).unwrap() self.stage_buf.map_read(range).unwrap()
} }
pub unsafe fn map_write<'a>(&'a mut self, range: impl RangeBounds<usize>) -> BufWriteGuard {
self.stage_buf.map_write(range).unwrap()
}
} }