Merge pull request #150 from linebender/clip

New clip implementation
This commit is contained in:
Raph Levien 2022-02-21 13:23:31 -08:00 committed by GitHub
commit d81e5cb4ee
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
80 changed files with 3318 additions and 1343 deletions

View file

@ -20,6 +20,7 @@ layout(set = 0, binding = 1) readonly buffer ConfigBuf {
#include "annotated.h"
#include "bins.h"
#include "drawtag.h"
// scale factors useful for converting coordinates to bins
#define SX (1.0 / float(N_TILE_X * TILE_WIDTH_PX))
@ -35,6 +36,47 @@ shared uint count[N_SLICE][N_TILE];
shared Alloc sh_chunk_alloc[N_TILE];
shared bool sh_alloc_failed;
DrawMonoid load_draw_monoid(uint element_ix) {
uint base = (conf.drawmonoid_alloc.offset >> 2) + 2 * element_ix;
uint path_ix = memory[base];
uint clip_ix = memory[base + 1];
return DrawMonoid(path_ix, clip_ix);
}
// Load bounding box computed by clip processing
vec4 load_clip_bbox(uint clip_ix) {
uint base = (conf.clip_bbox_alloc.offset >> 2) + 4 * clip_ix;
float x0 = uintBitsToFloat(memory[base]);
float y0 = uintBitsToFloat(memory[base + 1]);
float x1 = uintBitsToFloat(memory[base + 2]);
float y1 = uintBitsToFloat(memory[base + 3]);
vec4 bbox = vec4(x0, y0, x1, y1);
return bbox;
}
vec4 bbox_intersect(vec4 a, vec4 b) {
return vec4(max(a.xy, b.xy), min(a.zw, b.zw));
}
// Load path's bbox from bbox (as written by pathseg).
vec4 load_path_bbox(uint path_ix) {
uint base = (conf.bbox_alloc.offset >> 2) + 6 * path_ix;
float bbox_l = float(memory[base]) - 32768.0;
float bbox_t = float(memory[base + 1]) - 32768.0;
float bbox_r = float(memory[base + 2]) - 32768.0;
float bbox_b = float(memory[base + 3]) - 32768.0;
vec4 bbox = vec4(bbox_l, bbox_t, bbox_r, bbox_b);
return bbox;
}
void store_path_bbox(AnnotatedRef ref, vec4 bbox) {
uint ix = ref.offset >> 2;
memory[ix + 1] = floatBitsToUint(bbox.x);
memory[ix + 2] = floatBitsToUint(bbox.y);
memory[ix + 3] = floatBitsToUint(bbox.z);
memory[ix + 4] = floatBitsToUint(bbox.w);
}
void main() {
uint my_n_elements = conf.n_elements;
uint my_partition = gl_WorkGroupID.x;
@ -61,13 +103,27 @@ void main() {
case Annotated_Image:
case Annotated_BeginClip:
case Annotated_EndClip:
// Note: we take advantage of the fact that these drawing elements
// have the bbox at the same place in their layout.
AnnoEndClip clip = Annotated_EndClip_read(conf.anno_alloc, ref);
x0 = int(floor(clip.bbox.x * SX));
y0 = int(floor(clip.bbox.y * SY));
x1 = int(ceil(clip.bbox.z * SX));
y1 = int(ceil(clip.bbox.w * SY));
DrawMonoid draw_monoid = load_draw_monoid(element_ix);
uint path_ix = draw_monoid.path_ix;
vec4 clip_bbox = vec4(-1e9, -1e9, 1e9, 1e9);
uint clip_ix = draw_monoid.clip_ix;
if (clip_ix > 0) {
clip_bbox = load_clip_bbox(clip_ix - 1);
}
// For clip elements, clip_bbox is the bbox of the clip path, intersected
// with enclosing clips.
// For other elements, it is the bbox of the enclosing clips.
vec4 path_bbox = load_path_bbox(path_ix);
vec4 bbox = bbox_intersect(path_bbox, clip_bbox);
// Avoid negative-size bbox (is this necessary)?
bbox.zw = max(bbox.xy, bbox.zw);
// Store clip-intersected bbox for tile_alloc.
store_path_bbox(ref, bbox);
x0 = int(floor(bbox.x * SX));
y0 = int(floor(bbox.y * SY));
x1 = int(ceil(bbox.z * SX));
y1 = int(ceil(bbox.w * SY));
break;
}

View file

@ -22,7 +22,7 @@ rule dxil
rule msl
command = $spirv_cross --msl $in --output $out $msl_flags
build gen/binning.spv: glsl binning.comp | annotated.h state.h bins.h setup.h mem.h
build gen/binning.spv: glsl binning.comp | annotated.h bins.h drawtag.h setup.h mem.h
build gen/binning.hlsl: hlsl gen/binning.spv
build gen/binning.dxil: dxil gen/binning.hlsl
build gen/binning.msl: msl gen/binning.spv
@ -119,6 +119,16 @@ build gen/draw_leaf.hlsl: hlsl gen/draw_leaf.spv
build gen/draw_leaf.dxil: dxil gen/draw_leaf.hlsl
build gen/draw_leaf.msl: msl gen/draw_leaf.spv
build spv: phony gen/backdrop_lg.spv gen/backdrop.spv gen/bbox_clear.spv gen/binning.spv gen/coarse.spv gen/draw_leaf.spv gen/draw_reduce.spv gen/draw_root.spv gen/kernel4.spv gen/kernel4_gray.spv gen/path_coarse.spv gen/pathseg.spv gen/pathtag_reduce.spv gen/pathtag_root.spv gen/tile_alloc.spv gen/transform_leaf.spv gen/transform_reduce.spv gen/transform_root.spv
build dxil: phony gen/backdrop.hlsl gen/backdrop_lg.hlsl gen/bbox_clear.hlsl gen/binning.hlsl gen/coarse.hlsl gen/draw_leaf.hlsl gen/draw_reduce.hlsl gen/draw_root.hlsl gen/kernel4.hlsl gen/kernel4_gray.hlsl gen/path_coarse.hlsl gen/pathseg.hlsl gen/pathtag_reduce.hlsl gen/pathtag_root.hlsl gen/tile_alloc.hlsl gen/transform_leaf.hlsl gen/transform_reduce.hlsl gen/transform_root.hlsl
build msl: phony gen/backdrop_lg.msl gen/backdrop.msl gen/bbox_clear.msl gen/binning.msl gen/coarse.msl gen/draw_leaf.msl gen/draw_reduce.msl gen/draw_root.msl gen/kernel4.msl gen/kernel4_gray.msl gen/path_coarse.msl gen/pathseg.msl gen/pathtag_reduce.msl gen/pathtag_root.msl gen/tile_alloc.msl gen/transform_leaf.msl gen/transform_reduce.msl gen/transform_root.msl
build gen/clip_reduce.spv: glsl clip_reduce.comp | mem.h setup.h annotated.h
build gen/clip_reduce.hlsl: hlsl gen/clip_reduce.spv
build gen/clip_reduce.dxil: dxil gen/clip_reduce.hlsl
build gen/clip_reduce.msl: msl gen/clip_reduce.spv
build gen/clip_leaf.spv: glsl clip_leaf.comp | mem.h setup.h annotated.h
build gen/clip_leaf.hlsl: hlsl gen/clip_leaf.spv
build gen/clip_leaf.dxil: dxil gen/clip_leaf.hlsl
build gen/clip_leaf.msl: msl gen/clip_leaf.spv
build spv: phony gen/backdrop_lg.spv gen/backdrop.spv gen/bbox_clear.spv gen/binning.spv gen/clip_leaf.spv gen/clip_reduce.spv gen/coarse.spv gen/draw_leaf.spv gen/draw_reduce.spv gen/draw_root.spv gen/kernel4.spv gen/kernel4_gray.spv gen/path_coarse.spv gen/pathseg.spv gen/pathtag_reduce.spv gen/pathtag_root.spv gen/tile_alloc.spv gen/transform_leaf.spv gen/transform_reduce.spv gen/transform_root.spv
build dxil: phony gen/backdrop.hlsl gen/backdrop_lg.hlsl gen/bbox_clear.hlsl gen/binning.hlsl gen/clip_leaf.hlsl gen/clip_reduce.hlsl gen/coarse.hlsl gen/draw_leaf.hlsl gen/draw_reduce.hlsl gen/draw_root.hlsl gen/kernel4.hlsl gen/kernel4_gray.hlsl gen/path_coarse.hlsl gen/pathseg.hlsl gen/pathtag_reduce.hlsl gen/pathtag_root.hlsl gen/tile_alloc.hlsl gen/transform_leaf.hlsl gen/transform_reduce.hlsl gen/transform_root.hlsl
build msl: phony gen/backdrop_lg.msl gen/backdrop.msl gen/bbox_clear.msl gen/binning.msl gen/clip_leaf.msl gen/clip_reduce.msl gen/coarse.msl gen/draw_leaf.msl gen/draw_reduce.msl gen/draw_root.msl gen/kernel4.msl gen/kernel4_gray.msl gen/path_coarse.msl gen/pathseg.msl gen/pathtag_reduce.msl gen/pathtag_root.msl gen/tile_alloc.msl gen/transform_leaf.msl gen/transform_reduce.msl gen/transform_root.msl

View file

@ -0,0 +1,287 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
// The second dispatch of clip stack processing.
#version 450
#extension GL_GOOGLE_include_directive : enable
#include "mem.h"
#include "setup.h"
#define LG_WG_SIZE (7 + LG_WG_FACTOR)
#define WG_SIZE (1 << LG_WG_SIZE)
#define PARTITION_SIZE WG_SIZE
layout(local_size_x = WG_SIZE) in;
layout(binding = 1) readonly buffer ConfigBuf {
Config conf;
};
#include "annotated.h"
// Some of this is cut'n'paste duplication with the reduce pass, and
// arguably should be moved to a common .h file.
// The bicyclic monoid
struct ClipEl {
// index of parent node
uint parent_ix;
// bounding box
vec4 bbox;
};
struct Bic {
uint a;
uint b;
};
Bic bic_combine(Bic x, Bic y) {
uint m = min(x.b, y.a);
return Bic(x.a + y.a - m, x.b + y.b - m);
}
// Load path's bbox from bbox (as written by pathseg).
vec4 load_path_bbox(uint path_ix) {
uint base = (conf.bbox_alloc.offset >> 2) + 6 * path_ix;
float bbox_l = float(memory[base]) - 32768.0;
float bbox_t = float(memory[base + 1]) - 32768.0;
float bbox_r = float(memory[base + 2]) - 32768.0;
float bbox_b = float(memory[base + 3]) - 32768.0;
vec4 bbox = vec4(bbox_l, bbox_t, bbox_r, bbox_b);
return bbox;
}
vec4 bbox_intersect(vec4 a, vec4 b) {
return vec4(max(a.xy, b.xy), min(a.zw, b.zw));
}
shared Bic sh_bic[WG_SIZE * 2 - 2];
shared uint sh_stack[PARTITION_SIZE];
shared vec4 sh_stack_bbox[PARTITION_SIZE];
shared uint sh_link[PARTITION_SIZE];
shared vec4 sh_bbox[PARTITION_SIZE];
// This is adapted directly from the stack monoid impl.
// Return value is reference within partition if >= 0,
// otherwise reference to stack.
uint search_link(inout Bic bic) {
uint ix = gl_LocalInvocationID.x;
uint j = 0;
while (j < LG_WG_SIZE) {
uint base = 2 * WG_SIZE - (2u << (LG_WG_SIZE - j));
if (((ix >> j) & 1) != 0) {
Bic test = bic_combine(sh_bic[base + (ix >> j) - 1], bic);
if (test.b > 0) {
break;
}
bic = test;
ix -= 1u << j;
}
j++;
}
if (ix > 0) {
while (j > 0) {
j--;
uint base = 2 * WG_SIZE - (2u << (LG_WG_SIZE - j));
Bic test = bic_combine(sh_bic[base + (ix >> j) - 1], bic);
if (test.b == 0) {
bic = test;
ix -= 1u << j;
}
}
}
// ix is the smallest value such that reduce(ix..th).b == 0
if (ix > 0) {
return ix - 1;
} else {
return ~0u - bic.a;
}
}
Bic load_bic(uint ix) {
uint base = (conf.clip_bic_alloc.offset >> 2) + 2 * ix;
return Bic(memory[base], memory[base + 1]);
}
ClipEl load_clip_el(uint ix) {
uint base = (conf.clip_stack_alloc.offset >> 2) + 5 * ix;
uint parent_ix = memory[base];
float x0 = uintBitsToFloat(memory[base + 1]);
float y0 = uintBitsToFloat(memory[base + 2]);
float x1 = uintBitsToFloat(memory[base + 3]);
float y1 = uintBitsToFloat(memory[base + 4]);
vec4 bbox = vec4(x0, y0, x1, y1);
return ClipEl(parent_ix, bbox);
}
uint load_path_ix(uint ix) {
// This is one approach to a partial final block. Another would be
// to do a memset to the padding in the command queue.
if (ix < conf.n_clip) {
return memory[(conf.clip_alloc.offset >> 2) + ix];
} else {
// EndClip tags don't implicate further loads.
return 0x80000000;
}
}
void store_clip_bbox(uint ix, vec4 bbox) {
uint base = (conf.clip_bbox_alloc.offset >> 2) + 4 * ix;
memory[base] = floatBitsToUint(bbox.x);
memory[base + 1] = floatBitsToUint(bbox.y);
memory[base + 2] = floatBitsToUint(bbox.z);
memory[base + 3] = floatBitsToUint(bbox.w);
}
void main() {
// materialize stack up to the start of this partition. This
// is based on the pure stack monoid, but with two additions.
// First, (this only matters if the stack goes deeper than the
// partition size, which might be unlikely in practice), the
// topmost stack element from each partition is picked, then an
// exclusive scan of those. Also note that if this is skipped,
// a scan is not needed in the reduce stage.
// Second, after the stream compaction, do a scan of the retrieved
// bbox values.
uint th = gl_LocalInvocationID.x;
Bic bic = Bic(0, 0);
if (th < gl_WorkGroupID.x) {
bic = load_bic(th);
}
sh_bic[th] = bic;
for (uint i = 0; i < LG_WG_SIZE; i++) {
barrier();
if (th + (1u << i) < WG_SIZE) {
Bic other = sh_bic[th + (1u << i)];
bic = bic_combine(bic, other);
}
barrier();
sh_bic[th] = bic;
}
barrier();
uint stack_size = sh_bic[0].b;
// TODO: do bbox scan here (to unlock greater stack depth)
// binary search in stack
uint sp = PARTITION_SIZE - 1 - th;
uint ix = 0;
for (uint i = 0; i < LG_WG_SIZE; i++) {
uint probe = ix + (uint(PARTITION_SIZE / 2) >> i);
if (sp < sh_bic[probe].b) {
ix = probe;
}
}
// ix is largest value such that sp < sh_bic[ix].b (if any)
uint b = sh_bic[ix].b;
vec4 bbox = vec4(-1e9, -1e9, 1e9, 1e9);
if (sp < b) {
// maybe store the index here for future use?
ClipEl el = load_clip_el(ix * PARTITION_SIZE + b - sp - 1);
sh_stack[th] = el.parent_ix;
bbox = el.bbox;
// other element values here?
}
// forward scan of bbox values of prefix stack
for (uint i = 0; i < LG_WG_SIZE; i++) {
sh_stack_bbox[th] = bbox;
barrier();
if (th >= (1u << i)) {
bbox = bbox_intersect(sh_stack_bbox[th - (1u << i)], bbox);
}
barrier();
}
sh_stack_bbox[th] = bbox;
// Read input and compute bicyclic semigroup binary tree
uint inp = load_path_ix(gl_GlobalInvocationID.x);
bool is_push = int(inp) >= 0;
bic = Bic(1 - uint(is_push), uint(is_push));
sh_bic[th] = bic;
if (is_push) {
bbox = load_path_bbox(inp);
} else {
bbox = vec4(-1e9, -1e9, 1e9, 1e9);
}
uint inbase = 0;
for (uint i = 0; i < LG_WG_SIZE - 1; i++) {
uint outbase = 2 * WG_SIZE - (1u << (LG_WG_SIZE - i));
barrier();
if (th < (1u << (LG_WG_SIZE - 1 - i))) {
sh_bic[outbase + th] = bic_combine(sh_bic[inbase + th * 2], sh_bic[inbase + th * 2 + 1]);
}
inbase = outbase;
}
barrier();
// Search for predecessor node
bic = Bic(0, 0);
uint link = search_link(bic);
// we use N_SEQ > 1 convention here:
// link >= 0 is index within partition
// link < 0 is reference to stack
// We want grandparent bbox for pop nodes, so follow those links.
sh_link[th] = link;
barrier();
uint grandparent;
if (int(link) >= 0) {
grandparent = sh_link[link];
} else {
grandparent = link - 1;
}
// Resolve parent
uint parent;
if (int(link) >= 0) {
parent = gl_WorkGroupID.x * PARTITION_SIZE + link;
} else if (int(link + stack_size) >= 0) {
parent = sh_stack[PARTITION_SIZE + link];
} else {
parent = ~0u;
}
// bbox scan along parent links
for (uint i = 0; i < LG_WG_SIZE; i++) {
// sh_link was already stored for first iteration
if (i != 0) {
sh_link[th] = link;
}
sh_bbox[th] = bbox;
barrier();
if (int(link) >= 0) {
bbox = bbox_intersect(sh_bbox[link], bbox);
link = sh_link[link];
}
barrier();
}
if (int(link + stack_size) >= 0) {
bbox = bbox_intersect(sh_stack_bbox[PARTITION_SIZE + link], bbox);
}
// At this point, bbox is the reduction of bounding boxes along the tree.
sh_bbox[th] = bbox;
barrier();
uint path_ix = inp;
if (!is_push && gl_GlobalInvocationID.x < conf.n_clip) {
// Is this load expensive? If so, it's loaded earlier for in-partition
// and is in the ClipEl for cross-partition.
// If not, can probably get rid of it in the stack intermediate buf.
path_ix = load_path_ix(parent);
uint drawmonoid_out_base = (conf.drawmonoid_alloc.offset >> 2) + 2 * ~inp;
// Fix up drawmonoid so path_ix at EndClip matches BeginClip
memory[drawmonoid_out_base] = path_ix;
if (int(grandparent) >= 0) {
bbox = sh_bbox[grandparent];
} else if (int(grandparent + stack_size) >= 0) {
bbox = sh_stack_bbox[PARTITION_SIZE + grandparent];
} else {
bbox = vec4(-1e9, -1e9, 1e9, 1e9);
}
}
store_clip_bbox(gl_GlobalInvocationID.x, bbox);
}

View file

@ -0,0 +1,148 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
// The reduce pass for clip stack processing.
// The primary input is a sequence of path ids representing paths to
// push, with a special value of ~0 to represent pop.
// For each path, the bounding box is found in the anno stream
// (anno_alloc), though this may change.
// Output is a stack monoid reduction for the partition. The Bic
// is stored in the BicBuf, and the stack slice in StackBuf.
// Note: for this shader, only pushes are represented in the stack
// monoid reduction output, so we don't have to worry about the
// interpretation of pops.
#version 450
#extension GL_GOOGLE_include_directive : enable
#include "mem.h"
#include "setup.h"
#define LG_WG_SIZE (7 + LG_WG_FACTOR)
#define WG_SIZE (1 << LG_WG_SIZE)
#define PARTITION_SIZE WG_SIZE
layout(local_size_x = WG_SIZE) in;
layout(binding = 1) readonly buffer ConfigBuf {
Config conf;
};
#include "annotated.h"
// The intermediate state for clip processing.
struct ClipEl {
// index of parent node
uint parent_ix;
// bounding box
vec4 bbox;
};
// The bicyclic monoid
struct Bic {
uint a;
uint b;
};
Bic bic_combine(Bic x, Bic y) {
uint m = min(x.b, y.a);
return Bic(x.a + y.a - m, x.b + y.b - m);
}
shared Bic sh_bic[WG_SIZE];
shared uint sh_parent[WG_SIZE];
shared uint sh_path_ix[WG_SIZE];
shared vec4 sh_bbox[WG_SIZE];
// Load path's bbox from bbox (as written by pathseg).
vec4 load_path_bbox(uint path_ix) {
uint base = (conf.bbox_alloc.offset >> 2) + 6 * path_ix;
float bbox_l = float(memory[base]) - 32768.0;
float bbox_t = float(memory[base + 1]) - 32768.0;
float bbox_r = float(memory[base + 2]) - 32768.0;
float bbox_b = float(memory[base + 3]) - 32768.0;
vec4 bbox = vec4(bbox_l, bbox_t, bbox_r, bbox_b);
return bbox;
}
vec4 bbox_intersect(vec4 a, vec4 b) {
return vec4(max(a.xy, b.xy), min(a.zw, b.zw));
}
void store_bic(uint ix, Bic bic) {
uint base = (conf.clip_bic_alloc.offset >> 2) + 2 * ix;
memory[base] = bic.a;
memory[base + 1] = bic.b;
}
void store_clip_el(uint ix, ClipEl el) {
uint base = (conf.clip_stack_alloc.offset >> 2) + 5 * ix;
memory[base] = el.parent_ix;
memory[base + 1] = floatBitsToUint(el.bbox.x);
memory[base + 2] = floatBitsToUint(el.bbox.y);
memory[base + 3] = floatBitsToUint(el.bbox.z);
memory[base + 4] = floatBitsToUint(el.bbox.w);
}
void main() {
uint th = gl_LocalInvocationID.x;
uint inp = memory[(conf.clip_alloc.offset >> 2) + gl_GlobalInvocationID.x];
bool is_push = int(inp) >= 0;
// reverse scan of bicyclic semigroup
Bic bic = Bic(1 - uint(is_push), uint(is_push));
sh_bic[gl_LocalInvocationID.x] = bic;
for (uint i = 0; i < LG_WG_SIZE; i++) {
barrier();
if (th + (1u << i) < WG_SIZE) {
Bic other = sh_bic[gl_LocalInvocationID.x + (1u << i)];
bic = bic_combine(bic, other);
}
barrier();
sh_bic[th] = bic;
}
if (th == 0) {
store_bic(gl_WorkGroupID.x, bic);
}
barrier();
uint size = sh_bic[0].b;
bic = Bic(0, 0);
if (th + 1 < WG_SIZE) {
bic = sh_bic[th + 1];
}
if (is_push && bic.a == 0) {
uint local_ix = size - bic.b - 1;
sh_parent[local_ix] = th;
sh_path_ix[local_ix] = inp;
}
barrier();
// Do forward scan of bounding box intersection
vec4 bbox;
uint path_ix;
if (th < size) {
path_ix = sh_path_ix[th];
bbox = load_path_bbox(path_ix);
}
// Not necessary if depth is bounded by wg size
#if 0
for (uint i = 0; i < LG_WG_SIZE; i++) {
// We gate so we never access uninit data, but it might
// be more efficient to avoid the conditionals.
if (th < size) {
sh_bbox[th] = bbox;
}
barrier();
if (th < size && th >= (1u << i)) {
bbox = bbox_intersect(sh_bbox[th - (1u << i)], bbox);
}
barrier();
}
#endif
if (th < size) {
uint parent_ix = sh_parent[th] + gl_WorkGroupID.x * PARTITION_SIZE;
ClipEl el = ClipEl(parent_ix, bbox);
store_clip_el(gl_GlobalInvocationID.x, el);
}
}

View file

@ -136,9 +136,6 @@ void main() {
// currently in a clip for which the entire tile has an alpha of zero, and
// the value is the depth after the "begin clip" of that element.
uint clip_zero_depth = 0;
// State for the "clip one" optimization. If bit `i` is set, then that means
// that the clip pushed at depth `i` has an alpha of all one.
uint clip_one_mask = 0;
// I'm sure we can figure out how to do this with at least one fewer register...
// Items up to rd_ix have been read from sh_elements
@ -227,9 +224,8 @@ void main() {
case Annotated_LinGradient:
case Annotated_BeginClip:
case Annotated_EndClip:
// We have one "path" for each element, even if the element isn't
// actually a path (currently EndClip, but images etc in the future).
uint path_ix = element_ix;
uint drawmonoid_base = (conf.drawmonoid_alloc.offset >> 2) + 2 * element_ix;
uint path_ix = memory[drawmonoid_base];
Path path = Path_read(conf.tile_alloc, PathRef(conf.tile_alloc.offset + path_ix * Path_size));
uint stride = path.bbox.z - path.bbox.x;
sh_tile_stride[th_ix] = stride;
@ -283,15 +279,15 @@ void main() {
uint x = sh_tile_x0[el_ix] + seq_ix % width;
uint y = sh_tile_y0[el_ix] + seq_ix / width;
bool include_tile = false;
if (tag == Annotated_BeginClip || tag == Annotated_EndClip) {
include_tile = true;
} else if (mem_ok) {
if (mem_ok) {
Tile tile = Tile_read(read_tile_alloc(el_ix, mem_ok),
TileRef(sh_tile_base[el_ix] + (sh_tile_stride[el_ix] * y + x) * Tile_size));
// Include the path in the tile if
// - the tile contains at least a segment (tile offset non-zero)
// - the tile is completely covered (backdrop non-zero)
include_tile = tile.tile.offset != 0 || tile.backdrop != 0;
bool is_clip = tag == Annotated_BeginClip || tag == Annotated_EndClip;
// Always include the tile if it contains a path segment.
// For draws, include the tile if it is solid.
// For clips, include the tile if it is empty - this way, logic
// below will suppress the drawing of inner elements.
include_tile = tile.tile.offset != 0 || (tile.backdrop == 0) == is_clip;
}
if (include_tile) {
uint el_slice = el_ix / 32;
@ -378,33 +374,26 @@ void main() {
(sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
if (tile.tile.offset == 0 && tile.backdrop == 0) {
clip_zero_depth = clip_depth + 1;
} else if (tile.tile.offset == 0 && clip_depth < 32) {
clip_one_mask |= (1u << clip_depth);
} else {
AnnoBeginClip begin_clip = Annotated_BeginClip_read(conf.anno_alloc, ref);
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
break;
}
write_fill(cmd_alloc, cmd_ref, tag.flags, tile, begin_clip.linewidth);
Cmd_BeginClip_write(cmd_alloc, cmd_ref);
cmd_ref.offset += 4;
if (clip_depth < 32) {
clip_one_mask &= ~(1u << clip_depth);
}
}
clip_depth++;
break;
case Annotated_EndClip:
tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok),
TileRef(sh_tile_base[element_ref_ix] +
(sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
clip_depth--;
if (clip_depth >= 32 || (clip_one_mask & (1u << clip_depth)) == 0) {
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
break;
}
Cmd_Solid_write(cmd_alloc, cmd_ref);
cmd_ref.offset += 4;
Cmd_EndClip_write(cmd_alloc, cmd_ref);
cmd_ref.offset += 4;
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
break;
}
write_fill(cmd_alloc, cmd_ref, MODE_NONZERO, tile, 0.0);
Cmd_EndClip_write(cmd_alloc, cmd_ref);
cmd_ref.offset += 4;
break;
}
} else {

View file

@ -72,9 +72,14 @@ void main() {
}
uint out_ix = gl_GlobalInvocationID.x * N_ROWS;
uint out_base = (conf.drawmonoid_alloc.offset >> 2) + out_ix * 2;
uint clip_out_base = conf.clip_alloc.offset >> 2;
AnnotatedRef out_ref = AnnotatedRef(conf.anno_alloc.offset + out_ix * Annotated_size);
for (uint i = 0; i < N_ROWS; i++) {
Monoid m = combine_tag_monoid(row, local[i]);
Monoid m = row;
if (i > 0) {
m = combine_tag_monoid(m, local[i - 1]);
}
// m now holds exclusive scan of draw monoid
memory[out_base + i * 2] = m.path_ix;
memory[out_base + i * 2 + 1] = m.clip_ix;
@ -83,8 +88,9 @@ void main() {
// later stages read scene + bbox etc.
ElementRef this_ref = Element_index(ref, i);
tag_word = Element_tag(this_ref).tag;
if (tag_word == Element_FillColor || tag_word == Element_FillLinGradient || tag_word == Element_FillImage) {
uint bbox_offset = (conf.bbox_alloc.offset >> 2) + 6 * (m.path_ix - 1);
if (tag_word == Element_FillColor || tag_word == Element_FillLinGradient || tag_word == Element_FillImage ||
tag_word == Element_BeginClip) {
uint bbox_offset = (conf.bbox_alloc.offset >> 2) + 6 * m.path_ix;
float bbox_l = float(memory[bbox_offset]) - 32768.0;
float bbox_t = float(memory[bbox_offset + 1]) - 32768.0;
float bbox_r = float(memory[bbox_offset + 2]) - 32768.0;
@ -142,21 +148,27 @@ void main() {
anno_img.offset = fill_img.offset;
Annotated_Image_write(conf.anno_alloc, out_ref, fill_mode, anno_img);
break;
case Element_BeginClip:
AnnoBeginClip anno_begin_clip;
anno_begin_clip.bbox = bbox;
anno_begin_clip.linewidth = 0.0; // don't support clip-with-stroke
Annotated_BeginClip_write(conf.anno_alloc, out_ref, 0, anno_begin_clip);
break;
}
} else if (tag_word == Element_BeginClip) {
Clip begin_clip = Element_BeginClip_read(this_ref);
AnnoBeginClip anno_begin_clip;
// This is the absolute bbox, it's been transformed during encoding.
anno_begin_clip.bbox = begin_clip.bbox;
anno_begin_clip.linewidth = 0.0; // don't support clip-with-stroke
Annotated_BeginClip_write(conf.anno_alloc, out_ref, 0, anno_begin_clip);
} else if (tag_word == Element_EndClip) {
Clip end_clip = Element_EndClip_read(this_ref);
AnnoEndClip anno_end_clip;
// This bbox is expected to be the same as the begin one.
anno_end_clip.bbox = end_clip.bbox;
// The actual bbox will be reconstructed from clip stream output.
anno_end_clip.bbox = vec4(-1e9, -1e9, 1e9, 1e9);
Annotated_EndClip_write(conf.anno_alloc, out_ref, anno_end_clip);
}
// Generate clip stream.
if (tag_word == Element_BeginClip || tag_word == Element_EndClip) {
uint path_ix = ~(out_ix + i);
if (tag_word == Element_BeginClip) {
path_ix = m.path_ix;
}
memory[clip_out_base + m.clip_ix] = path_ix;
}
out_ref.offset += Annotated_size;
}
}

View file

@ -44,8 +44,13 @@ struct Config
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;

View file

@ -63,8 +63,13 @@ struct Config
Alloc_1 trans_alloc;
Alloc_1 bbox_alloc;
Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;

Binary file not shown.

View file

@ -44,8 +44,13 @@ struct Config
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;

View file

@ -63,8 +63,13 @@ struct Config
Alloc_1 trans_alloc;
Alloc_1 bbox_alloc;
Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;

Binary file not shown.

Binary file not shown.

View file

@ -17,8 +17,13 @@ struct Config
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
@ -39,7 +44,7 @@ struct SPIRV_Cross_Input
void comp_main()
{
uint ix = gl_GlobalInvocationID.x;
if (ix < _21.Load(52))
if (ix < _21.Load(68))
{
uint out_ix = (_21.Load(40) >> uint(2)) + (6u * ix);
_45.Store(out_ix * 4 + 8, 65535u);

View file

@ -22,8 +22,13 @@ struct Config
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;

Binary file not shown.

Binary file not shown.

View file

@ -9,16 +9,6 @@ struct MallocResult
bool failed;
};
struct AnnoEndClipRef
{
uint offset;
};
struct AnnoEndClip
{
float4 bbox;
};
struct AnnotatedRef
{
uint offset;
@ -40,6 +30,12 @@ struct BinInstance
uint element_ix;
};
struct DrawMonoid
{
uint path_ix;
uint clip_ix;
};
struct Config
{
uint n_elements;
@ -54,8 +50,13 @@ struct Config
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
@ -64,8 +65,8 @@ struct Config
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
RWByteAddressBuffer _84 : register(u0, space0);
ByteAddressBuffer _253 : register(t1, space0);
RWByteAddressBuffer _94 : register(u0, space0);
ByteAddressBuffer _202 : register(t1, space0);
static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID;
@ -93,7 +94,7 @@ uint read_mem(Alloc alloc, uint offset)
{
return 0u;
}
uint v = _84.Load(offset * 4 + 8);
uint v = _94.Load(offset * 4 + 8);
return v;
}
@ -102,36 +103,53 @@ AnnotatedTag Annotated_tag(Alloc a, AnnotatedRef ref)
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint tag_and_flags = read_mem(param, param_1);
AnnotatedTag _221 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) };
return _221;
AnnotatedTag _181 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) };
return _181;
}
AnnoEndClip AnnoEndClip_read(Alloc a, AnnoEndClipRef ref)
DrawMonoid load_draw_monoid(uint element_ix)
{
uint base = (_202.Load(44) >> uint(2)) + (2u * element_ix);
uint path_ix = _94.Load(base * 4 + 8);
uint clip_ix = _94.Load((base + 1u) * 4 + 8);
DrawMonoid _222 = { path_ix, clip_ix };
return _222;
}
float4 load_clip_bbox(uint clip_ix)
{
uint base = (_202.Load(60) >> uint(2)) + (4u * clip_ix);
float x0 = asfloat(_94.Load(base * 4 + 8));
float y0 = asfloat(_94.Load((base + 1u) * 4 + 8));
float x1 = asfloat(_94.Load((base + 2u) * 4 + 8));
float y1 = asfloat(_94.Load((base + 3u) * 4 + 8));
float4 bbox = float4(x0, y0, x1, y1);
return bbox;
}
float4 load_path_bbox(uint path_ix)
{
uint base = (_202.Load(40) >> uint(2)) + (6u * path_ix);
float bbox_l = float(_94.Load(base * 4 + 8)) - 32768.0f;
float bbox_t = float(_94.Load((base + 1u) * 4 + 8)) - 32768.0f;
float bbox_r = float(_94.Load((base + 2u) * 4 + 8)) - 32768.0f;
float bbox_b = float(_94.Load((base + 3u) * 4 + 8)) - 32768.0f;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
return bbox;
}
float4 bbox_intersect(float4 a, float4 b)
{
return float4(max(a.xy, b.xy), min(a.zw, b.zw));
}
void store_path_bbox(AnnotatedRef ref, float4 bbox)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint raw0 = read_mem(param, param_1);
Alloc param_2 = a;
uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3);
Alloc param_4 = a;
uint param_5 = ix + 2u;
uint raw2 = read_mem(param_4, param_5);
Alloc param_6 = a;
uint param_7 = ix + 3u;
uint raw3 = read_mem(param_6, param_7);
AnnoEndClip s;
s.bbox = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3));
return s;
}
AnnoEndClip Annotated_EndClip_read(Alloc a, AnnotatedRef ref)
{
AnnoEndClipRef _228 = { ref.offset + 4u };
Alloc param = a;
AnnoEndClipRef param_1 = _228;
return AnnoEndClip_read(param, param_1);
_94.Store((ix + 1u) * 4 + 8, asuint(bbox.x));
_94.Store((ix + 2u) * 4 + 8, asuint(bbox.y));
_94.Store((ix + 3u) * 4 + 8, asuint(bbox.z));
_94.Store((ix + 4u) * 4 + 8, asuint(bbox.w));
}
Alloc new_alloc(uint offset, uint size, bool mem_ok)
@ -143,22 +161,22 @@ Alloc new_alloc(uint offset, uint size, bool mem_ok)
MallocResult malloc(uint size)
{
uint _90;
_84.InterlockedAdd(0, size, _90);
uint offset = _90;
uint _97;
_84.GetDimensions(_97);
_97 = (_97 - 8) / 4;
uint _100;
_94.InterlockedAdd(0, size, _100);
uint offset = _100;
uint _107;
_94.GetDimensions(_107);
_107 = (_107 - 8) / 4;
MallocResult r;
r.failed = (offset + size) > uint(int(_97) * 4);
r.failed = (offset + size) > uint(int(_107) * 4);
uint param = offset;
uint param_1 = size;
bool param_2 = !r.failed;
r.alloc = new_alloc(param, param_1, param_2);
if (r.failed)
{
uint _119;
_84.InterlockedMax(4, 1u, _119);
uint _129;
_94.InterlockedMax(4, 1u, _129);
return r;
}
return r;
@ -172,7 +190,7 @@ void write_mem(Alloc alloc, uint offset, uint val)
{
return;
}
_84.Store(offset * 4 + 8, val);
_94.Store(offset * 4 + 8, val);
}
void BinInstance_write(Alloc a, BinInstanceRef ref, BinInstance s)
@ -186,7 +204,7 @@ void BinInstance_write(Alloc a, BinInstanceRef ref, BinInstance s)
void comp_main()
{
uint my_n_elements = _253.Load(0);
uint my_n_elements = _202.Load(0);
uint my_partition = gl_WorkGroupID.x;
for (uint i = 0u; i < 8u; i++)
{
@ -198,15 +216,15 @@ void comp_main()
}
GroupMemoryBarrierWithGroupSync();
uint element_ix = (my_partition * 256u) + gl_LocalInvocationID.x;
AnnotatedRef _308 = { _253.Load(32) + (element_ix * 40u) };
AnnotatedRef ref = _308;
AnnotatedRef _415 = { _202.Load(32) + (element_ix * 40u) };
AnnotatedRef ref = _415;
uint tag = 0u;
if (element_ix < my_n_elements)
{
Alloc _318;
_318.offset = _253.Load(32);
Alloc _425;
_425.offset = _202.Load(32);
Alloc param;
param.offset = _318.offset;
param.offset = _425.offset;
AnnotatedRef param_1 = ref;
tag = Annotated_tag(param, param_1).tag;
}
@ -222,21 +240,38 @@ void comp_main()
case 4u:
case 5u:
{
Alloc _336;
_336.offset = _253.Load(32);
Alloc param_2;
param_2.offset = _336.offset;
AnnotatedRef param_3 = ref;
AnnoEndClip clip = Annotated_EndClip_read(param_2, param_3);
x0 = int(floor(clip.bbox.x * 0.00390625f));
y0 = int(floor(clip.bbox.y * 0.00390625f));
x1 = int(ceil(clip.bbox.z * 0.00390625f));
y1 = int(ceil(clip.bbox.w * 0.00390625f));
uint param_2 = element_ix;
DrawMonoid draw_monoid = load_draw_monoid(param_2);
uint path_ix = draw_monoid.path_ix;
float4 clip_bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f);
uint clip_ix = draw_monoid.clip_ix;
if (clip_ix > 0u)
{
uint param_3 = clip_ix - 1u;
clip_bbox = load_clip_bbox(param_3);
}
uint param_4 = path_ix;
float4 path_bbox = load_path_bbox(param_4);
float4 param_5 = path_bbox;
float4 param_6 = clip_bbox;
float4 bbox = bbox_intersect(param_5, param_6);
float4 _473 = bbox;
float4 _475 = bbox;
float2 _477 = max(_473.xy, _475.zw);
bbox.z = _477.x;
bbox.w = _477.y;
AnnotatedRef param_7 = ref;
float4 param_8 = bbox;
store_path_bbox(param_7, param_8);
x0 = int(floor(bbox.x * 0.00390625f));
y0 = int(floor(bbox.y * 0.00390625f));
x1 = int(ceil(bbox.z * 0.00390625f));
y1 = int(ceil(bbox.w * 0.00390625f));
break;
}
}
uint width_in_bins = ((_253.Load(8) + 16u) - 1u) / 16u;
uint height_in_bins = ((_253.Load(12) + 16u) - 1u) / 16u;
uint width_in_bins = ((_202.Load(8) + 16u) - 1u) / 16u;
uint height_in_bins = ((_202.Load(12) + 16u) - 1u) / 16u;
x0 = clamp(x0, 0, int(width_in_bins));
x1 = clamp(x1, x0, int(width_in_bins));
y0 = clamp(y0, 0, int(height_in_bins));
@ -251,8 +286,8 @@ void comp_main()
uint my_mask = 1u << (gl_LocalInvocationID.x & 31u);
while (y < y1)
{
uint _437;
InterlockedOr(bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, _437);
uint _581;
InterlockedOr(bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, _581);
x++;
if (x == x1)
{
@ -267,15 +302,15 @@ void comp_main()
element_count += uint(int(countbits(bitmaps[i_1][gl_LocalInvocationID.x])));
count[i_1][gl_LocalInvocationID.x] = element_count;
}
uint param_4 = 0u;
uint param_5 = 0u;
bool param_6 = true;
Alloc chunk_alloc = new_alloc(param_4, param_5, param_6);
uint param_9 = 0u;
uint param_10 = 0u;
bool param_11 = true;
Alloc chunk_alloc = new_alloc(param_9, param_10, param_11);
if (element_count != 0u)
{
uint param_7 = element_count * 4u;
MallocResult _487 = malloc(param_7);
MallocResult chunk = _487;
uint param_12 = element_count * 4u;
MallocResult _631 = malloc(param_12);
MallocResult chunk = _631;
chunk_alloc = chunk.alloc;
sh_chunk_alloc[gl_LocalInvocationID.x] = chunk_alloc;
if (chunk.failed)
@ -283,32 +318,32 @@ void comp_main()
sh_alloc_failed = true;
}
}
uint out_ix = (_253.Load(20) >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u);
Alloc _516;
_516.offset = _253.Load(20);
Alloc param_8;
param_8.offset = _516.offset;
uint param_9 = out_ix;
uint param_10 = element_count;
write_mem(param_8, param_9, param_10);
Alloc _528;
_528.offset = _253.Load(20);
Alloc param_11;
param_11.offset = _528.offset;
uint param_12 = out_ix + 1u;
uint param_13 = chunk_alloc.offset;
write_mem(param_11, param_12, param_13);
uint out_ix = (_202.Load(20) >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u);
Alloc _660;
_660.offset = _202.Load(20);
Alloc param_13;
param_13.offset = _660.offset;
uint param_14 = out_ix;
uint param_15 = element_count;
write_mem(param_13, param_14, param_15);
Alloc _672;
_672.offset = _202.Load(20);
Alloc param_16;
param_16.offset = _672.offset;
uint param_17 = out_ix + 1u;
uint param_18 = chunk_alloc.offset;
write_mem(param_16, param_17, param_18);
GroupMemoryBarrierWithGroupSync();
bool _543;
bool _687;
if (!sh_alloc_failed)
{
_543 = _84.Load(4) != 0u;
_687 = _94.Load(4) != 0u;
}
else
{
_543 = sh_alloc_failed;
_687 = sh_alloc_failed;
}
if (_543)
if (_687)
{
return;
}
@ -327,12 +362,12 @@ void comp_main()
}
Alloc out_alloc = sh_chunk_alloc[bin_ix];
uint out_offset = out_alloc.offset + (idx * 4u);
BinInstanceRef _605 = { out_offset };
BinInstance _607 = { element_ix };
Alloc param_14 = out_alloc;
BinInstanceRef param_15 = _605;
BinInstance param_16 = _607;
BinInstance_write(param_14, param_15, param_16);
BinInstanceRef _749 = { out_offset };
BinInstance _751 = { element_ix };
Alloc param_19 = out_alloc;
BinInstanceRef param_20 = _749;
BinInstance param_21 = _751;
BinInstance_write(param_19, param_20, param_21);
}
x++;
if (x == x1)

View file

@ -18,16 +18,6 @@ struct MallocResult
bool failed;
};
struct AnnoEndClipRef
{
uint offset;
};
struct AnnoEndClip
{
float4 bbox;
};
struct AnnotatedRef
{
uint offset;
@ -49,6 +39,12 @@ struct BinInstance
uint element_ix;
};
struct DrawMonoid
{
uint path_ix;
uint clip_ix;
};
struct Memory
{
uint mem_offset;
@ -75,8 +71,13 @@ struct Config
Alloc_1 trans_alloc;
Alloc_1 bbox_alloc;
Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
@ -97,7 +98,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
}
static inline __attribute__((always_inline))
uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_84, constant uint& v_84BufferSize)
uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_94, constant uint& v_94BufferSize)
{
Alloc param = alloc;
uint param_1 = offset;
@ -105,46 +106,66 @@ uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memor
{
return 0u;
}
uint v = v_84.memory[offset];
uint v = v_94.memory[offset];
return v;
}
static inline __attribute__((always_inline))
AnnotatedTag Annotated_tag(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_84, constant uint& v_84BufferSize)
AnnotatedTag Annotated_tag(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_94, constant uint& v_94BufferSize)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint tag_and_flags = read_mem(param, param_1, v_84, v_84BufferSize);
uint tag_and_flags = read_mem(param, param_1, v_94, v_94BufferSize);
return AnnotatedTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) };
}
static inline __attribute__((always_inline))
AnnoEndClip AnnoEndClip_read(thread const Alloc& a, thread const AnnoEndClipRef& ref, device Memory& v_84, constant uint& v_84BufferSize)
DrawMonoid load_draw_monoid(thread const uint& element_ix, device Memory& v_94, constant uint& v_94BufferSize, const device ConfigBuf& v_202)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint raw0 = read_mem(param, param_1, v_84, v_84BufferSize);
Alloc param_2 = a;
uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3, v_84, v_84BufferSize);
Alloc param_4 = a;
uint param_5 = ix + 2u;
uint raw2 = read_mem(param_4, param_5, v_84, v_84BufferSize);
Alloc param_6 = a;
uint param_7 = ix + 3u;
uint raw3 = read_mem(param_6, param_7, v_84, v_84BufferSize);
AnnoEndClip s;
s.bbox = float4(as_type<float>(raw0), as_type<float>(raw1), as_type<float>(raw2), as_type<float>(raw3));
return s;
uint base = (v_202.conf.drawmonoid_alloc.offset >> uint(2)) + (2u * element_ix);
uint path_ix = v_94.memory[base];
uint clip_ix = v_94.memory[base + 1u];
return DrawMonoid{ path_ix, clip_ix };
}
static inline __attribute__((always_inline))
AnnoEndClip Annotated_EndClip_read(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_84, constant uint& v_84BufferSize)
float4 load_clip_bbox(thread const uint& clip_ix, device Memory& v_94, constant uint& v_94BufferSize, const device ConfigBuf& v_202)
{
Alloc param = a;
AnnoEndClipRef param_1 = AnnoEndClipRef{ ref.offset + 4u };
return AnnoEndClip_read(param, param_1, v_84, v_84BufferSize);
uint base = (v_202.conf.clip_bbox_alloc.offset >> uint(2)) + (4u * clip_ix);
float x0 = as_type<float>(v_94.memory[base]);
float y0 = as_type<float>(v_94.memory[base + 1u]);
float x1 = as_type<float>(v_94.memory[base + 2u]);
float y1 = as_type<float>(v_94.memory[base + 3u]);
float4 bbox = float4(x0, y0, x1, y1);
return bbox;
}
static inline __attribute__((always_inline))
float4 load_path_bbox(thread const uint& path_ix, device Memory& v_94, constant uint& v_94BufferSize, const device ConfigBuf& v_202)
{
uint base = (v_202.conf.bbox_alloc.offset >> uint(2)) + (6u * path_ix);
float bbox_l = float(v_94.memory[base]) - 32768.0;
float bbox_t = float(v_94.memory[base + 1u]) - 32768.0;
float bbox_r = float(v_94.memory[base + 2u]) - 32768.0;
float bbox_b = float(v_94.memory[base + 3u]) - 32768.0;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
return bbox;
}
static inline __attribute__((always_inline))
float4 bbox_intersect(thread const float4& a, thread const float4& b)
{
return float4(fast::max(a.xy, b.xy), fast::min(a.zw, b.zw));
}
static inline __attribute__((always_inline))
void store_path_bbox(thread const AnnotatedRef& ref, thread const float4& bbox, device Memory& v_94, constant uint& v_94BufferSize)
{
uint ix = ref.offset >> uint(2);
v_94.memory[ix + 1u] = as_type<uint>(bbox.x);
v_94.memory[ix + 2u] = as_type<uint>(bbox.y);
v_94.memory[ix + 3u] = as_type<uint>(bbox.z);
v_94.memory[ix + 4u] = as_type<uint>(bbox.w);
}
static inline __attribute__((always_inline))
@ -156,26 +177,26 @@ Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const
}
static inline __attribute__((always_inline))
MallocResult malloc(thread const uint& size, device Memory& v_84, constant uint& v_84BufferSize)
MallocResult malloc(thread const uint& size, device Memory& v_94, constant uint& v_94BufferSize)
{
uint _90 = atomic_fetch_add_explicit((device atomic_uint*)&v_84.mem_offset, size, memory_order_relaxed);
uint offset = _90;
uint _100 = atomic_fetch_add_explicit((device atomic_uint*)&v_94.mem_offset, size, memory_order_relaxed);
uint offset = _100;
MallocResult r;
r.failed = (offset + size) > uint(int((v_84BufferSize - 8) / 4) * 4);
r.failed = (offset + size) > uint(int((v_94BufferSize - 8) / 4) * 4);
uint param = offset;
uint param_1 = size;
bool param_2 = !r.failed;
r.alloc = new_alloc(param, param_1, param_2);
if (r.failed)
{
uint _119 = atomic_fetch_max_explicit((device atomic_uint*)&v_84.mem_error, 1u, memory_order_relaxed);
uint _129 = atomic_fetch_max_explicit((device atomic_uint*)&v_94.mem_error, 1u, memory_order_relaxed);
return r;
}
return r;
}
static inline __attribute__((always_inline))
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_84, constant uint& v_84BufferSize)
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_94, constant uint& v_94BufferSize)
{
Alloc param = alloc;
uint param_1 = offset;
@ -183,27 +204,27 @@ void write_mem(thread const Alloc& alloc, thread const uint& offset, thread cons
{
return;
}
v_84.memory[offset] = val;
v_94.memory[offset] = val;
}
static inline __attribute__((always_inline))
void BinInstance_write(thread const Alloc& a, thread const BinInstanceRef& ref, thread const BinInstance& s, device Memory& v_84, constant uint& v_84BufferSize)
void BinInstance_write(thread const Alloc& a, thread const BinInstanceRef& ref, thread const BinInstance& s, device Memory& v_94, constant uint& v_94BufferSize)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = s.element_ix;
write_mem(param, param_1, param_2, v_84, v_84BufferSize);
write_mem(param, param_1, param_2, v_94, v_94BufferSize);
}
kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_84 [[buffer(0)]], const device ConfigBuf& _253 [[buffer(1)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_94 [[buffer(0)]], const device ConfigBuf& v_202 [[buffer(1)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
{
threadgroup uint bitmaps[8][256];
threadgroup short sh_alloc_failed;
threadgroup uint count[8][256];
threadgroup Alloc sh_chunk_alloc[256];
constant uint& v_84BufferSize = spvBufferSizeConstants[0];
uint my_n_elements = _253.conf.n_elements;
constant uint& v_94BufferSize = spvBufferSizeConstants[0];
uint my_n_elements = v_202.conf.n_elements;
uint my_partition = gl_WorkGroupID.x;
for (uint i = 0u; i < 8u; i++)
{
@ -215,14 +236,14 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
}
threadgroup_barrier(mem_flags::mem_threadgroup);
uint element_ix = (my_partition * 256u) + gl_LocalInvocationID.x;
AnnotatedRef ref = AnnotatedRef{ _253.conf.anno_alloc.offset + (element_ix * 40u) };
AnnotatedRef ref = AnnotatedRef{ v_202.conf.anno_alloc.offset + (element_ix * 40u) };
uint tag = 0u;
if (element_ix < my_n_elements)
{
Alloc param;
param.offset = _253.conf.anno_alloc.offset;
param.offset = v_202.conf.anno_alloc.offset;
AnnotatedRef param_1 = ref;
tag = Annotated_tag(param, param_1, v_84, v_84BufferSize).tag;
tag = Annotated_tag(param, param_1, v_94, v_94BufferSize).tag;
}
int x0 = 0;
int y0 = 0;
@ -236,19 +257,38 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
case 4u:
case 5u:
{
Alloc param_2;
param_2.offset = _253.conf.anno_alloc.offset;
AnnotatedRef param_3 = ref;
AnnoEndClip clip = Annotated_EndClip_read(param_2, param_3, v_84, v_84BufferSize);
x0 = int(floor(clip.bbox.x * 0.00390625));
y0 = int(floor(clip.bbox.y * 0.00390625));
x1 = int(ceil(clip.bbox.z * 0.00390625));
y1 = int(ceil(clip.bbox.w * 0.00390625));
uint param_2 = element_ix;
DrawMonoid draw_monoid = load_draw_monoid(param_2, v_94, v_94BufferSize, v_202);
uint path_ix = draw_monoid.path_ix;
float4 clip_bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0);
uint clip_ix = draw_monoid.clip_ix;
if (clip_ix > 0u)
{
uint param_3 = clip_ix - 1u;
clip_bbox = load_clip_bbox(param_3, v_94, v_94BufferSize, v_202);
}
uint param_4 = path_ix;
float4 path_bbox = load_path_bbox(param_4, v_94, v_94BufferSize, v_202);
float4 param_5 = path_bbox;
float4 param_6 = clip_bbox;
float4 bbox = bbox_intersect(param_5, param_6);
float4 _473 = bbox;
float4 _475 = bbox;
float2 _477 = fast::max(_473.xy, _475.zw);
bbox.z = _477.x;
bbox.w = _477.y;
AnnotatedRef param_7 = ref;
float4 param_8 = bbox;
store_path_bbox(param_7, param_8, v_94, v_94BufferSize);
x0 = int(floor(bbox.x * 0.00390625));
y0 = int(floor(bbox.y * 0.00390625));
x1 = int(ceil(bbox.z * 0.00390625));
y1 = int(ceil(bbox.w * 0.00390625));
break;
}
}
uint width_in_bins = ((_253.conf.width_in_tiles + 16u) - 1u) / 16u;
uint height_in_bins = ((_253.conf.height_in_tiles + 16u) - 1u) / 16u;
uint width_in_bins = ((v_202.conf.width_in_tiles + 16u) - 1u) / 16u;
uint height_in_bins = ((v_202.conf.height_in_tiles + 16u) - 1u) / 16u;
x0 = clamp(x0, 0, int(width_in_bins));
x1 = clamp(x1, x0, int(width_in_bins));
y0 = clamp(y0, 0, int(height_in_bins));
@ -263,7 +303,7 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
uint my_mask = 1u << (gl_LocalInvocationID.x & 31u);
while (y < y1)
{
uint _437 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, memory_order_relaxed);
uint _581 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, memory_order_relaxed);
x++;
if (x == x1)
{
@ -278,15 +318,15 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
element_count += uint(int(popcount(bitmaps[i_1][gl_LocalInvocationID.x])));
count[i_1][gl_LocalInvocationID.x] = element_count;
}
uint param_4 = 0u;
uint param_5 = 0u;
bool param_6 = true;
Alloc chunk_alloc = new_alloc(param_4, param_5, param_6);
uint param_9 = 0u;
uint param_10 = 0u;
bool param_11 = true;
Alloc chunk_alloc = new_alloc(param_9, param_10, param_11);
if (element_count != 0u)
{
uint param_7 = element_count * 4u;
MallocResult _487 = malloc(param_7, v_84, v_84BufferSize);
MallocResult chunk = _487;
uint param_12 = element_count * 4u;
MallocResult _631 = malloc(param_12, v_94, v_94BufferSize);
MallocResult chunk = _631;
chunk_alloc = chunk.alloc;
sh_chunk_alloc[gl_LocalInvocationID.x] = chunk_alloc;
if (chunk.failed)
@ -294,28 +334,28 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
sh_alloc_failed = short(true);
}
}
uint out_ix = (_253.conf.bin_alloc.offset >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u);
Alloc param_8;
param_8.offset = _253.conf.bin_alloc.offset;
uint param_9 = out_ix;
uint param_10 = element_count;
write_mem(param_8, param_9, param_10, v_84, v_84BufferSize);
Alloc param_11;
param_11.offset = _253.conf.bin_alloc.offset;
uint param_12 = out_ix + 1u;
uint param_13 = chunk_alloc.offset;
write_mem(param_11, param_12, param_13, v_84, v_84BufferSize);
uint out_ix = (v_202.conf.bin_alloc.offset >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u);
Alloc param_13;
param_13.offset = v_202.conf.bin_alloc.offset;
uint param_14 = out_ix;
uint param_15 = element_count;
write_mem(param_13, param_14, param_15, v_94, v_94BufferSize);
Alloc param_16;
param_16.offset = v_202.conf.bin_alloc.offset;
uint param_17 = out_ix + 1u;
uint param_18 = chunk_alloc.offset;
write_mem(param_16, param_17, param_18, v_94, v_94BufferSize);
threadgroup_barrier(mem_flags::mem_threadgroup);
bool _543;
bool _687;
if (!bool(sh_alloc_failed))
{
_543 = v_84.mem_error != 0u;
_687 = v_94.mem_error != 0u;
}
else
{
_543 = bool(sh_alloc_failed);
_687 = bool(sh_alloc_failed);
}
if (_543)
if (_687)
{
return;
}
@ -334,10 +374,10 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
}
Alloc out_alloc = sh_chunk_alloc[bin_ix];
uint out_offset = out_alloc.offset + (idx * 4u);
Alloc param_14 = out_alloc;
BinInstanceRef param_15 = BinInstanceRef{ out_offset };
BinInstance param_16 = BinInstance{ element_ix };
BinInstance_write(param_14, param_15, param_16, v_84, v_84BufferSize);
Alloc param_19 = out_alloc;
BinInstanceRef param_20 = BinInstanceRef{ out_offset };
BinInstance param_21 = BinInstance{ element_ix };
BinInstance_write(param_19, param_20, param_21, v_94, v_94BufferSize);
}
x++;
if (x == x1)

Binary file not shown.

BIN
piet-gpu/shader/gen/clip_leaf.dxil generated Normal file

Binary file not shown.

367
piet-gpu/shader/gen/clip_leaf.hlsl generated Normal file
View file

@ -0,0 +1,367 @@
struct Bic
{
uint a;
uint b;
};
struct ClipEl
{
uint parent_ix;
float4 bbox;
};
struct Alloc
{
uint offset;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
static const Bic _393 = { 0u, 0u };
ByteAddressBuffer _80 : register(t1, space0);
RWByteAddressBuffer _96 : register(u0, space0);
static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_WorkGroupID : SV_GroupID;
uint3 gl_LocalInvocationID : SV_GroupThreadID;
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
groupshared Bic sh_bic[510];
groupshared uint sh_stack[256];
groupshared float4 sh_stack_bbox[256];
groupshared uint sh_link[256];
groupshared float4 sh_bbox[256];
Bic load_bic(uint ix)
{
uint base = (_80.Load(52) >> uint(2)) + (2u * ix);
Bic _286 = { _96.Load(base * 4 + 8), _96.Load((base + 1u) * 4 + 8) };
return _286;
}
Bic bic_combine(Bic x, Bic y)
{
uint m = min(x.b, y.a);
Bic _72 = { (x.a + y.a) - m, (x.b + y.b) - m };
return _72;
}
ClipEl load_clip_el(uint ix)
{
uint base = (_80.Load(56) >> uint(2)) + (5u * ix);
uint parent_ix = _96.Load(base * 4 + 8);
float x0 = asfloat(_96.Load((base + 1u) * 4 + 8));
float y0 = asfloat(_96.Load((base + 2u) * 4 + 8));
float x1 = asfloat(_96.Load((base + 3u) * 4 + 8));
float y1 = asfloat(_96.Load((base + 4u) * 4 + 8));
float4 bbox = float4(x0, y0, x1, y1);
ClipEl _335 = { parent_ix, bbox };
return _335;
}
float4 bbox_intersect(float4 a, float4 b)
{
return float4(max(a.xy, b.xy), min(a.zw, b.zw));
}
uint load_path_ix(uint ix)
{
if (ix < _80.Load(72))
{
return _96.Load(((_80.Load(48) >> uint(2)) + ix) * 4 + 8);
}
else
{
return 2147483648u;
}
}
float4 load_path_bbox(uint path_ix)
{
uint base = (_80.Load(40) >> uint(2)) + (6u * path_ix);
float bbox_l = float(_96.Load(base * 4 + 8)) - 32768.0f;
float bbox_t = float(_96.Load((base + 1u) * 4 + 8)) - 32768.0f;
float bbox_r = float(_96.Load((base + 2u) * 4 + 8)) - 32768.0f;
float bbox_b = float(_96.Load((base + 3u) * 4 + 8)) - 32768.0f;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
return bbox;
}
uint search_link(inout Bic bic)
{
uint ix = gl_LocalInvocationID.x;
uint j = 0u;
while (j < 8u)
{
uint base = 512u - (2u << (8u - j));
if (((ix >> j) & 1u) != 0u)
{
Bic param = sh_bic[(base + (ix >> j)) - 1u];
Bic param_1 = bic;
Bic test = bic_combine(param, param_1);
if (test.b > 0u)
{
break;
}
bic = test;
ix -= (1u << j);
}
j++;
}
if (ix > 0u)
{
while (j > 0u)
{
j--;
uint base_1 = 512u - (2u << (8u - j));
Bic param_2 = sh_bic[(base_1 + (ix >> j)) - 1u];
Bic param_3 = bic;
Bic test_1 = bic_combine(param_2, param_3);
if (test_1.b == 0u)
{
bic = test_1;
ix -= (1u << j);
}
}
}
if (ix > 0u)
{
return ix - 1u;
}
else
{
return 4294967295u - bic.a;
}
}
void store_clip_bbox(uint ix, float4 bbox)
{
uint base = (_80.Load(60) >> uint(2)) + (4u * ix);
_96.Store(base * 4 + 8, asuint(bbox.x));
_96.Store((base + 1u) * 4 + 8, asuint(bbox.y));
_96.Store((base + 2u) * 4 + 8, asuint(bbox.z));
_96.Store((base + 3u) * 4 + 8, asuint(bbox.w));
}
void comp_main()
{
uint th = gl_LocalInvocationID.x;
Bic bic = _393;
if (th < gl_WorkGroupID.x)
{
uint param = th;
bic = load_bic(param);
}
sh_bic[th] = bic;
for (uint i = 0u; i < 8u; i++)
{
GroupMemoryBarrierWithGroupSync();
if ((th + (1u << i)) < 256u)
{
Bic other = sh_bic[th + (1u << i)];
Bic param_1 = bic;
Bic param_2 = other;
bic = bic_combine(param_1, param_2);
}
GroupMemoryBarrierWithGroupSync();
sh_bic[th] = bic;
}
GroupMemoryBarrierWithGroupSync();
uint stack_size = sh_bic[0].b;
uint sp = 255u - th;
uint ix = 0u;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
uint probe = ix + (128u >> i_1);
if (sp < sh_bic[probe].b)
{
ix = probe;
}
}
uint b = sh_bic[ix].b;
float4 bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f);
if (sp < b)
{
uint param_3 = (((ix * 256u) + b) - sp) - 1u;
ClipEl el = load_clip_el(param_3);
sh_stack[th] = el.parent_ix;
bbox = el.bbox;
}
for (uint i_2 = 0u; i_2 < 8u; i_2++)
{
sh_stack_bbox[th] = bbox;
GroupMemoryBarrierWithGroupSync();
if (th >= (1u << i_2))
{
float4 param_4 = sh_stack_bbox[th - (1u << i_2)];
float4 param_5 = bbox;
bbox = bbox_intersect(param_4, param_5);
}
GroupMemoryBarrierWithGroupSync();
}
sh_stack_bbox[th] = bbox;
uint param_6 = gl_GlobalInvocationID.x;
uint inp = load_path_ix(param_6);
bool is_push = int(inp) >= 0;
Bic _559 = { 1u - uint(is_push), uint(is_push) };
bic = _559;
sh_bic[th] = bic;
if (is_push)
{
uint param_7 = inp;
bbox = load_path_bbox(param_7);
}
else
{
bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f);
}
uint inbase = 0u;
for (uint i_3 = 0u; i_3 < 7u; i_3++)
{
uint outbase = 512u - (1u << (8u - i_3));
GroupMemoryBarrierWithGroupSync();
if (th < (1u << (7u - i_3)))
{
Bic param_8 = sh_bic[inbase + (th * 2u)];
Bic param_9 = sh_bic[(inbase + (th * 2u)) + 1u];
sh_bic[outbase + th] = bic_combine(param_8, param_9);
}
inbase = outbase;
}
GroupMemoryBarrierWithGroupSync();
bic = _393;
Bic param_10 = bic;
uint _618 = search_link(param_10);
bic = param_10;
uint link = _618;
sh_link[th] = link;
GroupMemoryBarrierWithGroupSync();
uint grandparent;
if (int(link) >= 0)
{
grandparent = sh_link[link];
}
else
{
grandparent = link - 1u;
}
uint parent;
if (int(link) >= 0)
{
parent = (gl_WorkGroupID.x * 256u) + link;
}
else
{
if (int(link + stack_size) >= 0)
{
parent = sh_stack[256u + link];
}
else
{
parent = 4294967295u;
}
}
for (uint i_4 = 0u; i_4 < 8u; i_4++)
{
if (i_4 != 0u)
{
sh_link[th] = link;
}
sh_bbox[th] = bbox;
GroupMemoryBarrierWithGroupSync();
if (int(link) >= 0)
{
float4 param_11 = sh_bbox[link];
float4 param_12 = bbox;
bbox = bbox_intersect(param_11, param_12);
link = sh_link[link];
}
GroupMemoryBarrierWithGroupSync();
}
if (int(link + stack_size) >= 0)
{
float4 param_13 = sh_stack_bbox[256u + link];
float4 param_14 = bbox;
bbox = bbox_intersect(param_13, param_14);
}
sh_bbox[th] = bbox;
GroupMemoryBarrierWithGroupSync();
uint path_ix = inp;
bool _717 = !is_push;
bool _725;
if (_717)
{
_725 = gl_GlobalInvocationID.x < _80.Load(72);
}
else
{
_725 = _717;
}
if (_725)
{
uint param_15 = parent;
path_ix = load_path_ix(param_15);
uint drawmonoid_out_base = (_80.Load(44) >> uint(2)) + (2u * (~inp));
_96.Store(drawmonoid_out_base * 4 + 8, path_ix);
if (int(grandparent) >= 0)
{
bbox = sh_bbox[grandparent];
}
else
{
if (int(grandparent + stack_size) >= 0)
{
bbox = sh_stack_bbox[256u + grandparent];
}
else
{
bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f);
}
}
}
uint param_16 = gl_GlobalInvocationID.x;
float4 param_17 = bbox;
store_clip_bbox(param_16, param_17);
}
[numthreads(256, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_WorkGroupID = stage_input.gl_WorkGroupID;
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

366
piet-gpu/shader/gen/clip_leaf.msl generated Normal file
View file

@ -0,0 +1,366 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct Bic
{
uint a;
uint b;
};
struct ClipEl
{
uint parent_ix;
float4 bbox;
};
struct Alloc
{
uint offset;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
};
struct ConfigBuf
{
Config conf;
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint memory[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
Bic load_bic(thread const uint& ix, const device ConfigBuf& v_80, device Memory& v_96)
{
uint base = (v_80.conf.clip_bic_alloc.offset >> uint(2)) + (2u * ix);
return Bic{ v_96.memory[base], v_96.memory[base + 1u] };
}
static inline __attribute__((always_inline))
Bic bic_combine(thread const Bic& x, thread const Bic& y)
{
uint m = min(x.b, y.a);
return Bic{ (x.a + y.a) - m, (x.b + y.b) - m };
}
static inline __attribute__((always_inline))
ClipEl load_clip_el(thread const uint& ix, const device ConfigBuf& v_80, device Memory& v_96)
{
uint base = (v_80.conf.clip_stack_alloc.offset >> uint(2)) + (5u * ix);
uint parent_ix = v_96.memory[base];
float x0 = as_type<float>(v_96.memory[base + 1u]);
float y0 = as_type<float>(v_96.memory[base + 2u]);
float x1 = as_type<float>(v_96.memory[base + 3u]);
float y1 = as_type<float>(v_96.memory[base + 4u]);
float4 bbox = float4(x0, y0, x1, y1);
return ClipEl{ parent_ix, bbox };
}
static inline __attribute__((always_inline))
float4 bbox_intersect(thread const float4& a, thread const float4& b)
{
return float4(fast::max(a.xy, b.xy), fast::min(a.zw, b.zw));
}
static inline __attribute__((always_inline))
uint load_path_ix(thread const uint& ix, const device ConfigBuf& v_80, device Memory& v_96)
{
if (ix < v_80.conf.n_clip)
{
return v_96.memory[(v_80.conf.clip_alloc.offset >> uint(2)) + ix];
}
else
{
return 2147483648u;
}
}
static inline __attribute__((always_inline))
float4 load_path_bbox(thread const uint& path_ix, const device ConfigBuf& v_80, device Memory& v_96)
{
uint base = (v_80.conf.bbox_alloc.offset >> uint(2)) + (6u * path_ix);
float bbox_l = float(v_96.memory[base]) - 32768.0;
float bbox_t = float(v_96.memory[base + 1u]) - 32768.0;
float bbox_r = float(v_96.memory[base + 2u]) - 32768.0;
float bbox_b = float(v_96.memory[base + 3u]) - 32768.0;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
return bbox;
}
static inline __attribute__((always_inline))
uint search_link(thread Bic& bic, thread uint3& gl_LocalInvocationID, threadgroup Bic (&sh_bic)[510])
{
uint ix = gl_LocalInvocationID.x;
uint j = 0u;
while (j < 8u)
{
uint base = 512u - (2u << (8u - j));
if (((ix >> j) & 1u) != 0u)
{
Bic param = sh_bic[(base + (ix >> j)) - 1u];
Bic param_1 = bic;
Bic test = bic_combine(param, param_1);
if (test.b > 0u)
{
break;
}
bic = test;
ix -= (1u << j);
}
j++;
}
if (ix > 0u)
{
while (j > 0u)
{
j--;
uint base_1 = 512u - (2u << (8u - j));
Bic param_2 = sh_bic[(base_1 + (ix >> j)) - 1u];
Bic param_3 = bic;
Bic test_1 = bic_combine(param_2, param_3);
if (test_1.b == 0u)
{
bic = test_1;
ix -= (1u << j);
}
}
}
if (ix > 0u)
{
return ix - 1u;
}
else
{
return 4294967295u - bic.a;
}
}
static inline __attribute__((always_inline))
void store_clip_bbox(thread const uint& ix, thread const float4& bbox, const device ConfigBuf& v_80, device Memory& v_96)
{
uint base = (v_80.conf.clip_bbox_alloc.offset >> uint(2)) + (4u * ix);
v_96.memory[base] = as_type<uint>(bbox.x);
v_96.memory[base + 1u] = as_type<uint>(bbox.y);
v_96.memory[base + 2u] = as_type<uint>(bbox.z);
v_96.memory[base + 3u] = as_type<uint>(bbox.w);
}
kernel void main0(device Memory& v_96 [[buffer(0)]], const device ConfigBuf& v_80 [[buffer(1)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
threadgroup Bic sh_bic[510];
threadgroup uint sh_stack[256];
threadgroup float4 sh_stack_bbox[256];
threadgroup uint sh_link[256];
threadgroup float4 sh_bbox[256];
uint th = gl_LocalInvocationID.x;
Bic bic = Bic{ 0u, 0u };
if (th < gl_WorkGroupID.x)
{
uint param = th;
bic = load_bic(param, v_80, v_96);
}
sh_bic[th] = bic;
for (uint i = 0u; i < 8u; i++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
if ((th + (1u << i)) < 256u)
{
Bic other = sh_bic[th + (1u << i)];
Bic param_1 = bic;
Bic param_2 = other;
bic = bic_combine(param_1, param_2);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
sh_bic[th] = bic;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
uint stack_size = sh_bic[0].b;
uint sp = 255u - th;
uint ix = 0u;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
uint probe = ix + (128u >> i_1);
if (sp < sh_bic[probe].b)
{
ix = probe;
}
}
uint b = sh_bic[ix].b;
float4 bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0);
if (sp < b)
{
uint param_3 = (((ix * 256u) + b) - sp) - 1u;
ClipEl el = load_clip_el(param_3, v_80, v_96);
sh_stack[th] = el.parent_ix;
bbox = el.bbox;
}
for (uint i_2 = 0u; i_2 < 8u; i_2++)
{
sh_stack_bbox[th] = bbox;
threadgroup_barrier(mem_flags::mem_threadgroup);
if (th >= (1u << i_2))
{
float4 param_4 = sh_stack_bbox[th - (1u << i_2)];
float4 param_5 = bbox;
bbox = bbox_intersect(param_4, param_5);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
}
sh_stack_bbox[th] = bbox;
uint param_6 = gl_GlobalInvocationID.x;
uint inp = load_path_ix(param_6, v_80, v_96);
bool is_push = int(inp) >= 0;
bic = Bic{ 1u - uint(is_push), uint(is_push) };
sh_bic[th] = bic;
if (is_push)
{
uint param_7 = inp;
bbox = load_path_bbox(param_7, v_80, v_96);
}
else
{
bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0);
}
uint inbase = 0u;
for (uint i_3 = 0u; i_3 < 7u; i_3++)
{
uint outbase = 512u - (1u << (8u - i_3));
threadgroup_barrier(mem_flags::mem_threadgroup);
if (th < (1u << (7u - i_3)))
{
Bic param_8 = sh_bic[inbase + (th * 2u)];
Bic param_9 = sh_bic[(inbase + (th * 2u)) + 1u];
sh_bic[outbase + th] = bic_combine(param_8, param_9);
}
inbase = outbase;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
bic = Bic{ 0u, 0u };
Bic param_10 = bic;
uint _618 = search_link(param_10, gl_LocalInvocationID, sh_bic);
bic = param_10;
uint link = _618;
sh_link[th] = link;
threadgroup_barrier(mem_flags::mem_threadgroup);
uint grandparent;
if (int(link) >= 0)
{
grandparent = sh_link[link];
}
else
{
grandparent = link - 1u;
}
uint parent;
if (int(link) >= 0)
{
parent = (gl_WorkGroupID.x * 256u) + link;
}
else
{
if (int(link + stack_size) >= 0)
{
parent = sh_stack[256u + link];
}
else
{
parent = 4294967295u;
}
}
for (uint i_4 = 0u; i_4 < 8u; i_4++)
{
if (i_4 != 0u)
{
sh_link[th] = link;
}
sh_bbox[th] = bbox;
threadgroup_barrier(mem_flags::mem_threadgroup);
if (int(link) >= 0)
{
float4 param_11 = sh_bbox[link];
float4 param_12 = bbox;
bbox = bbox_intersect(param_11, param_12);
link = sh_link[link];
}
threadgroup_barrier(mem_flags::mem_threadgroup);
}
if (int(link + stack_size) >= 0)
{
float4 param_13 = sh_stack_bbox[256u + link];
float4 param_14 = bbox;
bbox = bbox_intersect(param_13, param_14);
}
sh_bbox[th] = bbox;
threadgroup_barrier(mem_flags::mem_threadgroup);
uint path_ix = inp;
bool _717 = !is_push;
bool _725;
if (_717)
{
_725 = gl_GlobalInvocationID.x < v_80.conf.n_clip;
}
else
{
_725 = _717;
}
if (_725)
{
uint param_15 = parent;
path_ix = load_path_ix(param_15, v_80, v_96);
uint drawmonoid_out_base = (v_80.conf.drawmonoid_alloc.offset >> uint(2)) + (2u * (~inp));
v_96.memory[drawmonoid_out_base] = path_ix;
if (int(grandparent) >= 0)
{
bbox = sh_bbox[grandparent];
}
else
{
if (int(grandparent + stack_size) >= 0)
{
bbox = sh_stack_bbox[256u + grandparent];
}
else
{
bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0);
}
}
}
uint param_16 = gl_GlobalInvocationID.x;
float4 param_17 = bbox;
store_clip_bbox(param_16, param_17, v_80, v_96);
}

BIN
piet-gpu/shader/gen/clip_leaf.spv generated Normal file

Binary file not shown.

BIN
piet-gpu/shader/gen/clip_reduce.dxil generated Normal file

Binary file not shown.

177
piet-gpu/shader/gen/clip_reduce.hlsl generated Normal file
View file

@ -0,0 +1,177 @@
struct Bic
{
uint a;
uint b;
};
struct ClipEl
{
uint parent_ix;
float4 bbox;
};
struct Alloc
{
uint offset;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
static const Bic _267 = { 0u, 0u };
ByteAddressBuffer _64 : register(t1, space0);
RWByteAddressBuffer _80 : register(u0, space0);
static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_WorkGroupID : SV_GroupID;
uint3 gl_LocalInvocationID : SV_GroupThreadID;
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
groupshared Bic sh_bic[256];
groupshared uint sh_parent[256];
groupshared uint sh_path_ix[256];
groupshared float4 sh_bbox[256];
Bic bic_combine(Bic x, Bic y)
{
uint m = min(x.b, y.a);
Bic _56 = { (x.a + y.a) - m, (x.b + y.b) - m };
return _56;
}
void store_bic(uint ix, Bic bic)
{
uint base = (_64.Load(52) >> uint(2)) + (2u * ix);
_80.Store(base * 4 + 8, bic.a);
_80.Store((base + 1u) * 4 + 8, bic.b);
}
float4 load_path_bbox(uint path_ix)
{
uint base = (_64.Load(40) >> uint(2)) + (6u * path_ix);
float bbox_l = float(_80.Load(base * 4 + 8)) - 32768.0f;
float bbox_t = float(_80.Load((base + 1u) * 4 + 8)) - 32768.0f;
float bbox_r = float(_80.Load((base + 2u) * 4 + 8)) - 32768.0f;
float bbox_b = float(_80.Load((base + 3u) * 4 + 8)) - 32768.0f;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
return bbox;
}
void store_clip_el(uint ix, ClipEl el)
{
uint base = (_64.Load(56) >> uint(2)) + (5u * ix);
_80.Store(base * 4 + 8, el.parent_ix);
_80.Store((base + 1u) * 4 + 8, asuint(el.bbox.x));
_80.Store((base + 2u) * 4 + 8, asuint(el.bbox.y));
_80.Store((base + 3u) * 4 + 8, asuint(el.bbox.z));
_80.Store((base + 4u) * 4 + 8, asuint(el.bbox.w));
}
void comp_main()
{
uint th = gl_LocalInvocationID.x;
uint inp = _80.Load(((_64.Load(48) >> uint(2)) + gl_GlobalInvocationID.x) * 4 + 8);
bool is_push = int(inp) >= 0;
Bic _207 = { 1u - uint(is_push), uint(is_push) };
Bic bic = _207;
sh_bic[gl_LocalInvocationID.x] = bic;
for (uint i = 0u; i < 8u; i++)
{
GroupMemoryBarrierWithGroupSync();
if ((th + (1u << i)) < 256u)
{
Bic other = sh_bic[gl_LocalInvocationID.x + (1u << i)];
Bic param = bic;
Bic param_1 = other;
bic = bic_combine(param, param_1);
}
GroupMemoryBarrierWithGroupSync();
sh_bic[th] = bic;
}
if (th == 0u)
{
uint param_2 = gl_WorkGroupID.x;
Bic param_3 = bic;
store_bic(param_2, param_3);
}
GroupMemoryBarrierWithGroupSync();
uint size = sh_bic[0].b;
bic = _267;
if ((th + 1u) < 256u)
{
bic = sh_bic[th + 1u];
}
bool _283;
if (is_push)
{
_283 = bic.a == 0u;
}
else
{
_283 = is_push;
}
if (_283)
{
uint local_ix = (size - bic.b) - 1u;
sh_parent[local_ix] = th;
sh_path_ix[local_ix] = inp;
}
GroupMemoryBarrierWithGroupSync();
float4 bbox;
if (th < size)
{
uint path_ix = sh_path_ix[th];
uint param_4 = path_ix;
bbox = load_path_bbox(param_4);
}
if (th < size)
{
uint parent_ix = sh_parent[th] + (gl_WorkGroupID.x * 256u);
ClipEl _331 = { parent_ix, bbox };
ClipEl el = _331;
uint param_5 = gl_GlobalInvocationID.x;
ClipEl param_6 = el;
store_clip_el(param_5, param_6);
}
}
[numthreads(256, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_WorkGroupID = stage_input.gl_WorkGroupID;
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

173
piet-gpu/shader/gen/clip_reduce.msl generated Normal file
View file

@ -0,0 +1,173 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct Bic
{
uint a;
uint b;
};
struct ClipEl
{
uint parent_ix;
float4 bbox;
};
struct Alloc
{
uint offset;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
};
struct ConfigBuf
{
Config conf;
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint memory[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
Bic bic_combine(thread const Bic& x, thread const Bic& y)
{
uint m = min(x.b, y.a);
return Bic{ (x.a + y.a) - m, (x.b + y.b) - m };
}
static inline __attribute__((always_inline))
void store_bic(thread const uint& ix, thread const Bic& bic, const device ConfigBuf& v_64, device Memory& v_80)
{
uint base = (v_64.conf.clip_bic_alloc.offset >> uint(2)) + (2u * ix);
v_80.memory[base] = bic.a;
v_80.memory[base + 1u] = bic.b;
}
static inline __attribute__((always_inline))
float4 load_path_bbox(thread const uint& path_ix, const device ConfigBuf& v_64, device Memory& v_80)
{
uint base = (v_64.conf.bbox_alloc.offset >> uint(2)) + (6u * path_ix);
float bbox_l = float(v_80.memory[base]) - 32768.0;
float bbox_t = float(v_80.memory[base + 1u]) - 32768.0;
float bbox_r = float(v_80.memory[base + 2u]) - 32768.0;
float bbox_b = float(v_80.memory[base + 3u]) - 32768.0;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
return bbox;
}
static inline __attribute__((always_inline))
void store_clip_el(thread const uint& ix, thread const ClipEl& el, const device ConfigBuf& v_64, device Memory& v_80)
{
uint base = (v_64.conf.clip_stack_alloc.offset >> uint(2)) + (5u * ix);
v_80.memory[base] = el.parent_ix;
v_80.memory[base + 1u] = as_type<uint>(el.bbox.x);
v_80.memory[base + 2u] = as_type<uint>(el.bbox.y);
v_80.memory[base + 3u] = as_type<uint>(el.bbox.z);
v_80.memory[base + 4u] = as_type<uint>(el.bbox.w);
}
kernel void main0(device Memory& v_80 [[buffer(0)]], const device ConfigBuf& v_64 [[buffer(1)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
{
threadgroup Bic sh_bic[256];
threadgroup uint sh_parent[256];
threadgroup uint sh_path_ix[256];
threadgroup float4 sh_bbox[256];
uint th = gl_LocalInvocationID.x;
uint inp = v_80.memory[(v_64.conf.clip_alloc.offset >> uint(2)) + gl_GlobalInvocationID.x];
bool is_push = int(inp) >= 0;
Bic bic = Bic{ 1u - uint(is_push), uint(is_push) };
sh_bic[gl_LocalInvocationID.x] = bic;
for (uint i = 0u; i < 8u; i++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
if ((th + (1u << i)) < 256u)
{
Bic other = sh_bic[gl_LocalInvocationID.x + (1u << i)];
Bic param = bic;
Bic param_1 = other;
bic = bic_combine(param, param_1);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
sh_bic[th] = bic;
}
if (th == 0u)
{
uint param_2 = gl_WorkGroupID.x;
Bic param_3 = bic;
store_bic(param_2, param_3, v_64, v_80);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
uint size = sh_bic[0].b;
bic = Bic{ 0u, 0u };
if ((th + 1u) < 256u)
{
bic = sh_bic[th + 1u];
}
bool _283;
if (is_push)
{
_283 = bic.a == 0u;
}
else
{
_283 = is_push;
}
if (_283)
{
uint local_ix = (size - bic.b) - 1u;
sh_parent[local_ix] = th;
sh_path_ix[local_ix] = inp;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
float4 bbox;
if (th < size)
{
uint path_ix = sh_path_ix[th];
uint param_4 = path_ix;
bbox = load_path_bbox(param_4, v_64, v_80);
}
if (th < size)
{
uint parent_ix = sh_parent[th] + (gl_WorkGroupID.x * 256u);
ClipEl el = ClipEl{ parent_ix, bbox };
uint param_5 = gl_GlobalInvocationID.x;
ClipEl param_6 = el;
store_clip_el(param_5, param_6, v_64, v_80);
}
}

BIN
piet-gpu/shader/gen/clip_reduce.spv generated Normal file

Binary file not shown.

Binary file not shown.

View file

@ -49,17 +49,6 @@ struct AnnoLinGradient
float line_c;
};
struct AnnoBeginClipRef
{
uint offset;
};
struct AnnoBeginClip
{
float4 bbox;
float linewidth;
};
struct AnnotatedRef
{
uint offset;
@ -193,8 +182,13 @@ struct Config
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
@ -203,8 +197,8 @@ struct Config
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
RWByteAddressBuffer _296 : register(u0, space0);
ByteAddressBuffer _1249 : register(t1, space0);
RWByteAddressBuffer _283 : register(u0, space0);
ByteAddressBuffer _1169 : register(t1, space0);
static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID;
@ -227,8 +221,8 @@ groupshared uint sh_tile_count[256];
Alloc slice_mem(Alloc a, uint offset, uint size)
{
Alloc _373 = { a.offset + offset };
return _373;
Alloc _360 = { a.offset + offset };
return _360;
}
bool touch_mem(Alloc alloc, uint offset)
@ -244,7 +238,7 @@ uint read_mem(Alloc alloc, uint offset)
{
return 0u;
}
uint v = _296.Load(offset * 4 + 8);
uint v = _283.Load(offset * 4 + 8);
return v;
}
@ -257,8 +251,8 @@ Alloc new_alloc(uint offset, uint size, bool mem_ok)
BinInstanceRef BinInstance_index(BinInstanceRef ref, uint index)
{
BinInstanceRef _754 = { ref.offset + (index * 4u) };
return _754;
BinInstanceRef _674 = { ref.offset + (index * 4u) };
return _674;
}
BinInstance BinInstance_read(Alloc a, BinInstanceRef ref)
@ -277,8 +271,8 @@ AnnotatedTag Annotated_tag(Alloc a, AnnotatedRef ref)
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint tag_and_flags = read_mem(param, param_1);
AnnotatedTag _706 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) };
return _706;
AnnotatedTag _636 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) };
return _636;
}
Path Path_read(Alloc a, PathRef ref)
@ -295,8 +289,8 @@ Path Path_read(Alloc a, PathRef ref)
uint raw2 = read_mem(param_4, param_5);
Path s;
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
TileRef _814 = { raw2 };
s.tiles = _814;
TileRef _734 = { raw2 };
s.tiles = _734;
return s;
}
@ -306,11 +300,11 @@ void write_tile_alloc(uint el_ix, Alloc a)
Alloc read_tile_alloc(uint el_ix, bool mem_ok)
{
uint _1135;
_296.GetDimensions(_1135);
_1135 = (_1135 - 8) / 4;
uint _1055;
_283.GetDimensions(_1055);
_1055 = (_1055 - 8) / 4;
uint param = 0u;
uint param_1 = uint(int(_1135) * 4);
uint param_1 = uint(int(_1055) * 4);
bool param_2 = mem_ok;
return new_alloc(param, param_1, param_2);
}
@ -324,9 +318,9 @@ Tile Tile_read(Alloc a, TileRef ref)
Alloc param_2 = a;
uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3);
TileSegRef _839 = { raw0 };
TileSegRef _759 = { raw0 };
Tile s;
s.tile = _839;
s.tile = _759;
s.backdrop = int(raw1);
return s;
}
@ -361,30 +355,30 @@ AnnoColor AnnoColor_read(Alloc a, AnnoColorRef ref)
AnnoColor Annotated_Color_read(Alloc a, AnnotatedRef ref)
{
AnnoColorRef _712 = { ref.offset + 4u };
AnnoColorRef _642 = { ref.offset + 4u };
Alloc param = a;
AnnoColorRef param_1 = _712;
AnnoColorRef param_1 = _642;
return AnnoColor_read(param, param_1);
}
MallocResult malloc(uint size)
{
uint _302;
_296.InterlockedAdd(0, size, _302);
uint offset = _302;
uint _309;
_296.GetDimensions(_309);
_309 = (_309 - 8) / 4;
uint _289;
_283.InterlockedAdd(0, size, _289);
uint offset = _289;
uint _296;
_283.GetDimensions(_296);
_296 = (_296 - 8) / 4;
MallocResult r;
r.failed = (offset + size) > uint(int(_309) * 4);
r.failed = (offset + size) > uint(int(_296) * 4);
uint param = offset;
uint param_1 = size;
bool param_2 = !r.failed;
r.alloc = new_alloc(param, param_1, param_2);
if (r.failed)
{
uint _331;
_296.InterlockedMax(4, 1u, _331);
uint _318;
_283.InterlockedMax(4, 1u, _318);
return r;
}
return r;
@ -398,7 +392,7 @@ void write_mem(Alloc alloc, uint offset, uint val)
{
return;
}
_296.Store(offset * 4 + 8, val);
_283.Store(offset * 4 + 8, val);
}
void CmdJump_write(Alloc a, CmdJumpRef ref, CmdJump s)
@ -416,9 +410,9 @@ void Cmd_Jump_write(Alloc a, CmdRef ref, CmdJump s)
uint param_1 = ref.offset >> uint(2);
uint param_2 = 10u;
write_mem(param, param_1, param_2);
CmdJumpRef _1128 = { ref.offset + 4u };
CmdJumpRef _1048 = { ref.offset + 4u };
Alloc param_3 = a;
CmdJumpRef param_4 = _1128;
CmdJumpRef param_4 = _1048;
CmdJump param_5 = s;
CmdJump_write(param_3, param_4, param_5);
}
@ -430,21 +424,21 @@ bool alloc_cmd(inout Alloc cmd_alloc, inout CmdRef cmd_ref, inout uint cmd_limit
return true;
}
uint param = 1024u;
MallocResult _1156 = malloc(param);
MallocResult new_cmd = _1156;
MallocResult _1076 = malloc(param);
MallocResult new_cmd = _1076;
if (new_cmd.failed)
{
return false;
}
CmdJump _1166 = { new_cmd.alloc.offset };
CmdJump jump = _1166;
CmdJump _1086 = { new_cmd.alloc.offset };
CmdJump jump = _1086;
Alloc param_1 = cmd_alloc;
CmdRef param_2 = cmd_ref;
CmdJump param_3 = jump;
Cmd_Jump_write(param_1, param_2, param_3);
cmd_alloc = new_cmd.alloc;
CmdRef _1178 = { cmd_alloc.offset };
cmd_ref = _1178;
CmdRef _1098 = { cmd_alloc.offset };
cmd_ref = _1098;
cmd_limit = (cmd_alloc.offset + 1024u) - 60u;
return true;
}
@ -473,9 +467,9 @@ void Cmd_Fill_write(Alloc a, CmdRef ref, CmdFill s)
uint param_1 = ref.offset >> uint(2);
uint param_2 = 1u;
write_mem(param, param_1, param_2);
CmdFillRef _1012 = { ref.offset + 4u };
CmdFillRef _932 = { ref.offset + 4u };
Alloc param_3 = a;
CmdFillRef param_4 = _1012;
CmdFillRef param_4 = _932;
CmdFill param_5 = s;
CmdFill_write(param_3, param_4, param_5);
}
@ -507,9 +501,9 @@ void Cmd_Stroke_write(Alloc a, CmdRef ref, CmdStroke s)
uint param_1 = ref.offset >> uint(2);
uint param_2 = 2u;
write_mem(param, param_1, param_2);
CmdStrokeRef _1030 = { ref.offset + 4u };
CmdStrokeRef _950 = { ref.offset + 4u };
Alloc param_3 = a;
CmdStrokeRef param_4 = _1030;
CmdStrokeRef param_4 = _950;
CmdStroke param_5 = s;
CmdStroke_write(param_3, param_4, param_5);
}
@ -521,8 +515,8 @@ void write_fill(Alloc alloc, inout CmdRef cmd_ref, uint flags, Tile tile, float
{
if (tile.tile.offset != 0u)
{
CmdFill _1202 = { tile.tile.offset, tile.backdrop };
CmdFill cmd_fill = _1202;
CmdFill _1122 = { tile.tile.offset, tile.backdrop };
CmdFill cmd_fill = _1122;
Alloc param_1 = alloc;
CmdRef param_2 = cmd_ref;
CmdFill param_3 = cmd_fill;
@ -539,8 +533,8 @@ void write_fill(Alloc alloc, inout CmdRef cmd_ref, uint flags, Tile tile, float
}
else
{
CmdStroke _1232 = { tile.tile.offset, 0.5f * linewidth };
CmdStroke cmd_stroke = _1232;
CmdStroke _1152 = { tile.tile.offset, 0.5f * linewidth };
CmdStroke cmd_stroke = _1152;
Alloc param_6 = alloc;
CmdRef param_7 = cmd_ref;
CmdStroke param_8 = cmd_stroke;
@ -564,9 +558,9 @@ void Cmd_Color_write(Alloc a, CmdRef ref, CmdColor s)
uint param_1 = ref.offset >> uint(2);
uint param_2 = 5u;
write_mem(param, param_1, param_2);
CmdColorRef _1056 = { ref.offset + 4u };
CmdColorRef _976 = { ref.offset + 4u };
Alloc param_3 = a;
CmdColorRef param_4 = _1056;
CmdColorRef param_4 = _976;
CmdColor param_5 = s;
CmdColor_write(param_3, param_4, param_5);
}
@ -613,9 +607,9 @@ AnnoLinGradient AnnoLinGradient_read(Alloc a, AnnoLinGradientRef ref)
AnnoLinGradient Annotated_LinGradient_read(Alloc a, AnnotatedRef ref)
{
AnnoLinGradientRef _722 = { ref.offset + 4u };
AnnoLinGradientRef _652 = { ref.offset + 4u };
Alloc param = a;
AnnoLinGradientRef param_1 = _722;
AnnoLinGradientRef param_1 = _652;
return AnnoLinGradient_read(param, param_1);
}
@ -646,9 +640,9 @@ void Cmd_LinGrad_write(Alloc a, CmdRef ref, CmdLinGrad s)
uint param_1 = ref.offset >> uint(2);
uint param_2 = 6u;
write_mem(param, param_1, param_2);
CmdLinGradRef _1074 = { ref.offset + 4u };
CmdLinGradRef _994 = { ref.offset + 4u };
Alloc param_3 = a;
CmdLinGradRef param_4 = _1074;
CmdLinGradRef param_4 = _994;
CmdLinGrad param_5 = s;
CmdLinGrad_write(param_3, param_4, param_5);
}
@ -687,9 +681,9 @@ AnnoImage AnnoImage_read(Alloc a, AnnoImageRef ref)
AnnoImage Annotated_Image_read(Alloc a, AnnotatedRef ref)
{
AnnoImageRef _732 = { ref.offset + 4u };
AnnoImageRef _662 = { ref.offset + 4u };
Alloc param = a;
AnnoImageRef param_1 = _732;
AnnoImageRef param_1 = _662;
return AnnoImage_read(param, param_1);
}
@ -712,45 +706,13 @@ void Cmd_Image_write(Alloc a, CmdRef ref, CmdImage s)
uint param_1 = ref.offset >> uint(2);
uint param_2 = 7u;
write_mem(param, param_1, param_2);
CmdImageRef _1092 = { ref.offset + 4u };
CmdImageRef _1012 = { ref.offset + 4u };
Alloc param_3 = a;
CmdImageRef param_4 = _1092;
CmdImageRef param_4 = _1012;
CmdImage param_5 = s;
CmdImage_write(param_3, param_4, param_5);
}
AnnoBeginClip AnnoBeginClip_read(Alloc a, AnnoBeginClipRef ref)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint raw0 = read_mem(param, param_1);
Alloc param_2 = a;
uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3);
Alloc param_4 = a;
uint param_5 = ix + 2u;
uint raw2 = read_mem(param_4, param_5);
Alloc param_6 = a;
uint param_7 = ix + 3u;
uint raw3 = read_mem(param_6, param_7);
Alloc param_8 = a;
uint param_9 = ix + 4u;
uint raw4 = read_mem(param_8, param_9);
AnnoBeginClip s;
s.bbox = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3));
s.linewidth = asfloat(raw4);
return s;
}
AnnoBeginClip Annotated_BeginClip_read(Alloc a, AnnotatedRef ref)
{
AnnoBeginClipRef _742 = { ref.offset + 4u };
Alloc param = a;
AnnoBeginClipRef param_1 = _742;
return AnnoBeginClip_read(param, param_1);
}
void Cmd_BeginClip_write(Alloc a, CmdRef ref)
{
Alloc param = a;
@ -777,44 +739,43 @@ void Cmd_End_write(Alloc a, CmdRef ref)
void comp_main()
{
uint width_in_bins = ((_1249.Load(8) + 16u) - 1u) / 16u;
uint width_in_bins = ((_1169.Load(8) + 16u) - 1u) / 16u;
uint bin_ix = (width_in_bins * gl_WorkGroupID.y) + gl_WorkGroupID.x;
uint partition_ix = 0u;
uint n_partitions = ((_1249.Load(0) + 256u) - 1u) / 256u;
uint n_partitions = ((_1169.Load(0) + 256u) - 1u) / 256u;
uint th_ix = gl_LocalInvocationID.x;
uint bin_tile_x = 16u * gl_WorkGroupID.x;
uint bin_tile_y = 16u * gl_WorkGroupID.y;
uint tile_x = gl_LocalInvocationID.x % 16u;
uint tile_y = gl_LocalInvocationID.x / 16u;
uint this_tile_ix = (((bin_tile_y + tile_y) * _1249.Load(8)) + bin_tile_x) + tile_x;
Alloc _1314;
_1314.offset = _1249.Load(24);
uint this_tile_ix = (((bin_tile_y + tile_y) * _1169.Load(8)) + bin_tile_x) + tile_x;
Alloc _1234;
_1234.offset = _1169.Load(24);
Alloc param;
param.offset = _1314.offset;
param.offset = _1234.offset;
uint param_1 = this_tile_ix * 1024u;
uint param_2 = 1024u;
Alloc cmd_alloc = slice_mem(param, param_1, param_2);
CmdRef _1323 = { cmd_alloc.offset };
CmdRef cmd_ref = _1323;
CmdRef _1243 = { cmd_alloc.offset };
CmdRef cmd_ref = _1243;
uint cmd_limit = (cmd_ref.offset + 1024u) - 60u;
uint clip_depth = 0u;
uint clip_zero_depth = 0u;
uint clip_one_mask = 0u;
uint rd_ix = 0u;
uint wr_ix = 0u;
uint part_start_ix = 0u;
uint ready_ix = 0u;
bool mem_ok = _296.Load(4) == 0u;
bool mem_ok = _283.Load(4) == 0u;
Alloc param_3;
Alloc param_5;
uint _1529;
uint _1448;
uint element_ix;
AnnotatedRef ref;
Alloc param_14;
Alloc param_16;
uint tile_count;
Alloc param_23;
uint _1841;
uint _1770;
Alloc param_29;
Tile tile_1;
AnnoColor fill;
@ -822,41 +783,40 @@ void comp_main()
Alloc param_52;
CmdLinGrad cmd_lin;
Alloc param_69;
Alloc param_86;
while (true)
{
for (uint i = 0u; i < 8u; i++)
{
sh_bitmaps[i][th_ix] = 0u;
}
bool _1581;
bool _1500;
for (;;)
{
if ((ready_ix == wr_ix) && (partition_ix < n_partitions))
{
part_start_ix = ready_ix;
uint count = 0u;
bool _1379 = th_ix < 256u;
bool _1387;
if (_1379)
bool _1298 = th_ix < 256u;
bool _1306;
if (_1298)
{
_1387 = (partition_ix + th_ix) < n_partitions;
_1306 = (partition_ix + th_ix) < n_partitions;
}
else
{
_1387 = _1379;
_1306 = _1298;
}
if (_1387)
if (_1306)
{
uint in_ix = (_1249.Load(20) >> uint(2)) + ((((partition_ix + th_ix) * 256u) + bin_ix) * 2u);
Alloc _1404;
_1404.offset = _1249.Load(20);
param_3.offset = _1404.offset;
uint in_ix = (_1169.Load(20) >> uint(2)) + ((((partition_ix + th_ix) * 256u) + bin_ix) * 2u);
Alloc _1323;
_1323.offset = _1169.Load(20);
param_3.offset = _1323.offset;
uint param_4 = in_ix;
count = read_mem(param_3, param_4);
Alloc _1415;
_1415.offset = _1249.Load(20);
param_5.offset = _1415.offset;
Alloc _1334;
_1334.offset = _1169.Load(20);
param_5.offset = _1334.offset;
uint param_6 = in_ix + 1u;
uint offset = read_mem(param_5, param_6);
uint param_7 = offset;
@ -902,16 +862,16 @@ void comp_main()
}
if (part_ix > 0u)
{
_1529 = sh_part_count[part_ix - 1u];
_1448 = sh_part_count[part_ix - 1u];
}
else
{
_1529 = part_start_ix;
_1448 = part_start_ix;
}
ix -= _1529;
ix -= _1448;
Alloc bin_alloc = sh_part_elements[part_ix];
BinInstanceRef _1548 = { bin_alloc.offset };
BinInstanceRef inst_ref = _1548;
BinInstanceRef _1467 = { bin_alloc.offset };
BinInstanceRef inst_ref = _1467;
BinInstanceRef param_10 = inst_ref;
uint param_11 = ix;
Alloc param_12 = bin_alloc;
@ -921,16 +881,16 @@ void comp_main()
}
GroupMemoryBarrierWithGroupSync();
wr_ix = min((rd_ix + 256u), ready_ix);
bool _1571 = (wr_ix - rd_ix) < 256u;
if (_1571)
bool _1490 = (wr_ix - rd_ix) < 256u;
if (_1490)
{
_1581 = (wr_ix < ready_ix) || (partition_ix < n_partitions);
_1500 = (wr_ix < ready_ix) || (partition_ix < n_partitions);
}
else
{
_1581 = _1571;
_1500 = _1490;
}
if (_1581)
if (_1500)
{
continue;
}
@ -943,11 +903,11 @@ void comp_main()
if ((th_ix + rd_ix) < wr_ix)
{
element_ix = sh_elements[th_ix];
AnnotatedRef _1602 = { _1249.Load(32) + (element_ix * 40u) };
ref = _1602;
Alloc _1605;
_1605.offset = _1249.Load(32);
param_14.offset = _1605.offset;
AnnotatedRef _1521 = { _1169.Load(32) + (element_ix * 40u) };
ref = _1521;
Alloc _1524;
_1524.offset = _1169.Load(32);
param_14.offset = _1524.offset;
AnnotatedRef param_15 = ref;
tag = Annotated_tag(param_14, param_15).tag;
}
@ -959,12 +919,13 @@ void comp_main()
case 4u:
case 5u:
{
uint path_ix = element_ix;
PathRef _1624 = { _1249.Load(16) + (path_ix * 12u) };
Alloc _1627;
_1627.offset = _1249.Load(16);
param_16.offset = _1627.offset;
PathRef param_17 = _1624;
uint drawmonoid_base = (_1169.Load(44) >> uint(2)) + (2u * element_ix);
uint path_ix = _283.Load(drawmonoid_base * 4 + 8);
PathRef _1553 = { _1169.Load(16) + (path_ix * 12u) };
Alloc _1556;
_1556.offset = _1169.Load(16);
param_16.offset = _1556.offset;
PathRef param_17 = _1553;
Path path = Path_read(param_16, param_17);
uint stride = path.bbox.z - path.bbox.x;
sh_tile_stride[th_ix] = stride;
@ -1019,59 +980,53 @@ void comp_main()
el_ix = probe_1;
}
}
AnnotatedRef _1826 = { _1249.Load(32) + (sh_elements[el_ix] * 40u) };
AnnotatedRef ref_1 = _1826;
Alloc _1830;
_1830.offset = _1249.Load(32);
param_23.offset = _1830.offset;
AnnotatedRef _1755 = { _1169.Load(32) + (sh_elements[el_ix] * 40u) };
AnnotatedRef ref_1 = _1755;
Alloc _1759;
_1759.offset = _1169.Load(32);
param_23.offset = _1759.offset;
AnnotatedRef param_24 = ref_1;
uint tag_1 = Annotated_tag(param_23, param_24).tag;
if (el_ix > 0u)
{
_1841 = sh_tile_count[el_ix - 1u];
_1770 = sh_tile_count[el_ix - 1u];
}
else
{
_1841 = 0u;
_1770 = 0u;
}
uint seq_ix = ix_1 - _1841;
uint seq_ix = ix_1 - _1770;
uint width = sh_tile_width[el_ix];
uint x = sh_tile_x0[el_ix] + (seq_ix % width);
uint y = sh_tile_y0[el_ix] + (seq_ix / width);
bool include_tile = false;
if ((tag_1 == 4u) || (tag_1 == 5u))
if (mem_ok)
{
include_tile = true;
}
else
{
if (mem_ok)
uint param_25 = el_ix;
bool param_26 = mem_ok;
TileRef _1822 = { sh_tile_base[el_ix] + (((sh_tile_stride[el_ix] * y) + x) * 8u) };
Alloc param_27 = read_tile_alloc(param_25, param_26);
TileRef param_28 = _1822;
Tile tile = Tile_read(param_27, param_28);
bool is_clip = (tag_1 == 4u) || (tag_1 == 5u);
bool _1834 = tile.tile.offset != 0u;
bool _1843;
if (!_1834)
{
uint param_25 = el_ix;
bool param_26 = mem_ok;
TileRef _1901 = { sh_tile_base[el_ix] + (((sh_tile_stride[el_ix] * y) + x) * 8u) };
Alloc param_27 = read_tile_alloc(param_25, param_26);
TileRef param_28 = _1901;
Tile tile = Tile_read(param_27, param_28);
bool _1907 = tile.tile.offset != 0u;
bool _1914;
if (!_1907)
{
_1914 = tile.backdrop != 0;
}
else
{
_1914 = _1907;
}
include_tile = _1914;
_1843 = (tile.backdrop == 0) == is_clip;
}
else
{
_1843 = _1834;
}
include_tile = _1843;
}
if (include_tile)
{
uint el_slice = el_ix / 32u;
uint el_mask = 1u << (el_ix & 31u);
uint _1934;
InterlockedOr(sh_bitmaps[el_slice][(y * 16u) + x], el_mask, _1934);
uint _1863;
InterlockedOr(sh_bitmaps[el_slice][(y * 16u) + x], el_mask, _1863);
}
}
GroupMemoryBarrierWithGroupSync();
@ -1095,11 +1050,11 @@ void comp_main()
uint element_ref_ix = (slice_ix * 32u) + uint(int(firstbitlow(bitmap)));
uint element_ix_1 = sh_elements[element_ref_ix];
bitmap &= (bitmap - 1u);
AnnotatedRef _1988 = { _1249.Load(32) + (element_ix_1 * 40u) };
ref = _1988;
Alloc _1993;
_1993.offset = _1249.Load(32);
param_29.offset = _1993.offset;
AnnotatedRef _1917 = { _1169.Load(32) + (element_ix_1 * 40u) };
ref = _1917;
Alloc _1922;
_1922.offset = _1169.Load(32);
param_29.offset = _1922.offset;
AnnotatedRef param_30 = ref;
AnnotatedTag tag_2 = Annotated_tag(param_29, param_30);
if (clip_zero_depth == 0u)
@ -1110,23 +1065,23 @@ void comp_main()
{
uint param_31 = element_ref_ix;
bool param_32 = mem_ok;
TileRef _2029 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) };
TileRef _1958 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) };
Alloc param_33 = read_tile_alloc(param_31, param_32);
TileRef param_34 = _2029;
TileRef param_34 = _1958;
tile_1 = Tile_read(param_33, param_34);
Alloc _2036;
_2036.offset = _1249.Load(32);
param_35.offset = _2036.offset;
Alloc _1965;
_1965.offset = _1169.Load(32);
param_35.offset = _1965.offset;
AnnotatedRef param_36 = ref;
fill = Annotated_Color_read(param_35, param_36);
Alloc param_37 = cmd_alloc;
CmdRef param_38 = cmd_ref;
uint param_39 = cmd_limit;
bool _2048 = alloc_cmd(param_37, param_38, param_39);
bool _1977 = alloc_cmd(param_37, param_38, param_39);
cmd_alloc = param_37;
cmd_ref = param_38;
cmd_limit = param_39;
if (!_2048)
if (!_1977)
{
break;
}
@ -1137,10 +1092,10 @@ void comp_main()
float param_44 = fill.linewidth;
write_fill(param_40, param_41, param_42, param_43, param_44);
cmd_ref = param_41;
CmdColor _2072 = { fill.rgba_color };
CmdColor _2001 = { fill.rgba_color };
Alloc param_45 = cmd_alloc;
CmdRef param_46 = cmd_ref;
CmdColor param_47 = _2072;
CmdColor param_47 = _2001;
Cmd_Color_write(param_45, param_46, param_47);
cmd_ref.offset += 8u;
break;
@ -1149,23 +1104,23 @@ void comp_main()
{
uint param_48 = element_ref_ix;
bool param_49 = mem_ok;
TileRef _2101 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) };
TileRef _2030 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) };
Alloc param_50 = read_tile_alloc(param_48, param_49);
TileRef param_51 = _2101;
TileRef param_51 = _2030;
tile_1 = Tile_read(param_50, param_51);
Alloc _2108;
_2108.offset = _1249.Load(32);
param_52.offset = _2108.offset;
Alloc _2037;
_2037.offset = _1169.Load(32);
param_52.offset = _2037.offset;
AnnotatedRef param_53 = ref;
AnnoLinGradient lin = Annotated_LinGradient_read(param_52, param_53);
Alloc param_54 = cmd_alloc;
CmdRef param_55 = cmd_ref;
uint param_56 = cmd_limit;
bool _2120 = alloc_cmd(param_54, param_55, param_56);
bool _2049 = alloc_cmd(param_54, param_55, param_56);
cmd_alloc = param_54;
cmd_ref = param_55;
cmd_limit = param_56;
if (!_2120)
if (!_2049)
{
break;
}
@ -1191,23 +1146,23 @@ void comp_main()
{
uint param_65 = element_ref_ix;
bool param_66 = mem_ok;
TileRef _2185 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) };
TileRef _2114 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) };
Alloc param_67 = read_tile_alloc(param_65, param_66);
TileRef param_68 = _2185;
TileRef param_68 = _2114;
tile_1 = Tile_read(param_67, param_68);
Alloc _2192;
_2192.offset = _1249.Load(32);
param_69.offset = _2192.offset;
Alloc _2121;
_2121.offset = _1169.Load(32);
param_69.offset = _2121.offset;
AnnotatedRef param_70 = ref;
AnnoImage fill_img = Annotated_Image_read(param_69, param_70);
Alloc param_71 = cmd_alloc;
CmdRef param_72 = cmd_ref;
uint param_73 = cmd_limit;
bool _2204 = alloc_cmd(param_71, param_72, param_73);
bool _2133 = alloc_cmd(param_71, param_72, param_73);
cmd_alloc = param_71;
cmd_ref = param_72;
cmd_limit = param_73;
if (!_2204)
if (!_2133)
{
break;
}
@ -1218,10 +1173,10 @@ void comp_main()
float param_78 = fill_img.linewidth;
write_fill(param_74, param_75, param_76, param_77, param_78);
cmd_ref = param_75;
CmdImage _2230 = { fill_img.index, fill_img.offset };
CmdImage _2159 = { fill_img.index, fill_img.offset };
Alloc param_79 = cmd_alloc;
CmdRef param_80 = cmd_ref;
CmdImage param_81 = _2230;
CmdImage param_81 = _2159;
Cmd_Image_write(param_79, param_80, param_81);
cmd_ref.offset += 12u;
break;
@ -1230,103 +1185,76 @@ void comp_main()
{
uint param_82 = element_ref_ix;
bool param_83 = mem_ok;
TileRef _2259 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) };
TileRef _2188 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) };
Alloc param_84 = read_tile_alloc(param_82, param_83);
TileRef param_85 = _2259;
TileRef param_85 = _2188;
tile_1 = Tile_read(param_84, param_85);
bool _2265 = tile_1.tile.offset == 0u;
bool _2271;
if (_2265)
bool _2194 = tile_1.tile.offset == 0u;
bool _2200;
if (_2194)
{
_2271 = tile_1.backdrop == 0;
_2200 = tile_1.backdrop == 0;
}
else
{
_2271 = _2265;
_2200 = _2194;
}
if (_2271)
if (_2200)
{
clip_zero_depth = clip_depth + 1u;
}
else
{
if ((tile_1.tile.offset == 0u) && (clip_depth < 32u))
Alloc param_86 = cmd_alloc;
CmdRef param_87 = cmd_ref;
uint param_88 = cmd_limit;
bool _2212 = alloc_cmd(param_86, param_87, param_88);
cmd_alloc = param_86;
cmd_ref = param_87;
cmd_limit = param_88;
if (!_2212)
{
clip_one_mask |= (1u << clip_depth);
}
else
{
Alloc _2293;
_2293.offset = _1249.Load(32);
param_86.offset = _2293.offset;
AnnotatedRef param_87 = ref;
AnnoBeginClip begin_clip = Annotated_BeginClip_read(param_86, param_87);
Alloc param_88 = cmd_alloc;
CmdRef param_89 = cmd_ref;
uint param_90 = cmd_limit;
bool _2305 = alloc_cmd(param_88, param_89, param_90);
cmd_alloc = param_88;
cmd_ref = param_89;
cmd_limit = param_90;
if (!_2305)
{
break;
}
Alloc param_91 = cmd_alloc;
CmdRef param_92 = cmd_ref;
uint param_93 = tag_2.flags;
Tile param_94 = tile_1;
float param_95 = begin_clip.linewidth;
write_fill(param_91, param_92, param_93, param_94, param_95);
cmd_ref = param_92;
Alloc param_96 = cmd_alloc;
CmdRef param_97 = cmd_ref;
Cmd_BeginClip_write(param_96, param_97);
cmd_ref.offset += 4u;
if (clip_depth < 32u)
{
clip_one_mask &= (~(1u << clip_depth));
}
break;
}
Alloc param_89 = cmd_alloc;
CmdRef param_90 = cmd_ref;
Cmd_BeginClip_write(param_89, param_90);
cmd_ref.offset += 4u;
}
clip_depth++;
break;
}
case 5u:
{
uint param_91 = element_ref_ix;
bool param_92 = mem_ok;
TileRef _2249 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) };
Alloc param_93 = read_tile_alloc(param_91, param_92);
TileRef param_94 = _2249;
tile_1 = Tile_read(param_93, param_94);
clip_depth--;
bool _2351 = clip_depth >= 32u;
bool _2360;
if (!_2351)
Alloc param_95 = cmd_alloc;
CmdRef param_96 = cmd_ref;
uint param_97 = cmd_limit;
bool _2261 = alloc_cmd(param_95, param_96, param_97);
cmd_alloc = param_95;
cmd_ref = param_96;
cmd_limit = param_97;
if (!_2261)
{
_2360 = (clip_one_mask & (1u << clip_depth)) == 0u;
}
else
{
_2360 = _2351;
}
if (_2360)
{
Alloc param_98 = cmd_alloc;
CmdRef param_99 = cmd_ref;
uint param_100 = cmd_limit;
bool _2369 = alloc_cmd(param_98, param_99, param_100);
cmd_alloc = param_98;
cmd_ref = param_99;
cmd_limit = param_100;
if (!_2369)
{
break;
}
Alloc param_101 = cmd_alloc;
CmdRef param_102 = cmd_ref;
Cmd_Solid_write(param_101, param_102);
cmd_ref.offset += 4u;
Alloc param_103 = cmd_alloc;
CmdRef param_104 = cmd_ref;
Cmd_EndClip_write(param_103, param_104);
cmd_ref.offset += 4u;
break;
}
Alloc param_98 = cmd_alloc;
CmdRef param_99 = cmd_ref;
uint param_100 = 0u;
Tile param_101 = tile_1;
float param_102 = 0.0f;
write_fill(param_98, param_99, param_100, param_101, param_102);
cmd_ref = param_99;
Alloc param_103 = cmd_alloc;
CmdRef param_104 = cmd_ref;
Cmd_EndClip_write(param_103, param_104);
cmd_ref.offset += 4u;
break;
}
}
@ -1359,17 +1287,17 @@ void comp_main()
break;
}
}
bool _2432 = (bin_tile_x + tile_x) < _1249.Load(8);
bool _2441;
if (_2432)
bool _2326 = (bin_tile_x + tile_x) < _1169.Load(8);
bool _2335;
if (_2326)
{
_2441 = (bin_tile_y + tile_y) < _1249.Load(12);
_2335 = (bin_tile_y + tile_y) < _1169.Load(12);
}
else
{
_2441 = _2432;
_2335 = _2326;
}
if (_2441)
if (_2335)
{
Alloc param_105 = cmd_alloc;
CmdRef param_106 = cmd_ref;

File diff suppressed because it is too large Load diff

Binary file not shown.

Binary file not shown.

View file

@ -41,16 +41,6 @@ struct FillImage
int2 offset;
};
struct ClipRef
{
uint offset;
};
struct Clip
{
float4 bbox;
};
struct ElementTag
{
uint tag;
@ -143,8 +133,13 @@ struct Config
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
@ -153,14 +148,14 @@ struct Config
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
static const DrawMonoid _418 = { 0u, 0u };
static const DrawMonoid _442 = { 1u, 0u };
static const DrawMonoid _444 = { 1u, 1u };
static const DrawMonoid _348 = { 0u, 0u };
static const DrawMonoid _372 = { 1u, 0u };
static const DrawMonoid _374 = { 1u, 1u };
RWByteAddressBuffer _201 : register(u0, space0);
ByteAddressBuffer _225 : register(t2, space0);
ByteAddressBuffer _1004 : register(t3, space0);
ByteAddressBuffer _1038 : register(t1, space0);
RWByteAddressBuffer _187 : register(u0, space0);
ByteAddressBuffer _211 : register(t2, space0);
ByteAddressBuffer _934 : register(t3, space0);
ByteAddressBuffer _968 : register(t1, space0);
static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID;
@ -176,9 +171,9 @@ groupshared DrawMonoid sh_scratch[256];
ElementTag Element_tag(ElementRef ref)
{
uint tag_and_flags = _225.Load((ref.offset >> uint(2)) * 4 + 0);
ElementTag _375 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) };
return _375;
uint tag_and_flags = _211.Load((ref.offset >> uint(2)) * 4 + 0);
ElementTag _321 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) };
return _321;
}
DrawMonoid map_tag(uint tag_word)
@ -189,24 +184,24 @@ DrawMonoid map_tag(uint tag_word)
case 5u:
case 6u:
{
return _442;
return _372;
}
case 9u:
case 10u:
{
return _444;
return _374;
}
default:
{
return _418;
return _348;
}
}
}
ElementRef Element_index(ElementRef ref, uint index)
{
ElementRef _214 = { ref.offset + (index * 36u) };
return _214;
ElementRef _200 = { ref.offset + (index * 36u) };
return _200;
}
DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b)
@ -219,13 +214,13 @@ DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b)
DrawMonoid tag_monoid_identity()
{
return _418;
return _348;
}
FillColor FillColor_read(FillColorRef ref)
{
uint ix = ref.offset >> uint(2);
uint raw0 = _225.Load((ix + 0u) * 4 + 0);
uint raw0 = _211.Load((ix + 0u) * 4 + 0);
FillColor s;
s.rgba_color = raw0;
return s;
@ -233,8 +228,8 @@ FillColor FillColor_read(FillColorRef ref)
FillColor Element_FillColor_read(ElementRef ref)
{
FillColorRef _381 = { ref.offset + 4u };
FillColorRef param = _381;
FillColorRef _327 = { ref.offset + 4u };
FillColorRef param = _327;
return FillColor_read(param);
}
@ -251,7 +246,7 @@ void write_mem(Alloc alloc, uint offset, uint val)
{
return;
}
_201.Store(offset * 4 + 8, val);
_187.Store(offset * 4 + 8, val);
}
void AnnoColor_write(Alloc a, AnnoColorRef ref, AnnoColor s)
@ -289,9 +284,9 @@ void Annotated_Color_write(Alloc a, AnnotatedRef ref, uint flags, AnnoColor s)
uint param_1 = ref.offset >> uint(2);
uint param_2 = (flags << uint(16)) | 1u;
write_mem(param, param_1, param_2);
AnnoColorRef _805 = { ref.offset + 4u };
AnnoColorRef _735 = { ref.offset + 4u };
Alloc param_3 = a;
AnnoColorRef param_4 = _805;
AnnoColorRef param_4 = _735;
AnnoColor param_5 = s;
AnnoColor_write(param_3, param_4, param_5);
}
@ -299,11 +294,11 @@ void Annotated_Color_write(Alloc a, AnnotatedRef ref, uint flags, AnnoColor s)
FillLinGradient FillLinGradient_read(FillLinGradientRef ref)
{
uint ix = ref.offset >> uint(2);
uint raw0 = _225.Load((ix + 0u) * 4 + 0);
uint raw1 = _225.Load((ix + 1u) * 4 + 0);
uint raw2 = _225.Load((ix + 2u) * 4 + 0);
uint raw3 = _225.Load((ix + 3u) * 4 + 0);
uint raw4 = _225.Load((ix + 4u) * 4 + 0);
uint raw0 = _211.Load((ix + 0u) * 4 + 0);
uint raw1 = _211.Load((ix + 1u) * 4 + 0);
uint raw2 = _211.Load((ix + 2u) * 4 + 0);
uint raw3 = _211.Load((ix + 3u) * 4 + 0);
uint raw4 = _211.Load((ix + 4u) * 4 + 0);
FillLinGradient s;
s.index = raw0;
s.p0 = float2(asfloat(raw1), asfloat(raw2));
@ -313,8 +308,8 @@ FillLinGradient FillLinGradient_read(FillLinGradientRef ref)
FillLinGradient Element_FillLinGradient_read(ElementRef ref)
{
FillLinGradientRef _389 = { ref.offset + 4u };
FillLinGradientRef param = _389;
FillLinGradientRef _335 = { ref.offset + 4u };
FillLinGradientRef param = _335;
return FillLinGradient_read(param);
}
@ -365,9 +360,9 @@ void Annotated_LinGradient_write(Alloc a, AnnotatedRef ref, uint flags, AnnoLinG
uint param_1 = ref.offset >> uint(2);
uint param_2 = (flags << uint(16)) | 2u;
write_mem(param, param_1, param_2);
AnnoLinGradientRef _826 = { ref.offset + 4u };
AnnoLinGradientRef _756 = { ref.offset + 4u };
Alloc param_3 = a;
AnnoLinGradientRef param_4 = _826;
AnnoLinGradientRef param_4 = _756;
AnnoLinGradient param_5 = s;
AnnoLinGradient_write(param_3, param_4, param_5);
}
@ -375,8 +370,8 @@ void Annotated_LinGradient_write(Alloc a, AnnotatedRef ref, uint flags, AnnoLinG
FillImage FillImage_read(FillImageRef ref)
{
uint ix = ref.offset >> uint(2);
uint raw0 = _225.Load((ix + 0u) * 4 + 0);
uint raw1 = _225.Load((ix + 1u) * 4 + 0);
uint raw0 = _211.Load((ix + 0u) * 4 + 0);
uint raw1 = _211.Load((ix + 1u) * 4 + 0);
FillImage s;
s.index = raw0;
s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16);
@ -385,8 +380,8 @@ FillImage FillImage_read(FillImageRef ref)
FillImage Element_FillImage_read(ElementRef ref)
{
FillImageRef _397 = { ref.offset + 4u };
FillImageRef param = _397;
FillImageRef _343 = { ref.offset + 4u };
FillImageRef param = _343;
return FillImage_read(param);
}
@ -429,32 +424,13 @@ void Annotated_Image_write(Alloc a, AnnotatedRef ref, uint flags, AnnoImage s)
uint param_1 = ref.offset >> uint(2);
uint param_2 = (flags << uint(16)) | 3u;
write_mem(param, param_1, param_2);
AnnoImageRef _847 = { ref.offset + 4u };
AnnoImageRef _777 = { ref.offset + 4u };
Alloc param_3 = a;
AnnoImageRef param_4 = _847;
AnnoImageRef param_4 = _777;
AnnoImage param_5 = s;
AnnoImage_write(param_3, param_4, param_5);
}
Clip Clip_read(ClipRef ref)
{
uint ix = ref.offset >> uint(2);
uint raw0 = _225.Load((ix + 0u) * 4 + 0);
uint raw1 = _225.Load((ix + 1u) * 4 + 0);
uint raw2 = _225.Load((ix + 2u) * 4 + 0);
uint raw3 = _225.Load((ix + 3u) * 4 + 0);
Clip s;
s.bbox = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3));
return s;
}
Clip Element_BeginClip_read(ElementRef ref)
{
ClipRef _405 = { ref.offset + 4u };
ClipRef param = _405;
return Clip_read(param);
}
void AnnoBeginClip_write(Alloc a, AnnoBeginClipRef ref, AnnoBeginClip s)
{
uint ix = ref.offset >> uint(2);
@ -486,20 +462,13 @@ void Annotated_BeginClip_write(Alloc a, AnnotatedRef ref, uint flags, AnnoBeginC
uint param_1 = ref.offset >> uint(2);
uint param_2 = (flags << uint(16)) | 4u;
write_mem(param, param_1, param_2);
AnnoBeginClipRef _868 = { ref.offset + 4u };
AnnoBeginClipRef _798 = { ref.offset + 4u };
Alloc param_3 = a;
AnnoBeginClipRef param_4 = _868;
AnnoBeginClipRef param_4 = _798;
AnnoBeginClip param_5 = s;
AnnoBeginClip_write(param_3, param_4, param_5);
}
Clip Element_EndClip_read(ElementRef ref)
{
ClipRef _413 = { ref.offset + 4u };
ClipRef param = _413;
return Clip_read(param);
}
void AnnoEndClip_write(Alloc a, AnnoEndClipRef ref, AnnoEndClip s)
{
uint ix = ref.offset >> uint(2);
@ -527,9 +496,9 @@ void Annotated_EndClip_write(Alloc a, AnnotatedRef ref, AnnoEndClip s)
uint param_1 = ref.offset >> uint(2);
uint param_2 = 5u;
write_mem(param, param_1, param_2);
AnnoEndClipRef _886 = { ref.offset + 4u };
AnnoEndClipRef _816 = { ref.offset + 4u };
Alloc param_3 = a;
AnnoEndClipRef param_4 = _886;
AnnoEndClipRef param_4 = _816;
AnnoEndClip param_5 = s;
AnnoEndClip_write(param_3, param_4, param_5);
}
@ -537,8 +506,8 @@ void Annotated_EndClip_write(Alloc a, AnnotatedRef ref, AnnoEndClip s)
void comp_main()
{
uint ix = gl_GlobalInvocationID.x * 8u;
ElementRef _904 = { ix * 36u };
ElementRef ref = _904;
ElementRef _834 = { ix * 36u };
ElementRef ref = _834;
ElementRef param = ref;
uint tag_word = Element_tag(param).tag;
uint param_1 = tag_word;
@ -575,11 +544,11 @@ void comp_main()
DrawMonoid row = tag_monoid_identity();
if (gl_WorkGroupID.x > 0u)
{
DrawMonoid _1010;
_1010.path_ix = _1004.Load((gl_WorkGroupID.x - 1u) * 8 + 0);
_1010.clip_ix = _1004.Load((gl_WorkGroupID.x - 1u) * 8 + 4);
row.path_ix = _1010.path_ix;
row.clip_ix = _1010.clip_ix;
DrawMonoid _940;
_940.path_ix = _934.Load((gl_WorkGroupID.x - 1u) * 8 + 0);
_940.clip_ix = _934.Load((gl_WorkGroupID.x - 1u) * 8 + 4);
row.path_ix = _940.path_ix;
row.clip_ix = _940.clip_ix;
}
if (gl_LocalInvocationID.x > 0u)
{
@ -588,9 +557,10 @@ void comp_main()
row = combine_tag_monoid(param_10, param_11);
}
uint out_ix = gl_GlobalInvocationID.x * 8u;
uint out_base = (_1038.Load(44) >> uint(2)) + (out_ix * 2u);
AnnotatedRef _1054 = { _1038.Load(32) + (out_ix * 40u) };
AnnotatedRef out_ref = _1054;
uint out_base = (_968.Load(44) >> uint(2)) + (out_ix * 2u);
uint clip_out_base = _968.Load(48) >> uint(2);
AnnotatedRef _989 = { _968.Load(32) + (out_ix * 40u) };
AnnotatedRef out_ref = _989;
float4 mat;
float2 translate;
AnnoColor anno_fill;
@ -600,39 +570,43 @@ void comp_main()
AnnoImage anno_img;
Alloc param_28;
AnnoBeginClip anno_begin_clip;
Alloc param_33;
Alloc param_32;
AnnoEndClip anno_end_clip;
Alloc param_38;
Alloc param_36;
for (uint i_2 = 0u; i_2 < 8u; i_2++)
{
DrawMonoid param_12 = row;
DrawMonoid param_13 = local[i_2];
DrawMonoid m = combine_tag_monoid(param_12, param_13);
_201.Store((out_base + (i_2 * 2u)) * 4 + 8, m.path_ix);
_201.Store(((out_base + (i_2 * 2u)) + 1u) * 4 + 8, m.clip_ix);
DrawMonoid m = row;
if (i_2 > 0u)
{
DrawMonoid param_12 = m;
DrawMonoid param_13 = local[i_2 - 1u];
m = combine_tag_monoid(param_12, param_13);
}
_187.Store((out_base + (i_2 * 2u)) * 4 + 8, m.path_ix);
_187.Store(((out_base + (i_2 * 2u)) + 1u) * 4 + 8, m.clip_ix);
ElementRef param_14 = ref;
uint param_15 = i_2;
ElementRef this_ref = Element_index(param_14, param_15);
ElementRef param_16 = this_ref;
tag_word = Element_tag(param_16).tag;
if (((tag_word == 4u) || (tag_word == 5u)) || (tag_word == 6u))
if ((((tag_word == 4u) || (tag_word == 5u)) || (tag_word == 6u)) || (tag_word == 9u))
{
uint bbox_offset = (_1038.Load(40) >> uint(2)) + (6u * (m.path_ix - 1u));
float bbox_l = float(_201.Load(bbox_offset * 4 + 8)) - 32768.0f;
float bbox_t = float(_201.Load((bbox_offset + 1u) * 4 + 8)) - 32768.0f;
float bbox_r = float(_201.Load((bbox_offset + 2u) * 4 + 8)) - 32768.0f;
float bbox_b = float(_201.Load((bbox_offset + 3u) * 4 + 8)) - 32768.0f;
uint bbox_offset = (_968.Load(40) >> uint(2)) + (6u * m.path_ix);
float bbox_l = float(_187.Load(bbox_offset * 4 + 8)) - 32768.0f;
float bbox_t = float(_187.Load((bbox_offset + 1u) * 4 + 8)) - 32768.0f;
float bbox_r = float(_187.Load((bbox_offset + 2u) * 4 + 8)) - 32768.0f;
float bbox_b = float(_187.Load((bbox_offset + 3u) * 4 + 8)) - 32768.0f;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
float linewidth = asfloat(_201.Load((bbox_offset + 4u) * 4 + 8));
float linewidth = asfloat(_187.Load((bbox_offset + 4u) * 4 + 8));
uint fill_mode = uint(linewidth >= 0.0f);
if ((linewidth >= 0.0f) || (tag_word == 5u))
{
uint trans_ix = _201.Load((bbox_offset + 5u) * 4 + 8);
uint t = (_1038.Load(36) >> uint(2)) + (6u * trans_ix);
mat = asfloat(uint4(_201.Load(t * 4 + 8), _201.Load((t + 1u) * 4 + 8), _201.Load((t + 2u) * 4 + 8), _201.Load((t + 3u) * 4 + 8)));
uint trans_ix = _187.Load((bbox_offset + 5u) * 4 + 8);
uint t = (_968.Load(36) >> uint(2)) + (6u * trans_ix);
mat = asfloat(uint4(_187.Load(t * 4 + 8), _187.Load((t + 1u) * 4 + 8), _187.Load((t + 2u) * 4 + 8), _187.Load((t + 3u) * 4 + 8)));
if (tag_word == 5u)
{
translate = asfloat(uint2(_201.Load((t + 4u) * 4 + 8), _201.Load((t + 5u) * 4 + 8)));
translate = asfloat(uint2(_187.Load((t + 4u) * 4 + 8), _187.Load((t + 5u) * 4 + 8)));
}
}
if (linewidth >= 0.0f)
@ -649,9 +623,9 @@ void comp_main()
anno_fill.bbox = bbox;
anno_fill.linewidth = linewidth;
anno_fill.rgba_color = fill.rgba_color;
Alloc _1257;
_1257.offset = _1038.Load(32);
param_18.offset = _1257.offset;
Alloc _1203;
_1203.offset = _968.Load(32);
param_18.offset = _1203.offset;
AnnotatedRef param_19 = out_ref;
uint param_20 = fill_mode;
AnnoColor param_21 = anno_fill;
@ -674,9 +648,9 @@ void comp_main()
anno_lin.line_x = line_x;
anno_lin.line_y = line_y;
anno_lin.line_c = -((p0.x * line_x) + (p0.y * line_y));
Alloc _1353;
_1353.offset = _1038.Load(32);
param_23.offset = _1353.offset;
Alloc _1299;
_1299.offset = _968.Load(32);
param_23.offset = _1299.offset;
AnnotatedRef param_24 = out_ref;
uint param_25 = fill_mode;
AnnoLinGradient param_26 = anno_lin;
@ -691,48 +665,51 @@ void comp_main()
anno_img.linewidth = linewidth;
anno_img.index = fill_img.index;
anno_img.offset = fill_img.offset;
Alloc _1381;
_1381.offset = _1038.Load(32);
param_28.offset = _1381.offset;
Alloc _1327;
_1327.offset = _968.Load(32);
param_28.offset = _1327.offset;
AnnotatedRef param_29 = out_ref;
uint param_30 = fill_mode;
AnnoImage param_31 = anno_img;
Annotated_Image_write(param_28, param_29, param_30, param_31);
break;
}
case 9u:
{
anno_begin_clip.bbox = bbox;
anno_begin_clip.linewidth = 0.0f;
Alloc _1344;
_1344.offset = _968.Load(32);
param_32.offset = _1344.offset;
AnnotatedRef param_33 = out_ref;
uint param_34 = 0u;
AnnoBeginClip param_35 = anno_begin_clip;
Annotated_BeginClip_write(param_32, param_33, param_34, param_35);
break;
}
}
}
else
{
if (tag_word == 10u)
{
anno_end_clip.bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f);
Alloc _1368;
_1368.offset = _968.Load(32);
param_36.offset = _1368.offset;
AnnotatedRef param_37 = out_ref;
AnnoEndClip param_38 = anno_end_clip;
Annotated_EndClip_write(param_36, param_37, param_38);
}
}
if ((tag_word == 9u) || (tag_word == 10u))
{
uint path_ix = ~(out_ix + i_2);
if (tag_word == 9u)
{
ElementRef param_32 = this_ref;
Clip begin_clip = Element_BeginClip_read(param_32);
anno_begin_clip.bbox = begin_clip.bbox;
anno_begin_clip.linewidth = 0.0f;
Alloc _1410;
_1410.offset = _1038.Load(32);
param_33.offset = _1410.offset;
AnnotatedRef param_34 = out_ref;
uint param_35 = 0u;
AnnoBeginClip param_36 = anno_begin_clip;
Annotated_BeginClip_write(param_33, param_34, param_35, param_36);
}
else
{
if (tag_word == 10u)
{
ElementRef param_37 = this_ref;
Clip end_clip = Element_EndClip_read(param_37);
anno_end_clip.bbox = end_clip.bbox;
Alloc _1435;
_1435.offset = _1038.Load(32);
param_38.offset = _1435.offset;
AnnotatedRef param_39 = out_ref;
AnnoEndClip param_40 = anno_end_clip;
Annotated_EndClip_write(param_38, param_39, param_40);
}
path_ix = m.path_ix;
}
_187.Store((clip_out_base + m.clip_ix) * 4 + 8, path_ix);
}
out_ref.offset += 40u;
}

View file

@ -87,16 +87,6 @@ struct FillImage
int2 offset;
};
struct ClipRef
{
uint offset;
};
struct Clip
{
float4 bbox;
};
struct ElementTag
{
uint tag;
@ -217,8 +207,13 @@ struct Config
Alloc_1 trans_alloc;
Alloc_1 bbox_alloc;
Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
@ -233,9 +228,9 @@ struct ConfigBuf
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
ElementTag Element_tag(thread const ElementRef& ref, const device SceneBuf& v_225)
ElementTag Element_tag(thread const ElementRef& ref, const device SceneBuf& v_211)
{
uint tag_and_flags = v_225.scene[ref.offset >> uint(2)];
uint tag_and_flags = v_211.scene[ref.offset >> uint(2)];
return ElementTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) };
}
@ -284,20 +279,20 @@ DrawMonoid tag_monoid_identity()
}
static inline __attribute__((always_inline))
FillColor FillColor_read(thread const FillColorRef& ref, const device SceneBuf& v_225)
FillColor FillColor_read(thread const FillColorRef& ref, const device SceneBuf& v_211)
{
uint ix = ref.offset >> uint(2);
uint raw0 = v_225.scene[ix + 0u];
uint raw0 = v_211.scene[ix + 0u];
FillColor s;
s.rgba_color = raw0;
return s;
}
static inline __attribute__((always_inline))
FillColor Element_FillColor_read(thread const ElementRef& ref, const device SceneBuf& v_225)
FillColor Element_FillColor_read(thread const ElementRef& ref, const device SceneBuf& v_211)
{
FillColorRef param = FillColorRef{ ref.offset + 4u };
return FillColor_read(param, v_225);
return FillColor_read(param, v_211);
}
static inline __attribute__((always_inline))
@ -307,7 +302,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
}
static inline __attribute__((always_inline))
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_201)
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_187)
{
Alloc param = alloc;
uint param_1 = offset;
@ -315,61 +310,61 @@ void write_mem(thread const Alloc& alloc, thread const uint& offset, thread cons
{
return;
}
v_201.memory[offset] = val;
v_187.memory[offset] = val;
}
static inline __attribute__((always_inline))
void AnnoColor_write(thread const Alloc& a, thread const AnnoColorRef& ref, thread const AnnoColor& s, device Memory& v_201)
void AnnoColor_write(thread const Alloc& a, thread const AnnoColorRef& ref, thread const AnnoColor& s, device Memory& v_187)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = as_type<uint>(s.bbox.x);
write_mem(param, param_1, param_2, v_201);
write_mem(param, param_1, param_2, v_187);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = as_type<uint>(s.bbox.y);
write_mem(param_3, param_4, param_5, v_201);
write_mem(param_3, param_4, param_5, v_187);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = as_type<uint>(s.bbox.z);
write_mem(param_6, param_7, param_8, v_201);
write_mem(param_6, param_7, param_8, v_187);
Alloc param_9 = a;
uint param_10 = ix + 3u;
uint param_11 = as_type<uint>(s.bbox.w);
write_mem(param_9, param_10, param_11, v_201);
write_mem(param_9, param_10, param_11, v_187);
Alloc param_12 = a;
uint param_13 = ix + 4u;
uint param_14 = as_type<uint>(s.linewidth);
write_mem(param_12, param_13, param_14, v_201);
write_mem(param_12, param_13, param_14, v_187);
Alloc param_15 = a;
uint param_16 = ix + 5u;
uint param_17 = s.rgba_color;
write_mem(param_15, param_16, param_17, v_201);
write_mem(param_15, param_16, param_17, v_187);
}
static inline __attribute__((always_inline))
void Annotated_Color_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoColor& s, device Memory& v_201)
void Annotated_Color_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoColor& s, device Memory& v_187)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint param_2 = (flags << uint(16)) | 1u;
write_mem(param, param_1, param_2, v_201);
write_mem(param, param_1, param_2, v_187);
Alloc param_3 = a;
AnnoColorRef param_4 = AnnoColorRef{ ref.offset + 4u };
AnnoColor param_5 = s;
AnnoColor_write(param_3, param_4, param_5, v_201);
AnnoColor_write(param_3, param_4, param_5, v_187);
}
static inline __attribute__((always_inline))
FillLinGradient FillLinGradient_read(thread const FillLinGradientRef& ref, const device SceneBuf& v_225)
FillLinGradient FillLinGradient_read(thread const FillLinGradientRef& ref, const device SceneBuf& v_211)
{
uint ix = ref.offset >> uint(2);
uint raw0 = v_225.scene[ix + 0u];
uint raw1 = v_225.scene[ix + 1u];
uint raw2 = v_225.scene[ix + 2u];
uint raw3 = v_225.scene[ix + 3u];
uint raw4 = v_225.scene[ix + 4u];
uint raw0 = v_211.scene[ix + 0u];
uint raw1 = v_211.scene[ix + 1u];
uint raw2 = v_211.scene[ix + 2u];
uint raw3 = v_211.scene[ix + 3u];
uint raw4 = v_211.scene[ix + 4u];
FillLinGradient s;
s.index = raw0;
s.p0 = float2(as_type<float>(raw1), as_type<float>(raw2));
@ -378,73 +373,73 @@ FillLinGradient FillLinGradient_read(thread const FillLinGradientRef& ref, const
}
static inline __attribute__((always_inline))
FillLinGradient Element_FillLinGradient_read(thread const ElementRef& ref, const device SceneBuf& v_225)
FillLinGradient Element_FillLinGradient_read(thread const ElementRef& ref, const device SceneBuf& v_211)
{
FillLinGradientRef param = FillLinGradientRef{ ref.offset + 4u };
return FillLinGradient_read(param, v_225);
return FillLinGradient_read(param, v_211);
}
static inline __attribute__((always_inline))
void AnnoLinGradient_write(thread const Alloc& a, thread const AnnoLinGradientRef& ref, thread const AnnoLinGradient& s, device Memory& v_201)
void AnnoLinGradient_write(thread const Alloc& a, thread const AnnoLinGradientRef& ref, thread const AnnoLinGradient& s, device Memory& v_187)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = as_type<uint>(s.bbox.x);
write_mem(param, param_1, param_2, v_201);
write_mem(param, param_1, param_2, v_187);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = as_type<uint>(s.bbox.y);
write_mem(param_3, param_4, param_5, v_201);
write_mem(param_3, param_4, param_5, v_187);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = as_type<uint>(s.bbox.z);
write_mem(param_6, param_7, param_8, v_201);
write_mem(param_6, param_7, param_8, v_187);
Alloc param_9 = a;
uint param_10 = ix + 3u;
uint param_11 = as_type<uint>(s.bbox.w);
write_mem(param_9, param_10, param_11, v_201);
write_mem(param_9, param_10, param_11, v_187);
Alloc param_12 = a;
uint param_13 = ix + 4u;
uint param_14 = as_type<uint>(s.linewidth);
write_mem(param_12, param_13, param_14, v_201);
write_mem(param_12, param_13, param_14, v_187);
Alloc param_15 = a;
uint param_16 = ix + 5u;
uint param_17 = s.index;
write_mem(param_15, param_16, param_17, v_201);
write_mem(param_15, param_16, param_17, v_187);
Alloc param_18 = a;
uint param_19 = ix + 6u;
uint param_20 = as_type<uint>(s.line_x);
write_mem(param_18, param_19, param_20, v_201);
write_mem(param_18, param_19, param_20, v_187);
Alloc param_21 = a;
uint param_22 = ix + 7u;
uint param_23 = as_type<uint>(s.line_y);
write_mem(param_21, param_22, param_23, v_201);
write_mem(param_21, param_22, param_23, v_187);
Alloc param_24 = a;
uint param_25 = ix + 8u;
uint param_26 = as_type<uint>(s.line_c);
write_mem(param_24, param_25, param_26, v_201);
write_mem(param_24, param_25, param_26, v_187);
}
static inline __attribute__((always_inline))
void Annotated_LinGradient_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoLinGradient& s, device Memory& v_201)
void Annotated_LinGradient_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoLinGradient& s, device Memory& v_187)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint param_2 = (flags << uint(16)) | 2u;
write_mem(param, param_1, param_2, v_201);
write_mem(param, param_1, param_2, v_187);
Alloc param_3 = a;
AnnoLinGradientRef param_4 = AnnoLinGradientRef{ ref.offset + 4u };
AnnoLinGradient param_5 = s;
AnnoLinGradient_write(param_3, param_4, param_5, v_201);
AnnoLinGradient_write(param_3, param_4, param_5, v_187);
}
static inline __attribute__((always_inline))
FillImage FillImage_read(thread const FillImageRef& ref, const device SceneBuf& v_225)
FillImage FillImage_read(thread const FillImageRef& ref, const device SceneBuf& v_211)
{
uint ix = ref.offset >> uint(2);
uint raw0 = v_225.scene[ix + 0u];
uint raw1 = v_225.scene[ix + 1u];
uint raw0 = v_211.scene[ix + 0u];
uint raw1 = v_211.scene[ix + 1u];
FillImage s;
s.index = raw0;
s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16);
@ -452,167 +447,140 @@ FillImage FillImage_read(thread const FillImageRef& ref, const device SceneBuf&
}
static inline __attribute__((always_inline))
FillImage Element_FillImage_read(thread const ElementRef& ref, const device SceneBuf& v_225)
FillImage Element_FillImage_read(thread const ElementRef& ref, const device SceneBuf& v_211)
{
FillImageRef param = FillImageRef{ ref.offset + 4u };
return FillImage_read(param, v_225);
return FillImage_read(param, v_211);
}
static inline __attribute__((always_inline))
void AnnoImage_write(thread const Alloc& a, thread const AnnoImageRef& ref, thread const AnnoImage& s, device Memory& v_201)
void AnnoImage_write(thread const Alloc& a, thread const AnnoImageRef& ref, thread const AnnoImage& s, device Memory& v_187)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = as_type<uint>(s.bbox.x);
write_mem(param, param_1, param_2, v_201);
write_mem(param, param_1, param_2, v_187);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = as_type<uint>(s.bbox.y);
write_mem(param_3, param_4, param_5, v_201);
write_mem(param_3, param_4, param_5, v_187);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = as_type<uint>(s.bbox.z);
write_mem(param_6, param_7, param_8, v_201);
write_mem(param_6, param_7, param_8, v_187);
Alloc param_9 = a;
uint param_10 = ix + 3u;
uint param_11 = as_type<uint>(s.bbox.w);
write_mem(param_9, param_10, param_11, v_201);
write_mem(param_9, param_10, param_11, v_187);
Alloc param_12 = a;
uint param_13 = ix + 4u;
uint param_14 = as_type<uint>(s.linewidth);
write_mem(param_12, param_13, param_14, v_201);
write_mem(param_12, param_13, param_14, v_187);
Alloc param_15 = a;
uint param_16 = ix + 5u;
uint param_17 = s.index;
write_mem(param_15, param_16, param_17, v_201);
write_mem(param_15, param_16, param_17, v_187);
Alloc param_18 = a;
uint param_19 = ix + 6u;
uint param_20 = (uint(s.offset.x) & 65535u) | (uint(s.offset.y) << uint(16));
write_mem(param_18, param_19, param_20, v_201);
write_mem(param_18, param_19, param_20, v_187);
}
static inline __attribute__((always_inline))
void Annotated_Image_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoImage& s, device Memory& v_201)
void Annotated_Image_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoImage& s, device Memory& v_187)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint param_2 = (flags << uint(16)) | 3u;
write_mem(param, param_1, param_2, v_201);
write_mem(param, param_1, param_2, v_187);
Alloc param_3 = a;
AnnoImageRef param_4 = AnnoImageRef{ ref.offset + 4u };
AnnoImage param_5 = s;
AnnoImage_write(param_3, param_4, param_5, v_201);
AnnoImage_write(param_3, param_4, param_5, v_187);
}
static inline __attribute__((always_inline))
Clip Clip_read(thread const ClipRef& ref, const device SceneBuf& v_225)
{
uint ix = ref.offset >> uint(2);
uint raw0 = v_225.scene[ix + 0u];
uint raw1 = v_225.scene[ix + 1u];
uint raw2 = v_225.scene[ix + 2u];
uint raw3 = v_225.scene[ix + 3u];
Clip s;
s.bbox = float4(as_type<float>(raw0), as_type<float>(raw1), as_type<float>(raw2), as_type<float>(raw3));
return s;
}
static inline __attribute__((always_inline))
Clip Element_BeginClip_read(thread const ElementRef& ref, const device SceneBuf& v_225)
{
ClipRef param = ClipRef{ ref.offset + 4u };
return Clip_read(param, v_225);
}
static inline __attribute__((always_inline))
void AnnoBeginClip_write(thread const Alloc& a, thread const AnnoBeginClipRef& ref, thread const AnnoBeginClip& s, device Memory& v_201)
void AnnoBeginClip_write(thread const Alloc& a, thread const AnnoBeginClipRef& ref, thread const AnnoBeginClip& s, device Memory& v_187)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = as_type<uint>(s.bbox.x);
write_mem(param, param_1, param_2, v_201);
write_mem(param, param_1, param_2, v_187);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = as_type<uint>(s.bbox.y);
write_mem(param_3, param_4, param_5, v_201);
write_mem(param_3, param_4, param_5, v_187);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = as_type<uint>(s.bbox.z);
write_mem(param_6, param_7, param_8, v_201);
write_mem(param_6, param_7, param_8, v_187);
Alloc param_9 = a;
uint param_10 = ix + 3u;
uint param_11 = as_type<uint>(s.bbox.w);
write_mem(param_9, param_10, param_11, v_201);
write_mem(param_9, param_10, param_11, v_187);
Alloc param_12 = a;
uint param_13 = ix + 4u;
uint param_14 = as_type<uint>(s.linewidth);
write_mem(param_12, param_13, param_14, v_201);
write_mem(param_12, param_13, param_14, v_187);
}
static inline __attribute__((always_inline))
void Annotated_BeginClip_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoBeginClip& s, device Memory& v_201)
void Annotated_BeginClip_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoBeginClip& s, device Memory& v_187)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint param_2 = (flags << uint(16)) | 4u;
write_mem(param, param_1, param_2, v_201);
write_mem(param, param_1, param_2, v_187);
Alloc param_3 = a;
AnnoBeginClipRef param_4 = AnnoBeginClipRef{ ref.offset + 4u };
AnnoBeginClip param_5 = s;
AnnoBeginClip_write(param_3, param_4, param_5, v_201);
AnnoBeginClip_write(param_3, param_4, param_5, v_187);
}
static inline __attribute__((always_inline))
Clip Element_EndClip_read(thread const ElementRef& ref, const device SceneBuf& v_225)
{
ClipRef param = ClipRef{ ref.offset + 4u };
return Clip_read(param, v_225);
}
static inline __attribute__((always_inline))
void AnnoEndClip_write(thread const Alloc& a, thread const AnnoEndClipRef& ref, thread const AnnoEndClip& s, device Memory& v_201)
void AnnoEndClip_write(thread const Alloc& a, thread const AnnoEndClipRef& ref, thread const AnnoEndClip& s, device Memory& v_187)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = as_type<uint>(s.bbox.x);
write_mem(param, param_1, param_2, v_201);
write_mem(param, param_1, param_2, v_187);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = as_type<uint>(s.bbox.y);
write_mem(param_3, param_4, param_5, v_201);
write_mem(param_3, param_4, param_5, v_187);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = as_type<uint>(s.bbox.z);
write_mem(param_6, param_7, param_8, v_201);
write_mem(param_6, param_7, param_8, v_187);
Alloc param_9 = a;
uint param_10 = ix + 3u;
uint param_11 = as_type<uint>(s.bbox.w);
write_mem(param_9, param_10, param_11, v_201);
write_mem(param_9, param_10, param_11, v_187);
}
static inline __attribute__((always_inline))
void Annotated_EndClip_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const AnnoEndClip& s, device Memory& v_201)
void Annotated_EndClip_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const AnnoEndClip& s, device Memory& v_187)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint param_2 = 5u;
write_mem(param, param_1, param_2, v_201);
write_mem(param, param_1, param_2, v_187);
Alloc param_3 = a;
AnnoEndClipRef param_4 = AnnoEndClipRef{ ref.offset + 4u };
AnnoEndClip param_5 = s;
AnnoEndClip_write(param_3, param_4, param_5, v_201);
AnnoEndClip_write(param_3, param_4, param_5, v_187);
}
kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1038 [[buffer(1)]], const device SceneBuf& v_225 [[buffer(2)]], const device ParentBuf& _1004 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
kernel void main0(device Memory& v_187 [[buffer(0)]], const device ConfigBuf& _968 [[buffer(1)]], const device SceneBuf& v_211 [[buffer(2)]], const device ParentBuf& _934 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
{
threadgroup DrawMonoid sh_scratch[256];
uint ix = gl_GlobalInvocationID.x * 8u;
ElementRef ref = ElementRef{ ix * 36u };
ElementRef param = ref;
uint tag_word = Element_tag(param, v_225).tag;
uint tag_word = Element_tag(param, v_211).tag;
uint param_1 = tag_word;
DrawMonoid agg = map_tag(param_1);
spvUnsafeArray<DrawMonoid, 8> local;
@ -622,7 +590,7 @@ kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1
ElementRef param_2 = ref;
uint param_3 = i;
ElementRef param_4 = Element_index(param_2, param_3);
tag_word = Element_tag(param_4, v_225).tag;
tag_word = Element_tag(param_4, v_211).tag;
uint param_5 = tag_word;
DrawMonoid param_6 = agg;
DrawMonoid param_7 = map_tag(param_5);
@ -647,9 +615,9 @@ kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1
DrawMonoid row = tag_monoid_identity();
if (gl_WorkGroupID.x > 0u)
{
uint _1007 = gl_WorkGroupID.x - 1u;
row.path_ix = _1004.parent[_1007].path_ix;
row.clip_ix = _1004.parent[_1007].clip_ix;
uint _937 = gl_WorkGroupID.x - 1u;
row.path_ix = _934.parent[_937].path_ix;
row.clip_ix = _934.parent[_937].clip_ix;
}
if (gl_LocalInvocationID.x > 0u)
{
@ -658,8 +626,9 @@ kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1
row = combine_tag_monoid(param_10, param_11);
}
uint out_ix = gl_GlobalInvocationID.x * 8u;
uint out_base = (_1038.conf.drawmonoid_alloc.offset >> uint(2)) + (out_ix * 2u);
AnnotatedRef out_ref = AnnotatedRef{ _1038.conf.anno_alloc.offset + (out_ix * 40u) };
uint out_base = (_968.conf.drawmonoid_alloc.offset >> uint(2)) + (out_ix * 2u);
uint clip_out_base = _968.conf.clip_alloc.offset >> uint(2);
AnnotatedRef out_ref = AnnotatedRef{ _968.conf.anno_alloc.offset + (out_ix * 40u) };
float4 mat;
float2 translate;
AnnoColor anno_fill;
@ -669,39 +638,43 @@ kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1
AnnoImage anno_img;
Alloc param_28;
AnnoBeginClip anno_begin_clip;
Alloc param_33;
Alloc param_32;
AnnoEndClip anno_end_clip;
Alloc param_38;
Alloc param_36;
for (uint i_2 = 0u; i_2 < 8u; i_2++)
{
DrawMonoid param_12 = row;
DrawMonoid param_13 = local[i_2];
DrawMonoid m = combine_tag_monoid(param_12, param_13);
v_201.memory[out_base + (i_2 * 2u)] = m.path_ix;
v_201.memory[(out_base + (i_2 * 2u)) + 1u] = m.clip_ix;
DrawMonoid m = row;
if (i_2 > 0u)
{
DrawMonoid param_12 = m;
DrawMonoid param_13 = local[i_2 - 1u];
m = combine_tag_monoid(param_12, param_13);
}
v_187.memory[out_base + (i_2 * 2u)] = m.path_ix;
v_187.memory[(out_base + (i_2 * 2u)) + 1u] = m.clip_ix;
ElementRef param_14 = ref;
uint param_15 = i_2;
ElementRef this_ref = Element_index(param_14, param_15);
ElementRef param_16 = this_ref;
tag_word = Element_tag(param_16, v_225).tag;
if (((tag_word == 4u) || (tag_word == 5u)) || (tag_word == 6u))
tag_word = Element_tag(param_16, v_211).tag;
if ((((tag_word == 4u) || (tag_word == 5u)) || (tag_word == 6u)) || (tag_word == 9u))
{
uint bbox_offset = (_1038.conf.bbox_alloc.offset >> uint(2)) + (6u * (m.path_ix - 1u));
float bbox_l = float(v_201.memory[bbox_offset]) - 32768.0;
float bbox_t = float(v_201.memory[bbox_offset + 1u]) - 32768.0;
float bbox_r = float(v_201.memory[bbox_offset + 2u]) - 32768.0;
float bbox_b = float(v_201.memory[bbox_offset + 3u]) - 32768.0;
uint bbox_offset = (_968.conf.bbox_alloc.offset >> uint(2)) + (6u * m.path_ix);
float bbox_l = float(v_187.memory[bbox_offset]) - 32768.0;
float bbox_t = float(v_187.memory[bbox_offset + 1u]) - 32768.0;
float bbox_r = float(v_187.memory[bbox_offset + 2u]) - 32768.0;
float bbox_b = float(v_187.memory[bbox_offset + 3u]) - 32768.0;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
float linewidth = as_type<float>(v_201.memory[bbox_offset + 4u]);
float linewidth = as_type<float>(v_187.memory[bbox_offset + 4u]);
uint fill_mode = uint(linewidth >= 0.0);
if ((linewidth >= 0.0) || (tag_word == 5u))
{
uint trans_ix = v_201.memory[bbox_offset + 5u];
uint t = (_1038.conf.trans_alloc.offset >> uint(2)) + (6u * trans_ix);
mat = as_type<float4>(uint4(v_201.memory[t], v_201.memory[t + 1u], v_201.memory[t + 2u], v_201.memory[t + 3u]));
uint trans_ix = v_187.memory[bbox_offset + 5u];
uint t = (_968.conf.trans_alloc.offset >> uint(2)) + (6u * trans_ix);
mat = as_type<float4>(uint4(v_187.memory[t], v_187.memory[t + 1u], v_187.memory[t + 2u], v_187.memory[t + 3u]));
if (tag_word == 5u)
{
translate = as_type<float2>(uint2(v_201.memory[t + 4u], v_201.memory[t + 5u]));
translate = as_type<float2>(uint2(v_187.memory[t + 4u], v_187.memory[t + 5u]));
}
}
if (linewidth >= 0.0)
@ -714,21 +687,21 @@ kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1
case 4u:
{
ElementRef param_17 = this_ref;
FillColor fill = Element_FillColor_read(param_17, v_225);
FillColor fill = Element_FillColor_read(param_17, v_211);
anno_fill.bbox = bbox;
anno_fill.linewidth = linewidth;
anno_fill.rgba_color = fill.rgba_color;
param_18.offset = _1038.conf.anno_alloc.offset;
param_18.offset = _968.conf.anno_alloc.offset;
AnnotatedRef param_19 = out_ref;
uint param_20 = fill_mode;
AnnoColor param_21 = anno_fill;
Annotated_Color_write(param_18, param_19, param_20, param_21, v_201);
Annotated_Color_write(param_18, param_19, param_20, param_21, v_187);
break;
}
case 5u:
{
ElementRef param_22 = this_ref;
FillLinGradient lin = Element_FillLinGradient_read(param_22, v_225);
FillLinGradient lin = Element_FillLinGradient_read(param_22, v_211);
anno_lin.bbox = bbox;
anno_lin.linewidth = linewidth;
anno_lin.index = lin.index;
@ -741,57 +714,60 @@ kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1
anno_lin.line_x = line_x;
anno_lin.line_y = line_y;
anno_lin.line_c = -((p0.x * line_x) + (p0.y * line_y));
param_23.offset = _1038.conf.anno_alloc.offset;
param_23.offset = _968.conf.anno_alloc.offset;
AnnotatedRef param_24 = out_ref;
uint param_25 = fill_mode;
AnnoLinGradient param_26 = anno_lin;
Annotated_LinGradient_write(param_23, param_24, param_25, param_26, v_201);
Annotated_LinGradient_write(param_23, param_24, param_25, param_26, v_187);
break;
}
case 6u:
{
ElementRef param_27 = this_ref;
FillImage fill_img = Element_FillImage_read(param_27, v_225);
FillImage fill_img = Element_FillImage_read(param_27, v_211);
anno_img.bbox = bbox;
anno_img.linewidth = linewidth;
anno_img.index = fill_img.index;
anno_img.offset = fill_img.offset;
param_28.offset = _1038.conf.anno_alloc.offset;
param_28.offset = _968.conf.anno_alloc.offset;
AnnotatedRef param_29 = out_ref;
uint param_30 = fill_mode;
AnnoImage param_31 = anno_img;
Annotated_Image_write(param_28, param_29, param_30, param_31, v_201);
Annotated_Image_write(param_28, param_29, param_30, param_31, v_187);
break;
}
case 9u:
{
anno_begin_clip.bbox = bbox;
anno_begin_clip.linewidth = 0.0;
param_32.offset = _968.conf.anno_alloc.offset;
AnnotatedRef param_33 = out_ref;
uint param_34 = 0u;
AnnoBeginClip param_35 = anno_begin_clip;
Annotated_BeginClip_write(param_32, param_33, param_34, param_35, v_187);
break;
}
}
}
else
{
if (tag_word == 10u)
{
anno_end_clip.bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0);
param_36.offset = _968.conf.anno_alloc.offset;
AnnotatedRef param_37 = out_ref;
AnnoEndClip param_38 = anno_end_clip;
Annotated_EndClip_write(param_36, param_37, param_38, v_187);
}
}
if ((tag_word == 9u) || (tag_word == 10u))
{
uint path_ix = ~(out_ix + i_2);
if (tag_word == 9u)
{
ElementRef param_32 = this_ref;
Clip begin_clip = Element_BeginClip_read(param_32, v_225);
anno_begin_clip.bbox = begin_clip.bbox;
anno_begin_clip.linewidth = 0.0;
param_33.offset = _1038.conf.anno_alloc.offset;
AnnotatedRef param_34 = out_ref;
uint param_35 = 0u;
AnnoBeginClip param_36 = anno_begin_clip;
Annotated_BeginClip_write(param_33, param_34, param_35, param_36, v_201);
}
else
{
if (tag_word == 10u)
{
ElementRef param_37 = this_ref;
Clip end_clip = Element_EndClip_read(param_37, v_225);
anno_end_clip.bbox = end_clip.bbox;
param_38.offset = _1038.conf.anno_alloc.offset;
AnnotatedRef param_39 = out_ref;
AnnoEndClip param_40 = anno_end_clip;
Annotated_EndClip_write(param_38, param_39, param_40, v_201);
}
path_ix = m.path_ix;
}
v_187.memory[clip_out_base + m.clip_ix] = path_ix;
}
out_ref.offset += 40u;
}

Binary file not shown.

View file

@ -36,8 +36,13 @@ struct Config
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;

View file

@ -66,8 +66,13 @@ struct Config
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;

Binary file not shown.

Binary file not shown.

View file

@ -117,8 +117,13 @@ struct Config
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
@ -457,7 +462,6 @@ void comp_main()
TileSegRef tile_seg_ref;
float area[8];
uint blend_stack[128][8];
float blend_alpha_stack[128][8];
while (mem_ok)
{
Alloc param_3 = cmd_alloc;
@ -640,7 +644,6 @@ void comp_main()
float4 param_34 = float4(rgba[k_11]);
uint _1390 = packsRGB(param_34);
blend_stack[d_2][k_11] = _1390;
blend_alpha_stack[d_2][k_11] = clamp(abs(area[k_11]), 0.0f, 1.0f);
rgba[k_11] = 0.0f.xxxx;
}
clip_depth++;
@ -655,7 +658,7 @@ void comp_main()
uint d_3 = min(clip_depth, 127u);
uint param_35 = blend_stack[d_3][k_12];
float4 bg = unpacksRGB(param_35);
float4 fg_1 = (rgba[k_12] * area[k_12]) * blend_alpha_stack[d_3][k_12];
float4 fg_1 = rgba[k_12] * area[k_12];
rgba[k_12] = (bg * (1.0f - fg_1.w)) + fg_1;
}
cmd_ref.offset += 4u;
@ -665,8 +668,8 @@ void comp_main()
{
Alloc param_36 = cmd_alloc;
CmdRef param_37 = cmd_ref;
CmdRef _1469 = { Cmd_Jump_read(param_36, param_37).new_ref };
cmd_ref = _1469;
CmdRef _1453 = { Cmd_Jump_read(param_36, param_37).new_ref };
cmd_ref = _1453;
cmd_alloc.offset = cmd_ref.offset;
break;
}

View file

@ -175,8 +175,13 @@ struct Config
Alloc_1 trans_alloc;
Alloc_1 bbox_alloc;
Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
@ -507,7 +512,6 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7
TileSegRef tile_seg_ref;
spvUnsafeArray<float, 8> area;
spvUnsafeArray<spvUnsafeArray<uint, 8>, 128> blend_stack;
spvUnsafeArray<spvUnsafeArray<float, 8>, 128> blend_alpha_stack;
while (mem_ok)
{
Alloc param_3 = cmd_alloc;
@ -687,7 +691,6 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7
float4 param_34 = float4(rgba[k_11]);
uint _1390 = packsRGB(param_34);
blend_stack[d_2][k_11] = _1390;
blend_alpha_stack[d_2][k_11] = fast::clamp(abs(area[k_11]), 0.0, 1.0);
rgba[k_11] = float4(0.0);
}
clip_depth++;
@ -702,7 +705,7 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7
uint d_3 = min(clip_depth, 127u);
uint param_35 = blend_stack[d_3][k_12];
float4 bg = unpacksRGB(param_35);
float4 fg_1 = (rgba[k_12] * area[k_12]) * blend_alpha_stack[d_3][k_12];
float4 fg_1 = rgba[k_12] * area[k_12];
rgba[k_12] = (bg * (1.0 - fg_1.w)) + fg_1;
}
cmd_ref.offset += 4u;

Binary file not shown.

Binary file not shown.

View file

@ -117,8 +117,13 @@ struct Config
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
@ -457,7 +462,6 @@ void comp_main()
TileSegRef tile_seg_ref;
float area[8];
uint blend_stack[128][8];
float blend_alpha_stack[128][8];
while (mem_ok)
{
Alloc param_3 = cmd_alloc;
@ -640,7 +644,6 @@ void comp_main()
float4 param_34 = float4(rgba[k_11]);
uint _1390 = packsRGB(param_34);
blend_stack[d_2][k_11] = _1390;
blend_alpha_stack[d_2][k_11] = clamp(abs(area[k_11]), 0.0f, 1.0f);
rgba[k_11] = 0.0f.xxxx;
}
clip_depth++;
@ -655,7 +658,7 @@ void comp_main()
uint d_3 = min(clip_depth, 127u);
uint param_35 = blend_stack[d_3][k_12];
float4 bg = unpacksRGB(param_35);
float4 fg_1 = (rgba[k_12] * area[k_12]) * blend_alpha_stack[d_3][k_12];
float4 fg_1 = rgba[k_12] * area[k_12];
rgba[k_12] = (bg * (1.0f - fg_1.w)) + fg_1;
}
cmd_ref.offset += 4u;
@ -665,8 +668,8 @@ void comp_main()
{
Alloc param_36 = cmd_alloc;
CmdRef param_37 = cmd_ref;
CmdRef _1469 = { Cmd_Jump_read(param_36, param_37).new_ref };
cmd_ref = _1469;
CmdRef _1453 = { Cmd_Jump_read(param_36, param_37).new_ref };
cmd_ref = _1453;
cmd_alloc.offset = cmd_ref.offset;
break;
}

View file

@ -175,8 +175,13 @@ struct Config
Alloc_1 trans_alloc;
Alloc_1 bbox_alloc;
Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
@ -507,7 +512,6 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7
TileSegRef tile_seg_ref;
spvUnsafeArray<float, 8> area;
spvUnsafeArray<spvUnsafeArray<uint, 8>, 128> blend_stack;
spvUnsafeArray<spvUnsafeArray<float, 8>, 128> blend_alpha_stack;
while (mem_ok)
{
Alloc param_3 = cmd_alloc;
@ -687,7 +691,6 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7
float4 param_34 = float4(rgba[k_11]);
uint _1390 = packsRGB(param_34);
blend_stack[d_2][k_11] = _1390;
blend_alpha_stack[d_2][k_11] = fast::clamp(abs(area[k_11]), 0.0, 1.0);
rgba[k_11] = float4(0.0);
}
clip_depth++;
@ -702,7 +705,7 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7
uint d_3 = min(clip_depth, 127u);
uint param_35 = blend_stack[d_3][k_12];
float4 bg = unpacksRGB(param_35);
float4 fg_1 = (rgba[k_12] * area[k_12]) * blend_alpha_stack[d_3][k_12];
float4 fg_1 = rgba[k_12] * area[k_12];
rgba[k_12] = (bg * (1.0 - fg_1.w)) + fg_1;
}
cmd_ref.offset += 4u;

Binary file not shown.

View file

@ -86,8 +86,13 @@ struct Config
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;

View file

@ -146,8 +146,13 @@ struct Config
Alloc_1 trans_alloc;
Alloc_1 bbox_alloc;
Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;

Binary file not shown.

Binary file not shown.

View file

@ -64,8 +64,13 @@ struct Config
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
@ -80,7 +85,7 @@ static const Monoid _567 = { 0.0f.xxxx, 0u };
RWByteAddressBuffer _111 : register(u0, space0);
ByteAddressBuffer _574 : register(t2, space0);
ByteAddressBuffer _639 : register(t1, space0);
ByteAddressBuffer _709 : register(t3, space0);
ByteAddressBuffer _710 : register(t3, space0);
static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID;
@ -356,7 +361,7 @@ uint round_up(float x)
void comp_main()
{
uint ix = gl_GlobalInvocationID.x * 4u;
uint tag_word = _574.Load(((_639.Load(64) >> uint(2)) + (ix >> uint(2))) * 4 + 0);
uint tag_word = _574.Load(((_639.Load(84) >> uint(2)) + (ix >> uint(2))) * 4 + 0);
uint param = tag_word;
TagMonoid local_tm = reduce_tag(param);
sh_tag[gl_LocalInvocationID.x] = local_tm;
@ -377,17 +382,17 @@ void comp_main()
TagMonoid tm = tag_monoid_identity();
if (gl_WorkGroupID.x > 0u)
{
TagMonoid _715;
_715.trans_ix = _709.Load((gl_WorkGroupID.x - 1u) * 20 + 0);
_715.linewidth_ix = _709.Load((gl_WorkGroupID.x - 1u) * 20 + 4);
_715.pathseg_ix = _709.Load((gl_WorkGroupID.x - 1u) * 20 + 8);
_715.path_ix = _709.Load((gl_WorkGroupID.x - 1u) * 20 + 12);
_715.pathseg_offset = _709.Load((gl_WorkGroupID.x - 1u) * 20 + 16);
tm.trans_ix = _715.trans_ix;
tm.linewidth_ix = _715.linewidth_ix;
tm.pathseg_ix = _715.pathseg_ix;
tm.path_ix = _715.path_ix;
tm.pathseg_offset = _715.pathseg_offset;
TagMonoid _716;
_716.trans_ix = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 0);
_716.linewidth_ix = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 4);
_716.pathseg_ix = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 8);
_716.path_ix = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 12);
_716.pathseg_offset = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 16);
tm.trans_ix = _716.trans_ix;
tm.linewidth_ix = _716.linewidth_ix;
tm.pathseg_ix = _716.pathseg_ix;
tm.path_ix = _716.path_ix;
tm.pathseg_offset = _716.pathseg_offset;
}
if (gl_LocalInvocationID.x > 0u)
{
@ -395,14 +400,14 @@ void comp_main()
TagMonoid param_4 = sh_tag[gl_LocalInvocationID.x - 1u];
tm = combine_tag_monoid(param_3, param_4);
}
uint ps_ix = (_639.Load(68) >> uint(2)) + tm.pathseg_offset;
uint lw_ix = (_639.Load(60) >> uint(2)) + tm.linewidth_ix;
uint ps_ix = (_639.Load(88) >> uint(2)) + tm.pathseg_offset;
uint lw_ix = (_639.Load(80) >> uint(2)) + tm.linewidth_ix;
uint save_path_ix = tm.path_ix;
uint trans_ix = tm.trans_ix;
TransformSegRef _770 = { _639.Load(36) + (trans_ix * 24u) };
TransformSegRef trans_ref = _770;
PathSegRef _780 = { _639.Load(28) + (tm.pathseg_ix * 52u) };
PathSegRef ps_ref = _780;
TransformSegRef _771 = { _639.Load(36) + (trans_ix * 24u) };
TransformSegRef trans_ref = _771;
PathSegRef _781 = { _639.Load(28) + (tm.pathseg_ix * 52u) };
PathSegRef ps_ref = _781;
float linewidth[4];
uint save_trans_ix[4];
float2 p0;
@ -455,9 +460,9 @@ void comp_main()
}
}
}
Alloc _876;
_876.offset = _639.Load(36);
param_13.offset = _876.offset;
Alloc _877;
_877.offset = _639.Load(36);
param_13.offset = _877.offset;
TransformSegRef param_14 = trans_ref;
TransformSeg transform = TransformSeg_read(param_13, param_14);
p0 = ((transform.mat.xy * p0.x) + (transform.mat.zw * p0.y)) + transform.translate;
@ -466,25 +471,25 @@ void comp_main()
if (seg_type >= 2u)
{
p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate;
float4 _946 = bbox;
float2 _949 = min(_946.xy, p2);
bbox.x = _949.x;
bbox.y = _949.y;
float4 _954 = bbox;
float2 _957 = max(_954.zw, p2);
bbox.z = _957.x;
bbox.w = _957.y;
float4 _947 = bbox;
float2 _950 = min(_947.xy, p2);
bbox.x = _950.x;
bbox.y = _950.y;
float4 _955 = bbox;
float2 _958 = max(_955.zw, p2);
bbox.z = _958.x;
bbox.w = _958.y;
if (seg_type == 3u)
{
p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate;
float4 _982 = bbox;
float2 _985 = min(_982.xy, p3);
bbox.x = _985.x;
bbox.y = _985.y;
float4 _990 = bbox;
float2 _993 = max(_990.zw, p3);
bbox.z = _993.x;
bbox.w = _993.y;
float4 _983 = bbox;
float2 _986 = min(_983.xy, p3);
bbox.x = _986.x;
bbox.y = _986.y;
float4 _991 = bbox;
float2 _994 = max(_991.zw, p3);
bbox.z = _994.x;
bbox.w = _994.y;
}
else
{
@ -515,9 +520,9 @@ void comp_main()
cubic.trans_ix = (gl_GlobalInvocationID.x * 4u) + i_1;
cubic.stroke = stroke;
uint fill_mode = uint(linewidth[i_1] >= 0.0f);
Alloc _1088;
_1088.offset = _639.Load(28);
param_15.offset = _1088.offset;
Alloc _1089;
_1089.offset = _639.Load(28);
param_15.offset = _1089.offset;
PathSegRef param_16 = ps_ref;
uint param_17 = fill_mode;
PathCubic param_18 = cubic;
@ -574,17 +579,17 @@ void comp_main()
Monoid param_24 = local[i_4];
Monoid m = combine_monoid(param_23, param_24);
bool do_atomic = false;
bool _1263 = i_4 == 3u;
bool _1269;
if (_1263)
bool _1264 = i_4 == 3u;
bool _1270;
if (_1264)
{
_1269 = gl_LocalInvocationID.x == 255u;
_1270 = gl_LocalInvocationID.x == 255u;
}
else
{
_1269 = _1263;
_1270 = _1264;
}
if (_1269)
if (_1270)
{
do_atomic = true;
}
@ -612,30 +617,30 @@ void comp_main()
}
if (do_atomic)
{
bool _1334 = m.bbox.z > m.bbox.x;
bool _1343;
if (!_1334)
bool _1335 = m.bbox.z > m.bbox.x;
bool _1344;
if (!_1335)
{
_1343 = m.bbox.w > m.bbox.y;
_1344 = m.bbox.w > m.bbox.y;
}
else
{
_1343 = _1334;
_1344 = _1335;
}
if (_1343)
if (_1344)
{
float param_29 = m.bbox.x;
uint _1352;
_111.InterlockedMin(bbox_out_ix * 4 + 8, round_down(param_29), _1352);
uint _1353;
_111.InterlockedMin(bbox_out_ix * 4 + 8, round_down(param_29), _1353);
float param_30 = m.bbox.y;
uint _1360;
_111.InterlockedMin((bbox_out_ix + 1u) * 4 + 8, round_down(param_30), _1360);
uint _1361;
_111.InterlockedMin((bbox_out_ix + 1u) * 4 + 8, round_down(param_30), _1361);
float param_31 = m.bbox.z;
uint _1368;
_111.InterlockedMax((bbox_out_ix + 2u) * 4 + 8, round_up(param_31), _1368);
uint _1369;
_111.InterlockedMax((bbox_out_ix + 2u) * 4 + 8, round_up(param_31), _1369);
float param_32 = m.bbox.w;
uint _1376;
_111.InterlockedMax((bbox_out_ix + 3u) * 4 + 8, round_up(param_32), _1376);
uint _1377;
_111.InterlockedMax((bbox_out_ix + 3u) * 4 + 8, round_up(param_32), _1377);
}
bbox_out_ix += 6u;
}

View file

@ -129,8 +129,13 @@ struct Config
Alloc_1 trans_alloc;
Alloc_1 bbox_alloc;
Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
@ -430,7 +435,7 @@ uint round_up(thread const float& x)
return uint(fast::min(65535.0, ceil(x) + 32768.0));
}
kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _639 [[buffer(1)]], const device SceneBuf& v_574 [[buffer(2)]], const device ParentBuf& _709 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _639 [[buffer(1)]], const device SceneBuf& v_574 [[buffer(2)]], const device ParentBuf& _710 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
{
threadgroup TagMonoid sh_tag[256];
threadgroup Monoid sh_scratch[256];
@ -456,12 +461,12 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6
TagMonoid tm = tag_monoid_identity();
if (gl_WorkGroupID.x > 0u)
{
uint _712 = gl_WorkGroupID.x - 1u;
tm.trans_ix = _709.parent[_712].trans_ix;
tm.linewidth_ix = _709.parent[_712].linewidth_ix;
tm.pathseg_ix = _709.parent[_712].pathseg_ix;
tm.path_ix = _709.parent[_712].path_ix;
tm.pathseg_offset = _709.parent[_712].pathseg_offset;
uint _713 = gl_WorkGroupID.x - 1u;
tm.trans_ix = _710.parent[_713].trans_ix;
tm.linewidth_ix = _710.parent[_713].linewidth_ix;
tm.pathseg_ix = _710.parent[_713].pathseg_ix;
tm.path_ix = _710.parent[_713].path_ix;
tm.pathseg_offset = _710.parent[_713].pathseg_offset;
}
if (gl_LocalInvocationID.x > 0u)
{
@ -536,25 +541,25 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6
if (seg_type >= 2u)
{
p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate;
float4 _946 = bbox;
float2 _949 = fast::min(_946.xy, p2);
bbox.x = _949.x;
bbox.y = _949.y;
float4 _954 = bbox;
float2 _957 = fast::max(_954.zw, p2);
bbox.z = _957.x;
bbox.w = _957.y;
float4 _947 = bbox;
float2 _950 = fast::min(_947.xy, p2);
bbox.x = _950.x;
bbox.y = _950.y;
float4 _955 = bbox;
float2 _958 = fast::max(_955.zw, p2);
bbox.z = _958.x;
bbox.w = _958.y;
if (seg_type == 3u)
{
p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate;
float4 _982 = bbox;
float2 _985 = fast::min(_982.xy, p3);
bbox.x = _985.x;
bbox.y = _985.y;
float4 _990 = bbox;
float2 _993 = fast::max(_990.zw, p3);
bbox.z = _993.x;
bbox.w = _993.y;
float4 _983 = bbox;
float2 _986 = fast::min(_983.xy, p3);
bbox.x = _986.x;
bbox.y = _986.y;
float4 _991 = bbox;
float2 _994 = fast::max(_991.zw, p3);
bbox.z = _994.x;
bbox.w = _994.y;
}
else
{
@ -642,17 +647,17 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6
Monoid param_24 = local[i_4];
Monoid m = combine_monoid(param_23, param_24);
bool do_atomic = false;
bool _1263 = i_4 == 3u;
bool _1269;
if (_1263)
bool _1264 = i_4 == 3u;
bool _1270;
if (_1264)
{
_1269 = gl_LocalInvocationID.x == 255u;
_1270 = gl_LocalInvocationID.x == 255u;
}
else
{
_1269 = _1263;
_1270 = _1264;
}
if (_1269)
if (_1270)
{
do_atomic = true;
}
@ -680,26 +685,26 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6
}
if (do_atomic)
{
bool _1334 = m.bbox.z > m.bbox.x;
bool _1343;
if (!_1334)
bool _1335 = m.bbox.z > m.bbox.x;
bool _1344;
if (!_1335)
{
_1343 = m.bbox.w > m.bbox.y;
_1344 = m.bbox.w > m.bbox.y;
}
else
{
_1343 = _1334;
_1344 = _1335;
}
if (_1343)
if (_1344)
{
float param_29 = m.bbox.x;
uint _1352 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix], round_down(param_29), memory_order_relaxed);
uint _1353 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix], round_down(param_29), memory_order_relaxed);
float param_30 = m.bbox.y;
uint _1360 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 1u], round_down(param_30), memory_order_relaxed);
uint _1361 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 1u], round_down(param_30), memory_order_relaxed);
float param_31 = m.bbox.z;
uint _1368 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 2u], round_up(param_31), memory_order_relaxed);
uint _1369 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 2u], round_up(param_31), memory_order_relaxed);
float param_32 = m.bbox.w;
uint _1376 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 3u], round_up(param_32), memory_order_relaxed);
uint _1377 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 3u], round_up(param_32), memory_order_relaxed);
}
bbox_out_ix += 6u;
}

Binary file not shown.

Binary file not shown.

View file

@ -26,8 +26,13 @@ struct Config
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
@ -37,9 +42,9 @@ struct Config
static const uint3 gl_WorkGroupSize = uint3(128u, 1u, 1u);
ByteAddressBuffer _139 : register(t1, space0);
ByteAddressBuffer _150 : register(t2, space0);
RWByteAddressBuffer _237 : register(u3, space0);
RWByteAddressBuffer _257 : register(u0, space0);
ByteAddressBuffer _151 : register(t2, space0);
RWByteAddressBuffer _238 : register(u3, space0);
RWByteAddressBuffer _258 : register(u0, space0);
static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID;
@ -83,13 +88,13 @@ TagMonoid combine_tag_monoid(TagMonoid a, TagMonoid b)
void comp_main()
{
uint ix = gl_GlobalInvocationID.x * 2u;
uint scene_ix = (_139.Load(64) >> uint(2)) + ix;
uint tag_word = _150.Load(scene_ix * 4 + 0);
uint scene_ix = (_139.Load(84) >> uint(2)) + ix;
uint tag_word = _151.Load(scene_ix * 4 + 0);
uint param = tag_word;
TagMonoid agg = reduce_tag(param);
for (uint i = 1u; i < 2u; i++)
{
tag_word = _150.Load((scene_ix + i) * 4 + 0);
tag_word = _151.Load((scene_ix + i) * 4 + 0);
uint param_1 = tag_word;
TagMonoid param_2 = agg;
TagMonoid param_3 = reduce_tag(param_1);
@ -111,11 +116,11 @@ void comp_main()
}
if (gl_LocalInvocationID.x == 0u)
{
_237.Store(gl_WorkGroupID.x * 20 + 0, agg.trans_ix);
_237.Store(gl_WorkGroupID.x * 20 + 4, agg.linewidth_ix);
_237.Store(gl_WorkGroupID.x * 20 + 8, agg.pathseg_ix);
_237.Store(gl_WorkGroupID.x * 20 + 12, agg.path_ix);
_237.Store(gl_WorkGroupID.x * 20 + 16, agg.pathseg_offset);
_238.Store(gl_WorkGroupID.x * 20 + 0, agg.trans_ix);
_238.Store(gl_WorkGroupID.x * 20 + 4, agg.linewidth_ix);
_238.Store(gl_WorkGroupID.x * 20 + 8, agg.pathseg_ix);
_238.Store(gl_WorkGroupID.x * 20 + 12, agg.path_ix);
_238.Store(gl_WorkGroupID.x * 20 + 16, agg.pathseg_offset);
}
}

View file

@ -33,8 +33,13 @@ struct Config
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
@ -103,17 +108,17 @@ TagMonoid combine_tag_monoid(thread const TagMonoid& a, thread const TagMonoid&
return c;
}
kernel void main0(const device ConfigBuf& _139 [[buffer(1)]], const device SceneBuf& _150 [[buffer(2)]], device OutBuf& _237 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
kernel void main0(const device ConfigBuf& _139 [[buffer(1)]], const device SceneBuf& _151 [[buffer(2)]], device OutBuf& _238 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
{
threadgroup TagMonoid sh_scratch[128];
uint ix = gl_GlobalInvocationID.x * 2u;
uint scene_ix = (_139.conf.pathtag_offset >> uint(2)) + ix;
uint tag_word = _150.scene[scene_ix];
uint tag_word = _151.scene[scene_ix];
uint param = tag_word;
TagMonoid agg = reduce_tag(param);
for (uint i = 1u; i < 2u; i++)
{
tag_word = _150.scene[scene_ix + i];
tag_word = _151.scene[scene_ix + i];
uint param_1 = tag_word;
TagMonoid param_2 = agg;
TagMonoid param_3 = reduce_tag(param_1);
@ -135,11 +140,11 @@ kernel void main0(const device ConfigBuf& _139 [[buffer(1)]], const device Scene
}
if (gl_LocalInvocationID.x == 0u)
{
_237.outbuf[gl_WorkGroupID.x].trans_ix = agg.trans_ix;
_237.outbuf[gl_WorkGroupID.x].linewidth_ix = agg.linewidth_ix;
_237.outbuf[gl_WorkGroupID.x].pathseg_ix = agg.pathseg_ix;
_237.outbuf[gl_WorkGroupID.x].path_ix = agg.path_ix;
_237.outbuf[gl_WorkGroupID.x].pathseg_offset = agg.pathseg_offset;
_238.outbuf[gl_WorkGroupID.x].trans_ix = agg.trans_ix;
_238.outbuf[gl_WorkGroupID.x].linewidth_ix = agg.linewidth_ix;
_238.outbuf[gl_WorkGroupID.x].pathseg_ix = agg.pathseg_ix;
_238.outbuf[gl_WorkGroupID.x].path_ix = agg.path_ix;
_238.outbuf[gl_WorkGroupID.x].pathseg_offset = agg.pathseg_offset;
}
}

Binary file not shown.

View file

@ -60,8 +60,13 @@ struct Config
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;

View file

@ -81,8 +81,13 @@ struct Config
Alloc_1 trans_alloc;
Alloc_1 bbox_alloc;
Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;

Binary file not shown.

Binary file not shown.

View file

@ -39,8 +39,13 @@ struct Config
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
@ -150,7 +155,7 @@ void TransformSeg_write(Alloc a, TransformSegRef ref, TransformSeg s)
void comp_main()
{
uint ix = gl_GlobalInvocationID.x * 8u;
TransformRef _285 = { _278.Load(56) + (ix * 24u) };
TransformRef _285 = { _278.Load(76) + (ix * 24u) };
TransformRef ref = _285;
TransformRef param = ref;
Transform agg = Transform_read(param);

View file

@ -102,8 +102,13 @@ struct Config
Alloc_1 trans_alloc;
Alloc_1 bbox_alloc;
Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;

Binary file not shown.

Binary file not shown.

View file

@ -28,8 +28,13 @@ struct Config
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
@ -87,7 +92,7 @@ Transform combine_monoid(Transform a, Transform b)
void comp_main()
{
uint ix = gl_GlobalInvocationID.x * 8u;
TransformRef _168 = { _161.Load(56) + (ix * 24u) };
TransformRef _168 = { _161.Load(76) + (ix * 24u) };
TransformRef ref = _168;
TransformRef param = ref;
Transform agg = Transform_read(param);

View file

@ -40,8 +40,13 @@ struct Config
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;

Binary file not shown.

View file

@ -91,7 +91,6 @@ void main() {
vec2 xy = vec2(xy_uint);
mediump vec4 rgba[CHUNK];
uint blend_stack[MAX_BLEND_STACK][CHUNK];
mediump float blend_alpha_stack[MAX_BLEND_STACK][CHUNK];
for (uint i = 0; i < CHUNK; i++) {
rgba[i] = vec4(0.0);
}
@ -211,7 +210,6 @@ void main() {
// The following is a sanity check so we don't corrupt memory should there be malformed inputs.
uint d = min(clip_depth, MAX_BLEND_STACK - 1);
blend_stack[d][k] = packsRGB(vec4(rgba[k]));
blend_alpha_stack[d][k] = clamp(abs(area[k]), 0.0, 1.0);
rgba[k] = vec4(0.0);
}
clip_depth++;
@ -222,7 +220,7 @@ void main() {
for (uint k = 0; k < CHUNK; k++) {
uint d = min(clip_depth, MAX_BLEND_STACK - 1);
mediump vec4 bg = unpacksRGB(blend_stack[d][k]);
mediump vec4 fg = rgba[k] * area[k] * blend_alpha_stack[d][k];
mediump vec4 fg = rgba[k] * area[k];
rgba[k] = bg * (1.0 - fg.a) + fg;
}
cmd_ref.offset += 4;

View file

@ -46,11 +46,23 @@ struct Config {
// Monoid for draw objects
Alloc drawmonoid_alloc;
// BeginClip(path_ix) / EndClip
Alloc clip_alloc;
// Intermediate bicyclic semigroup
Alloc clip_bic_alloc;
// Intermediate stack
Alloc clip_stack_alloc;
// Clip processing results (path_ix + bbox)
Alloc clip_bbox_alloc;
// Number of transforms in scene
// This is probably not needed.
uint n_trans;
// This only counts actual paths, not EndClip.
// This *should* count only actual paths, but in the current
// implementation is redundant with n_elements.
uint n_path;
// Total number of BeginClip and EndClip draw objects.
uint n_clip;
// Offset (in bytes) of transform stream in scene buffer
uint trans_offset;
// Offset (in bytes) of linewidth stream in scene

View file

@ -20,7 +20,8 @@ use bytemuck::{Pod, Zeroable};
use piet_gpu_hal::BufWrite;
use crate::stages::{
self, Config, PathEncoder, Transform, DRAW_PART_SIZE, PATHSEG_PART_SIZE, TRANSFORM_PART_SIZE,
self, Config, PathEncoder, Transform, CLIP_PART_SIZE, DRAW_PART_SIZE, PATHSEG_PART_SIZE,
TRANSFORM_PART_SIZE,
};
pub struct Encoder {
@ -31,6 +32,7 @@ pub struct Encoder {
drawobj_stream: Vec<u8>,
n_path: u32,
n_pathseg: u32,
n_clip: u32,
}
/// A scene fragment encoding a glyph.
@ -98,6 +100,7 @@ impl Encoder {
drawobj_stream: Vec::new(),
n_path: 0,
n_pathseg: 0,
n_clip: 0,
}
}
@ -155,6 +158,7 @@ impl Encoder {
..Default::default()
};
self.drawobj_stream.extend(bytemuck::bytes_of(&element));
self.n_clip += 1;
saved
}
@ -170,6 +174,7 @@ impl Encoder {
// This is a dummy path, and will go away with the new clip impl.
self.tag_stream.push(0x10);
self.n_path += 1;
self.n_clip += 1;
}
/// Return a config for the element processing pipeline.
@ -203,6 +208,20 @@ impl Encoder {
alloc += n_drawobj_padded * DRAWMONOID_SIZE;
let anno_alloc = alloc;
alloc += n_drawobj * ANNOTATED_SIZE;
let clip_alloc = alloc;
let n_clip = self.n_clip as usize;
const CLIP_SIZE: usize = 4;
alloc += n_clip * CLIP_SIZE;
let clip_bic_alloc = alloc;
const CLIP_BIC_SIZE: usize = 8;
// This can round down, as we only reduce the prefix
alloc += (n_clip / CLIP_PART_SIZE as usize) * CLIP_BIC_SIZE;
let clip_stack_alloc = alloc;
const CLIP_EL_SIZE: usize = 20;
alloc += n_clip * CLIP_EL_SIZE;
let clip_bbox_alloc = alloc;
const CLIP_BBOX_SIZE: usize = 16;
alloc += align_up(n_clip as usize, CLIP_PART_SIZE as usize) * CLIP_BBOX_SIZE;
let config = Config {
n_elements: n_drawobj as u32,
@ -212,8 +231,13 @@ impl Encoder {
trans_alloc: trans_alloc as u32,
bbox_alloc: bbox_alloc as u32,
drawmonoid_alloc: drawmonoid_alloc as u32,
clip_alloc: clip_alloc as u32,
clip_bic_alloc: clip_bic_alloc as u32,
clip_stack_alloc: clip_stack_alloc as u32,
clip_bbox_alloc: clip_bbox_alloc as u32,
n_trans: n_trans as u32,
n_path: self.n_path,
n_clip: self.n_clip,
trans_offset: trans_offset as u32,
linewidth_offset: linewidth_offset as u32,
pathtag_offset: pathtag_offset as u32,
@ -261,6 +285,10 @@ impl Encoder {
self.tag_stream.len()
}
pub(crate) fn n_clip(&self) -> u32 {
self.n_clip
}
pub(crate) fn encode_glyph(&mut self, glyph: &GlyphEncoder) {
self.tag_stream.extend(&glyph.tag_stream);
self.pathseg_stream.extend(&glyph.pathseg_stream);

View file

@ -20,9 +20,9 @@ use piet_gpu_hal::{
};
use pico_svg::PicoSvg;
use stages::{ElementBinding, ElementCode};
use stages::{ClipBinding, ElementBinding, ElementCode};
use crate::stages::{Config, ElementStage};
use crate::stages::{ClipCode, Config, ElementStage};
const TILE_W: usize = 16;
const TILE_H: usize = 16;
@ -86,6 +86,9 @@ pub struct Renderer {
element_stage: ElementStage,
element_bindings: Vec<ElementBinding>,
clip_code: ClipCode,
clip_binding: ClipBinding,
tile_pipeline: Pipeline,
tile_ds: DescriptorSet,
@ -110,6 +113,7 @@ pub struct Renderer {
n_paths: usize,
n_pathseg: usize,
n_pathtag: usize,
n_clip: u32,
// Keep a reference to the image so that it is not destroyed.
_bg_image: Image,
@ -191,18 +195,20 @@ impl Renderer {
let element_stage = ElementStage::new(session, &element_code);
let element_bindings = scene_bufs
.iter()
.zip(&config_bufs)
.map(|(scene_buf, config_buf)| {
.map(|scene_buf| {
element_stage.bind(
session,
&element_code,
config_buf,
&config_buf,
scene_buf,
&memory_buf_dev,
)
})
.collect();
let clip_code = ClipCode::new(session);
let clip_binding = ClipBinding::new(session, &clip_code, &config_buf, &memory_buf_dev);
let tile_alloc_code = include_shader!(session, "../shader/gen/tile_alloc");
let tile_pipeline = session
.create_compute_pipeline(tile_alloc_code, &[BindType::Buffer, BindType::BufReadOnly])?;
@ -286,6 +292,8 @@ impl Renderer {
element_code,
element_stage,
element_bindings,
clip_code,
clip_binding,
tile_pipeline,
tile_ds,
path_pipeline,
@ -304,6 +312,7 @@ impl Renderer {
n_paths: 0,
n_pathseg: 0,
n_pathtag: 0,
n_clip: 0,
_bg_image: bg_image,
gradient_bufs,
gradients,
@ -329,6 +338,7 @@ impl Renderer {
self.n_drawobj = render_ctx.n_drawobj();
self.n_pathseg = render_ctx.n_pathseg() as usize;
self.n_pathtag = render_ctx.n_pathtag();
self.n_clip = render_ctx.n_clip();
// These constants depend on encoding and may need to be updated.
// Perhaps we can plumb these from piet-gpu-derive?
@ -342,6 +352,7 @@ impl Renderer {
alloc += ((n_drawobj + 255) & !255) * BIN_SIZE;
let ptcl_base = alloc;
alloc += width_in_tiles * height_in_tiles * PTCL_INITIAL_ALLOC;
config.width_in_tiles = width_in_tiles as u32;
config.height_in_tiles = height_in_tiles as u32;
config.tile_alloc = tile_base as u32;
@ -401,6 +412,19 @@ impl Renderer {
cmd_buf.end_debug_label();
cmd_buf.write_timestamp(&query_pool, 1);
cmd_buf.memory_barrier();
cmd_buf.begin_debug_label("Clip bounding box calculation");
self.clip_binding
.record(cmd_buf, &self.clip_code, self.n_clip as u32);
cmd_buf.end_debug_label();
cmd_buf.begin_debug_label("Element binning");
cmd_buf.dispatch(
&self.bin_pipeline,
&self.bin_ds,
(((self.n_paths + 255) / 256) as u32, 1, 1),
(256, 1, 1),
);
cmd_buf.end_debug_label();
cmd_buf.memory_barrier();
cmd_buf.begin_debug_label("Tile allocation");
cmd_buf.dispatch(
&self.tile_pipeline,
@ -430,18 +454,7 @@ impl Renderer {
);
cmd_buf.end_debug_label();
cmd_buf.write_timestamp(&query_pool, 4);
// Note: this barrier is not needed as an actual dependency between
// pipeline stages, but I am keeping it in so that timer queries are
// easier to interpret.
cmd_buf.memory_barrier();
cmd_buf.begin_debug_label("Element binning");
cmd_buf.dispatch(
&self.bin_pipeline,
&self.bin_ds,
(((self.n_paths + 255) / 256) as u32, 1, 1),
(256, 1, 1),
);
cmd_buf.end_debug_label();
// TODO: redo query accounting
cmd_buf.write_timestamp(&query_pool, 5);
cmd_buf.memory_barrier();
cmd_buf.begin_debug_label("Coarse raster");

View file

@ -123,6 +123,10 @@ impl PietGpuRenderContext {
self.new_encoder.n_transform()
}
pub fn n_clip(&self) -> u32 {
self.new_encoder.n_clip()
}
pub fn write_scene(&self, buf: &mut BufWrite) {
self.new_encoder.write_scene(buf);
}

View file

@ -16,12 +16,14 @@
//! Stages for new element pipeline, exposed for testing.
mod clip;
mod draw;
mod path;
mod transform;
use bytemuck::{Pod, Zeroable};
pub use clip::{ClipBinding, ClipCode, CLIP_PART_SIZE};
pub use draw::{DrawBinding, DrawCode, DrawMonoid, DrawStage, DRAW_PART_SIZE};
pub use path::{PathBinding, PathCode, PathEncoder, PathStage, PATHSEG_PART_SIZE};
use piet_gpu_hal::{Buffer, CmdBuf, Session};
@ -47,8 +49,13 @@ pub struct Config {
pub trans_alloc: u32,
pub bbox_alloc: u32,
pub drawmonoid_alloc: u32,
pub clip_alloc: u32,
pub clip_bic_alloc: u32,
pub clip_stack_alloc: u32,
pub clip_bbox_alloc: u32,
pub n_trans: u32,
pub n_path: u32,
pub n_clip: u32,
pub trans_offset: u32,
pub linewidth_offset: u32,
pub pathtag_offset: u32,

View file

@ -0,0 +1,94 @@
// Copyright 2022 The piet-gpu authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Also licensed under MIT license, at your choice.
//! The clip processing stage (includes substages).
use piet_gpu_hal::{include_shader, BindType, Buffer, CmdBuf, DescriptorSet, Pipeline, Session};
// Note that this isn't the code/stage/binding pattern of most of the other stages
// in the new element processing pipeline. We want to move those temporary buffers
// into common memory and converge on this pattern.
pub struct ClipCode {
reduce_pipeline: Pipeline,
leaf_pipeline: Pipeline,
}
pub struct ClipBinding {
reduce_ds: DescriptorSet,
leaf_ds: DescriptorSet,
}
pub const CLIP_PART_SIZE: u32 = 256;
impl ClipCode {
pub unsafe fn new(session: &Session) -> ClipCode {
let reduce_code = include_shader!(session, "../../shader/gen/clip_reduce");
let reduce_pipeline = session
.create_compute_pipeline(reduce_code, &[BindType::Buffer, BindType::BufReadOnly])
.unwrap();
let leaf_code = include_shader!(session, "../../shader/gen/clip_leaf");
let leaf_pipeline = session
.create_compute_pipeline(leaf_code, &[BindType::Buffer, BindType::BufReadOnly])
.unwrap();
ClipCode {
reduce_pipeline,
leaf_pipeline,
}
}
}
impl ClipBinding {
pub unsafe fn new(
session: &Session,
code: &ClipCode,
config: &Buffer,
memory: &Buffer,
) -> ClipBinding {
let reduce_ds = session
.create_simple_descriptor_set(&code.reduce_pipeline, &[memory, config])
.unwrap();
let leaf_ds = session
.create_simple_descriptor_set(&code.leaf_pipeline, &[memory, config])
.unwrap();
ClipBinding { reduce_ds, leaf_ds }
}
/// Record the clip dispatches.
///
/// Assumes memory barrier on entry. Provides memory barrier on exit.
pub unsafe fn record(&self, cmd_buf: &mut CmdBuf, code: &ClipCode, n_clip: u32) {
let n_wg_reduce = n_clip.saturating_sub(1) / CLIP_PART_SIZE;
if n_wg_reduce > 0 {
cmd_buf.dispatch(
&code.reduce_pipeline,
&self.reduce_ds,
(n_wg_reduce, 1, 1),
(CLIP_PART_SIZE, 1, 1),
);
cmd_buf.memory_barrier();
}
let n_wg = (n_clip + CLIP_PART_SIZE - 1) / CLIP_PART_SIZE;
if n_wg > 0 {
cmd_buf.dispatch(
&code.leaf_pipeline,
&self.leaf_ds,
(n_wg, 1, 1),
(CLIP_PART_SIZE, 1, 1),
);
cmd_buf.memory_barrier();
}
}
}

237
tests/src/clip.rs Normal file
View file

@ -0,0 +1,237 @@
// Copyright 2022 The piet-gpu authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Also licensed under MIT license, at your choice.
//! Tests for the piet-gpu clip processing stage.
use bytemuck::{Pod, Zeroable};
use rand::Rng;
use piet_gpu::stages::{self, ClipBinding, ClipCode, DrawMonoid};
use piet_gpu_hal::{BufWrite, BufferUsage};
use crate::{Config, Runner, TestResult};
struct ClipData {
clip_stream: Vec<u32>,
// In the atomic-int friendly encoding
path_bbox_stream: Vec<PathBbox>,
}
#[derive(Copy, Clone, Debug, Pod, Zeroable, Default)]
#[repr(C)]
struct PathBbox {
bbox: [u32; 4],
linewidth: f32,
trans_ix: u32,
}
pub unsafe fn clip_test(runner: &mut Runner, config: &Config) -> TestResult {
let mut result = TestResult::new("clip");
let n_clip: u64 = config.size.choose(1 << 8, 1 << 12, 1 << 16);
let data = ClipData::new(n_clip);
let stage_config = data.get_config();
let config_buf = runner
.session
.create_buffer_init(std::slice::from_ref(&stage_config), BufferUsage::STORAGE)
.unwrap();
// Need to actually get data uploaded
let mut memory = runner.buf_down(data.memory_size(), BufferUsage::STORAGE);
{
let mut buf_write = memory.map_write(..);
data.fill_memory(&mut buf_write);
}
let code = ClipCode::new(&runner.session);
let binding = ClipBinding::new(&runner.session, &code, &config_buf, &memory.dev_buf);
let mut commands = runner.commands();
commands.write_timestamp(0);
commands.upload(&memory);
binding.record(&mut commands.cmd_buf, &code, n_clip as u32);
commands.download(&memory);
commands.write_timestamp(1);
runner.submit(commands);
let dst = memory.map_read(..);
if let Some(failure) = data.verify(&dst) {
result.fail(failure);
}
result
}
fn rand_bbox() -> [u32; 4] {
let mut rng = rand::thread_rng();
const Y_MIN: u32 = 32768;
const Y_MAX: u32 = Y_MIN + 1000;
let mut x0 = rng.gen_range(Y_MIN, Y_MAX);
let mut y0 = rng.gen_range(Y_MIN, Y_MAX);
let mut x1 = rng.gen_range(Y_MIN, Y_MAX);
let mut y1 = rng.gen_range(Y_MIN, Y_MAX);
if x0 > x1 {
std::mem::swap(&mut x0, &mut x1);
}
if y0 > y1 {
std::mem::swap(&mut y0, &mut y1);
}
[x0, y0, x1, y1]
}
/// Convert from atomic-friendly to normal float bbox.
fn decode_bbox(raw: [u32; 4]) -> [f32; 4] {
fn decode(x: u32) -> f32 {
x as f32 - 32768.0
}
[
decode(raw[0]),
decode(raw[1]),
decode(raw[2]),
decode(raw[3]),
]
}
fn intersect_bbox(b0: [f32; 4], b1: [f32; 4]) -> [f32; 4] {
[
b0[0].max(b1[0]),
b0[1].max(b1[1]),
b0[2].min(b1[2]),
b0[3].min(b1[3]),
]
}
const INFTY_BBOX: [f32; 4] = [-1e9, -1e9, 1e9, 1e9];
impl ClipData {
/// Generate a random clip sequence
fn new(n: u64) -> ClipData {
// Simple LCG random generator, for deterministic results
let mut z = 20170705u64;
let mut depth = 0;
let mut path_bbox_stream = Vec::new();
let clip_stream = (0..n)
.map(|i| {
let is_push = if depth == 0 {
true
} else if depth >= 255 {
false
} else {
z = z.wrapping_mul(742938285) % ((1 << 31) - 1);
(z % 2) != 0
};
if is_push {
depth += 1;
let path_ix = path_bbox_stream.len() as u32;
let bbox = rand_bbox();
let path_bbox = PathBbox {
bbox,
..Default::default()
};
path_bbox_stream.push(path_bbox);
path_ix
} else {
depth -= 1;
!(i as u32)
}
})
.collect();
ClipData {
clip_stream,
path_bbox_stream,
}
}
fn get_config(&self) -> stages::Config {
let n_clip = self.clip_stream.len();
let n_path = self.path_bbox_stream.len();
let clip_alloc = 0;
let path_bbox_alloc = clip_alloc + 4 * n_clip;
let drawmonoid_alloc = path_bbox_alloc + 24 * n_path;
let clip_bic_alloc = drawmonoid_alloc + 8 * n_clip;
// TODO: this is over-allocated, we only need one bic per wg
let clip_stack_alloc = clip_bic_alloc + 8 * n_clip;
let clip_bbox_alloc = clip_stack_alloc + 20 * n_clip;
stages::Config {
clip_alloc: clip_alloc as u32,
// TODO: this wants to be renamed to path_bbox_alloc
bbox_alloc: path_bbox_alloc as u32,
drawmonoid_alloc: drawmonoid_alloc as u32,
clip_bic_alloc: clip_bic_alloc as u32,
clip_stack_alloc: clip_stack_alloc as u32,
clip_bbox_alloc: clip_bbox_alloc as u32,
n_clip: n_clip as u32,
..Default::default()
}
}
fn memory_size(&self) -> u64 {
(8 + self.clip_stream.len() * (4 + 8 + 8 + 20 + 16) + self.path_bbox_stream.len() * 24)
as u64
}
fn fill_memory(&self, buf: &mut BufWrite) {
// offset / header; no dynamic allocation
buf.fill_zero(8);
buf.extend_slice(&self.clip_stream);
buf.extend_slice(&self.path_bbox_stream);
// drawmonoid is left uninitialized
}
fn verify(&self, buf: &[u8]) -> Option<String> {
let n_clip = self.clip_stream.len();
let n_path = self.path_bbox_stream.len();
let clip_bbox_start = 8 + n_clip * (4 + 8 + 8 + 20) + n_path * 24;
let clip_range = clip_bbox_start..(clip_bbox_start + n_clip * 16);
let clip_result = bytemuck::cast_slice::<u8, [f32; 4]>(&buf[clip_range]);
let draw_start = 8 + n_clip * 4 + n_path * 24;
let draw_range = draw_start..(draw_start + n_clip * 8);
let draw_result = bytemuck::cast_slice::<u8, DrawMonoid>(&buf[draw_range]);
let mut bbox_stack = Vec::new();
let mut parent_stack = Vec::new();
for (i, path_ix) in self.clip_stream.iter().enumerate() {
let mut expected_path = None;
if *path_ix >= 0x8000_0000 {
let parent = parent_stack.pop().unwrap();
expected_path = Some(self.clip_stream[parent as usize]);
bbox_stack.pop().unwrap();
} else {
parent_stack.push(i);
let path_bbox_stream = self.path_bbox_stream[*path_ix as usize];
let bbox = decode_bbox(path_bbox_stream.bbox);
let new = match bbox_stack.last() {
None => bbox,
Some(old) => intersect_bbox(*old, bbox),
};
bbox_stack.push(new);
};
let expected = bbox_stack.last().copied().unwrap_or(INFTY_BBOX);
let clip_bbox = clip_result[i];
if clip_bbox != expected {
return Some(format!(
"{}: path_ix={}, expected bbox={:?}, clip_bbox={:?}",
i, path_ix, expected, clip_bbox
));
}
if let Some(expected_path) = expected_path {
let actual_path = draw_result[i].path_ix;
if expected_path != actual_path {
return Some(format!(
"{}: expected path {}, actual {}",
i, expected_path, actual_path
));
}
}
}
None
}
}

View file

@ -102,17 +102,21 @@ impl DrawTestData {
// Layout of memory
let drawmonoid_alloc = 0;
let anno_alloc = drawmonoid_alloc + 8 * n_tags;
let clip_alloc = anno_alloc + ANNOTATED_SIZE * n_tags;
let stage_config = stages::Config {
n_elements: n_tags as u32,
anno_alloc: anno_alloc as u32,
drawmonoid_alloc: drawmonoid_alloc as u32,
clip_alloc: clip_alloc as u32,
..Default::default()
};
stage_config
}
fn memory_size(&self) -> u64 {
(8 + self.tags.len() * (8 + ANNOTATED_SIZE)) as u64
// Note: this overallocates the clip buf a bit - only needed for the
// total number of begin_clip and end_clip tags.
(8 + self.tags.len() * (8 + 4 + ANNOTATED_SIZE)) as u64
}
fn fill_scene(&self, buf: &mut BufWrite) {
@ -128,14 +132,13 @@ impl DrawTestData {
let actual = bytemuck::cast_slice::<u8, DrawMonoid>(&buf[8..8 + size]);
let mut expected = DrawMonoid::default();
for (i, (tag, actual)) in self.tags.iter().zip(actual).enumerate() {
// We compute an inclusive prefix sum, but for this application
// exclusive would be slightly better. We can adapt though.
// Verify exclusive prefix sum.
let (path_ix, clip_ix) = Self::reduce_tag(*tag);
expected.path_ix += path_ix;
expected.clip_ix += clip_ix;
if *actual != expected {
return Some(format!("draw mismatch at {}", i));
}
expected.path_ix += path_ix;
expected.clip_ix += clip_ix;
}
None
}

View file

@ -17,6 +17,7 @@
//! Tests for piet-gpu shaders and GPU capabilities.
mod clear;
mod clip;
mod config;
mod draw;
mod linkedlist;
@ -139,6 +140,7 @@ fn main() {
report(&transform::transform_test(&mut runner, &config));
report(&path::path_test(&mut runner, &config));
report(&draw::draw_test(&mut runner, &config));
report(&clip::clip_test(&mut runner, &config));
}
}
}

View file

@ -20,8 +20,8 @@ use std::ops::RangeBounds;
use bytemuck::Pod;
use piet_gpu_hal::{
BackendType, BufReadGuard, Buffer, BufferUsage, CmdBuf, Instance, InstanceFlags, QueryPool,
Session,
BackendType, BufReadGuard, BufWriteGuard, Buffer, BufferUsage, CmdBuf, Instance, InstanceFlags,
QueryPool, Session,
};
pub struct Runner {
@ -37,15 +37,8 @@ pub struct Commands {
query_pool: QueryPool,
}
/// Buffer for uploading data to GPU.
#[allow(unused)]
pub struct BufUp {
pub stage_buf: Buffer,
pub dev_buf: Buffer,
}
/// Buffer for downloading data from GPU.
pub struct BufDown {
/// Buffer for both uploading and downloading
pub struct BufStage {
pub stage_buf: Buffer,
pub dev_buf: Buffer,
}
@ -92,7 +85,7 @@ impl Runner {
}
#[allow(unused)]
pub fn buf_up(&self, size: u64) -> BufUp {
pub fn buf_up(&self, size: u64) -> BufStage {
let stage_buf = self
.session
.create_buffer(size, BufferUsage::MAP_WRITE | BufferUsage::COPY_SRC)
@ -101,13 +94,13 @@ impl Runner {
.session
.create_buffer(size, BufferUsage::COPY_DST | BufferUsage::STORAGE)
.unwrap();
BufUp { stage_buf, dev_buf }
BufStage { stage_buf, dev_buf }
}
/// Create a buffer for download (readback).
///
/// The `usage` parameter need not include COPY_SRC and STORAGE.
pub fn buf_down(&self, size: u64, usage: BufferUsage) -> BufDown {
pub fn buf_down(&self, size: u64, usage: BufferUsage) -> BufStage {
let stage_buf = self
.session
.create_buffer(size, BufferUsage::MAP_READ | BufferUsage::COPY_DST)
@ -116,7 +109,7 @@ impl Runner {
.session
.create_buffer(size, usage | BufferUsage::COPY_SRC | BufferUsage::STORAGE)
.unwrap();
BufDown { stage_buf, dev_buf }
BufStage { stage_buf, dev_buf }
}
pub fn backend_type(&self) -> BackendType {
@ -129,17 +122,16 @@ impl Commands {
self.cmd_buf.write_timestamp(&self.query_pool, query);
}
#[allow(unused)]
pub unsafe fn upload(&mut self, buf: &BufUp) {
pub unsafe fn upload(&mut self, buf: &BufStage) {
self.cmd_buf.copy_buffer(&buf.stage_buf, &buf.dev_buf);
}
pub unsafe fn download(&mut self, buf: &BufDown) {
pub unsafe fn download(&mut self, buf: &BufStage) {
self.cmd_buf.copy_buffer(&buf.dev_buf, &buf.stage_buf);
}
}
impl BufDown {
impl BufStage {
pub unsafe fn read(&self, dst: &mut Vec<impl Pod>) {
self.stage_buf.read(dst).unwrap()
}
@ -147,4 +139,8 @@ impl BufDown {
pub unsafe fn map_read<'a>(&'a self, range: impl RangeBounds<usize>) -> BufReadGuard<'a> {
self.stage_buf.map_read(range).unwrap()
}
pub unsafe fn map_write<'a>(&'a mut self, range: impl RangeBounds<usize>) -> BufWriteGuard {
self.stage_buf.map_write(range).unwrap()
}
}