mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 12:41:30 +11:00
New clip implementation
This PR reworks the clip implementation. The highlight is that clip bounding box accounting is now done on GPU rather than CPU. The clip mask is also rasterized on EndClip rather than BeginClip, which decreases memory traffic needed for the clip stack. This is a pretty good working state, but not all cleanup has been applied. An important next step is to remove the CPU clip accounting (it is computed and encoded, but that result is not used). Another step is to remove the Annotated structure entirely. Fixes #88. Also relevant to #119
This commit is contained in:
parent
a968f13382
commit
3b67a4e7c1
|
@ -20,6 +20,7 @@ layout(set = 0, binding = 1) readonly buffer ConfigBuf {
|
|||
|
||||
#include "annotated.h"
|
||||
#include "bins.h"
|
||||
#include "drawtag.h"
|
||||
|
||||
// scale factors useful for converting coordinates to bins
|
||||
#define SX (1.0 / float(N_TILE_X * TILE_WIDTH_PX))
|
||||
|
@ -35,6 +36,47 @@ shared uint count[N_SLICE][N_TILE];
|
|||
shared Alloc sh_chunk_alloc[N_TILE];
|
||||
shared bool sh_alloc_failed;
|
||||
|
||||
DrawMonoid load_draw_monoid(uint element_ix) {
|
||||
uint base = (conf.drawmonoid_alloc.offset >> 2) + 2 * element_ix;
|
||||
uint path_ix = memory[base];
|
||||
uint clip_ix = memory[base + 1];
|
||||
return DrawMonoid(path_ix, clip_ix);
|
||||
}
|
||||
|
||||
// Load bounding box computed by clip processing
|
||||
vec4 load_clip_bbox(uint clip_ix) {
|
||||
uint base = (conf.clip_bbox_alloc.offset >> 2) + 4 * clip_ix;
|
||||
float x0 = uintBitsToFloat(memory[base]);
|
||||
float y0 = uintBitsToFloat(memory[base + 1]);
|
||||
float x1 = uintBitsToFloat(memory[base + 2]);
|
||||
float y1 = uintBitsToFloat(memory[base + 3]);
|
||||
vec4 bbox = vec4(x0, y0, x1, y1);
|
||||
return bbox;
|
||||
}
|
||||
|
||||
vec4 bbox_intersect(vec4 a, vec4 b) {
|
||||
return vec4(max(a.xy, b.xy), min(a.zw, b.zw));
|
||||
}
|
||||
|
||||
// Load path's bbox from bbox (as written by pathseg).
|
||||
vec4 load_path_bbox(uint path_ix) {
|
||||
uint base = (conf.bbox_alloc.offset >> 2) + 6 * path_ix;
|
||||
float bbox_l = float(memory[base]) - 32768.0;
|
||||
float bbox_t = float(memory[base + 1]) - 32768.0;
|
||||
float bbox_r = float(memory[base + 2]) - 32768.0;
|
||||
float bbox_b = float(memory[base + 3]) - 32768.0;
|
||||
vec4 bbox = vec4(bbox_l, bbox_t, bbox_r, bbox_b);
|
||||
return bbox;
|
||||
}
|
||||
|
||||
void store_path_bbox(AnnotatedRef ref, vec4 bbox) {
|
||||
uint ix = ref.offset >> 2;
|
||||
memory[ix + 1] = floatBitsToUint(bbox.x);
|
||||
memory[ix + 2] = floatBitsToUint(bbox.y);
|
||||
memory[ix + 3] = floatBitsToUint(bbox.z);
|
||||
memory[ix + 4] = floatBitsToUint(bbox.w);
|
||||
}
|
||||
|
||||
void main() {
|
||||
uint my_n_elements = conf.n_elements;
|
||||
uint my_partition = gl_WorkGroupID.x;
|
||||
|
@ -61,13 +103,27 @@ void main() {
|
|||
case Annotated_Image:
|
||||
case Annotated_BeginClip:
|
||||
case Annotated_EndClip:
|
||||
// Note: we take advantage of the fact that these drawing elements
|
||||
// have the bbox at the same place in their layout.
|
||||
AnnoEndClip clip = Annotated_EndClip_read(conf.anno_alloc, ref);
|
||||
x0 = int(floor(clip.bbox.x * SX));
|
||||
y0 = int(floor(clip.bbox.y * SY));
|
||||
x1 = int(ceil(clip.bbox.z * SX));
|
||||
y1 = int(ceil(clip.bbox.w * SY));
|
||||
DrawMonoid draw_monoid = load_draw_monoid(element_ix);
|
||||
uint path_ix = draw_monoid.path_ix;
|
||||
vec4 clip_bbox = vec4(-1e9, -1e9, 1e9, 1e9);
|
||||
uint clip_ix = draw_monoid.clip_ix;
|
||||
if (clip_ix > 0) {
|
||||
clip_bbox = load_clip_bbox(clip_ix - 1);
|
||||
}
|
||||
// For clip elements, clip_bbox is the bbox of the clip path, intersected
|
||||
// with enclosing clips.
|
||||
// For other elements, it is the bbox of the enclosing clips.
|
||||
|
||||
vec4 path_bbox = load_path_bbox(path_ix);
|
||||
vec4 bbox = bbox_intersect(path_bbox, clip_bbox);
|
||||
// Avoid negative-size bbox (is this necessary)?
|
||||
bbox.zw = max(bbox.xy, bbox.zw);
|
||||
// Store clip-intersected bbox for tile_alloc.
|
||||
store_path_bbox(ref, bbox);
|
||||
x0 = int(floor(bbox.x * SX));
|
||||
y0 = int(floor(bbox.y * SY));
|
||||
x1 = int(ceil(bbox.z * SX));
|
||||
y1 = int(ceil(bbox.w * SY));
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -22,7 +22,7 @@ rule dxil
|
|||
rule msl
|
||||
command = $spirv_cross --msl $in --output $out $msl_flags
|
||||
|
||||
build gen/binning.spv: glsl binning.comp | annotated.h state.h bins.h setup.h mem.h
|
||||
build gen/binning.spv: glsl binning.comp | annotated.h bins.h drawtag.h setup.h mem.h
|
||||
build gen/binning.hlsl: hlsl gen/binning.spv
|
||||
build gen/binning.dxil: dxil gen/binning.hlsl
|
||||
build gen/binning.msl: msl gen/binning.spv
|
||||
|
@ -119,6 +119,16 @@ build gen/draw_leaf.hlsl: hlsl gen/draw_leaf.spv
|
|||
build gen/draw_leaf.dxil: dxil gen/draw_leaf.hlsl
|
||||
build gen/draw_leaf.msl: msl gen/draw_leaf.spv
|
||||
|
||||
build spv: phony gen/backdrop_lg.spv gen/backdrop.spv gen/bbox_clear.spv gen/binning.spv gen/coarse.spv gen/draw_leaf.spv gen/draw_reduce.spv gen/draw_root.spv gen/kernel4.spv gen/kernel4_gray.spv gen/path_coarse.spv gen/pathseg.spv gen/pathtag_reduce.spv gen/pathtag_root.spv gen/tile_alloc.spv gen/transform_leaf.spv gen/transform_reduce.spv gen/transform_root.spv
|
||||
build dxil: phony gen/backdrop.hlsl gen/backdrop_lg.hlsl gen/bbox_clear.hlsl gen/binning.hlsl gen/coarse.hlsl gen/draw_leaf.hlsl gen/draw_reduce.hlsl gen/draw_root.hlsl gen/kernel4.hlsl gen/kernel4_gray.hlsl gen/path_coarse.hlsl gen/pathseg.hlsl gen/pathtag_reduce.hlsl gen/pathtag_root.hlsl gen/tile_alloc.hlsl gen/transform_leaf.hlsl gen/transform_reduce.hlsl gen/transform_root.hlsl
|
||||
build msl: phony gen/backdrop_lg.msl gen/backdrop.msl gen/bbox_clear.msl gen/binning.msl gen/coarse.msl gen/draw_leaf.msl gen/draw_reduce.msl gen/draw_root.msl gen/kernel4.msl gen/kernel4_gray.msl gen/path_coarse.msl gen/pathseg.msl gen/pathtag_reduce.msl gen/pathtag_root.msl gen/tile_alloc.msl gen/transform_leaf.msl gen/transform_reduce.msl gen/transform_root.msl
|
||||
build gen/clip_reduce.spv: glsl clip_reduce.comp | mem.h setup.h annotated.h
|
||||
build gen/clip_reduce.hlsl: hlsl gen/clip_reduce.spv
|
||||
build gen/clip_reduce.dxil: dxil gen/clip_reduce.hlsl
|
||||
build gen/clip_reduce.msl: msl gen/clip_reduce.spv
|
||||
|
||||
build gen/clip_leaf.spv: glsl clip_leaf.comp | mem.h setup.h annotated.h
|
||||
build gen/clip_leaf.hlsl: hlsl gen/clip_leaf.spv
|
||||
build gen/clip_leaf.dxil: dxil gen/clip_leaf.hlsl
|
||||
build gen/clip_leaf.msl: msl gen/clip_leaf.spv
|
||||
|
||||
build spv: phony gen/backdrop_lg.spv gen/backdrop.spv gen/bbox_clear.spv gen/binning.spv gen/clip_leaf.spv gen/clip_reduce.spv gen/coarse.spv gen/draw_leaf.spv gen/draw_reduce.spv gen/draw_root.spv gen/kernel4.spv gen/kernel4_gray.spv gen/path_coarse.spv gen/pathseg.spv gen/pathtag_reduce.spv gen/pathtag_root.spv gen/tile_alloc.spv gen/transform_leaf.spv gen/transform_reduce.spv gen/transform_root.spv
|
||||
build dxil: phony gen/backdrop.hlsl gen/backdrop_lg.hlsl gen/bbox_clear.hlsl gen/binning.hlsl gen/clip_leaf.hlsl gen/clip_reduce.hlsl gen/coarse.hlsl gen/draw_leaf.hlsl gen/draw_reduce.hlsl gen/draw_root.hlsl gen/kernel4.hlsl gen/kernel4_gray.hlsl gen/path_coarse.hlsl gen/pathseg.hlsl gen/pathtag_reduce.hlsl gen/pathtag_root.hlsl gen/tile_alloc.hlsl gen/transform_leaf.hlsl gen/transform_reduce.hlsl gen/transform_root.hlsl
|
||||
build msl: phony gen/backdrop_lg.msl gen/backdrop.msl gen/bbox_clear.msl gen/binning.msl gen/clip_leaf.msl gen/clip_reduce.msl gen/coarse.msl gen/draw_leaf.msl gen/draw_reduce.msl gen/draw_root.msl gen/kernel4.msl gen/kernel4_gray.msl gen/path_coarse.msl gen/pathseg.msl gen/pathtag_reduce.msl gen/pathtag_root.msl gen/tile_alloc.msl gen/transform_leaf.msl gen/transform_reduce.msl gen/transform_root.msl
|
||||
|
|
287
piet-gpu/shader/clip_leaf.comp
Normal file
287
piet-gpu/shader/clip_leaf.comp
Normal file
|
@ -0,0 +1,287 @@
|
|||
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||
|
||||
// The second dispatch of clip stack processing.
|
||||
|
||||
#version 450
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
|
||||
#include "mem.h"
|
||||
#include "setup.h"
|
||||
|
||||
#define LG_WG_SIZE (7 + LG_WG_FACTOR)
|
||||
#define WG_SIZE (1 << LG_WG_SIZE)
|
||||
#define PARTITION_SIZE WG_SIZE
|
||||
|
||||
layout(local_size_x = WG_SIZE) in;
|
||||
|
||||
layout(binding = 1) readonly buffer ConfigBuf {
|
||||
Config conf;
|
||||
};
|
||||
|
||||
#include "annotated.h"
|
||||
|
||||
// Some of this is cut'n'paste duplication with the reduce pass, and
|
||||
// arguably should be moved to a common .h file.
|
||||
// The bicyclic monoid
|
||||
|
||||
struct ClipEl {
|
||||
// index of parent node
|
||||
uint parent_ix;
|
||||
// bounding box
|
||||
vec4 bbox;
|
||||
};
|
||||
|
||||
struct Bic {
|
||||
uint a;
|
||||
uint b;
|
||||
};
|
||||
|
||||
Bic bic_combine(Bic x, Bic y) {
|
||||
uint m = min(x.b, y.a);
|
||||
return Bic(x.a + y.a - m, x.b + y.b - m);
|
||||
}
|
||||
|
||||
// Load path's bbox from bbox (as written by pathseg).
|
||||
vec4 load_path_bbox(uint path_ix) {
|
||||
uint base = (conf.bbox_alloc.offset >> 2) + 6 * path_ix;
|
||||
float bbox_l = float(memory[base]) - 32768.0;
|
||||
float bbox_t = float(memory[base + 1]) - 32768.0;
|
||||
float bbox_r = float(memory[base + 2]) - 32768.0;
|
||||
float bbox_b = float(memory[base + 3]) - 32768.0;
|
||||
vec4 bbox = vec4(bbox_l, bbox_t, bbox_r, bbox_b);
|
||||
return bbox;
|
||||
}
|
||||
|
||||
vec4 bbox_intersect(vec4 a, vec4 b) {
|
||||
return vec4(max(a.xy, b.xy), min(a.zw, b.zw));
|
||||
}
|
||||
|
||||
shared Bic sh_bic[WG_SIZE * 2 - 2];
|
||||
shared uint sh_stack[PARTITION_SIZE];
|
||||
shared vec4 sh_stack_bbox[PARTITION_SIZE];
|
||||
shared uint sh_link[PARTITION_SIZE];
|
||||
shared vec4 sh_bbox[PARTITION_SIZE];
|
||||
|
||||
// This is adapted directly from the stack monoid impl.
|
||||
// Return value is reference within partition if >= 0,
|
||||
// otherwise reference to stack.
|
||||
uint search_link(inout Bic bic) {
|
||||
uint ix = gl_LocalInvocationID.x;
|
||||
uint j = 0;
|
||||
while (j < LG_WG_SIZE) {
|
||||
uint base = 2 * WG_SIZE - (2u << (LG_WG_SIZE - j));
|
||||
if (((ix >> j) & 1) != 0) {
|
||||
Bic test = bic_combine(sh_bic[base + (ix >> j) - 1], bic);
|
||||
if (test.b > 0) {
|
||||
break;
|
||||
}
|
||||
bic = test;
|
||||
ix -= 1u << j;
|
||||
}
|
||||
j++;
|
||||
}
|
||||
if (ix > 0) {
|
||||
while (j > 0) {
|
||||
j--;
|
||||
uint base = 2 * WG_SIZE - (2u << (LG_WG_SIZE - j));
|
||||
Bic test = bic_combine(sh_bic[base + (ix >> j) - 1], bic);
|
||||
if (test.b == 0) {
|
||||
bic = test;
|
||||
ix -= 1u << j;
|
||||
}
|
||||
}
|
||||
}
|
||||
// ix is the smallest value such that reduce(ix..th).b == 0
|
||||
if (ix > 0) {
|
||||
return ix - 1;
|
||||
} else {
|
||||
return ~0u - bic.a;
|
||||
}
|
||||
}
|
||||
|
||||
Bic load_bic(uint ix) {
|
||||
uint base = (conf.clip_bic_alloc.offset >> 2) + 2 * ix;
|
||||
return Bic(memory[base], memory[base + 1]);
|
||||
}
|
||||
|
||||
ClipEl load_clip_el(uint ix) {
|
||||
uint base = (conf.clip_stack_alloc.offset >> 2) + 5 * ix;
|
||||
uint parent_ix = memory[base];
|
||||
float x0 = uintBitsToFloat(memory[base + 1]);
|
||||
float y0 = uintBitsToFloat(memory[base + 2]);
|
||||
float x1 = uintBitsToFloat(memory[base + 3]);
|
||||
float y1 = uintBitsToFloat(memory[base + 4]);
|
||||
vec4 bbox = vec4(x0, y0, x1, y1);
|
||||
return ClipEl(parent_ix, bbox);
|
||||
}
|
||||
|
||||
uint load_path_ix(uint ix) {
|
||||
// This is one approach to a partial final block. Another would be
|
||||
// to do a memset to the padding in the command queue.
|
||||
if (ix < conf.n_clip) {
|
||||
return memory[(conf.clip_alloc.offset >> 2) + ix];
|
||||
} else {
|
||||
// EndClip tags don't implicate further loads.
|
||||
return 0x80000000;
|
||||
}
|
||||
}
|
||||
|
||||
void store_clip_bbox(uint ix, vec4 bbox) {
|
||||
uint base = (conf.clip_bbox_alloc.offset >> 2) + 4 * ix;
|
||||
memory[base] = floatBitsToUint(bbox.x);
|
||||
memory[base + 1] = floatBitsToUint(bbox.y);
|
||||
memory[base + 2] = floatBitsToUint(bbox.z);
|
||||
memory[base + 3] = floatBitsToUint(bbox.w);
|
||||
}
|
||||
|
||||
void main() {
|
||||
// materialize stack up to the start of this partition. This
|
||||
// is based on the pure stack monoid, but with two additions.
|
||||
|
||||
// First, (this only matters if the stack goes deeper than the
|
||||
// partition size, which might be unlikely in practice), the
|
||||
// topmost stack element from each partition is picked, then an
|
||||
// exclusive scan of those. Also note that if this is skipped,
|
||||
// a scan is not needed in the reduce stage.
|
||||
|
||||
// Second, after the stream compaction, do a scan of the retrieved
|
||||
// bbox values.
|
||||
uint th = gl_LocalInvocationID.x;
|
||||
Bic bic = Bic(0, 0);
|
||||
if (th < gl_WorkGroupID.x) {
|
||||
bic = load_bic(th);
|
||||
}
|
||||
sh_bic[th] = bic;
|
||||
for (uint i = 0; i < LG_WG_SIZE; i++) {
|
||||
barrier();
|
||||
if (th + (1u << i) < WG_SIZE) {
|
||||
Bic other = sh_bic[th + (1u << i)];
|
||||
bic = bic_combine(bic, other);
|
||||
}
|
||||
barrier();
|
||||
sh_bic[th] = bic;
|
||||
}
|
||||
barrier();
|
||||
uint stack_size = sh_bic[0].b;
|
||||
|
||||
// TODO: do bbox scan here (to unlock greater stack depth)
|
||||
|
||||
// binary search in stack
|
||||
uint sp = PARTITION_SIZE - 1 - th;
|
||||
uint ix = 0;
|
||||
for (uint i = 0; i < LG_WG_SIZE; i++) {
|
||||
uint probe = ix + (uint(PARTITION_SIZE / 2) >> i);
|
||||
if (sp < sh_bic[probe].b) {
|
||||
ix = probe;
|
||||
}
|
||||
}
|
||||
// ix is largest value such that sp < sh_bic[ix].b (if any)
|
||||
uint b = sh_bic[ix].b;
|
||||
vec4 bbox = vec4(-1e9, -1e9, 1e9, 1e9);
|
||||
if (sp < b) {
|
||||
// maybe store the index here for future use?
|
||||
ClipEl el = load_clip_el(ix * PARTITION_SIZE + b - sp - 1);
|
||||
sh_stack[th] = el.parent_ix;
|
||||
bbox = el.bbox;
|
||||
// other element values here?
|
||||
}
|
||||
|
||||
// forward scan of bbox values of prefix stack
|
||||
for (uint i = 0; i < LG_WG_SIZE; i++) {
|
||||
sh_stack_bbox[th] = bbox;
|
||||
barrier();
|
||||
if (th >= (1u << i)) {
|
||||
bbox = bbox_intersect(sh_stack_bbox[th - (1u << i)], bbox);
|
||||
}
|
||||
barrier();
|
||||
}
|
||||
sh_stack_bbox[th] = bbox;
|
||||
|
||||
// Read input and compute bicyclic semigroup binary tree
|
||||
uint inp = load_path_ix(gl_GlobalInvocationID.x);
|
||||
bool is_push = int(inp) >= 0;
|
||||
bic = Bic(1 - uint(is_push), uint(is_push));
|
||||
sh_bic[th] = bic;
|
||||
if (is_push) {
|
||||
bbox = load_path_bbox(inp);
|
||||
} else {
|
||||
bbox = vec4(-1e9, -1e9, 1e9, 1e9);
|
||||
}
|
||||
uint inbase = 0;
|
||||
for (uint i = 0; i < LG_WG_SIZE - 1; i++) {
|
||||
uint outbase = 2 * WG_SIZE - (1u << (LG_WG_SIZE - i));
|
||||
barrier();
|
||||
if (th < (1u << (LG_WG_SIZE - 1 - i))) {
|
||||
sh_bic[outbase + th] = bic_combine(sh_bic[inbase + th * 2], sh_bic[inbase + th * 2 + 1]);
|
||||
}
|
||||
inbase = outbase;
|
||||
}
|
||||
barrier();
|
||||
// Search for predecessor node
|
||||
bic = Bic(0, 0);
|
||||
uint link = search_link(bic);
|
||||
// we use N_SEQ > 1 convention here:
|
||||
// link >= 0 is index within partition
|
||||
// link < 0 is reference to stack
|
||||
|
||||
// We want grandparent bbox for pop nodes, so follow those links.
|
||||
sh_link[th] = link;
|
||||
barrier();
|
||||
uint grandparent;
|
||||
if (int(link) >= 0) {
|
||||
grandparent = sh_link[link];
|
||||
} else {
|
||||
grandparent = link - 1;
|
||||
}
|
||||
|
||||
// Resolve parent
|
||||
uint parent;
|
||||
if (int(link) >= 0) {
|
||||
parent = gl_WorkGroupID.x * PARTITION_SIZE + link;
|
||||
} else if (int(link + stack_size) >= 0) {
|
||||
parent = sh_stack[PARTITION_SIZE + link];
|
||||
} else {
|
||||
parent = ~0u;
|
||||
}
|
||||
|
||||
// bbox scan along parent links
|
||||
for (uint i = 0; i < LG_WG_SIZE; i++) {
|
||||
// sh_link was already stored for first iteration
|
||||
if (i != 0) {
|
||||
sh_link[th] = link;
|
||||
}
|
||||
sh_bbox[th] = bbox;
|
||||
barrier();
|
||||
if (int(link) >= 0) {
|
||||
bbox = bbox_intersect(sh_bbox[link], bbox);
|
||||
link = sh_link[link];
|
||||
}
|
||||
barrier();
|
||||
}
|
||||
if (int(link + stack_size) >= 0) {
|
||||
bbox = bbox_intersect(sh_stack_bbox[PARTITION_SIZE + link], bbox);
|
||||
}
|
||||
// At this point, bbox is the reduction of bounding boxes along the tree.
|
||||
sh_bbox[th] = bbox;
|
||||
barrier();
|
||||
|
||||
uint path_ix = inp;
|
||||
if (!is_push && gl_GlobalInvocationID.x < conf.n_clip) {
|
||||
// Is this load expensive? If so, it's loaded earlier for in-partition
|
||||
// and is in the ClipEl for cross-partition.
|
||||
// If not, can probably get rid of it in the stack intermediate buf.
|
||||
path_ix = load_path_ix(parent);
|
||||
uint drawmonoid_out_base = (conf.drawmonoid_alloc.offset >> 2) + 2 * ~inp;
|
||||
// Fix up drawmonoid so path_ix at EndClip matches BeginClip
|
||||
memory[drawmonoid_out_base] = path_ix;
|
||||
|
||||
if (int(grandparent) >= 0) {
|
||||
bbox = sh_bbox[grandparent];
|
||||
} else if (int(grandparent + stack_size) >= 0) {
|
||||
bbox = sh_stack_bbox[PARTITION_SIZE + grandparent];
|
||||
} else {
|
||||
bbox = vec4(-1e9, -1e9, 1e9, 1e9);
|
||||
}
|
||||
}
|
||||
store_clip_bbox(gl_GlobalInvocationID.x, bbox);
|
||||
}
|
148
piet-gpu/shader/clip_reduce.comp
Normal file
148
piet-gpu/shader/clip_reduce.comp
Normal file
|
@ -0,0 +1,148 @@
|
|||
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||
|
||||
// The reduce pass for clip stack processing.
|
||||
|
||||
// The primary input is a sequence of path ids representing paths to
|
||||
// push, with a special value of ~0 to represent pop.
|
||||
|
||||
// For each path, the bounding box is found in the anno stream
|
||||
// (anno_alloc), though this may change.
|
||||
|
||||
// Output is a stack monoid reduction for the partition. The Bic
|
||||
// is stored in the BicBuf, and the stack slice in StackBuf.
|
||||
|
||||
// Note: for this shader, only pushes are represented in the stack
|
||||
// monoid reduction output, so we don't have to worry about the
|
||||
// interpretation of pops.
|
||||
|
||||
#version 450
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
|
||||
#include "mem.h"
|
||||
#include "setup.h"
|
||||
|
||||
#define LG_WG_SIZE (7 + LG_WG_FACTOR)
|
||||
#define WG_SIZE (1 << LG_WG_SIZE)
|
||||
#define PARTITION_SIZE WG_SIZE
|
||||
|
||||
layout(local_size_x = WG_SIZE) in;
|
||||
|
||||
layout(binding = 1) readonly buffer ConfigBuf {
|
||||
Config conf;
|
||||
};
|
||||
|
||||
#include "annotated.h"
|
||||
|
||||
// The intermediate state for clip processing.
|
||||
struct ClipEl {
|
||||
// index of parent node
|
||||
uint parent_ix;
|
||||
// bounding box
|
||||
vec4 bbox;
|
||||
};
|
||||
|
||||
// The bicyclic monoid
|
||||
struct Bic {
|
||||
uint a;
|
||||
uint b;
|
||||
};
|
||||
|
||||
Bic bic_combine(Bic x, Bic y) {
|
||||
uint m = min(x.b, y.a);
|
||||
return Bic(x.a + y.a - m, x.b + y.b - m);
|
||||
}
|
||||
|
||||
shared Bic sh_bic[WG_SIZE];
|
||||
shared uint sh_parent[WG_SIZE];
|
||||
shared uint sh_path_ix[WG_SIZE];
|
||||
shared vec4 sh_bbox[WG_SIZE];
|
||||
|
||||
// Load path's bbox from bbox (as written by pathseg).
|
||||
vec4 load_path_bbox(uint path_ix) {
|
||||
uint base = (conf.bbox_alloc.offset >> 2) + 6 * path_ix;
|
||||
float bbox_l = float(memory[base]) - 32768.0;
|
||||
float bbox_t = float(memory[base + 1]) - 32768.0;
|
||||
float bbox_r = float(memory[base + 2]) - 32768.0;
|
||||
float bbox_b = float(memory[base + 3]) - 32768.0;
|
||||
vec4 bbox = vec4(bbox_l, bbox_t, bbox_r, bbox_b);
|
||||
return bbox;
|
||||
}
|
||||
|
||||
vec4 bbox_intersect(vec4 a, vec4 b) {
|
||||
return vec4(max(a.xy, b.xy), min(a.zw, b.zw));
|
||||
}
|
||||
|
||||
void store_bic(uint ix, Bic bic) {
|
||||
uint base = (conf.clip_bic_alloc.offset >> 2) + 2 * ix;
|
||||
memory[base] = bic.a;
|
||||
memory[base + 1] = bic.b;
|
||||
}
|
||||
|
||||
void store_clip_el(uint ix, ClipEl el) {
|
||||
uint base = (conf.clip_stack_alloc.offset >> 2) + 5 * ix;
|
||||
memory[base] = el.parent_ix;
|
||||
memory[base + 1] = floatBitsToUint(el.bbox.x);
|
||||
memory[base + 2] = floatBitsToUint(el.bbox.y);
|
||||
memory[base + 3] = floatBitsToUint(el.bbox.z);
|
||||
memory[base + 4] = floatBitsToUint(el.bbox.w);
|
||||
}
|
||||
|
||||
void main() {
|
||||
uint th = gl_LocalInvocationID.x;
|
||||
uint inp = memory[(conf.clip_alloc.offset >> 2) + gl_GlobalInvocationID.x];
|
||||
bool is_push = int(inp) >= 0;
|
||||
// reverse scan of bicyclic semigroup
|
||||
Bic bic = Bic(1 - uint(is_push), uint(is_push));
|
||||
sh_bic[gl_LocalInvocationID.x] = bic;
|
||||
for (uint i = 0; i < LG_WG_SIZE; i++) {
|
||||
barrier();
|
||||
if (th + (1u << i) < WG_SIZE) {
|
||||
Bic other = sh_bic[gl_LocalInvocationID.x + (1u << i)];
|
||||
bic = bic_combine(bic, other);
|
||||
}
|
||||
barrier();
|
||||
sh_bic[th] = bic;
|
||||
}
|
||||
if (th == 0) {
|
||||
store_bic(gl_WorkGroupID.x, bic);
|
||||
}
|
||||
barrier();
|
||||
uint size = sh_bic[0].b;
|
||||
bic = Bic(0, 0);
|
||||
if (th + 1 < WG_SIZE) {
|
||||
bic = sh_bic[th + 1];
|
||||
}
|
||||
if (is_push && bic.a == 0) {
|
||||
uint local_ix = size - bic.b - 1;
|
||||
sh_parent[local_ix] = th;
|
||||
sh_path_ix[local_ix] = inp;
|
||||
}
|
||||
barrier();
|
||||
// Do forward scan of bounding box intersection
|
||||
vec4 bbox;
|
||||
uint path_ix;
|
||||
if (th < size) {
|
||||
path_ix = sh_path_ix[th];
|
||||
bbox = load_path_bbox(path_ix);
|
||||
}
|
||||
// Not necessary if depth is bounded by wg size
|
||||
#if 0
|
||||
for (uint i = 0; i < LG_WG_SIZE; i++) {
|
||||
// We gate so we never access uninit data, but it might
|
||||
// be more efficient to avoid the conditionals.
|
||||
if (th < size) {
|
||||
sh_bbox[th] = bbox;
|
||||
}
|
||||
barrier();
|
||||
if (th < size && th >= (1u << i)) {
|
||||
bbox = bbox_intersect(sh_bbox[th - (1u << i)], bbox);
|
||||
}
|
||||
barrier();
|
||||
}
|
||||
#endif
|
||||
if (th < size) {
|
||||
uint parent_ix = sh_parent[th] + gl_WorkGroupID.x * PARTITION_SIZE;
|
||||
ClipEl el = ClipEl(parent_ix, bbox);
|
||||
store_clip_el(gl_GlobalInvocationID.x, el);
|
||||
}
|
||||
}
|
|
@ -136,9 +136,6 @@ void main() {
|
|||
// currently in a clip for which the entire tile has an alpha of zero, and
|
||||
// the value is the depth after the "begin clip" of that element.
|
||||
uint clip_zero_depth = 0;
|
||||
// State for the "clip one" optimization. If bit `i` is set, then that means
|
||||
// that the clip pushed at depth `i` has an alpha of all one.
|
||||
uint clip_one_mask = 0;
|
||||
|
||||
// I'm sure we can figure out how to do this with at least one fewer register...
|
||||
// Items up to rd_ix have been read from sh_elements
|
||||
|
@ -227,9 +224,8 @@ void main() {
|
|||
case Annotated_LinGradient:
|
||||
case Annotated_BeginClip:
|
||||
case Annotated_EndClip:
|
||||
// We have one "path" for each element, even if the element isn't
|
||||
// actually a path (currently EndClip, but images etc in the future).
|
||||
uint path_ix = element_ix;
|
||||
uint drawmonoid_base = (conf.drawmonoid_alloc.offset >> 2) + 2 * element_ix;
|
||||
uint path_ix = memory[drawmonoid_base];
|
||||
Path path = Path_read(conf.tile_alloc, PathRef(conf.tile_alloc.offset + path_ix * Path_size));
|
||||
uint stride = path.bbox.z - path.bbox.x;
|
||||
sh_tile_stride[th_ix] = stride;
|
||||
|
@ -283,15 +279,15 @@ void main() {
|
|||
uint x = sh_tile_x0[el_ix] + seq_ix % width;
|
||||
uint y = sh_tile_y0[el_ix] + seq_ix / width;
|
||||
bool include_tile = false;
|
||||
if (tag == Annotated_BeginClip || tag == Annotated_EndClip) {
|
||||
include_tile = true;
|
||||
} else if (mem_ok) {
|
||||
if (mem_ok) {
|
||||
Tile tile = Tile_read(read_tile_alloc(el_ix, mem_ok),
|
||||
TileRef(sh_tile_base[el_ix] + (sh_tile_stride[el_ix] * y + x) * Tile_size));
|
||||
// Include the path in the tile if
|
||||
// - the tile contains at least a segment (tile offset non-zero)
|
||||
// - the tile is completely covered (backdrop non-zero)
|
||||
include_tile = tile.tile.offset != 0 || tile.backdrop != 0;
|
||||
bool is_clip = tag == Annotated_BeginClip || tag == Annotated_EndClip;
|
||||
// Always include the tile if it contains a path segment.
|
||||
// For draws, include the tile if it is solid.
|
||||
// For clips, include the tile if it is empty - this way, logic
|
||||
// below will suppress the drawing of inner elements.
|
||||
include_tile = tile.tile.offset != 0 || (tile.backdrop == 0) == is_clip;
|
||||
}
|
||||
if (include_tile) {
|
||||
uint el_slice = el_ix / 32;
|
||||
|
@ -378,33 +374,26 @@ void main() {
|
|||
(sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
|
||||
if (tile.tile.offset == 0 && tile.backdrop == 0) {
|
||||
clip_zero_depth = clip_depth + 1;
|
||||
} else if (tile.tile.offset == 0 && clip_depth < 32) {
|
||||
clip_one_mask |= (1u << clip_depth);
|
||||
} else {
|
||||
AnnoBeginClip begin_clip = Annotated_BeginClip_read(conf.anno_alloc, ref);
|
||||
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
|
||||
break;
|
||||
}
|
||||
write_fill(cmd_alloc, cmd_ref, tag.flags, tile, begin_clip.linewidth);
|
||||
Cmd_BeginClip_write(cmd_alloc, cmd_ref);
|
||||
cmd_ref.offset += 4;
|
||||
if (clip_depth < 32) {
|
||||
clip_one_mask &= ~(1u << clip_depth);
|
||||
}
|
||||
}
|
||||
clip_depth++;
|
||||
break;
|
||||
case Annotated_EndClip:
|
||||
tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok),
|
||||
TileRef(sh_tile_base[element_ref_ix] +
|
||||
(sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
|
||||
clip_depth--;
|
||||
if (clip_depth >= 32 || (clip_one_mask & (1u << clip_depth)) == 0) {
|
||||
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
|
||||
break;
|
||||
}
|
||||
Cmd_Solid_write(cmd_alloc, cmd_ref);
|
||||
cmd_ref.offset += 4;
|
||||
write_fill(cmd_alloc, cmd_ref, MODE_NONZERO, tile, 0.0);
|
||||
Cmd_EndClip_write(cmd_alloc, cmd_ref);
|
||||
cmd_ref.offset += 4;
|
||||
}
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
|
|
|
@ -72,9 +72,14 @@ void main() {
|
|||
}
|
||||
uint out_ix = gl_GlobalInvocationID.x * N_ROWS;
|
||||
uint out_base = (conf.drawmonoid_alloc.offset >> 2) + out_ix * 2;
|
||||
uint clip_out_base = conf.clip_alloc.offset >> 2;
|
||||
AnnotatedRef out_ref = AnnotatedRef(conf.anno_alloc.offset + out_ix * Annotated_size);
|
||||
for (uint i = 0; i < N_ROWS; i++) {
|
||||
Monoid m = combine_tag_monoid(row, local[i]);
|
||||
Monoid m = row;
|
||||
if (i > 0) {
|
||||
m = combine_tag_monoid(m, local[i - 1]);
|
||||
}
|
||||
// m now holds exclusive scan of draw monoid
|
||||
memory[out_base + i * 2] = m.path_ix;
|
||||
memory[out_base + i * 2 + 1] = m.clip_ix;
|
||||
|
||||
|
@ -83,8 +88,9 @@ void main() {
|
|||
// later stages read scene + bbox etc.
|
||||
ElementRef this_ref = Element_index(ref, i);
|
||||
tag_word = Element_tag(this_ref).tag;
|
||||
if (tag_word == Element_FillColor || tag_word == Element_FillLinGradient || tag_word == Element_FillImage) {
|
||||
uint bbox_offset = (conf.bbox_alloc.offset >> 2) + 6 * (m.path_ix - 1);
|
||||
if (tag_word == Element_FillColor || tag_word == Element_FillLinGradient || tag_word == Element_FillImage ||
|
||||
tag_word == Element_BeginClip) {
|
||||
uint bbox_offset = (conf.bbox_alloc.offset >> 2) + 6 * m.path_ix;
|
||||
float bbox_l = float(memory[bbox_offset]) - 32768.0;
|
||||
float bbox_t = float(memory[bbox_offset + 1]) - 32768.0;
|
||||
float bbox_r = float(memory[bbox_offset + 2]) - 32768.0;
|
||||
|
@ -142,21 +148,27 @@ void main() {
|
|||
anno_img.offset = fill_img.offset;
|
||||
Annotated_Image_write(conf.anno_alloc, out_ref, fill_mode, anno_img);
|
||||
break;
|
||||
}
|
||||
} else if (tag_word == Element_BeginClip) {
|
||||
Clip begin_clip = Element_BeginClip_read(this_ref);
|
||||
case Element_BeginClip:
|
||||
AnnoBeginClip anno_begin_clip;
|
||||
// This is the absolute bbox, it's been transformed during encoding.
|
||||
anno_begin_clip.bbox = begin_clip.bbox;
|
||||
anno_begin_clip.bbox = bbox;
|
||||
anno_begin_clip.linewidth = 0.0; // don't support clip-with-stroke
|
||||
Annotated_BeginClip_write(conf.anno_alloc, out_ref, 0, anno_begin_clip);
|
||||
break;
|
||||
}
|
||||
} else if (tag_word == Element_EndClip) {
|
||||
Clip end_clip = Element_EndClip_read(this_ref);
|
||||
AnnoEndClip anno_end_clip;
|
||||
// This bbox is expected to be the same as the begin one.
|
||||
anno_end_clip.bbox = end_clip.bbox;
|
||||
// The actual bbox will be reconstructed from clip stream output.
|
||||
anno_end_clip.bbox = vec4(-1e9, -1e9, 1e9, 1e9);
|
||||
Annotated_EndClip_write(conf.anno_alloc, out_ref, anno_end_clip);
|
||||
}
|
||||
// Generate clip stream.
|
||||
if (tag_word == Element_BeginClip || tag_word == Element_EndClip) {
|
||||
uint path_ix = ~(out_ix + i);
|
||||
if (tag_word == Element_BeginClip) {
|
||||
path_ix = m.path_ix;
|
||||
}
|
||||
memory[clip_out_base + m.clip_ix] = path_ix;
|
||||
}
|
||||
out_ref.offset += Annotated_size;
|
||||
}
|
||||
}
|
||||
|
|
5
piet-gpu/shader/gen/backdrop.hlsl
generated
5
piet-gpu/shader/gen/backdrop.hlsl
generated
|
@ -44,8 +44,13 @@ struct Config
|
|||
Alloc trans_alloc;
|
||||
Alloc bbox_alloc;
|
||||
Alloc drawmonoid_alloc;
|
||||
Alloc clip_alloc;
|
||||
Alloc clip_bic_alloc;
|
||||
Alloc clip_stack_alloc;
|
||||
Alloc clip_bbox_alloc;
|
||||
uint n_trans;
|
||||
uint n_path;
|
||||
uint n_clip;
|
||||
uint trans_offset;
|
||||
uint linewidth_offset;
|
||||
uint pathtag_offset;
|
||||
|
|
5
piet-gpu/shader/gen/backdrop.msl
generated
5
piet-gpu/shader/gen/backdrop.msl
generated
|
@ -63,8 +63,13 @@ struct Config
|
|||
Alloc_1 trans_alloc;
|
||||
Alloc_1 bbox_alloc;
|
||||
Alloc_1 drawmonoid_alloc;
|
||||
Alloc_1 clip_alloc;
|
||||
Alloc_1 clip_bic_alloc;
|
||||
Alloc_1 clip_stack_alloc;
|
||||
Alloc_1 clip_bbox_alloc;
|
||||
uint n_trans;
|
||||
uint n_path;
|
||||
uint n_clip;
|
||||
uint trans_offset;
|
||||
uint linewidth_offset;
|
||||
uint pathtag_offset;
|
||||
|
|
BIN
piet-gpu/shader/gen/backdrop.spv
generated
BIN
piet-gpu/shader/gen/backdrop.spv
generated
Binary file not shown.
5
piet-gpu/shader/gen/backdrop_lg.hlsl
generated
5
piet-gpu/shader/gen/backdrop_lg.hlsl
generated
|
@ -44,8 +44,13 @@ struct Config
|
|||
Alloc trans_alloc;
|
||||
Alloc bbox_alloc;
|
||||
Alloc drawmonoid_alloc;
|
||||
Alloc clip_alloc;
|
||||
Alloc clip_bic_alloc;
|
||||
Alloc clip_stack_alloc;
|
||||
Alloc clip_bbox_alloc;
|
||||
uint n_trans;
|
||||
uint n_path;
|
||||
uint n_clip;
|
||||
uint trans_offset;
|
||||
uint linewidth_offset;
|
||||
uint pathtag_offset;
|
||||
|
|
5
piet-gpu/shader/gen/backdrop_lg.msl
generated
5
piet-gpu/shader/gen/backdrop_lg.msl
generated
|
@ -63,8 +63,13 @@ struct Config
|
|||
Alloc_1 trans_alloc;
|
||||
Alloc_1 bbox_alloc;
|
||||
Alloc_1 drawmonoid_alloc;
|
||||
Alloc_1 clip_alloc;
|
||||
Alloc_1 clip_bic_alloc;
|
||||
Alloc_1 clip_stack_alloc;
|
||||
Alloc_1 clip_bbox_alloc;
|
||||
uint n_trans;
|
||||
uint n_path;
|
||||
uint n_clip;
|
||||
uint trans_offset;
|
||||
uint linewidth_offset;
|
||||
uint pathtag_offset;
|
||||
|
|
BIN
piet-gpu/shader/gen/backdrop_lg.spv
generated
BIN
piet-gpu/shader/gen/backdrop_lg.spv
generated
Binary file not shown.
BIN
piet-gpu/shader/gen/bbox_clear.dxil
generated
BIN
piet-gpu/shader/gen/bbox_clear.dxil
generated
Binary file not shown.
7
piet-gpu/shader/gen/bbox_clear.hlsl
generated
7
piet-gpu/shader/gen/bbox_clear.hlsl
generated
|
@ -17,8 +17,13 @@ struct Config
|
|||
Alloc trans_alloc;
|
||||
Alloc bbox_alloc;
|
||||
Alloc drawmonoid_alloc;
|
||||
Alloc clip_alloc;
|
||||
Alloc clip_bic_alloc;
|
||||
Alloc clip_stack_alloc;
|
||||
Alloc clip_bbox_alloc;
|
||||
uint n_trans;
|
||||
uint n_path;
|
||||
uint n_clip;
|
||||
uint trans_offset;
|
||||
uint linewidth_offset;
|
||||
uint pathtag_offset;
|
||||
|
@ -39,7 +44,7 @@ struct SPIRV_Cross_Input
|
|||
void comp_main()
|
||||
{
|
||||
uint ix = gl_GlobalInvocationID.x;
|
||||
if (ix < _21.Load(52))
|
||||
if (ix < _21.Load(68))
|
||||
{
|
||||
uint out_ix = (_21.Load(40) >> uint(2)) + (6u * ix);
|
||||
_45.Store(out_ix * 4 + 8, 65535u);
|
||||
|
|
5
piet-gpu/shader/gen/bbox_clear.msl
generated
5
piet-gpu/shader/gen/bbox_clear.msl
generated
|
@ -22,8 +22,13 @@ struct Config
|
|||
Alloc trans_alloc;
|
||||
Alloc bbox_alloc;
|
||||
Alloc drawmonoid_alloc;
|
||||
Alloc clip_alloc;
|
||||
Alloc clip_bic_alloc;
|
||||
Alloc clip_stack_alloc;
|
||||
Alloc clip_bbox_alloc;
|
||||
uint n_trans;
|
||||
uint n_path;
|
||||
uint n_clip;
|
||||
uint trans_offset;
|
||||
uint linewidth_offset;
|
||||
uint pathtag_offset;
|
||||
|
|
BIN
piet-gpu/shader/gen/bbox_clear.spv
generated
BIN
piet-gpu/shader/gen/bbox_clear.spv
generated
Binary file not shown.
BIN
piet-gpu/shader/gen/binning.dxil
generated
BIN
piet-gpu/shader/gen/binning.dxil
generated
Binary file not shown.
237
piet-gpu/shader/gen/binning.hlsl
generated
237
piet-gpu/shader/gen/binning.hlsl
generated
|
@ -9,16 +9,6 @@ struct MallocResult
|
|||
bool failed;
|
||||
};
|
||||
|
||||
struct AnnoEndClipRef
|
||||
{
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct AnnoEndClip
|
||||
{
|
||||
float4 bbox;
|
||||
};
|
||||
|
||||
struct AnnotatedRef
|
||||
{
|
||||
uint offset;
|
||||
|
@ -40,6 +30,12 @@ struct BinInstance
|
|||
uint element_ix;
|
||||
};
|
||||
|
||||
struct DrawMonoid
|
||||
{
|
||||
uint path_ix;
|
||||
uint clip_ix;
|
||||
};
|
||||
|
||||
struct Config
|
||||
{
|
||||
uint n_elements;
|
||||
|
@ -54,8 +50,13 @@ struct Config
|
|||
Alloc trans_alloc;
|
||||
Alloc bbox_alloc;
|
||||
Alloc drawmonoid_alloc;
|
||||
Alloc clip_alloc;
|
||||
Alloc clip_bic_alloc;
|
||||
Alloc clip_stack_alloc;
|
||||
Alloc clip_bbox_alloc;
|
||||
uint n_trans;
|
||||
uint n_path;
|
||||
uint n_clip;
|
||||
uint trans_offset;
|
||||
uint linewidth_offset;
|
||||
uint pathtag_offset;
|
||||
|
@ -64,8 +65,8 @@ struct Config
|
|||
|
||||
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
|
||||
|
||||
RWByteAddressBuffer _84 : register(u0, space0);
|
||||
ByteAddressBuffer _253 : register(t1, space0);
|
||||
RWByteAddressBuffer _94 : register(u0, space0);
|
||||
ByteAddressBuffer _202 : register(t1, space0);
|
||||
|
||||
static uint3 gl_WorkGroupID;
|
||||
static uint3 gl_LocalInvocationID;
|
||||
|
@ -93,7 +94,7 @@ uint read_mem(Alloc alloc, uint offset)
|
|||
{
|
||||
return 0u;
|
||||
}
|
||||
uint v = _84.Load(offset * 4 + 8);
|
||||
uint v = _94.Load(offset * 4 + 8);
|
||||
return v;
|
||||
}
|
||||
|
||||
|
@ -102,36 +103,53 @@ AnnotatedTag Annotated_tag(Alloc a, AnnotatedRef ref)
|
|||
Alloc param = a;
|
||||
uint param_1 = ref.offset >> uint(2);
|
||||
uint tag_and_flags = read_mem(param, param_1);
|
||||
AnnotatedTag _221 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) };
|
||||
return _221;
|
||||
AnnotatedTag _181 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) };
|
||||
return _181;
|
||||
}
|
||||
|
||||
AnnoEndClip AnnoEndClip_read(Alloc a, AnnoEndClipRef ref)
|
||||
DrawMonoid load_draw_monoid(uint element_ix)
|
||||
{
|
||||
uint base = (_202.Load(44) >> uint(2)) + (2u * element_ix);
|
||||
uint path_ix = _94.Load(base * 4 + 8);
|
||||
uint clip_ix = _94.Load((base + 1u) * 4 + 8);
|
||||
DrawMonoid _222 = { path_ix, clip_ix };
|
||||
return _222;
|
||||
}
|
||||
|
||||
float4 load_clip_bbox(uint clip_ix)
|
||||
{
|
||||
uint base = (_202.Load(60) >> uint(2)) + (4u * clip_ix);
|
||||
float x0 = asfloat(_94.Load(base * 4 + 8));
|
||||
float y0 = asfloat(_94.Load((base + 1u) * 4 + 8));
|
||||
float x1 = asfloat(_94.Load((base + 2u) * 4 + 8));
|
||||
float y1 = asfloat(_94.Load((base + 3u) * 4 + 8));
|
||||
float4 bbox = float4(x0, y0, x1, y1);
|
||||
return bbox;
|
||||
}
|
||||
|
||||
float4 load_path_bbox(uint path_ix)
|
||||
{
|
||||
uint base = (_202.Load(40) >> uint(2)) + (6u * path_ix);
|
||||
float bbox_l = float(_94.Load(base * 4 + 8)) - 32768.0f;
|
||||
float bbox_t = float(_94.Load((base + 1u) * 4 + 8)) - 32768.0f;
|
||||
float bbox_r = float(_94.Load((base + 2u) * 4 + 8)) - 32768.0f;
|
||||
float bbox_b = float(_94.Load((base + 3u) * 4 + 8)) - 32768.0f;
|
||||
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
|
||||
return bbox;
|
||||
}
|
||||
|
||||
float4 bbox_intersect(float4 a, float4 b)
|
||||
{
|
||||
return float4(max(a.xy, b.xy), min(a.zw, b.zw));
|
||||
}
|
||||
|
||||
void store_path_bbox(AnnotatedRef ref, float4 bbox)
|
||||
{
|
||||
uint ix = ref.offset >> uint(2);
|
||||
Alloc param = a;
|
||||
uint param_1 = ix + 0u;
|
||||
uint raw0 = read_mem(param, param_1);
|
||||
Alloc param_2 = a;
|
||||
uint param_3 = ix + 1u;
|
||||
uint raw1 = read_mem(param_2, param_3);
|
||||
Alloc param_4 = a;
|
||||
uint param_5 = ix + 2u;
|
||||
uint raw2 = read_mem(param_4, param_5);
|
||||
Alloc param_6 = a;
|
||||
uint param_7 = ix + 3u;
|
||||
uint raw3 = read_mem(param_6, param_7);
|
||||
AnnoEndClip s;
|
||||
s.bbox = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3));
|
||||
return s;
|
||||
}
|
||||
|
||||
AnnoEndClip Annotated_EndClip_read(Alloc a, AnnotatedRef ref)
|
||||
{
|
||||
AnnoEndClipRef _228 = { ref.offset + 4u };
|
||||
Alloc param = a;
|
||||
AnnoEndClipRef param_1 = _228;
|
||||
return AnnoEndClip_read(param, param_1);
|
||||
_94.Store((ix + 1u) * 4 + 8, asuint(bbox.x));
|
||||
_94.Store((ix + 2u) * 4 + 8, asuint(bbox.y));
|
||||
_94.Store((ix + 3u) * 4 + 8, asuint(bbox.z));
|
||||
_94.Store((ix + 4u) * 4 + 8, asuint(bbox.w));
|
||||
}
|
||||
|
||||
Alloc new_alloc(uint offset, uint size, bool mem_ok)
|
||||
|
@ -143,22 +161,22 @@ Alloc new_alloc(uint offset, uint size, bool mem_ok)
|
|||
|
||||
MallocResult malloc(uint size)
|
||||
{
|
||||
uint _90;
|
||||
_84.InterlockedAdd(0, size, _90);
|
||||
uint offset = _90;
|
||||
uint _97;
|
||||
_84.GetDimensions(_97);
|
||||
_97 = (_97 - 8) / 4;
|
||||
uint _100;
|
||||
_94.InterlockedAdd(0, size, _100);
|
||||
uint offset = _100;
|
||||
uint _107;
|
||||
_94.GetDimensions(_107);
|
||||
_107 = (_107 - 8) / 4;
|
||||
MallocResult r;
|
||||
r.failed = (offset + size) > uint(int(_97) * 4);
|
||||
r.failed = (offset + size) > uint(int(_107) * 4);
|
||||
uint param = offset;
|
||||
uint param_1 = size;
|
||||
bool param_2 = !r.failed;
|
||||
r.alloc = new_alloc(param, param_1, param_2);
|
||||
if (r.failed)
|
||||
{
|
||||
uint _119;
|
||||
_84.InterlockedMax(4, 1u, _119);
|
||||
uint _129;
|
||||
_94.InterlockedMax(4, 1u, _129);
|
||||
return r;
|
||||
}
|
||||
return r;
|
||||
|
@ -172,7 +190,7 @@ void write_mem(Alloc alloc, uint offset, uint val)
|
|||
{
|
||||
return;
|
||||
}
|
||||
_84.Store(offset * 4 + 8, val);
|
||||
_94.Store(offset * 4 + 8, val);
|
||||
}
|
||||
|
||||
void BinInstance_write(Alloc a, BinInstanceRef ref, BinInstance s)
|
||||
|
@ -186,7 +204,7 @@ void BinInstance_write(Alloc a, BinInstanceRef ref, BinInstance s)
|
|||
|
||||
void comp_main()
|
||||
{
|
||||
uint my_n_elements = _253.Load(0);
|
||||
uint my_n_elements = _202.Load(0);
|
||||
uint my_partition = gl_WorkGroupID.x;
|
||||
for (uint i = 0u; i < 8u; i++)
|
||||
{
|
||||
|
@ -198,15 +216,15 @@ void comp_main()
|
|||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
uint element_ix = (my_partition * 256u) + gl_LocalInvocationID.x;
|
||||
AnnotatedRef _308 = { _253.Load(32) + (element_ix * 40u) };
|
||||
AnnotatedRef ref = _308;
|
||||
AnnotatedRef _415 = { _202.Load(32) + (element_ix * 40u) };
|
||||
AnnotatedRef ref = _415;
|
||||
uint tag = 0u;
|
||||
if (element_ix < my_n_elements)
|
||||
{
|
||||
Alloc _318;
|
||||
_318.offset = _253.Load(32);
|
||||
Alloc _425;
|
||||
_425.offset = _202.Load(32);
|
||||
Alloc param;
|
||||
param.offset = _318.offset;
|
||||
param.offset = _425.offset;
|
||||
AnnotatedRef param_1 = ref;
|
||||
tag = Annotated_tag(param, param_1).tag;
|
||||
}
|
||||
|
@ -222,21 +240,38 @@ void comp_main()
|
|||
case 4u:
|
||||
case 5u:
|
||||
{
|
||||
Alloc _336;
|
||||
_336.offset = _253.Load(32);
|
||||
Alloc param_2;
|
||||
param_2.offset = _336.offset;
|
||||
AnnotatedRef param_3 = ref;
|
||||
AnnoEndClip clip = Annotated_EndClip_read(param_2, param_3);
|
||||
x0 = int(floor(clip.bbox.x * 0.00390625f));
|
||||
y0 = int(floor(clip.bbox.y * 0.00390625f));
|
||||
x1 = int(ceil(clip.bbox.z * 0.00390625f));
|
||||
y1 = int(ceil(clip.bbox.w * 0.00390625f));
|
||||
uint param_2 = element_ix;
|
||||
DrawMonoid draw_monoid = load_draw_monoid(param_2);
|
||||
uint path_ix = draw_monoid.path_ix;
|
||||
float4 clip_bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f);
|
||||
uint clip_ix = draw_monoid.clip_ix;
|
||||
if (clip_ix > 0u)
|
||||
{
|
||||
uint param_3 = clip_ix - 1u;
|
||||
clip_bbox = load_clip_bbox(param_3);
|
||||
}
|
||||
uint param_4 = path_ix;
|
||||
float4 path_bbox = load_path_bbox(param_4);
|
||||
float4 param_5 = path_bbox;
|
||||
float4 param_6 = clip_bbox;
|
||||
float4 bbox = bbox_intersect(param_5, param_6);
|
||||
float4 _473 = bbox;
|
||||
float4 _475 = bbox;
|
||||
float2 _477 = max(_473.xy, _475.zw);
|
||||
bbox.z = _477.x;
|
||||
bbox.w = _477.y;
|
||||
AnnotatedRef param_7 = ref;
|
||||
float4 param_8 = bbox;
|
||||
store_path_bbox(param_7, param_8);
|
||||
x0 = int(floor(bbox.x * 0.00390625f));
|
||||
y0 = int(floor(bbox.y * 0.00390625f));
|
||||
x1 = int(ceil(bbox.z * 0.00390625f));
|
||||
y1 = int(ceil(bbox.w * 0.00390625f));
|
||||
break;
|
||||
}
|
||||
}
|
||||
uint width_in_bins = ((_253.Load(8) + 16u) - 1u) / 16u;
|
||||
uint height_in_bins = ((_253.Load(12) + 16u) - 1u) / 16u;
|
||||
uint width_in_bins = ((_202.Load(8) + 16u) - 1u) / 16u;
|
||||
uint height_in_bins = ((_202.Load(12) + 16u) - 1u) / 16u;
|
||||
x0 = clamp(x0, 0, int(width_in_bins));
|
||||
x1 = clamp(x1, x0, int(width_in_bins));
|
||||
y0 = clamp(y0, 0, int(height_in_bins));
|
||||
|
@ -251,8 +286,8 @@ void comp_main()
|
|||
uint my_mask = 1u << (gl_LocalInvocationID.x & 31u);
|
||||
while (y < y1)
|
||||
{
|
||||
uint _437;
|
||||
InterlockedOr(bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, _437);
|
||||
uint _581;
|
||||
InterlockedOr(bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, _581);
|
||||
x++;
|
||||
if (x == x1)
|
||||
{
|
||||
|
@ -267,15 +302,15 @@ void comp_main()
|
|||
element_count += uint(int(countbits(bitmaps[i_1][gl_LocalInvocationID.x])));
|
||||
count[i_1][gl_LocalInvocationID.x] = element_count;
|
||||
}
|
||||
uint param_4 = 0u;
|
||||
uint param_5 = 0u;
|
||||
bool param_6 = true;
|
||||
Alloc chunk_alloc = new_alloc(param_4, param_5, param_6);
|
||||
uint param_9 = 0u;
|
||||
uint param_10 = 0u;
|
||||
bool param_11 = true;
|
||||
Alloc chunk_alloc = new_alloc(param_9, param_10, param_11);
|
||||
if (element_count != 0u)
|
||||
{
|
||||
uint param_7 = element_count * 4u;
|
||||
MallocResult _487 = malloc(param_7);
|
||||
MallocResult chunk = _487;
|
||||
uint param_12 = element_count * 4u;
|
||||
MallocResult _631 = malloc(param_12);
|
||||
MallocResult chunk = _631;
|
||||
chunk_alloc = chunk.alloc;
|
||||
sh_chunk_alloc[gl_LocalInvocationID.x] = chunk_alloc;
|
||||
if (chunk.failed)
|
||||
|
@ -283,32 +318,32 @@ void comp_main()
|
|||
sh_alloc_failed = true;
|
||||
}
|
||||
}
|
||||
uint out_ix = (_253.Load(20) >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u);
|
||||
Alloc _516;
|
||||
_516.offset = _253.Load(20);
|
||||
Alloc param_8;
|
||||
param_8.offset = _516.offset;
|
||||
uint param_9 = out_ix;
|
||||
uint param_10 = element_count;
|
||||
write_mem(param_8, param_9, param_10);
|
||||
Alloc _528;
|
||||
_528.offset = _253.Load(20);
|
||||
Alloc param_11;
|
||||
param_11.offset = _528.offset;
|
||||
uint param_12 = out_ix + 1u;
|
||||
uint param_13 = chunk_alloc.offset;
|
||||
write_mem(param_11, param_12, param_13);
|
||||
uint out_ix = (_202.Load(20) >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u);
|
||||
Alloc _660;
|
||||
_660.offset = _202.Load(20);
|
||||
Alloc param_13;
|
||||
param_13.offset = _660.offset;
|
||||
uint param_14 = out_ix;
|
||||
uint param_15 = element_count;
|
||||
write_mem(param_13, param_14, param_15);
|
||||
Alloc _672;
|
||||
_672.offset = _202.Load(20);
|
||||
Alloc param_16;
|
||||
param_16.offset = _672.offset;
|
||||
uint param_17 = out_ix + 1u;
|
||||
uint param_18 = chunk_alloc.offset;
|
||||
write_mem(param_16, param_17, param_18);
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
bool _543;
|
||||
bool _687;
|
||||
if (!sh_alloc_failed)
|
||||
{
|
||||
_543 = _84.Load(4) != 0u;
|
||||
_687 = _94.Load(4) != 0u;
|
||||
}
|
||||
else
|
||||
{
|
||||
_543 = sh_alloc_failed;
|
||||
_687 = sh_alloc_failed;
|
||||
}
|
||||
if (_543)
|
||||
if (_687)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
@ -327,12 +362,12 @@ void comp_main()
|
|||
}
|
||||
Alloc out_alloc = sh_chunk_alloc[bin_ix];
|
||||
uint out_offset = out_alloc.offset + (idx * 4u);
|
||||
BinInstanceRef _605 = { out_offset };
|
||||
BinInstance _607 = { element_ix };
|
||||
Alloc param_14 = out_alloc;
|
||||
BinInstanceRef param_15 = _605;
|
||||
BinInstance param_16 = _607;
|
||||
BinInstance_write(param_14, param_15, param_16);
|
||||
BinInstanceRef _749 = { out_offset };
|
||||
BinInstance _751 = { element_ix };
|
||||
Alloc param_19 = out_alloc;
|
||||
BinInstanceRef param_20 = _749;
|
||||
BinInstance param_21 = _751;
|
||||
BinInstance_write(param_19, param_20, param_21);
|
||||
}
|
||||
x++;
|
||||
if (x == x1)
|
||||
|
|
214
piet-gpu/shader/gen/binning.msl
generated
214
piet-gpu/shader/gen/binning.msl
generated
|
@ -18,16 +18,6 @@ struct MallocResult
|
|||
bool failed;
|
||||
};
|
||||
|
||||
struct AnnoEndClipRef
|
||||
{
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct AnnoEndClip
|
||||
{
|
||||
float4 bbox;
|
||||
};
|
||||
|
||||
struct AnnotatedRef
|
||||
{
|
||||
uint offset;
|
||||
|
@ -49,6 +39,12 @@ struct BinInstance
|
|||
uint element_ix;
|
||||
};
|
||||
|
||||
struct DrawMonoid
|
||||
{
|
||||
uint path_ix;
|
||||
uint clip_ix;
|
||||
};
|
||||
|
||||
struct Memory
|
||||
{
|
||||
uint mem_offset;
|
||||
|
@ -75,8 +71,13 @@ struct Config
|
|||
Alloc_1 trans_alloc;
|
||||
Alloc_1 bbox_alloc;
|
||||
Alloc_1 drawmonoid_alloc;
|
||||
Alloc_1 clip_alloc;
|
||||
Alloc_1 clip_bic_alloc;
|
||||
Alloc_1 clip_stack_alloc;
|
||||
Alloc_1 clip_bbox_alloc;
|
||||
uint n_trans;
|
||||
uint n_path;
|
||||
uint n_clip;
|
||||
uint trans_offset;
|
||||
uint linewidth_offset;
|
||||
uint pathtag_offset;
|
||||
|
@ -97,7 +98,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
|
|||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_84, constant uint& v_84BufferSize)
|
||||
uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_94, constant uint& v_94BufferSize)
|
||||
{
|
||||
Alloc param = alloc;
|
||||
uint param_1 = offset;
|
||||
|
@ -105,46 +106,66 @@ uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memor
|
|||
{
|
||||
return 0u;
|
||||
}
|
||||
uint v = v_84.memory[offset];
|
||||
uint v = v_94.memory[offset];
|
||||
return v;
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
AnnotatedTag Annotated_tag(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_84, constant uint& v_84BufferSize)
|
||||
AnnotatedTag Annotated_tag(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_94, constant uint& v_94BufferSize)
|
||||
{
|
||||
Alloc param = a;
|
||||
uint param_1 = ref.offset >> uint(2);
|
||||
uint tag_and_flags = read_mem(param, param_1, v_84, v_84BufferSize);
|
||||
uint tag_and_flags = read_mem(param, param_1, v_94, v_94BufferSize);
|
||||
return AnnotatedTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) };
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
AnnoEndClip AnnoEndClip_read(thread const Alloc& a, thread const AnnoEndClipRef& ref, device Memory& v_84, constant uint& v_84BufferSize)
|
||||
DrawMonoid load_draw_monoid(thread const uint& element_ix, device Memory& v_94, constant uint& v_94BufferSize, const device ConfigBuf& v_202)
|
||||
{
|
||||
uint ix = ref.offset >> uint(2);
|
||||
Alloc param = a;
|
||||
uint param_1 = ix + 0u;
|
||||
uint raw0 = read_mem(param, param_1, v_84, v_84BufferSize);
|
||||
Alloc param_2 = a;
|
||||
uint param_3 = ix + 1u;
|
||||
uint raw1 = read_mem(param_2, param_3, v_84, v_84BufferSize);
|
||||
Alloc param_4 = a;
|
||||
uint param_5 = ix + 2u;
|
||||
uint raw2 = read_mem(param_4, param_5, v_84, v_84BufferSize);
|
||||
Alloc param_6 = a;
|
||||
uint param_7 = ix + 3u;
|
||||
uint raw3 = read_mem(param_6, param_7, v_84, v_84BufferSize);
|
||||
AnnoEndClip s;
|
||||
s.bbox = float4(as_type<float>(raw0), as_type<float>(raw1), as_type<float>(raw2), as_type<float>(raw3));
|
||||
return s;
|
||||
uint base = (v_202.conf.drawmonoid_alloc.offset >> uint(2)) + (2u * element_ix);
|
||||
uint path_ix = v_94.memory[base];
|
||||
uint clip_ix = v_94.memory[base + 1u];
|
||||
return DrawMonoid{ path_ix, clip_ix };
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
AnnoEndClip Annotated_EndClip_read(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_84, constant uint& v_84BufferSize)
|
||||
float4 load_clip_bbox(thread const uint& clip_ix, device Memory& v_94, constant uint& v_94BufferSize, const device ConfigBuf& v_202)
|
||||
{
|
||||
Alloc param = a;
|
||||
AnnoEndClipRef param_1 = AnnoEndClipRef{ ref.offset + 4u };
|
||||
return AnnoEndClip_read(param, param_1, v_84, v_84BufferSize);
|
||||
uint base = (v_202.conf.clip_bbox_alloc.offset >> uint(2)) + (4u * clip_ix);
|
||||
float x0 = as_type<float>(v_94.memory[base]);
|
||||
float y0 = as_type<float>(v_94.memory[base + 1u]);
|
||||
float x1 = as_type<float>(v_94.memory[base + 2u]);
|
||||
float y1 = as_type<float>(v_94.memory[base + 3u]);
|
||||
float4 bbox = float4(x0, y0, x1, y1);
|
||||
return bbox;
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
float4 load_path_bbox(thread const uint& path_ix, device Memory& v_94, constant uint& v_94BufferSize, const device ConfigBuf& v_202)
|
||||
{
|
||||
uint base = (v_202.conf.bbox_alloc.offset >> uint(2)) + (6u * path_ix);
|
||||
float bbox_l = float(v_94.memory[base]) - 32768.0;
|
||||
float bbox_t = float(v_94.memory[base + 1u]) - 32768.0;
|
||||
float bbox_r = float(v_94.memory[base + 2u]) - 32768.0;
|
||||
float bbox_b = float(v_94.memory[base + 3u]) - 32768.0;
|
||||
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
|
||||
return bbox;
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
float4 bbox_intersect(thread const float4& a, thread const float4& b)
|
||||
{
|
||||
return float4(fast::max(a.xy, b.xy), fast::min(a.zw, b.zw));
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void store_path_bbox(thread const AnnotatedRef& ref, thread const float4& bbox, device Memory& v_94, constant uint& v_94BufferSize)
|
||||
{
|
||||
uint ix = ref.offset >> uint(2);
|
||||
v_94.memory[ix + 1u] = as_type<uint>(bbox.x);
|
||||
v_94.memory[ix + 2u] = as_type<uint>(bbox.y);
|
||||
v_94.memory[ix + 3u] = as_type<uint>(bbox.z);
|
||||
v_94.memory[ix + 4u] = as_type<uint>(bbox.w);
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
|
@ -156,26 +177,26 @@ Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const
|
|||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
MallocResult malloc(thread const uint& size, device Memory& v_84, constant uint& v_84BufferSize)
|
||||
MallocResult malloc(thread const uint& size, device Memory& v_94, constant uint& v_94BufferSize)
|
||||
{
|
||||
uint _90 = atomic_fetch_add_explicit((device atomic_uint*)&v_84.mem_offset, size, memory_order_relaxed);
|
||||
uint offset = _90;
|
||||
uint _100 = atomic_fetch_add_explicit((device atomic_uint*)&v_94.mem_offset, size, memory_order_relaxed);
|
||||
uint offset = _100;
|
||||
MallocResult r;
|
||||
r.failed = (offset + size) > uint(int((v_84BufferSize - 8) / 4) * 4);
|
||||
r.failed = (offset + size) > uint(int((v_94BufferSize - 8) / 4) * 4);
|
||||
uint param = offset;
|
||||
uint param_1 = size;
|
||||
bool param_2 = !r.failed;
|
||||
r.alloc = new_alloc(param, param_1, param_2);
|
||||
if (r.failed)
|
||||
{
|
||||
uint _119 = atomic_fetch_max_explicit((device atomic_uint*)&v_84.mem_error, 1u, memory_order_relaxed);
|
||||
uint _129 = atomic_fetch_max_explicit((device atomic_uint*)&v_94.mem_error, 1u, memory_order_relaxed);
|
||||
return r;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_84, constant uint& v_84BufferSize)
|
||||
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_94, constant uint& v_94BufferSize)
|
||||
{
|
||||
Alloc param = alloc;
|
||||
uint param_1 = offset;
|
||||
|
@ -183,27 +204,27 @@ void write_mem(thread const Alloc& alloc, thread const uint& offset, thread cons
|
|||
{
|
||||
return;
|
||||
}
|
||||
v_84.memory[offset] = val;
|
||||
v_94.memory[offset] = val;
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void BinInstance_write(thread const Alloc& a, thread const BinInstanceRef& ref, thread const BinInstance& s, device Memory& v_84, constant uint& v_84BufferSize)
|
||||
void BinInstance_write(thread const Alloc& a, thread const BinInstanceRef& ref, thread const BinInstance& s, device Memory& v_94, constant uint& v_94BufferSize)
|
||||
{
|
||||
uint ix = ref.offset >> uint(2);
|
||||
Alloc param = a;
|
||||
uint param_1 = ix + 0u;
|
||||
uint param_2 = s.element_ix;
|
||||
write_mem(param, param_1, param_2, v_84, v_84BufferSize);
|
||||
write_mem(param, param_1, param_2, v_94, v_94BufferSize);
|
||||
}
|
||||
|
||||
kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_84 [[buffer(0)]], const device ConfigBuf& _253 [[buffer(1)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
|
||||
kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_94 [[buffer(0)]], const device ConfigBuf& v_202 [[buffer(1)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
|
||||
{
|
||||
threadgroup uint bitmaps[8][256];
|
||||
threadgroup short sh_alloc_failed;
|
||||
threadgroup uint count[8][256];
|
||||
threadgroup Alloc sh_chunk_alloc[256];
|
||||
constant uint& v_84BufferSize = spvBufferSizeConstants[0];
|
||||
uint my_n_elements = _253.conf.n_elements;
|
||||
constant uint& v_94BufferSize = spvBufferSizeConstants[0];
|
||||
uint my_n_elements = v_202.conf.n_elements;
|
||||
uint my_partition = gl_WorkGroupID.x;
|
||||
for (uint i = 0u; i < 8u; i++)
|
||||
{
|
||||
|
@ -215,14 +236,14 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
|
|||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
uint element_ix = (my_partition * 256u) + gl_LocalInvocationID.x;
|
||||
AnnotatedRef ref = AnnotatedRef{ _253.conf.anno_alloc.offset + (element_ix * 40u) };
|
||||
AnnotatedRef ref = AnnotatedRef{ v_202.conf.anno_alloc.offset + (element_ix * 40u) };
|
||||
uint tag = 0u;
|
||||
if (element_ix < my_n_elements)
|
||||
{
|
||||
Alloc param;
|
||||
param.offset = _253.conf.anno_alloc.offset;
|
||||
param.offset = v_202.conf.anno_alloc.offset;
|
||||
AnnotatedRef param_1 = ref;
|
||||
tag = Annotated_tag(param, param_1, v_84, v_84BufferSize).tag;
|
||||
tag = Annotated_tag(param, param_1, v_94, v_94BufferSize).tag;
|
||||
}
|
||||
int x0 = 0;
|
||||
int y0 = 0;
|
||||
|
@ -236,19 +257,38 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
|
|||
case 4u:
|
||||
case 5u:
|
||||
{
|
||||
Alloc param_2;
|
||||
param_2.offset = _253.conf.anno_alloc.offset;
|
||||
AnnotatedRef param_3 = ref;
|
||||
AnnoEndClip clip = Annotated_EndClip_read(param_2, param_3, v_84, v_84BufferSize);
|
||||
x0 = int(floor(clip.bbox.x * 0.00390625));
|
||||
y0 = int(floor(clip.bbox.y * 0.00390625));
|
||||
x1 = int(ceil(clip.bbox.z * 0.00390625));
|
||||
y1 = int(ceil(clip.bbox.w * 0.00390625));
|
||||
uint param_2 = element_ix;
|
||||
DrawMonoid draw_monoid = load_draw_monoid(param_2, v_94, v_94BufferSize, v_202);
|
||||
uint path_ix = draw_monoid.path_ix;
|
||||
float4 clip_bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0);
|
||||
uint clip_ix = draw_monoid.clip_ix;
|
||||
if (clip_ix > 0u)
|
||||
{
|
||||
uint param_3 = clip_ix - 1u;
|
||||
clip_bbox = load_clip_bbox(param_3, v_94, v_94BufferSize, v_202);
|
||||
}
|
||||
uint param_4 = path_ix;
|
||||
float4 path_bbox = load_path_bbox(param_4, v_94, v_94BufferSize, v_202);
|
||||
float4 param_5 = path_bbox;
|
||||
float4 param_6 = clip_bbox;
|
||||
float4 bbox = bbox_intersect(param_5, param_6);
|
||||
float4 _473 = bbox;
|
||||
float4 _475 = bbox;
|
||||
float2 _477 = fast::max(_473.xy, _475.zw);
|
||||
bbox.z = _477.x;
|
||||
bbox.w = _477.y;
|
||||
AnnotatedRef param_7 = ref;
|
||||
float4 param_8 = bbox;
|
||||
store_path_bbox(param_7, param_8, v_94, v_94BufferSize);
|
||||
x0 = int(floor(bbox.x * 0.00390625));
|
||||
y0 = int(floor(bbox.y * 0.00390625));
|
||||
x1 = int(ceil(bbox.z * 0.00390625));
|
||||
y1 = int(ceil(bbox.w * 0.00390625));
|
||||
break;
|
||||
}
|
||||
}
|
||||
uint width_in_bins = ((_253.conf.width_in_tiles + 16u) - 1u) / 16u;
|
||||
uint height_in_bins = ((_253.conf.height_in_tiles + 16u) - 1u) / 16u;
|
||||
uint width_in_bins = ((v_202.conf.width_in_tiles + 16u) - 1u) / 16u;
|
||||
uint height_in_bins = ((v_202.conf.height_in_tiles + 16u) - 1u) / 16u;
|
||||
x0 = clamp(x0, 0, int(width_in_bins));
|
||||
x1 = clamp(x1, x0, int(width_in_bins));
|
||||
y0 = clamp(y0, 0, int(height_in_bins));
|
||||
|
@ -263,7 +303,7 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
|
|||
uint my_mask = 1u << (gl_LocalInvocationID.x & 31u);
|
||||
while (y < y1)
|
||||
{
|
||||
uint _437 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, memory_order_relaxed);
|
||||
uint _581 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, memory_order_relaxed);
|
||||
x++;
|
||||
if (x == x1)
|
||||
{
|
||||
|
@ -278,15 +318,15 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
|
|||
element_count += uint(int(popcount(bitmaps[i_1][gl_LocalInvocationID.x])));
|
||||
count[i_1][gl_LocalInvocationID.x] = element_count;
|
||||
}
|
||||
uint param_4 = 0u;
|
||||
uint param_5 = 0u;
|
||||
bool param_6 = true;
|
||||
Alloc chunk_alloc = new_alloc(param_4, param_5, param_6);
|
||||
uint param_9 = 0u;
|
||||
uint param_10 = 0u;
|
||||
bool param_11 = true;
|
||||
Alloc chunk_alloc = new_alloc(param_9, param_10, param_11);
|
||||
if (element_count != 0u)
|
||||
{
|
||||
uint param_7 = element_count * 4u;
|
||||
MallocResult _487 = malloc(param_7, v_84, v_84BufferSize);
|
||||
MallocResult chunk = _487;
|
||||
uint param_12 = element_count * 4u;
|
||||
MallocResult _631 = malloc(param_12, v_94, v_94BufferSize);
|
||||
MallocResult chunk = _631;
|
||||
chunk_alloc = chunk.alloc;
|
||||
sh_chunk_alloc[gl_LocalInvocationID.x] = chunk_alloc;
|
||||
if (chunk.failed)
|
||||
|
@ -294,28 +334,28 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
|
|||
sh_alloc_failed = short(true);
|
||||
}
|
||||
}
|
||||
uint out_ix = (_253.conf.bin_alloc.offset >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u);
|
||||
Alloc param_8;
|
||||
param_8.offset = _253.conf.bin_alloc.offset;
|
||||
uint param_9 = out_ix;
|
||||
uint param_10 = element_count;
|
||||
write_mem(param_8, param_9, param_10, v_84, v_84BufferSize);
|
||||
Alloc param_11;
|
||||
param_11.offset = _253.conf.bin_alloc.offset;
|
||||
uint param_12 = out_ix + 1u;
|
||||
uint param_13 = chunk_alloc.offset;
|
||||
write_mem(param_11, param_12, param_13, v_84, v_84BufferSize);
|
||||
uint out_ix = (v_202.conf.bin_alloc.offset >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u);
|
||||
Alloc param_13;
|
||||
param_13.offset = v_202.conf.bin_alloc.offset;
|
||||
uint param_14 = out_ix;
|
||||
uint param_15 = element_count;
|
||||
write_mem(param_13, param_14, param_15, v_94, v_94BufferSize);
|
||||
Alloc param_16;
|
||||
param_16.offset = v_202.conf.bin_alloc.offset;
|
||||
uint param_17 = out_ix + 1u;
|
||||
uint param_18 = chunk_alloc.offset;
|
||||
write_mem(param_16, param_17, param_18, v_94, v_94BufferSize);
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
bool _543;
|
||||
bool _687;
|
||||
if (!bool(sh_alloc_failed))
|
||||
{
|
||||
_543 = v_84.mem_error != 0u;
|
||||
_687 = v_94.mem_error != 0u;
|
||||
}
|
||||
else
|
||||
{
|
||||
_543 = bool(sh_alloc_failed);
|
||||
_687 = bool(sh_alloc_failed);
|
||||
}
|
||||
if (_543)
|
||||
if (_687)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
@ -334,10 +374,10 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
|
|||
}
|
||||
Alloc out_alloc = sh_chunk_alloc[bin_ix];
|
||||
uint out_offset = out_alloc.offset + (idx * 4u);
|
||||
Alloc param_14 = out_alloc;
|
||||
BinInstanceRef param_15 = BinInstanceRef{ out_offset };
|
||||
BinInstance param_16 = BinInstance{ element_ix };
|
||||
BinInstance_write(param_14, param_15, param_16, v_84, v_84BufferSize);
|
||||
Alloc param_19 = out_alloc;
|
||||
BinInstanceRef param_20 = BinInstanceRef{ out_offset };
|
||||
BinInstance param_21 = BinInstance{ element_ix };
|
||||
BinInstance_write(param_19, param_20, param_21, v_94, v_94BufferSize);
|
||||
}
|
||||
x++;
|
||||
if (x == x1)
|
||||
|
|
BIN
piet-gpu/shader/gen/binning.spv
generated
BIN
piet-gpu/shader/gen/binning.spv
generated
Binary file not shown.
BIN
piet-gpu/shader/gen/clip_leaf.dxil
generated
Normal file
BIN
piet-gpu/shader/gen/clip_leaf.dxil
generated
Normal file
Binary file not shown.
367
piet-gpu/shader/gen/clip_leaf.hlsl
generated
Normal file
367
piet-gpu/shader/gen/clip_leaf.hlsl
generated
Normal file
|
@ -0,0 +1,367 @@
|
|||
struct Bic
|
||||
{
|
||||
uint a;
|
||||
uint b;
|
||||
};
|
||||
|
||||
struct ClipEl
|
||||
{
|
||||
uint parent_ix;
|
||||
float4 bbox;
|
||||
};
|
||||
|
||||
struct Alloc
|
||||
{
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct Config
|
||||
{
|
||||
uint n_elements;
|
||||
uint n_pathseg;
|
||||
uint width_in_tiles;
|
||||
uint height_in_tiles;
|
||||
Alloc tile_alloc;
|
||||
Alloc bin_alloc;
|
||||
Alloc ptcl_alloc;
|
||||
Alloc pathseg_alloc;
|
||||
Alloc anno_alloc;
|
||||
Alloc trans_alloc;
|
||||
Alloc bbox_alloc;
|
||||
Alloc drawmonoid_alloc;
|
||||
Alloc clip_alloc;
|
||||
Alloc clip_bic_alloc;
|
||||
Alloc clip_stack_alloc;
|
||||
Alloc clip_bbox_alloc;
|
||||
uint n_trans;
|
||||
uint n_path;
|
||||
uint n_clip;
|
||||
uint trans_offset;
|
||||
uint linewidth_offset;
|
||||
uint pathtag_offset;
|
||||
uint pathseg_offset;
|
||||
};
|
||||
|
||||
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
|
||||
|
||||
static const Bic _393 = { 0u, 0u };
|
||||
|
||||
ByteAddressBuffer _80 : register(t1, space0);
|
||||
RWByteAddressBuffer _96 : register(u0, space0);
|
||||
|
||||
static uint3 gl_WorkGroupID;
|
||||
static uint3 gl_LocalInvocationID;
|
||||
static uint3 gl_GlobalInvocationID;
|
||||
struct SPIRV_Cross_Input
|
||||
{
|
||||
uint3 gl_WorkGroupID : SV_GroupID;
|
||||
uint3 gl_LocalInvocationID : SV_GroupThreadID;
|
||||
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
|
||||
};
|
||||
|
||||
groupshared Bic sh_bic[510];
|
||||
groupshared uint sh_stack[256];
|
||||
groupshared float4 sh_stack_bbox[256];
|
||||
groupshared uint sh_link[256];
|
||||
groupshared float4 sh_bbox[256];
|
||||
|
||||
Bic load_bic(uint ix)
|
||||
{
|
||||
uint base = (_80.Load(52) >> uint(2)) + (2u * ix);
|
||||
Bic _286 = { _96.Load(base * 4 + 8), _96.Load((base + 1u) * 4 + 8) };
|
||||
return _286;
|
||||
}
|
||||
|
||||
Bic bic_combine(Bic x, Bic y)
|
||||
{
|
||||
uint m = min(x.b, y.a);
|
||||
Bic _72 = { (x.a + y.a) - m, (x.b + y.b) - m };
|
||||
return _72;
|
||||
}
|
||||
|
||||
ClipEl load_clip_el(uint ix)
|
||||
{
|
||||
uint base = (_80.Load(56) >> uint(2)) + (5u * ix);
|
||||
uint parent_ix = _96.Load(base * 4 + 8);
|
||||
float x0 = asfloat(_96.Load((base + 1u) * 4 + 8));
|
||||
float y0 = asfloat(_96.Load((base + 2u) * 4 + 8));
|
||||
float x1 = asfloat(_96.Load((base + 3u) * 4 + 8));
|
||||
float y1 = asfloat(_96.Load((base + 4u) * 4 + 8));
|
||||
float4 bbox = float4(x0, y0, x1, y1);
|
||||
ClipEl _335 = { parent_ix, bbox };
|
||||
return _335;
|
||||
}
|
||||
|
||||
float4 bbox_intersect(float4 a, float4 b)
|
||||
{
|
||||
return float4(max(a.xy, b.xy), min(a.zw, b.zw));
|
||||
}
|
||||
|
||||
uint load_path_ix(uint ix)
|
||||
{
|
||||
if (ix < _80.Load(72))
|
||||
{
|
||||
return _96.Load(((_80.Load(48) >> uint(2)) + ix) * 4 + 8);
|
||||
}
|
||||
else
|
||||
{
|
||||
return 2147483648u;
|
||||
}
|
||||
}
|
||||
|
||||
float4 load_path_bbox(uint path_ix)
|
||||
{
|
||||
uint base = (_80.Load(40) >> uint(2)) + (6u * path_ix);
|
||||
float bbox_l = float(_96.Load(base * 4 + 8)) - 32768.0f;
|
||||
float bbox_t = float(_96.Load((base + 1u) * 4 + 8)) - 32768.0f;
|
||||
float bbox_r = float(_96.Load((base + 2u) * 4 + 8)) - 32768.0f;
|
||||
float bbox_b = float(_96.Load((base + 3u) * 4 + 8)) - 32768.0f;
|
||||
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
|
||||
return bbox;
|
||||
}
|
||||
|
||||
uint search_link(inout Bic bic)
|
||||
{
|
||||
uint ix = gl_LocalInvocationID.x;
|
||||
uint j = 0u;
|
||||
while (j < 8u)
|
||||
{
|
||||
uint base = 512u - (2u << (8u - j));
|
||||
if (((ix >> j) & 1u) != 0u)
|
||||
{
|
||||
Bic param = sh_bic[(base + (ix >> j)) - 1u];
|
||||
Bic param_1 = bic;
|
||||
Bic test = bic_combine(param, param_1);
|
||||
if (test.b > 0u)
|
||||
{
|
||||
break;
|
||||
}
|
||||
bic = test;
|
||||
ix -= (1u << j);
|
||||
}
|
||||
j++;
|
||||
}
|
||||
if (ix > 0u)
|
||||
{
|
||||
while (j > 0u)
|
||||
{
|
||||
j--;
|
||||
uint base_1 = 512u - (2u << (8u - j));
|
||||
Bic param_2 = sh_bic[(base_1 + (ix >> j)) - 1u];
|
||||
Bic param_3 = bic;
|
||||
Bic test_1 = bic_combine(param_2, param_3);
|
||||
if (test_1.b == 0u)
|
||||
{
|
||||
bic = test_1;
|
||||
ix -= (1u << j);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (ix > 0u)
|
||||
{
|
||||
return ix - 1u;
|
||||
}
|
||||
else
|
||||
{
|
||||
return 4294967295u - bic.a;
|
||||
}
|
||||
}
|
||||
|
||||
void store_clip_bbox(uint ix, float4 bbox)
|
||||
{
|
||||
uint base = (_80.Load(60) >> uint(2)) + (4u * ix);
|
||||
_96.Store(base * 4 + 8, asuint(bbox.x));
|
||||
_96.Store((base + 1u) * 4 + 8, asuint(bbox.y));
|
||||
_96.Store((base + 2u) * 4 + 8, asuint(bbox.z));
|
||||
_96.Store((base + 3u) * 4 + 8, asuint(bbox.w));
|
||||
}
|
||||
|
||||
void comp_main()
|
||||
{
|
||||
uint th = gl_LocalInvocationID.x;
|
||||
Bic bic = _393;
|
||||
if (th < gl_WorkGroupID.x)
|
||||
{
|
||||
uint param = th;
|
||||
bic = load_bic(param);
|
||||
}
|
||||
sh_bic[th] = bic;
|
||||
for (uint i = 0u; i < 8u; i++)
|
||||
{
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
if ((th + (1u << i)) < 256u)
|
||||
{
|
||||
Bic other = sh_bic[th + (1u << i)];
|
||||
Bic param_1 = bic;
|
||||
Bic param_2 = other;
|
||||
bic = bic_combine(param_1, param_2);
|
||||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
sh_bic[th] = bic;
|
||||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
uint stack_size = sh_bic[0].b;
|
||||
uint sp = 255u - th;
|
||||
uint ix = 0u;
|
||||
for (uint i_1 = 0u; i_1 < 8u; i_1++)
|
||||
{
|
||||
uint probe = ix + (128u >> i_1);
|
||||
if (sp < sh_bic[probe].b)
|
||||
{
|
||||
ix = probe;
|
||||
}
|
||||
}
|
||||
uint b = sh_bic[ix].b;
|
||||
float4 bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f);
|
||||
if (sp < b)
|
||||
{
|
||||
uint param_3 = (((ix * 256u) + b) - sp) - 1u;
|
||||
ClipEl el = load_clip_el(param_3);
|
||||
sh_stack[th] = el.parent_ix;
|
||||
bbox = el.bbox;
|
||||
}
|
||||
for (uint i_2 = 0u; i_2 < 8u; i_2++)
|
||||
{
|
||||
sh_stack_bbox[th] = bbox;
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
if (th >= (1u << i_2))
|
||||
{
|
||||
float4 param_4 = sh_stack_bbox[th - (1u << i_2)];
|
||||
float4 param_5 = bbox;
|
||||
bbox = bbox_intersect(param_4, param_5);
|
||||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
}
|
||||
sh_stack_bbox[th] = bbox;
|
||||
uint param_6 = gl_GlobalInvocationID.x;
|
||||
uint inp = load_path_ix(param_6);
|
||||
bool is_push = int(inp) >= 0;
|
||||
Bic _559 = { 1u - uint(is_push), uint(is_push) };
|
||||
bic = _559;
|
||||
sh_bic[th] = bic;
|
||||
if (is_push)
|
||||
{
|
||||
uint param_7 = inp;
|
||||
bbox = load_path_bbox(param_7);
|
||||
}
|
||||
else
|
||||
{
|
||||
bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f);
|
||||
}
|
||||
uint inbase = 0u;
|
||||
for (uint i_3 = 0u; i_3 < 7u; i_3++)
|
||||
{
|
||||
uint outbase = 512u - (1u << (8u - i_3));
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
if (th < (1u << (7u - i_3)))
|
||||
{
|
||||
Bic param_8 = sh_bic[inbase + (th * 2u)];
|
||||
Bic param_9 = sh_bic[(inbase + (th * 2u)) + 1u];
|
||||
sh_bic[outbase + th] = bic_combine(param_8, param_9);
|
||||
}
|
||||
inbase = outbase;
|
||||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
bic = _393;
|
||||
Bic param_10 = bic;
|
||||
uint _618 = search_link(param_10);
|
||||
bic = param_10;
|
||||
uint link = _618;
|
||||
sh_link[th] = link;
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
uint grandparent;
|
||||
if (int(link) >= 0)
|
||||
{
|
||||
grandparent = sh_link[link];
|
||||
}
|
||||
else
|
||||
{
|
||||
grandparent = link - 1u;
|
||||
}
|
||||
uint parent;
|
||||
if (int(link) >= 0)
|
||||
{
|
||||
parent = (gl_WorkGroupID.x * 256u) + link;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (int(link + stack_size) >= 0)
|
||||
{
|
||||
parent = sh_stack[256u + link];
|
||||
}
|
||||
else
|
||||
{
|
||||
parent = 4294967295u;
|
||||
}
|
||||
}
|
||||
for (uint i_4 = 0u; i_4 < 8u; i_4++)
|
||||
{
|
||||
if (i_4 != 0u)
|
||||
{
|
||||
sh_link[th] = link;
|
||||
}
|
||||
sh_bbox[th] = bbox;
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
if (int(link) >= 0)
|
||||
{
|
||||
float4 param_11 = sh_bbox[link];
|
||||
float4 param_12 = bbox;
|
||||
bbox = bbox_intersect(param_11, param_12);
|
||||
link = sh_link[link];
|
||||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
}
|
||||
if (int(link + stack_size) >= 0)
|
||||
{
|
||||
float4 param_13 = sh_stack_bbox[256u + link];
|
||||
float4 param_14 = bbox;
|
||||
bbox = bbox_intersect(param_13, param_14);
|
||||
}
|
||||
sh_bbox[th] = bbox;
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
uint path_ix = inp;
|
||||
bool _717 = !is_push;
|
||||
bool _725;
|
||||
if (_717)
|
||||
{
|
||||
_725 = gl_GlobalInvocationID.x < _80.Load(72);
|
||||
}
|
||||
else
|
||||
{
|
||||
_725 = _717;
|
||||
}
|
||||
if (_725)
|
||||
{
|
||||
uint param_15 = parent;
|
||||
path_ix = load_path_ix(param_15);
|
||||
uint drawmonoid_out_base = (_80.Load(44) >> uint(2)) + (2u * (~inp));
|
||||
_96.Store(drawmonoid_out_base * 4 + 8, path_ix);
|
||||
if (int(grandparent) >= 0)
|
||||
{
|
||||
bbox = sh_bbox[grandparent];
|
||||
}
|
||||
else
|
||||
{
|
||||
if (int(grandparent + stack_size) >= 0)
|
||||
{
|
||||
bbox = sh_stack_bbox[256u + grandparent];
|
||||
}
|
||||
else
|
||||
{
|
||||
bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f);
|
||||
}
|
||||
}
|
||||
}
|
||||
uint param_16 = gl_GlobalInvocationID.x;
|
||||
float4 param_17 = bbox;
|
||||
store_clip_bbox(param_16, param_17);
|
||||
}
|
||||
|
||||
[numthreads(256, 1, 1)]
|
||||
void main(SPIRV_Cross_Input stage_input)
|
||||
{
|
||||
gl_WorkGroupID = stage_input.gl_WorkGroupID;
|
||||
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
|
||||
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
|
||||
comp_main();
|
||||
}
|
366
piet-gpu/shader/gen/clip_leaf.msl
generated
Normal file
366
piet-gpu/shader/gen/clip_leaf.msl
generated
Normal file
|
@ -0,0 +1,366 @@
|
|||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct Bic
|
||||
{
|
||||
uint a;
|
||||
uint b;
|
||||
};
|
||||
|
||||
struct ClipEl
|
||||
{
|
||||
uint parent_ix;
|
||||
float4 bbox;
|
||||
};
|
||||
|
||||
struct Alloc
|
||||
{
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct Config
|
||||
{
|
||||
uint n_elements;
|
||||
uint n_pathseg;
|
||||
uint width_in_tiles;
|
||||
uint height_in_tiles;
|
||||
Alloc tile_alloc;
|
||||
Alloc bin_alloc;
|
||||
Alloc ptcl_alloc;
|
||||
Alloc pathseg_alloc;
|
||||
Alloc anno_alloc;
|
||||
Alloc trans_alloc;
|
||||
Alloc bbox_alloc;
|
||||
Alloc drawmonoid_alloc;
|
||||
Alloc clip_alloc;
|
||||
Alloc clip_bic_alloc;
|
||||
Alloc clip_stack_alloc;
|
||||
Alloc clip_bbox_alloc;
|
||||
uint n_trans;
|
||||
uint n_path;
|
||||
uint n_clip;
|
||||
uint trans_offset;
|
||||
uint linewidth_offset;
|
||||
uint pathtag_offset;
|
||||
uint pathseg_offset;
|
||||
};
|
||||
|
||||
struct ConfigBuf
|
||||
{
|
||||
Config conf;
|
||||
};
|
||||
|
||||
struct Memory
|
||||
{
|
||||
uint mem_offset;
|
||||
uint mem_error;
|
||||
uint memory[1];
|
||||
};
|
||||
|
||||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
Bic load_bic(thread const uint& ix, const device ConfigBuf& v_80, device Memory& v_96)
|
||||
{
|
||||
uint base = (v_80.conf.clip_bic_alloc.offset >> uint(2)) + (2u * ix);
|
||||
return Bic{ v_96.memory[base], v_96.memory[base + 1u] };
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
Bic bic_combine(thread const Bic& x, thread const Bic& y)
|
||||
{
|
||||
uint m = min(x.b, y.a);
|
||||
return Bic{ (x.a + y.a) - m, (x.b + y.b) - m };
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
ClipEl load_clip_el(thread const uint& ix, const device ConfigBuf& v_80, device Memory& v_96)
|
||||
{
|
||||
uint base = (v_80.conf.clip_stack_alloc.offset >> uint(2)) + (5u * ix);
|
||||
uint parent_ix = v_96.memory[base];
|
||||
float x0 = as_type<float>(v_96.memory[base + 1u]);
|
||||
float y0 = as_type<float>(v_96.memory[base + 2u]);
|
||||
float x1 = as_type<float>(v_96.memory[base + 3u]);
|
||||
float y1 = as_type<float>(v_96.memory[base + 4u]);
|
||||
float4 bbox = float4(x0, y0, x1, y1);
|
||||
return ClipEl{ parent_ix, bbox };
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
float4 bbox_intersect(thread const float4& a, thread const float4& b)
|
||||
{
|
||||
return float4(fast::max(a.xy, b.xy), fast::min(a.zw, b.zw));
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
uint load_path_ix(thread const uint& ix, const device ConfigBuf& v_80, device Memory& v_96)
|
||||
{
|
||||
if (ix < v_80.conf.n_clip)
|
||||
{
|
||||
return v_96.memory[(v_80.conf.clip_alloc.offset >> uint(2)) + ix];
|
||||
}
|
||||
else
|
||||
{
|
||||
return 2147483648u;
|
||||
}
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
float4 load_path_bbox(thread const uint& path_ix, const device ConfigBuf& v_80, device Memory& v_96)
|
||||
{
|
||||
uint base = (v_80.conf.bbox_alloc.offset >> uint(2)) + (6u * path_ix);
|
||||
float bbox_l = float(v_96.memory[base]) - 32768.0;
|
||||
float bbox_t = float(v_96.memory[base + 1u]) - 32768.0;
|
||||
float bbox_r = float(v_96.memory[base + 2u]) - 32768.0;
|
||||
float bbox_b = float(v_96.memory[base + 3u]) - 32768.0;
|
||||
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
|
||||
return bbox;
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
uint search_link(thread Bic& bic, thread uint3& gl_LocalInvocationID, threadgroup Bic (&sh_bic)[510])
|
||||
{
|
||||
uint ix = gl_LocalInvocationID.x;
|
||||
uint j = 0u;
|
||||
while (j < 8u)
|
||||
{
|
||||
uint base = 512u - (2u << (8u - j));
|
||||
if (((ix >> j) & 1u) != 0u)
|
||||
{
|
||||
Bic param = sh_bic[(base + (ix >> j)) - 1u];
|
||||
Bic param_1 = bic;
|
||||
Bic test = bic_combine(param, param_1);
|
||||
if (test.b > 0u)
|
||||
{
|
||||
break;
|
||||
}
|
||||
bic = test;
|
||||
ix -= (1u << j);
|
||||
}
|
||||
j++;
|
||||
}
|
||||
if (ix > 0u)
|
||||
{
|
||||
while (j > 0u)
|
||||
{
|
||||
j--;
|
||||
uint base_1 = 512u - (2u << (8u - j));
|
||||
Bic param_2 = sh_bic[(base_1 + (ix >> j)) - 1u];
|
||||
Bic param_3 = bic;
|
||||
Bic test_1 = bic_combine(param_2, param_3);
|
||||
if (test_1.b == 0u)
|
||||
{
|
||||
bic = test_1;
|
||||
ix -= (1u << j);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (ix > 0u)
|
||||
{
|
||||
return ix - 1u;
|
||||
}
|
||||
else
|
||||
{
|
||||
return 4294967295u - bic.a;
|
||||
}
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void store_clip_bbox(thread const uint& ix, thread const float4& bbox, const device ConfigBuf& v_80, device Memory& v_96)
|
||||
{
|
||||
uint base = (v_80.conf.clip_bbox_alloc.offset >> uint(2)) + (4u * ix);
|
||||
v_96.memory[base] = as_type<uint>(bbox.x);
|
||||
v_96.memory[base + 1u] = as_type<uint>(bbox.y);
|
||||
v_96.memory[base + 2u] = as_type<uint>(bbox.z);
|
||||
v_96.memory[base + 3u] = as_type<uint>(bbox.w);
|
||||
}
|
||||
|
||||
kernel void main0(device Memory& v_96 [[buffer(0)]], const device ConfigBuf& v_80 [[buffer(1)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
|
||||
{
|
||||
threadgroup Bic sh_bic[510];
|
||||
threadgroup uint sh_stack[256];
|
||||
threadgroup float4 sh_stack_bbox[256];
|
||||
threadgroup uint sh_link[256];
|
||||
threadgroup float4 sh_bbox[256];
|
||||
uint th = gl_LocalInvocationID.x;
|
||||
Bic bic = Bic{ 0u, 0u };
|
||||
if (th < gl_WorkGroupID.x)
|
||||
{
|
||||
uint param = th;
|
||||
bic = load_bic(param, v_80, v_96);
|
||||
}
|
||||
sh_bic[th] = bic;
|
||||
for (uint i = 0u; i < 8u; i++)
|
||||
{
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
if ((th + (1u << i)) < 256u)
|
||||
{
|
||||
Bic other = sh_bic[th + (1u << i)];
|
||||
Bic param_1 = bic;
|
||||
Bic param_2 = other;
|
||||
bic = bic_combine(param_1, param_2);
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
sh_bic[th] = bic;
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
uint stack_size = sh_bic[0].b;
|
||||
uint sp = 255u - th;
|
||||
uint ix = 0u;
|
||||
for (uint i_1 = 0u; i_1 < 8u; i_1++)
|
||||
{
|
||||
uint probe = ix + (128u >> i_1);
|
||||
if (sp < sh_bic[probe].b)
|
||||
{
|
||||
ix = probe;
|
||||
}
|
||||
}
|
||||
uint b = sh_bic[ix].b;
|
||||
float4 bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0);
|
||||
if (sp < b)
|
||||
{
|
||||
uint param_3 = (((ix * 256u) + b) - sp) - 1u;
|
||||
ClipEl el = load_clip_el(param_3, v_80, v_96);
|
||||
sh_stack[th] = el.parent_ix;
|
||||
bbox = el.bbox;
|
||||
}
|
||||
for (uint i_2 = 0u; i_2 < 8u; i_2++)
|
||||
{
|
||||
sh_stack_bbox[th] = bbox;
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
if (th >= (1u << i_2))
|
||||
{
|
||||
float4 param_4 = sh_stack_bbox[th - (1u << i_2)];
|
||||
float4 param_5 = bbox;
|
||||
bbox = bbox_intersect(param_4, param_5);
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
}
|
||||
sh_stack_bbox[th] = bbox;
|
||||
uint param_6 = gl_GlobalInvocationID.x;
|
||||
uint inp = load_path_ix(param_6, v_80, v_96);
|
||||
bool is_push = int(inp) >= 0;
|
||||
bic = Bic{ 1u - uint(is_push), uint(is_push) };
|
||||
sh_bic[th] = bic;
|
||||
if (is_push)
|
||||
{
|
||||
uint param_7 = inp;
|
||||
bbox = load_path_bbox(param_7, v_80, v_96);
|
||||
}
|
||||
else
|
||||
{
|
||||
bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0);
|
||||
}
|
||||
uint inbase = 0u;
|
||||
for (uint i_3 = 0u; i_3 < 7u; i_3++)
|
||||
{
|
||||
uint outbase = 512u - (1u << (8u - i_3));
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
if (th < (1u << (7u - i_3)))
|
||||
{
|
||||
Bic param_8 = sh_bic[inbase + (th * 2u)];
|
||||
Bic param_9 = sh_bic[(inbase + (th * 2u)) + 1u];
|
||||
sh_bic[outbase + th] = bic_combine(param_8, param_9);
|
||||
}
|
||||
inbase = outbase;
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
bic = Bic{ 0u, 0u };
|
||||
Bic param_10 = bic;
|
||||
uint _618 = search_link(param_10, gl_LocalInvocationID, sh_bic);
|
||||
bic = param_10;
|
||||
uint link = _618;
|
||||
sh_link[th] = link;
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
uint grandparent;
|
||||
if (int(link) >= 0)
|
||||
{
|
||||
grandparent = sh_link[link];
|
||||
}
|
||||
else
|
||||
{
|
||||
grandparent = link - 1u;
|
||||
}
|
||||
uint parent;
|
||||
if (int(link) >= 0)
|
||||
{
|
||||
parent = (gl_WorkGroupID.x * 256u) + link;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (int(link + stack_size) >= 0)
|
||||
{
|
||||
parent = sh_stack[256u + link];
|
||||
}
|
||||
else
|
||||
{
|
||||
parent = 4294967295u;
|
||||
}
|
||||
}
|
||||
for (uint i_4 = 0u; i_4 < 8u; i_4++)
|
||||
{
|
||||
if (i_4 != 0u)
|
||||
{
|
||||
sh_link[th] = link;
|
||||
}
|
||||
sh_bbox[th] = bbox;
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
if (int(link) >= 0)
|
||||
{
|
||||
float4 param_11 = sh_bbox[link];
|
||||
float4 param_12 = bbox;
|
||||
bbox = bbox_intersect(param_11, param_12);
|
||||
link = sh_link[link];
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
}
|
||||
if (int(link + stack_size) >= 0)
|
||||
{
|
||||
float4 param_13 = sh_stack_bbox[256u + link];
|
||||
float4 param_14 = bbox;
|
||||
bbox = bbox_intersect(param_13, param_14);
|
||||
}
|
||||
sh_bbox[th] = bbox;
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
uint path_ix = inp;
|
||||
bool _717 = !is_push;
|
||||
bool _725;
|
||||
if (_717)
|
||||
{
|
||||
_725 = gl_GlobalInvocationID.x < v_80.conf.n_clip;
|
||||
}
|
||||
else
|
||||
{
|
||||
_725 = _717;
|
||||
}
|
||||
if (_725)
|
||||
{
|
||||
uint param_15 = parent;
|
||||
path_ix = load_path_ix(param_15, v_80, v_96);
|
||||
uint drawmonoid_out_base = (v_80.conf.drawmonoid_alloc.offset >> uint(2)) + (2u * (~inp));
|
||||
v_96.memory[drawmonoid_out_base] = path_ix;
|
||||
if (int(grandparent) >= 0)
|
||||
{
|
||||
bbox = sh_bbox[grandparent];
|
||||
}
|
||||
else
|
||||
{
|
||||
if (int(grandparent + stack_size) >= 0)
|
||||
{
|
||||
bbox = sh_stack_bbox[256u + grandparent];
|
||||
}
|
||||
else
|
||||
{
|
||||
bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
uint param_16 = gl_GlobalInvocationID.x;
|
||||
float4 param_17 = bbox;
|
||||
store_clip_bbox(param_16, param_17, v_80, v_96);
|
||||
}
|
||||
|
BIN
piet-gpu/shader/gen/clip_leaf.spv
generated
Normal file
BIN
piet-gpu/shader/gen/clip_leaf.spv
generated
Normal file
Binary file not shown.
BIN
piet-gpu/shader/gen/clip_reduce.dxil
generated
Normal file
BIN
piet-gpu/shader/gen/clip_reduce.dxil
generated
Normal file
Binary file not shown.
177
piet-gpu/shader/gen/clip_reduce.hlsl
generated
Normal file
177
piet-gpu/shader/gen/clip_reduce.hlsl
generated
Normal file
|
@ -0,0 +1,177 @@
|
|||
struct Bic
|
||||
{
|
||||
uint a;
|
||||
uint b;
|
||||
};
|
||||
|
||||
struct ClipEl
|
||||
{
|
||||
uint parent_ix;
|
||||
float4 bbox;
|
||||
};
|
||||
|
||||
struct Alloc
|
||||
{
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct Config
|
||||
{
|
||||
uint n_elements;
|
||||
uint n_pathseg;
|
||||
uint width_in_tiles;
|
||||
uint height_in_tiles;
|
||||
Alloc tile_alloc;
|
||||
Alloc bin_alloc;
|
||||
Alloc ptcl_alloc;
|
||||
Alloc pathseg_alloc;
|
||||
Alloc anno_alloc;
|
||||
Alloc trans_alloc;
|
||||
Alloc bbox_alloc;
|
||||
Alloc drawmonoid_alloc;
|
||||
Alloc clip_alloc;
|
||||
Alloc clip_bic_alloc;
|
||||
Alloc clip_stack_alloc;
|
||||
Alloc clip_bbox_alloc;
|
||||
uint n_trans;
|
||||
uint n_path;
|
||||
uint n_clip;
|
||||
uint trans_offset;
|
||||
uint linewidth_offset;
|
||||
uint pathtag_offset;
|
||||
uint pathseg_offset;
|
||||
};
|
||||
|
||||
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
|
||||
|
||||
static const Bic _267 = { 0u, 0u };
|
||||
|
||||
ByteAddressBuffer _64 : register(t1, space0);
|
||||
RWByteAddressBuffer _80 : register(u0, space0);
|
||||
|
||||
static uint3 gl_WorkGroupID;
|
||||
static uint3 gl_LocalInvocationID;
|
||||
static uint3 gl_GlobalInvocationID;
|
||||
struct SPIRV_Cross_Input
|
||||
{
|
||||
uint3 gl_WorkGroupID : SV_GroupID;
|
||||
uint3 gl_LocalInvocationID : SV_GroupThreadID;
|
||||
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
|
||||
};
|
||||
|
||||
groupshared Bic sh_bic[256];
|
||||
groupshared uint sh_parent[256];
|
||||
groupshared uint sh_path_ix[256];
|
||||
groupshared float4 sh_bbox[256];
|
||||
|
||||
Bic bic_combine(Bic x, Bic y)
|
||||
{
|
||||
uint m = min(x.b, y.a);
|
||||
Bic _56 = { (x.a + y.a) - m, (x.b + y.b) - m };
|
||||
return _56;
|
||||
}
|
||||
|
||||
void store_bic(uint ix, Bic bic)
|
||||
{
|
||||
uint base = (_64.Load(52) >> uint(2)) + (2u * ix);
|
||||
_80.Store(base * 4 + 8, bic.a);
|
||||
_80.Store((base + 1u) * 4 + 8, bic.b);
|
||||
}
|
||||
|
||||
float4 load_path_bbox(uint path_ix)
|
||||
{
|
||||
uint base = (_64.Load(40) >> uint(2)) + (6u * path_ix);
|
||||
float bbox_l = float(_80.Load(base * 4 + 8)) - 32768.0f;
|
||||
float bbox_t = float(_80.Load((base + 1u) * 4 + 8)) - 32768.0f;
|
||||
float bbox_r = float(_80.Load((base + 2u) * 4 + 8)) - 32768.0f;
|
||||
float bbox_b = float(_80.Load((base + 3u) * 4 + 8)) - 32768.0f;
|
||||
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
|
||||
return bbox;
|
||||
}
|
||||
|
||||
void store_clip_el(uint ix, ClipEl el)
|
||||
{
|
||||
uint base = (_64.Load(56) >> uint(2)) + (5u * ix);
|
||||
_80.Store(base * 4 + 8, el.parent_ix);
|
||||
_80.Store((base + 1u) * 4 + 8, asuint(el.bbox.x));
|
||||
_80.Store((base + 2u) * 4 + 8, asuint(el.bbox.y));
|
||||
_80.Store((base + 3u) * 4 + 8, asuint(el.bbox.z));
|
||||
_80.Store((base + 4u) * 4 + 8, asuint(el.bbox.w));
|
||||
}
|
||||
|
||||
void comp_main()
|
||||
{
|
||||
uint th = gl_LocalInvocationID.x;
|
||||
uint inp = _80.Load(((_64.Load(48) >> uint(2)) + gl_GlobalInvocationID.x) * 4 + 8);
|
||||
bool is_push = int(inp) >= 0;
|
||||
Bic _207 = { 1u - uint(is_push), uint(is_push) };
|
||||
Bic bic = _207;
|
||||
sh_bic[gl_LocalInvocationID.x] = bic;
|
||||
for (uint i = 0u; i < 8u; i++)
|
||||
{
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
if ((th + (1u << i)) < 256u)
|
||||
{
|
||||
Bic other = sh_bic[gl_LocalInvocationID.x + (1u << i)];
|
||||
Bic param = bic;
|
||||
Bic param_1 = other;
|
||||
bic = bic_combine(param, param_1);
|
||||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
sh_bic[th] = bic;
|
||||
}
|
||||
if (th == 0u)
|
||||
{
|
||||
uint param_2 = gl_WorkGroupID.x;
|
||||
Bic param_3 = bic;
|
||||
store_bic(param_2, param_3);
|
||||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
uint size = sh_bic[0].b;
|
||||
bic = _267;
|
||||
if ((th + 1u) < 256u)
|
||||
{
|
||||
bic = sh_bic[th + 1u];
|
||||
}
|
||||
bool _283;
|
||||
if (is_push)
|
||||
{
|
||||
_283 = bic.a == 0u;
|
||||
}
|
||||
else
|
||||
{
|
||||
_283 = is_push;
|
||||
}
|
||||
if (_283)
|
||||
{
|
||||
uint local_ix = (size - bic.b) - 1u;
|
||||
sh_parent[local_ix] = th;
|
||||
sh_path_ix[local_ix] = inp;
|
||||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
float4 bbox;
|
||||
if (th < size)
|
||||
{
|
||||
uint path_ix = sh_path_ix[th];
|
||||
uint param_4 = path_ix;
|
||||
bbox = load_path_bbox(param_4);
|
||||
}
|
||||
if (th < size)
|
||||
{
|
||||
uint parent_ix = sh_parent[th] + (gl_WorkGroupID.x * 256u);
|
||||
ClipEl _331 = { parent_ix, bbox };
|
||||
ClipEl el = _331;
|
||||
uint param_5 = gl_GlobalInvocationID.x;
|
||||
ClipEl param_6 = el;
|
||||
store_clip_el(param_5, param_6);
|
||||
}
|
||||
}
|
||||
|
||||
[numthreads(256, 1, 1)]
|
||||
void main(SPIRV_Cross_Input stage_input)
|
||||
{
|
||||
gl_WorkGroupID = stage_input.gl_WorkGroupID;
|
||||
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
|
||||
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
|
||||
comp_main();
|
||||
}
|
173
piet-gpu/shader/gen/clip_reduce.msl
generated
Normal file
173
piet-gpu/shader/gen/clip_reduce.msl
generated
Normal file
|
@ -0,0 +1,173 @@
|
|||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct Bic
|
||||
{
|
||||
uint a;
|
||||
uint b;
|
||||
};
|
||||
|
||||
struct ClipEl
|
||||
{
|
||||
uint parent_ix;
|
||||
float4 bbox;
|
||||
};
|
||||
|
||||
struct Alloc
|
||||
{
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct Config
|
||||
{
|
||||
uint n_elements;
|
||||
uint n_pathseg;
|
||||
uint width_in_tiles;
|
||||
uint height_in_tiles;
|
||||
Alloc tile_alloc;
|
||||
Alloc bin_alloc;
|
||||
Alloc ptcl_alloc;
|
||||
Alloc pathseg_alloc;
|
||||
Alloc anno_alloc;
|
||||
Alloc trans_alloc;
|
||||
Alloc bbox_alloc;
|
||||
Alloc drawmonoid_alloc;
|
||||
Alloc clip_alloc;
|
||||
Alloc clip_bic_alloc;
|
||||
Alloc clip_stack_alloc;
|
||||
Alloc clip_bbox_alloc;
|
||||
uint n_trans;
|
||||
uint n_path;
|
||||
uint n_clip;
|
||||
uint trans_offset;
|
||||
uint linewidth_offset;
|
||||
uint pathtag_offset;
|
||||
uint pathseg_offset;
|
||||
};
|
||||
|
||||
struct ConfigBuf
|
||||
{
|
||||
Config conf;
|
||||
};
|
||||
|
||||
struct Memory
|
||||
{
|
||||
uint mem_offset;
|
||||
uint mem_error;
|
||||
uint memory[1];
|
||||
};
|
||||
|
||||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
Bic bic_combine(thread const Bic& x, thread const Bic& y)
|
||||
{
|
||||
uint m = min(x.b, y.a);
|
||||
return Bic{ (x.a + y.a) - m, (x.b + y.b) - m };
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void store_bic(thread const uint& ix, thread const Bic& bic, const device ConfigBuf& v_64, device Memory& v_80)
|
||||
{
|
||||
uint base = (v_64.conf.clip_bic_alloc.offset >> uint(2)) + (2u * ix);
|
||||
v_80.memory[base] = bic.a;
|
||||
v_80.memory[base + 1u] = bic.b;
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
float4 load_path_bbox(thread const uint& path_ix, const device ConfigBuf& v_64, device Memory& v_80)
|
||||
{
|
||||
uint base = (v_64.conf.bbox_alloc.offset >> uint(2)) + (6u * path_ix);
|
||||
float bbox_l = float(v_80.memory[base]) - 32768.0;
|
||||
float bbox_t = float(v_80.memory[base + 1u]) - 32768.0;
|
||||
float bbox_r = float(v_80.memory[base + 2u]) - 32768.0;
|
||||
float bbox_b = float(v_80.memory[base + 3u]) - 32768.0;
|
||||
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
|
||||
return bbox;
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void store_clip_el(thread const uint& ix, thread const ClipEl& el, const device ConfigBuf& v_64, device Memory& v_80)
|
||||
{
|
||||
uint base = (v_64.conf.clip_stack_alloc.offset >> uint(2)) + (5u * ix);
|
||||
v_80.memory[base] = el.parent_ix;
|
||||
v_80.memory[base + 1u] = as_type<uint>(el.bbox.x);
|
||||
v_80.memory[base + 2u] = as_type<uint>(el.bbox.y);
|
||||
v_80.memory[base + 3u] = as_type<uint>(el.bbox.z);
|
||||
v_80.memory[base + 4u] = as_type<uint>(el.bbox.w);
|
||||
}
|
||||
|
||||
kernel void main0(device Memory& v_80 [[buffer(0)]], const device ConfigBuf& v_64 [[buffer(1)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
|
||||
{
|
||||
threadgroup Bic sh_bic[256];
|
||||
threadgroup uint sh_parent[256];
|
||||
threadgroup uint sh_path_ix[256];
|
||||
threadgroup float4 sh_bbox[256];
|
||||
uint th = gl_LocalInvocationID.x;
|
||||
uint inp = v_80.memory[(v_64.conf.clip_alloc.offset >> uint(2)) + gl_GlobalInvocationID.x];
|
||||
bool is_push = int(inp) >= 0;
|
||||
Bic bic = Bic{ 1u - uint(is_push), uint(is_push) };
|
||||
sh_bic[gl_LocalInvocationID.x] = bic;
|
||||
for (uint i = 0u; i < 8u; i++)
|
||||
{
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
if ((th + (1u << i)) < 256u)
|
||||
{
|
||||
Bic other = sh_bic[gl_LocalInvocationID.x + (1u << i)];
|
||||
Bic param = bic;
|
||||
Bic param_1 = other;
|
||||
bic = bic_combine(param, param_1);
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
sh_bic[th] = bic;
|
||||
}
|
||||
if (th == 0u)
|
||||
{
|
||||
uint param_2 = gl_WorkGroupID.x;
|
||||
Bic param_3 = bic;
|
||||
store_bic(param_2, param_3, v_64, v_80);
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
uint size = sh_bic[0].b;
|
||||
bic = Bic{ 0u, 0u };
|
||||
if ((th + 1u) < 256u)
|
||||
{
|
||||
bic = sh_bic[th + 1u];
|
||||
}
|
||||
bool _283;
|
||||
if (is_push)
|
||||
{
|
||||
_283 = bic.a == 0u;
|
||||
}
|
||||
else
|
||||
{
|
||||
_283 = is_push;
|
||||
}
|
||||
if (_283)
|
||||
{
|
||||
uint local_ix = (size - bic.b) - 1u;
|
||||
sh_parent[local_ix] = th;
|
||||
sh_path_ix[local_ix] = inp;
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
float4 bbox;
|
||||
if (th < size)
|
||||
{
|
||||
uint path_ix = sh_path_ix[th];
|
||||
uint param_4 = path_ix;
|
||||
bbox = load_path_bbox(param_4, v_64, v_80);
|
||||
}
|
||||
if (th < size)
|
||||
{
|
||||
uint parent_ix = sh_parent[th] + (gl_WorkGroupID.x * 256u);
|
||||
ClipEl el = ClipEl{ parent_ix, bbox };
|
||||
uint param_5 = gl_GlobalInvocationID.x;
|
||||
ClipEl param_6 = el;
|
||||
store_clip_el(param_5, param_6, v_64, v_80);
|
||||
}
|
||||
}
|
||||
|
BIN
piet-gpu/shader/gen/clip_reduce.spv
generated
Normal file
BIN
piet-gpu/shader/gen/clip_reduce.spv
generated
Normal file
Binary file not shown.
BIN
piet-gpu/shader/gen/coarse.dxil
generated
BIN
piet-gpu/shader/gen/coarse.dxil
generated
Binary file not shown.
476
piet-gpu/shader/gen/coarse.hlsl
generated
476
piet-gpu/shader/gen/coarse.hlsl
generated
|
@ -49,17 +49,6 @@ struct AnnoLinGradient
|
|||
float line_c;
|
||||
};
|
||||
|
||||
struct AnnoBeginClipRef
|
||||
{
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct AnnoBeginClip
|
||||
{
|
||||
float4 bbox;
|
||||
float linewidth;
|
||||
};
|
||||
|
||||
struct AnnotatedRef
|
||||
{
|
||||
uint offset;
|
||||
|
@ -193,8 +182,13 @@ struct Config
|
|||
Alloc trans_alloc;
|
||||
Alloc bbox_alloc;
|
||||
Alloc drawmonoid_alloc;
|
||||
Alloc clip_alloc;
|
||||
Alloc clip_bic_alloc;
|
||||
Alloc clip_stack_alloc;
|
||||
Alloc clip_bbox_alloc;
|
||||
uint n_trans;
|
||||
uint n_path;
|
||||
uint n_clip;
|
||||
uint trans_offset;
|
||||
uint linewidth_offset;
|
||||
uint pathtag_offset;
|
||||
|
@ -203,8 +197,8 @@ struct Config
|
|||
|
||||
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
|
||||
|
||||
RWByteAddressBuffer _296 : register(u0, space0);
|
||||
ByteAddressBuffer _1249 : register(t1, space0);
|
||||
RWByteAddressBuffer _283 : register(u0, space0);
|
||||
ByteAddressBuffer _1169 : register(t1, space0);
|
||||
|
||||
static uint3 gl_WorkGroupID;
|
||||
static uint3 gl_LocalInvocationID;
|
||||
|
@ -227,8 +221,8 @@ groupshared uint sh_tile_count[256];
|
|||
|
||||
Alloc slice_mem(Alloc a, uint offset, uint size)
|
||||
{
|
||||
Alloc _373 = { a.offset + offset };
|
||||
return _373;
|
||||
Alloc _360 = { a.offset + offset };
|
||||
return _360;
|
||||
}
|
||||
|
||||
bool touch_mem(Alloc alloc, uint offset)
|
||||
|
@ -244,7 +238,7 @@ uint read_mem(Alloc alloc, uint offset)
|
|||
{
|
||||
return 0u;
|
||||
}
|
||||
uint v = _296.Load(offset * 4 + 8);
|
||||
uint v = _283.Load(offset * 4 + 8);
|
||||
return v;
|
||||
}
|
||||
|
||||
|
@ -257,8 +251,8 @@ Alloc new_alloc(uint offset, uint size, bool mem_ok)
|
|||
|
||||
BinInstanceRef BinInstance_index(BinInstanceRef ref, uint index)
|
||||
{
|
||||
BinInstanceRef _754 = { ref.offset + (index * 4u) };
|
||||
return _754;
|
||||
BinInstanceRef _674 = { ref.offset + (index * 4u) };
|
||||
return _674;
|
||||
}
|
||||
|
||||
BinInstance BinInstance_read(Alloc a, BinInstanceRef ref)
|
||||
|
@ -277,8 +271,8 @@ AnnotatedTag Annotated_tag(Alloc a, AnnotatedRef ref)
|
|||
Alloc param = a;
|
||||
uint param_1 = ref.offset >> uint(2);
|
||||
uint tag_and_flags = read_mem(param, param_1);
|
||||
AnnotatedTag _706 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) };
|
||||
return _706;
|
||||
AnnotatedTag _636 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) };
|
||||
return _636;
|
||||
}
|
||||
|
||||
Path Path_read(Alloc a, PathRef ref)
|
||||
|
@ -295,8 +289,8 @@ Path Path_read(Alloc a, PathRef ref)
|
|||
uint raw2 = read_mem(param_4, param_5);
|
||||
Path s;
|
||||
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
|
||||
TileRef _814 = { raw2 };
|
||||
s.tiles = _814;
|
||||
TileRef _734 = { raw2 };
|
||||
s.tiles = _734;
|
||||
return s;
|
||||
}
|
||||
|
||||
|
@ -306,11 +300,11 @@ void write_tile_alloc(uint el_ix, Alloc a)
|
|||
|
||||
Alloc read_tile_alloc(uint el_ix, bool mem_ok)
|
||||
{
|
||||
uint _1135;
|
||||
_296.GetDimensions(_1135);
|
||||
_1135 = (_1135 - 8) / 4;
|
||||
uint _1055;
|
||||
_283.GetDimensions(_1055);
|
||||
_1055 = (_1055 - 8) / 4;
|
||||
uint param = 0u;
|
||||
uint param_1 = uint(int(_1135) * 4);
|
||||
uint param_1 = uint(int(_1055) * 4);
|
||||
bool param_2 = mem_ok;
|
||||
return new_alloc(param, param_1, param_2);
|
||||
}
|
||||
|
@ -324,9 +318,9 @@ Tile Tile_read(Alloc a, TileRef ref)
|
|||
Alloc param_2 = a;
|
||||
uint param_3 = ix + 1u;
|
||||
uint raw1 = read_mem(param_2, param_3);
|
||||
TileSegRef _839 = { raw0 };
|
||||
TileSegRef _759 = { raw0 };
|
||||
Tile s;
|
||||
s.tile = _839;
|
||||
s.tile = _759;
|
||||
s.backdrop = int(raw1);
|
||||
return s;
|
||||
}
|
||||
|
@ -361,30 +355,30 @@ AnnoColor AnnoColor_read(Alloc a, AnnoColorRef ref)
|
|||
|
||||
AnnoColor Annotated_Color_read(Alloc a, AnnotatedRef ref)
|
||||
{
|
||||
AnnoColorRef _712 = { ref.offset + 4u };
|
||||
AnnoColorRef _642 = { ref.offset + 4u };
|
||||
Alloc param = a;
|
||||
AnnoColorRef param_1 = _712;
|
||||
AnnoColorRef param_1 = _642;
|
||||
return AnnoColor_read(param, param_1);
|
||||
}
|
||||
|
||||
MallocResult malloc(uint size)
|
||||
{
|
||||
uint _302;
|
||||
_296.InterlockedAdd(0, size, _302);
|
||||
uint offset = _302;
|
||||
uint _309;
|
||||
_296.GetDimensions(_309);
|
||||
_309 = (_309 - 8) / 4;
|
||||
uint _289;
|
||||
_283.InterlockedAdd(0, size, _289);
|
||||
uint offset = _289;
|
||||
uint _296;
|
||||
_283.GetDimensions(_296);
|
||||
_296 = (_296 - 8) / 4;
|
||||
MallocResult r;
|
||||
r.failed = (offset + size) > uint(int(_309) * 4);
|
||||
r.failed = (offset + size) > uint(int(_296) * 4);
|
||||
uint param = offset;
|
||||
uint param_1 = size;
|
||||
bool param_2 = !r.failed;
|
||||
r.alloc = new_alloc(param, param_1, param_2);
|
||||
if (r.failed)
|
||||
{
|
||||
uint _331;
|
||||
_296.InterlockedMax(4, 1u, _331);
|
||||
uint _318;
|
||||
_283.InterlockedMax(4, 1u, _318);
|
||||
return r;
|
||||
}
|
||||
return r;
|
||||
|
@ -398,7 +392,7 @@ void write_mem(Alloc alloc, uint offset, uint val)
|
|||
{
|
||||
return;
|
||||
}
|
||||
_296.Store(offset * 4 + 8, val);
|
||||
_283.Store(offset * 4 + 8, val);
|
||||
}
|
||||
|
||||
void CmdJump_write(Alloc a, CmdJumpRef ref, CmdJump s)
|
||||
|
@ -416,9 +410,9 @@ void Cmd_Jump_write(Alloc a, CmdRef ref, CmdJump s)
|
|||
uint param_1 = ref.offset >> uint(2);
|
||||
uint param_2 = 10u;
|
||||
write_mem(param, param_1, param_2);
|
||||
CmdJumpRef _1128 = { ref.offset + 4u };
|
||||
CmdJumpRef _1048 = { ref.offset + 4u };
|
||||
Alloc param_3 = a;
|
||||
CmdJumpRef param_4 = _1128;
|
||||
CmdJumpRef param_4 = _1048;
|
||||
CmdJump param_5 = s;
|
||||
CmdJump_write(param_3, param_4, param_5);
|
||||
}
|
||||
|
@ -430,21 +424,21 @@ bool alloc_cmd(inout Alloc cmd_alloc, inout CmdRef cmd_ref, inout uint cmd_limit
|
|||
return true;
|
||||
}
|
||||
uint param = 1024u;
|
||||
MallocResult _1156 = malloc(param);
|
||||
MallocResult new_cmd = _1156;
|
||||
MallocResult _1076 = malloc(param);
|
||||
MallocResult new_cmd = _1076;
|
||||
if (new_cmd.failed)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
CmdJump _1166 = { new_cmd.alloc.offset };
|
||||
CmdJump jump = _1166;
|
||||
CmdJump _1086 = { new_cmd.alloc.offset };
|
||||
CmdJump jump = _1086;
|
||||
Alloc param_1 = cmd_alloc;
|
||||
CmdRef param_2 = cmd_ref;
|
||||
CmdJump param_3 = jump;
|
||||
Cmd_Jump_write(param_1, param_2, param_3);
|
||||
cmd_alloc = new_cmd.alloc;
|
||||
CmdRef _1178 = { cmd_alloc.offset };
|
||||
cmd_ref = _1178;
|
||||
CmdRef _1098 = { cmd_alloc.offset };
|
||||
cmd_ref = _1098;
|
||||
cmd_limit = (cmd_alloc.offset + 1024u) - 60u;
|
||||
return true;
|
||||
}
|
||||
|
@ -473,9 +467,9 @@ void Cmd_Fill_write(Alloc a, CmdRef ref, CmdFill s)
|
|||
uint param_1 = ref.offset >> uint(2);
|
||||
uint param_2 = 1u;
|
||||
write_mem(param, param_1, param_2);
|
||||
CmdFillRef _1012 = { ref.offset + 4u };
|
||||
CmdFillRef _932 = { ref.offset + 4u };
|
||||
Alloc param_3 = a;
|
||||
CmdFillRef param_4 = _1012;
|
||||
CmdFillRef param_4 = _932;
|
||||
CmdFill param_5 = s;
|
||||
CmdFill_write(param_3, param_4, param_5);
|
||||
}
|
||||
|
@ -507,9 +501,9 @@ void Cmd_Stroke_write(Alloc a, CmdRef ref, CmdStroke s)
|
|||
uint param_1 = ref.offset >> uint(2);
|
||||
uint param_2 = 2u;
|
||||
write_mem(param, param_1, param_2);
|
||||
CmdStrokeRef _1030 = { ref.offset + 4u };
|
||||
CmdStrokeRef _950 = { ref.offset + 4u };
|
||||
Alloc param_3 = a;
|
||||
CmdStrokeRef param_4 = _1030;
|
||||
CmdStrokeRef param_4 = _950;
|
||||
CmdStroke param_5 = s;
|
||||
CmdStroke_write(param_3, param_4, param_5);
|
||||
}
|
||||
|
@ -521,8 +515,8 @@ void write_fill(Alloc alloc, inout CmdRef cmd_ref, uint flags, Tile tile, float
|
|||
{
|
||||
if (tile.tile.offset != 0u)
|
||||
{
|
||||
CmdFill _1202 = { tile.tile.offset, tile.backdrop };
|
||||
CmdFill cmd_fill = _1202;
|
||||
CmdFill _1122 = { tile.tile.offset, tile.backdrop };
|
||||
CmdFill cmd_fill = _1122;
|
||||
Alloc param_1 = alloc;
|
||||
CmdRef param_2 = cmd_ref;
|
||||
CmdFill param_3 = cmd_fill;
|
||||
|
@ -539,8 +533,8 @@ void write_fill(Alloc alloc, inout CmdRef cmd_ref, uint flags, Tile tile, float
|
|||
}
|
||||
else
|
||||
{
|
||||
CmdStroke _1232 = { tile.tile.offset, 0.5f * linewidth };
|
||||
CmdStroke cmd_stroke = _1232;
|
||||
CmdStroke _1152 = { tile.tile.offset, 0.5f * linewidth };
|
||||
CmdStroke cmd_stroke = _1152;
|
||||
Alloc param_6 = alloc;
|
||||
CmdRef param_7 = cmd_ref;
|
||||
CmdStroke param_8 = cmd_stroke;
|
||||
|
@ -564,9 +558,9 @@ void Cmd_Color_write(Alloc a, CmdRef ref, CmdColor s)
|
|||
uint param_1 = ref.offset >> uint(2);
|
||||
uint param_2 = 5u;
|
||||
write_mem(param, param_1, param_2);
|
||||
CmdColorRef _1056 = { ref.offset + 4u };
|
||||
CmdColorRef _976 = { ref.offset + 4u };
|
||||
Alloc param_3 = a;
|
||||
CmdColorRef param_4 = _1056;
|
||||
CmdColorRef param_4 = _976;
|
||||
CmdColor param_5 = s;
|
||||
CmdColor_write(param_3, param_4, param_5);
|
||||
}
|
||||
|
@ -613,9 +607,9 @@ AnnoLinGradient AnnoLinGradient_read(Alloc a, AnnoLinGradientRef ref)
|
|||
|
||||
AnnoLinGradient Annotated_LinGradient_read(Alloc a, AnnotatedRef ref)
|
||||
{
|
||||
AnnoLinGradientRef _722 = { ref.offset + 4u };
|
||||
AnnoLinGradientRef _652 = { ref.offset + 4u };
|
||||
Alloc param = a;
|
||||
AnnoLinGradientRef param_1 = _722;
|
||||
AnnoLinGradientRef param_1 = _652;
|
||||
return AnnoLinGradient_read(param, param_1);
|
||||
}
|
||||
|
||||
|
@ -646,9 +640,9 @@ void Cmd_LinGrad_write(Alloc a, CmdRef ref, CmdLinGrad s)
|
|||
uint param_1 = ref.offset >> uint(2);
|
||||
uint param_2 = 6u;
|
||||
write_mem(param, param_1, param_2);
|
||||
CmdLinGradRef _1074 = { ref.offset + 4u };
|
||||
CmdLinGradRef _994 = { ref.offset + 4u };
|
||||
Alloc param_3 = a;
|
||||
CmdLinGradRef param_4 = _1074;
|
||||
CmdLinGradRef param_4 = _994;
|
||||
CmdLinGrad param_5 = s;
|
||||
CmdLinGrad_write(param_3, param_4, param_5);
|
||||
}
|
||||
|
@ -687,9 +681,9 @@ AnnoImage AnnoImage_read(Alloc a, AnnoImageRef ref)
|
|||
|
||||
AnnoImage Annotated_Image_read(Alloc a, AnnotatedRef ref)
|
||||
{
|
||||
AnnoImageRef _732 = { ref.offset + 4u };
|
||||
AnnoImageRef _662 = { ref.offset + 4u };
|
||||
Alloc param = a;
|
||||
AnnoImageRef param_1 = _732;
|
||||
AnnoImageRef param_1 = _662;
|
||||
return AnnoImage_read(param, param_1);
|
||||
}
|
||||
|
||||
|
@ -712,45 +706,13 @@ void Cmd_Image_write(Alloc a, CmdRef ref, CmdImage s)
|
|||
uint param_1 = ref.offset >> uint(2);
|
||||
uint param_2 = 7u;
|
||||
write_mem(param, param_1, param_2);
|
||||
CmdImageRef _1092 = { ref.offset + 4u };
|
||||
CmdImageRef _1012 = { ref.offset + 4u };
|
||||
Alloc param_3 = a;
|
||||
CmdImageRef param_4 = _1092;
|
||||
CmdImageRef param_4 = _1012;
|
||||
CmdImage param_5 = s;
|
||||
CmdImage_write(param_3, param_4, param_5);
|
||||
}
|
||||
|
||||
AnnoBeginClip AnnoBeginClip_read(Alloc a, AnnoBeginClipRef ref)
|
||||
{
|
||||
uint ix = ref.offset >> uint(2);
|
||||
Alloc param = a;
|
||||
uint param_1 = ix + 0u;
|
||||
uint raw0 = read_mem(param, param_1);
|
||||
Alloc param_2 = a;
|
||||
uint param_3 = ix + 1u;
|
||||
uint raw1 = read_mem(param_2, param_3);
|
||||
Alloc param_4 = a;
|
||||
uint param_5 = ix + 2u;
|
||||
uint raw2 = read_mem(param_4, param_5);
|
||||
Alloc param_6 = a;
|
||||
uint param_7 = ix + 3u;
|
||||
uint raw3 = read_mem(param_6, param_7);
|
||||
Alloc param_8 = a;
|
||||
uint param_9 = ix + 4u;
|
||||
uint raw4 = read_mem(param_8, param_9);
|
||||
AnnoBeginClip s;
|
||||
s.bbox = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3));
|
||||
s.linewidth = asfloat(raw4);
|
||||
return s;
|
||||
}
|
||||
|
||||
AnnoBeginClip Annotated_BeginClip_read(Alloc a, AnnotatedRef ref)
|
||||
{
|
||||
AnnoBeginClipRef _742 = { ref.offset + 4u };
|
||||
Alloc param = a;
|
||||
AnnoBeginClipRef param_1 = _742;
|
||||
return AnnoBeginClip_read(param, param_1);
|
||||
}
|
||||
|
||||
void Cmd_BeginClip_write(Alloc a, CmdRef ref)
|
||||
{
|
||||
Alloc param = a;
|
||||
|
@ -777,44 +739,43 @@ void Cmd_End_write(Alloc a, CmdRef ref)
|
|||
|
||||
void comp_main()
|
||||
{
|
||||
uint width_in_bins = ((_1249.Load(8) + 16u) - 1u) / 16u;
|
||||
uint width_in_bins = ((_1169.Load(8) + 16u) - 1u) / 16u;
|
||||
uint bin_ix = (width_in_bins * gl_WorkGroupID.y) + gl_WorkGroupID.x;
|
||||
uint partition_ix = 0u;
|
||||
uint n_partitions = ((_1249.Load(0) + 256u) - 1u) / 256u;
|
||||
uint n_partitions = ((_1169.Load(0) + 256u) - 1u) / 256u;
|
||||
uint th_ix = gl_LocalInvocationID.x;
|
||||
uint bin_tile_x = 16u * gl_WorkGroupID.x;
|
||||
uint bin_tile_y = 16u * gl_WorkGroupID.y;
|
||||
uint tile_x = gl_LocalInvocationID.x % 16u;
|
||||
uint tile_y = gl_LocalInvocationID.x / 16u;
|
||||
uint this_tile_ix = (((bin_tile_y + tile_y) * _1249.Load(8)) + bin_tile_x) + tile_x;
|
||||
Alloc _1314;
|
||||
_1314.offset = _1249.Load(24);
|
||||
uint this_tile_ix = (((bin_tile_y + tile_y) * _1169.Load(8)) + bin_tile_x) + tile_x;
|
||||
Alloc _1234;
|
||||
_1234.offset = _1169.Load(24);
|
||||
Alloc param;
|
||||
param.offset = _1314.offset;
|
||||
param.offset = _1234.offset;
|
||||
uint param_1 = this_tile_ix * 1024u;
|
||||
uint param_2 = 1024u;
|
||||
Alloc cmd_alloc = slice_mem(param, param_1, param_2);
|
||||
CmdRef _1323 = { cmd_alloc.offset };
|
||||
CmdRef cmd_ref = _1323;
|
||||
CmdRef _1243 = { cmd_alloc.offset };
|
||||
CmdRef cmd_ref = _1243;
|
||||
uint cmd_limit = (cmd_ref.offset + 1024u) - 60u;
|
||||
uint clip_depth = 0u;
|
||||
uint clip_zero_depth = 0u;
|
||||
uint clip_one_mask = 0u;
|
||||
uint rd_ix = 0u;
|
||||
uint wr_ix = 0u;
|
||||
uint part_start_ix = 0u;
|
||||
uint ready_ix = 0u;
|
||||
bool mem_ok = _296.Load(4) == 0u;
|
||||
bool mem_ok = _283.Load(4) == 0u;
|
||||
Alloc param_3;
|
||||
Alloc param_5;
|
||||
uint _1529;
|
||||
uint _1448;
|
||||
uint element_ix;
|
||||
AnnotatedRef ref;
|
||||
Alloc param_14;
|
||||
Alloc param_16;
|
||||
uint tile_count;
|
||||
Alloc param_23;
|
||||
uint _1841;
|
||||
uint _1770;
|
||||
Alloc param_29;
|
||||
Tile tile_1;
|
||||
AnnoColor fill;
|
||||
|
@ -822,41 +783,40 @@ void comp_main()
|
|||
Alloc param_52;
|
||||
CmdLinGrad cmd_lin;
|
||||
Alloc param_69;
|
||||
Alloc param_86;
|
||||
while (true)
|
||||
{
|
||||
for (uint i = 0u; i < 8u; i++)
|
||||
{
|
||||
sh_bitmaps[i][th_ix] = 0u;
|
||||
}
|
||||
bool _1581;
|
||||
bool _1500;
|
||||
for (;;)
|
||||
{
|
||||
if ((ready_ix == wr_ix) && (partition_ix < n_partitions))
|
||||
{
|
||||
part_start_ix = ready_ix;
|
||||
uint count = 0u;
|
||||
bool _1379 = th_ix < 256u;
|
||||
bool _1387;
|
||||
if (_1379)
|
||||
bool _1298 = th_ix < 256u;
|
||||
bool _1306;
|
||||
if (_1298)
|
||||
{
|
||||
_1387 = (partition_ix + th_ix) < n_partitions;
|
||||
_1306 = (partition_ix + th_ix) < n_partitions;
|
||||
}
|
||||
else
|
||||
{
|
||||
_1387 = _1379;
|
||||
_1306 = _1298;
|
||||
}
|
||||
if (_1387)
|
||||
if (_1306)
|
||||
{
|
||||
uint in_ix = (_1249.Load(20) >> uint(2)) + ((((partition_ix + th_ix) * 256u) + bin_ix) * 2u);
|
||||
Alloc _1404;
|
||||
_1404.offset = _1249.Load(20);
|
||||
param_3.offset = _1404.offset;
|
||||
uint in_ix = (_1169.Load(20) >> uint(2)) + ((((partition_ix + th_ix) * 256u) + bin_ix) * 2u);
|
||||
Alloc _1323;
|
||||
_1323.offset = _1169.Load(20);
|
||||
param_3.offset = _1323.offset;
|
||||
uint param_4 = in_ix;
|
||||
count = read_mem(param_3, param_4);
|
||||
Alloc _1415;
|
||||
_1415.offset = _1249.Load(20);
|
||||
param_5.offset = _1415.offset;
|
||||
Alloc _1334;
|
||||
_1334.offset = _1169.Load(20);
|
||||
param_5.offset = _1334.offset;
|
||||
uint param_6 = in_ix + 1u;
|
||||
uint offset = read_mem(param_5, param_6);
|
||||
uint param_7 = offset;
|
||||
|
@ -902,16 +862,16 @@ void comp_main()
|
|||
}
|
||||
if (part_ix > 0u)
|
||||
{
|
||||
_1529 = sh_part_count[part_ix - 1u];
|
||||
_1448 = sh_part_count[part_ix - 1u];
|
||||
}
|
||||
else
|
||||
{
|
||||
_1529 = part_start_ix;
|
||||
_1448 = part_start_ix;
|
||||
}
|
||||
ix -= _1529;
|
||||
ix -= _1448;
|
||||
Alloc bin_alloc = sh_part_elements[part_ix];
|
||||
BinInstanceRef _1548 = { bin_alloc.offset };
|
||||
BinInstanceRef inst_ref = _1548;
|
||||
BinInstanceRef _1467 = { bin_alloc.offset };
|
||||
BinInstanceRef inst_ref = _1467;
|
||||
BinInstanceRef param_10 = inst_ref;
|
||||
uint param_11 = ix;
|
||||
Alloc param_12 = bin_alloc;
|
||||
|
@ -921,16 +881,16 @@ void comp_main()
|
|||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
wr_ix = min((rd_ix + 256u), ready_ix);
|
||||
bool _1571 = (wr_ix - rd_ix) < 256u;
|
||||
if (_1571)
|
||||
bool _1490 = (wr_ix - rd_ix) < 256u;
|
||||
if (_1490)
|
||||
{
|
||||
_1581 = (wr_ix < ready_ix) || (partition_ix < n_partitions);
|
||||
_1500 = (wr_ix < ready_ix) || (partition_ix < n_partitions);
|
||||
}
|
||||
else
|
||||
{
|
||||
_1581 = _1571;
|
||||
_1500 = _1490;
|
||||
}
|
||||
if (_1581)
|
||||
if (_1500)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
@ -943,11 +903,11 @@ void comp_main()
|
|||
if ((th_ix + rd_ix) < wr_ix)
|
||||
{
|
||||
element_ix = sh_elements[th_ix];
|
||||
AnnotatedRef _1602 = { _1249.Load(32) + (element_ix * 40u) };
|
||||
ref = _1602;
|
||||
Alloc _1605;
|
||||
_1605.offset = _1249.Load(32);
|
||||
param_14.offset = _1605.offset;
|
||||
AnnotatedRef _1521 = { _1169.Load(32) + (element_ix * 40u) };
|
||||
ref = _1521;
|
||||
Alloc _1524;
|
||||
_1524.offset = _1169.Load(32);
|
||||
param_14.offset = _1524.offset;
|
||||
AnnotatedRef param_15 = ref;
|
||||
tag = Annotated_tag(param_14, param_15).tag;
|
||||
}
|
||||
|
@ -959,12 +919,13 @@ void comp_main()
|
|||
case 4u:
|
||||
case 5u:
|
||||
{
|
||||
uint path_ix = element_ix;
|
||||
PathRef _1624 = { _1249.Load(16) + (path_ix * 12u) };
|
||||
Alloc _1627;
|
||||
_1627.offset = _1249.Load(16);
|
||||
param_16.offset = _1627.offset;
|
||||
PathRef param_17 = _1624;
|
||||
uint drawmonoid_base = (_1169.Load(44) >> uint(2)) + (2u * element_ix);
|
||||
uint path_ix = _283.Load(drawmonoid_base * 4 + 8);
|
||||
PathRef _1553 = { _1169.Load(16) + (path_ix * 12u) };
|
||||
Alloc _1556;
|
||||
_1556.offset = _1169.Load(16);
|
||||
param_16.offset = _1556.offset;
|
||||
PathRef param_17 = _1553;
|
||||
Path path = Path_read(param_16, param_17);
|
||||
uint stride = path.bbox.z - path.bbox.x;
|
||||
sh_tile_stride[th_ix] = stride;
|
||||
|
@ -1019,59 +980,53 @@ void comp_main()
|
|||
el_ix = probe_1;
|
||||
}
|
||||
}
|
||||
AnnotatedRef _1826 = { _1249.Load(32) + (sh_elements[el_ix] * 40u) };
|
||||
AnnotatedRef ref_1 = _1826;
|
||||
Alloc _1830;
|
||||
_1830.offset = _1249.Load(32);
|
||||
param_23.offset = _1830.offset;
|
||||
AnnotatedRef _1755 = { _1169.Load(32) + (sh_elements[el_ix] * 40u) };
|
||||
AnnotatedRef ref_1 = _1755;
|
||||
Alloc _1759;
|
||||
_1759.offset = _1169.Load(32);
|
||||
param_23.offset = _1759.offset;
|
||||
AnnotatedRef param_24 = ref_1;
|
||||
uint tag_1 = Annotated_tag(param_23, param_24).tag;
|
||||
if (el_ix > 0u)
|
||||
{
|
||||
_1841 = sh_tile_count[el_ix - 1u];
|
||||
_1770 = sh_tile_count[el_ix - 1u];
|
||||
}
|
||||
else
|
||||
{
|
||||
_1841 = 0u;
|
||||
_1770 = 0u;
|
||||
}
|
||||
uint seq_ix = ix_1 - _1841;
|
||||
uint seq_ix = ix_1 - _1770;
|
||||
uint width = sh_tile_width[el_ix];
|
||||
uint x = sh_tile_x0[el_ix] + (seq_ix % width);
|
||||
uint y = sh_tile_y0[el_ix] + (seq_ix / width);
|
||||
bool include_tile = false;
|
||||
if ((tag_1 == 4u) || (tag_1 == 5u))
|
||||
{
|
||||
include_tile = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (mem_ok)
|
||||
{
|
||||
uint param_25 = el_ix;
|
||||
bool param_26 = mem_ok;
|
||||
TileRef _1901 = { sh_tile_base[el_ix] + (((sh_tile_stride[el_ix] * y) + x) * 8u) };
|
||||
TileRef _1822 = { sh_tile_base[el_ix] + (((sh_tile_stride[el_ix] * y) + x) * 8u) };
|
||||
Alloc param_27 = read_tile_alloc(param_25, param_26);
|
||||
TileRef param_28 = _1901;
|
||||
TileRef param_28 = _1822;
|
||||
Tile tile = Tile_read(param_27, param_28);
|
||||
bool _1907 = tile.tile.offset != 0u;
|
||||
bool _1914;
|
||||
if (!_1907)
|
||||
bool is_clip = (tag_1 == 4u) || (tag_1 == 5u);
|
||||
bool _1834 = tile.tile.offset != 0u;
|
||||
bool _1843;
|
||||
if (!_1834)
|
||||
{
|
||||
_1914 = tile.backdrop != 0;
|
||||
_1843 = (tile.backdrop == 0) == is_clip;
|
||||
}
|
||||
else
|
||||
{
|
||||
_1914 = _1907;
|
||||
}
|
||||
include_tile = _1914;
|
||||
_1843 = _1834;
|
||||
}
|
||||
include_tile = _1843;
|
||||
}
|
||||
if (include_tile)
|
||||
{
|
||||
uint el_slice = el_ix / 32u;
|
||||
uint el_mask = 1u << (el_ix & 31u);
|
||||
uint _1934;
|
||||
InterlockedOr(sh_bitmaps[el_slice][(y * 16u) + x], el_mask, _1934);
|
||||
uint _1863;
|
||||
InterlockedOr(sh_bitmaps[el_slice][(y * 16u) + x], el_mask, _1863);
|
||||
}
|
||||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
|
@ -1095,11 +1050,11 @@ void comp_main()
|
|||
uint element_ref_ix = (slice_ix * 32u) + uint(int(firstbitlow(bitmap)));
|
||||
uint element_ix_1 = sh_elements[element_ref_ix];
|
||||
bitmap &= (bitmap - 1u);
|
||||
AnnotatedRef _1988 = { _1249.Load(32) + (element_ix_1 * 40u) };
|
||||
ref = _1988;
|
||||
Alloc _1993;
|
||||
_1993.offset = _1249.Load(32);
|
||||
param_29.offset = _1993.offset;
|
||||
AnnotatedRef _1917 = { _1169.Load(32) + (element_ix_1 * 40u) };
|
||||
ref = _1917;
|
||||
Alloc _1922;
|
||||
_1922.offset = _1169.Load(32);
|
||||
param_29.offset = _1922.offset;
|
||||
AnnotatedRef param_30 = ref;
|
||||
AnnotatedTag tag_2 = Annotated_tag(param_29, param_30);
|
||||
if (clip_zero_depth == 0u)
|
||||
|
@ -1110,23 +1065,23 @@ void comp_main()
|
|||
{
|
||||
uint param_31 = element_ref_ix;
|
||||
bool param_32 = mem_ok;
|
||||
TileRef _2029 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) };
|
||||
TileRef _1958 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) };
|
||||
Alloc param_33 = read_tile_alloc(param_31, param_32);
|
||||
TileRef param_34 = _2029;
|
||||
TileRef param_34 = _1958;
|
||||
tile_1 = Tile_read(param_33, param_34);
|
||||
Alloc _2036;
|
||||
_2036.offset = _1249.Load(32);
|
||||
param_35.offset = _2036.offset;
|
||||
Alloc _1965;
|
||||
_1965.offset = _1169.Load(32);
|
||||
param_35.offset = _1965.offset;
|
||||
AnnotatedRef param_36 = ref;
|
||||
fill = Annotated_Color_read(param_35, param_36);
|
||||
Alloc param_37 = cmd_alloc;
|
||||
CmdRef param_38 = cmd_ref;
|
||||
uint param_39 = cmd_limit;
|
||||
bool _2048 = alloc_cmd(param_37, param_38, param_39);
|
||||
bool _1977 = alloc_cmd(param_37, param_38, param_39);
|
||||
cmd_alloc = param_37;
|
||||
cmd_ref = param_38;
|
||||
cmd_limit = param_39;
|
||||
if (!_2048)
|
||||
if (!_1977)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
@ -1137,10 +1092,10 @@ void comp_main()
|
|||
float param_44 = fill.linewidth;
|
||||
write_fill(param_40, param_41, param_42, param_43, param_44);
|
||||
cmd_ref = param_41;
|
||||
CmdColor _2072 = { fill.rgba_color };
|
||||
CmdColor _2001 = { fill.rgba_color };
|
||||
Alloc param_45 = cmd_alloc;
|
||||
CmdRef param_46 = cmd_ref;
|
||||
CmdColor param_47 = _2072;
|
||||
CmdColor param_47 = _2001;
|
||||
Cmd_Color_write(param_45, param_46, param_47);
|
||||
cmd_ref.offset += 8u;
|
||||
break;
|
||||
|
@ -1149,23 +1104,23 @@ void comp_main()
|
|||
{
|
||||
uint param_48 = element_ref_ix;
|
||||
bool param_49 = mem_ok;
|
||||
TileRef _2101 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) };
|
||||
TileRef _2030 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) };
|
||||
Alloc param_50 = read_tile_alloc(param_48, param_49);
|
||||
TileRef param_51 = _2101;
|
||||
TileRef param_51 = _2030;
|
||||
tile_1 = Tile_read(param_50, param_51);
|
||||
Alloc _2108;
|
||||
_2108.offset = _1249.Load(32);
|
||||
param_52.offset = _2108.offset;
|
||||
Alloc _2037;
|
||||
_2037.offset = _1169.Load(32);
|
||||
param_52.offset = _2037.offset;
|
||||
AnnotatedRef param_53 = ref;
|
||||
AnnoLinGradient lin = Annotated_LinGradient_read(param_52, param_53);
|
||||
Alloc param_54 = cmd_alloc;
|
||||
CmdRef param_55 = cmd_ref;
|
||||
uint param_56 = cmd_limit;
|
||||
bool _2120 = alloc_cmd(param_54, param_55, param_56);
|
||||
bool _2049 = alloc_cmd(param_54, param_55, param_56);
|
||||
cmd_alloc = param_54;
|
||||
cmd_ref = param_55;
|
||||
cmd_limit = param_56;
|
||||
if (!_2120)
|
||||
if (!_2049)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
@ -1191,23 +1146,23 @@ void comp_main()
|
|||
{
|
||||
uint param_65 = element_ref_ix;
|
||||
bool param_66 = mem_ok;
|
||||
TileRef _2185 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) };
|
||||
TileRef _2114 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) };
|
||||
Alloc param_67 = read_tile_alloc(param_65, param_66);
|
||||
TileRef param_68 = _2185;
|
||||
TileRef param_68 = _2114;
|
||||
tile_1 = Tile_read(param_67, param_68);
|
||||
Alloc _2192;
|
||||
_2192.offset = _1249.Load(32);
|
||||
param_69.offset = _2192.offset;
|
||||
Alloc _2121;
|
||||
_2121.offset = _1169.Load(32);
|
||||
param_69.offset = _2121.offset;
|
||||
AnnotatedRef param_70 = ref;
|
||||
AnnoImage fill_img = Annotated_Image_read(param_69, param_70);
|
||||
Alloc param_71 = cmd_alloc;
|
||||
CmdRef param_72 = cmd_ref;
|
||||
uint param_73 = cmd_limit;
|
||||
bool _2204 = alloc_cmd(param_71, param_72, param_73);
|
||||
bool _2133 = alloc_cmd(param_71, param_72, param_73);
|
||||
cmd_alloc = param_71;
|
||||
cmd_ref = param_72;
|
||||
cmd_limit = param_73;
|
||||
if (!_2204)
|
||||
if (!_2133)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
@ -1218,10 +1173,10 @@ void comp_main()
|
|||
float param_78 = fill_img.linewidth;
|
||||
write_fill(param_74, param_75, param_76, param_77, param_78);
|
||||
cmd_ref = param_75;
|
||||
CmdImage _2230 = { fill_img.index, fill_img.offset };
|
||||
CmdImage _2159 = { fill_img.index, fill_img.offset };
|
||||
Alloc param_79 = cmd_alloc;
|
||||
CmdRef param_80 = cmd_ref;
|
||||
CmdImage param_81 = _2230;
|
||||
CmdImage param_81 = _2159;
|
||||
Cmd_Image_write(param_79, param_80, param_81);
|
||||
cmd_ref.offset += 12u;
|
||||
break;
|
||||
|
@ -1230,103 +1185,76 @@ void comp_main()
|
|||
{
|
||||
uint param_82 = element_ref_ix;
|
||||
bool param_83 = mem_ok;
|
||||
TileRef _2259 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) };
|
||||
TileRef _2188 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) };
|
||||
Alloc param_84 = read_tile_alloc(param_82, param_83);
|
||||
TileRef param_85 = _2259;
|
||||
TileRef param_85 = _2188;
|
||||
tile_1 = Tile_read(param_84, param_85);
|
||||
bool _2265 = tile_1.tile.offset == 0u;
|
||||
bool _2271;
|
||||
if (_2265)
|
||||
bool _2194 = tile_1.tile.offset == 0u;
|
||||
bool _2200;
|
||||
if (_2194)
|
||||
{
|
||||
_2271 = tile_1.backdrop == 0;
|
||||
_2200 = tile_1.backdrop == 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
_2271 = _2265;
|
||||
_2200 = _2194;
|
||||
}
|
||||
if (_2271)
|
||||
if (_2200)
|
||||
{
|
||||
clip_zero_depth = clip_depth + 1u;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((tile_1.tile.offset == 0u) && (clip_depth < 32u))
|
||||
{
|
||||
clip_one_mask |= (1u << clip_depth);
|
||||
}
|
||||
else
|
||||
{
|
||||
Alloc _2293;
|
||||
_2293.offset = _1249.Load(32);
|
||||
param_86.offset = _2293.offset;
|
||||
AnnotatedRef param_87 = ref;
|
||||
AnnoBeginClip begin_clip = Annotated_BeginClip_read(param_86, param_87);
|
||||
Alloc param_88 = cmd_alloc;
|
||||
CmdRef param_89 = cmd_ref;
|
||||
uint param_90 = cmd_limit;
|
||||
bool _2305 = alloc_cmd(param_88, param_89, param_90);
|
||||
cmd_alloc = param_88;
|
||||
cmd_ref = param_89;
|
||||
cmd_limit = param_90;
|
||||
if (!_2305)
|
||||
Alloc param_86 = cmd_alloc;
|
||||
CmdRef param_87 = cmd_ref;
|
||||
uint param_88 = cmd_limit;
|
||||
bool _2212 = alloc_cmd(param_86, param_87, param_88);
|
||||
cmd_alloc = param_86;
|
||||
cmd_ref = param_87;
|
||||
cmd_limit = param_88;
|
||||
if (!_2212)
|
||||
{
|
||||
break;
|
||||
}
|
||||
Alloc param_91 = cmd_alloc;
|
||||
CmdRef param_92 = cmd_ref;
|
||||
uint param_93 = tag_2.flags;
|
||||
Tile param_94 = tile_1;
|
||||
float param_95 = begin_clip.linewidth;
|
||||
write_fill(param_91, param_92, param_93, param_94, param_95);
|
||||
cmd_ref = param_92;
|
||||
Alloc param_96 = cmd_alloc;
|
||||
CmdRef param_97 = cmd_ref;
|
||||
Cmd_BeginClip_write(param_96, param_97);
|
||||
Alloc param_89 = cmd_alloc;
|
||||
CmdRef param_90 = cmd_ref;
|
||||
Cmd_BeginClip_write(param_89, param_90);
|
||||
cmd_ref.offset += 4u;
|
||||
if (clip_depth < 32u)
|
||||
{
|
||||
clip_one_mask &= (~(1u << clip_depth));
|
||||
}
|
||||
}
|
||||
}
|
||||
clip_depth++;
|
||||
break;
|
||||
}
|
||||
case 5u:
|
||||
{
|
||||
uint param_91 = element_ref_ix;
|
||||
bool param_92 = mem_ok;
|
||||
TileRef _2249 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) };
|
||||
Alloc param_93 = read_tile_alloc(param_91, param_92);
|
||||
TileRef param_94 = _2249;
|
||||
tile_1 = Tile_read(param_93, param_94);
|
||||
clip_depth--;
|
||||
bool _2351 = clip_depth >= 32u;
|
||||
bool _2360;
|
||||
if (!_2351)
|
||||
{
|
||||
_2360 = (clip_one_mask & (1u << clip_depth)) == 0u;
|
||||
}
|
||||
else
|
||||
{
|
||||
_2360 = _2351;
|
||||
}
|
||||
if (_2360)
|
||||
{
|
||||
Alloc param_98 = cmd_alloc;
|
||||
CmdRef param_99 = cmd_ref;
|
||||
uint param_100 = cmd_limit;
|
||||
bool _2369 = alloc_cmd(param_98, param_99, param_100);
|
||||
cmd_alloc = param_98;
|
||||
cmd_ref = param_99;
|
||||
cmd_limit = param_100;
|
||||
if (!_2369)
|
||||
Alloc param_95 = cmd_alloc;
|
||||
CmdRef param_96 = cmd_ref;
|
||||
uint param_97 = cmd_limit;
|
||||
bool _2261 = alloc_cmd(param_95, param_96, param_97);
|
||||
cmd_alloc = param_95;
|
||||
cmd_ref = param_96;
|
||||
cmd_limit = param_97;
|
||||
if (!_2261)
|
||||
{
|
||||
break;
|
||||
}
|
||||
Alloc param_101 = cmd_alloc;
|
||||
CmdRef param_102 = cmd_ref;
|
||||
Cmd_Solid_write(param_101, param_102);
|
||||
cmd_ref.offset += 4u;
|
||||
Alloc param_98 = cmd_alloc;
|
||||
CmdRef param_99 = cmd_ref;
|
||||
uint param_100 = 0u;
|
||||
Tile param_101 = tile_1;
|
||||
float param_102 = 0.0f;
|
||||
write_fill(param_98, param_99, param_100, param_101, param_102);
|
||||
cmd_ref = param_99;
|
||||
Alloc param_103 = cmd_alloc;
|
||||
CmdRef param_104 = cmd_ref;
|
||||
Cmd_EndClip_write(param_103, param_104);
|
||||
cmd_ref.offset += 4u;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -1359,17 +1287,17 @@ void comp_main()
|
|||
break;
|
||||
}
|
||||
}
|
||||
bool _2432 = (bin_tile_x + tile_x) < _1249.Load(8);
|
||||
bool _2441;
|
||||
if (_2432)
|
||||
bool _2326 = (bin_tile_x + tile_x) < _1169.Load(8);
|
||||
bool _2335;
|
||||
if (_2326)
|
||||
{
|
||||
_2441 = (bin_tile_y + tile_y) < _1249.Load(12);
|
||||
_2335 = (bin_tile_y + tile_y) < _1169.Load(12);
|
||||
}
|
||||
else
|
||||
{
|
||||
_2441 = _2432;
|
||||
_2335 = _2326;
|
||||
}
|
||||
if (_2441)
|
||||
if (_2335)
|
||||
{
|
||||
Alloc param_105 = cmd_alloc;
|
||||
CmdRef param_106 = cmd_ref;
|
||||
|
|
548
piet-gpu/shader/gen/coarse.msl
generated
548
piet-gpu/shader/gen/coarse.msl
generated
File diff suppressed because it is too large
Load diff
BIN
piet-gpu/shader/gen/coarse.spv
generated
BIN
piet-gpu/shader/gen/coarse.spv
generated
Binary file not shown.
BIN
piet-gpu/shader/gen/draw_leaf.dxil
generated
BIN
piet-gpu/shader/gen/draw_leaf.dxil
generated
Binary file not shown.
253
piet-gpu/shader/gen/draw_leaf.hlsl
generated
253
piet-gpu/shader/gen/draw_leaf.hlsl
generated
|
@ -41,16 +41,6 @@ struct FillImage
|
|||
int2 offset;
|
||||
};
|
||||
|
||||
struct ClipRef
|
||||
{
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct Clip
|
||||
{
|
||||
float4 bbox;
|
||||
};
|
||||
|
||||
struct ElementTag
|
||||
{
|
||||
uint tag;
|
||||
|
@ -143,8 +133,13 @@ struct Config
|
|||
Alloc trans_alloc;
|
||||
Alloc bbox_alloc;
|
||||
Alloc drawmonoid_alloc;
|
||||
Alloc clip_alloc;
|
||||
Alloc clip_bic_alloc;
|
||||
Alloc clip_stack_alloc;
|
||||
Alloc clip_bbox_alloc;
|
||||
uint n_trans;
|
||||
uint n_path;
|
||||
uint n_clip;
|
||||
uint trans_offset;
|
||||
uint linewidth_offset;
|
||||
uint pathtag_offset;
|
||||
|
@ -153,14 +148,14 @@ struct Config
|
|||
|
||||
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
|
||||
|
||||
static const DrawMonoid _418 = { 0u, 0u };
|
||||
static const DrawMonoid _442 = { 1u, 0u };
|
||||
static const DrawMonoid _444 = { 1u, 1u };
|
||||
static const DrawMonoid _348 = { 0u, 0u };
|
||||
static const DrawMonoid _372 = { 1u, 0u };
|
||||
static const DrawMonoid _374 = { 1u, 1u };
|
||||
|
||||
RWByteAddressBuffer _201 : register(u0, space0);
|
||||
ByteAddressBuffer _225 : register(t2, space0);
|
||||
ByteAddressBuffer _1004 : register(t3, space0);
|
||||
ByteAddressBuffer _1038 : register(t1, space0);
|
||||
RWByteAddressBuffer _187 : register(u0, space0);
|
||||
ByteAddressBuffer _211 : register(t2, space0);
|
||||
ByteAddressBuffer _934 : register(t3, space0);
|
||||
ByteAddressBuffer _968 : register(t1, space0);
|
||||
|
||||
static uint3 gl_WorkGroupID;
|
||||
static uint3 gl_LocalInvocationID;
|
||||
|
@ -176,9 +171,9 @@ groupshared DrawMonoid sh_scratch[256];
|
|||
|
||||
ElementTag Element_tag(ElementRef ref)
|
||||
{
|
||||
uint tag_and_flags = _225.Load((ref.offset >> uint(2)) * 4 + 0);
|
||||
ElementTag _375 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) };
|
||||
return _375;
|
||||
uint tag_and_flags = _211.Load((ref.offset >> uint(2)) * 4 + 0);
|
||||
ElementTag _321 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) };
|
||||
return _321;
|
||||
}
|
||||
|
||||
DrawMonoid map_tag(uint tag_word)
|
||||
|
@ -189,24 +184,24 @@ DrawMonoid map_tag(uint tag_word)
|
|||
case 5u:
|
||||
case 6u:
|
||||
{
|
||||
return _442;
|
||||
return _372;
|
||||
}
|
||||
case 9u:
|
||||
case 10u:
|
||||
{
|
||||
return _444;
|
||||
return _374;
|
||||
}
|
||||
default:
|
||||
{
|
||||
return _418;
|
||||
return _348;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ElementRef Element_index(ElementRef ref, uint index)
|
||||
{
|
||||
ElementRef _214 = { ref.offset + (index * 36u) };
|
||||
return _214;
|
||||
ElementRef _200 = { ref.offset + (index * 36u) };
|
||||
return _200;
|
||||
}
|
||||
|
||||
DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b)
|
||||
|
@ -219,13 +214,13 @@ DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b)
|
|||
|
||||
DrawMonoid tag_monoid_identity()
|
||||
{
|
||||
return _418;
|
||||
return _348;
|
||||
}
|
||||
|
||||
FillColor FillColor_read(FillColorRef ref)
|
||||
{
|
||||
uint ix = ref.offset >> uint(2);
|
||||
uint raw0 = _225.Load((ix + 0u) * 4 + 0);
|
||||
uint raw0 = _211.Load((ix + 0u) * 4 + 0);
|
||||
FillColor s;
|
||||
s.rgba_color = raw0;
|
||||
return s;
|
||||
|
@ -233,8 +228,8 @@ FillColor FillColor_read(FillColorRef ref)
|
|||
|
||||
FillColor Element_FillColor_read(ElementRef ref)
|
||||
{
|
||||
FillColorRef _381 = { ref.offset + 4u };
|
||||
FillColorRef param = _381;
|
||||
FillColorRef _327 = { ref.offset + 4u };
|
||||
FillColorRef param = _327;
|
||||
return FillColor_read(param);
|
||||
}
|
||||
|
||||
|
@ -251,7 +246,7 @@ void write_mem(Alloc alloc, uint offset, uint val)
|
|||
{
|
||||
return;
|
||||
}
|
||||
_201.Store(offset * 4 + 8, val);
|
||||
_187.Store(offset * 4 + 8, val);
|
||||
}
|
||||
|
||||
void AnnoColor_write(Alloc a, AnnoColorRef ref, AnnoColor s)
|
||||
|
@ -289,9 +284,9 @@ void Annotated_Color_write(Alloc a, AnnotatedRef ref, uint flags, AnnoColor s)
|
|||
uint param_1 = ref.offset >> uint(2);
|
||||
uint param_2 = (flags << uint(16)) | 1u;
|
||||
write_mem(param, param_1, param_2);
|
||||
AnnoColorRef _805 = { ref.offset + 4u };
|
||||
AnnoColorRef _735 = { ref.offset + 4u };
|
||||
Alloc param_3 = a;
|
||||
AnnoColorRef param_4 = _805;
|
||||
AnnoColorRef param_4 = _735;
|
||||
AnnoColor param_5 = s;
|
||||
AnnoColor_write(param_3, param_4, param_5);
|
||||
}
|
||||
|
@ -299,11 +294,11 @@ void Annotated_Color_write(Alloc a, AnnotatedRef ref, uint flags, AnnoColor s)
|
|||
FillLinGradient FillLinGradient_read(FillLinGradientRef ref)
|
||||
{
|
||||
uint ix = ref.offset >> uint(2);
|
||||
uint raw0 = _225.Load((ix + 0u) * 4 + 0);
|
||||
uint raw1 = _225.Load((ix + 1u) * 4 + 0);
|
||||
uint raw2 = _225.Load((ix + 2u) * 4 + 0);
|
||||
uint raw3 = _225.Load((ix + 3u) * 4 + 0);
|
||||
uint raw4 = _225.Load((ix + 4u) * 4 + 0);
|
||||
uint raw0 = _211.Load((ix + 0u) * 4 + 0);
|
||||
uint raw1 = _211.Load((ix + 1u) * 4 + 0);
|
||||
uint raw2 = _211.Load((ix + 2u) * 4 + 0);
|
||||
uint raw3 = _211.Load((ix + 3u) * 4 + 0);
|
||||
uint raw4 = _211.Load((ix + 4u) * 4 + 0);
|
||||
FillLinGradient s;
|
||||
s.index = raw0;
|
||||
s.p0 = float2(asfloat(raw1), asfloat(raw2));
|
||||
|
@ -313,8 +308,8 @@ FillLinGradient FillLinGradient_read(FillLinGradientRef ref)
|
|||
|
||||
FillLinGradient Element_FillLinGradient_read(ElementRef ref)
|
||||
{
|
||||
FillLinGradientRef _389 = { ref.offset + 4u };
|
||||
FillLinGradientRef param = _389;
|
||||
FillLinGradientRef _335 = { ref.offset + 4u };
|
||||
FillLinGradientRef param = _335;
|
||||
return FillLinGradient_read(param);
|
||||
}
|
||||
|
||||
|
@ -365,9 +360,9 @@ void Annotated_LinGradient_write(Alloc a, AnnotatedRef ref, uint flags, AnnoLinG
|
|||
uint param_1 = ref.offset >> uint(2);
|
||||
uint param_2 = (flags << uint(16)) | 2u;
|
||||
write_mem(param, param_1, param_2);
|
||||
AnnoLinGradientRef _826 = { ref.offset + 4u };
|
||||
AnnoLinGradientRef _756 = { ref.offset + 4u };
|
||||
Alloc param_3 = a;
|
||||
AnnoLinGradientRef param_4 = _826;
|
||||
AnnoLinGradientRef param_4 = _756;
|
||||
AnnoLinGradient param_5 = s;
|
||||
AnnoLinGradient_write(param_3, param_4, param_5);
|
||||
}
|
||||
|
@ -375,8 +370,8 @@ void Annotated_LinGradient_write(Alloc a, AnnotatedRef ref, uint flags, AnnoLinG
|
|||
FillImage FillImage_read(FillImageRef ref)
|
||||
{
|
||||
uint ix = ref.offset >> uint(2);
|
||||
uint raw0 = _225.Load((ix + 0u) * 4 + 0);
|
||||
uint raw1 = _225.Load((ix + 1u) * 4 + 0);
|
||||
uint raw0 = _211.Load((ix + 0u) * 4 + 0);
|
||||
uint raw1 = _211.Load((ix + 1u) * 4 + 0);
|
||||
FillImage s;
|
||||
s.index = raw0;
|
||||
s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16);
|
||||
|
@ -385,8 +380,8 @@ FillImage FillImage_read(FillImageRef ref)
|
|||
|
||||
FillImage Element_FillImage_read(ElementRef ref)
|
||||
{
|
||||
FillImageRef _397 = { ref.offset + 4u };
|
||||
FillImageRef param = _397;
|
||||
FillImageRef _343 = { ref.offset + 4u };
|
||||
FillImageRef param = _343;
|
||||
return FillImage_read(param);
|
||||
}
|
||||
|
||||
|
@ -429,32 +424,13 @@ void Annotated_Image_write(Alloc a, AnnotatedRef ref, uint flags, AnnoImage s)
|
|||
uint param_1 = ref.offset >> uint(2);
|
||||
uint param_2 = (flags << uint(16)) | 3u;
|
||||
write_mem(param, param_1, param_2);
|
||||
AnnoImageRef _847 = { ref.offset + 4u };
|
||||
AnnoImageRef _777 = { ref.offset + 4u };
|
||||
Alloc param_3 = a;
|
||||
AnnoImageRef param_4 = _847;
|
||||
AnnoImageRef param_4 = _777;
|
||||
AnnoImage param_5 = s;
|
||||
AnnoImage_write(param_3, param_4, param_5);
|
||||
}
|
||||
|
||||
Clip Clip_read(ClipRef ref)
|
||||
{
|
||||
uint ix = ref.offset >> uint(2);
|
||||
uint raw0 = _225.Load((ix + 0u) * 4 + 0);
|
||||
uint raw1 = _225.Load((ix + 1u) * 4 + 0);
|
||||
uint raw2 = _225.Load((ix + 2u) * 4 + 0);
|
||||
uint raw3 = _225.Load((ix + 3u) * 4 + 0);
|
||||
Clip s;
|
||||
s.bbox = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3));
|
||||
return s;
|
||||
}
|
||||
|
||||
Clip Element_BeginClip_read(ElementRef ref)
|
||||
{
|
||||
ClipRef _405 = { ref.offset + 4u };
|
||||
ClipRef param = _405;
|
||||
return Clip_read(param);
|
||||
}
|
||||
|
||||
void AnnoBeginClip_write(Alloc a, AnnoBeginClipRef ref, AnnoBeginClip s)
|
||||
{
|
||||
uint ix = ref.offset >> uint(2);
|
||||
|
@ -486,20 +462,13 @@ void Annotated_BeginClip_write(Alloc a, AnnotatedRef ref, uint flags, AnnoBeginC
|
|||
uint param_1 = ref.offset >> uint(2);
|
||||
uint param_2 = (flags << uint(16)) | 4u;
|
||||
write_mem(param, param_1, param_2);
|
||||
AnnoBeginClipRef _868 = { ref.offset + 4u };
|
||||
AnnoBeginClipRef _798 = { ref.offset + 4u };
|
||||
Alloc param_3 = a;
|
||||
AnnoBeginClipRef param_4 = _868;
|
||||
AnnoBeginClipRef param_4 = _798;
|
||||
AnnoBeginClip param_5 = s;
|
||||
AnnoBeginClip_write(param_3, param_4, param_5);
|
||||
}
|
||||
|
||||
Clip Element_EndClip_read(ElementRef ref)
|
||||
{
|
||||
ClipRef _413 = { ref.offset + 4u };
|
||||
ClipRef param = _413;
|
||||
return Clip_read(param);
|
||||
}
|
||||
|
||||
void AnnoEndClip_write(Alloc a, AnnoEndClipRef ref, AnnoEndClip s)
|
||||
{
|
||||
uint ix = ref.offset >> uint(2);
|
||||
|
@ -527,9 +496,9 @@ void Annotated_EndClip_write(Alloc a, AnnotatedRef ref, AnnoEndClip s)
|
|||
uint param_1 = ref.offset >> uint(2);
|
||||
uint param_2 = 5u;
|
||||
write_mem(param, param_1, param_2);
|
||||
AnnoEndClipRef _886 = { ref.offset + 4u };
|
||||
AnnoEndClipRef _816 = { ref.offset + 4u };
|
||||
Alloc param_3 = a;
|
||||
AnnoEndClipRef param_4 = _886;
|
||||
AnnoEndClipRef param_4 = _816;
|
||||
AnnoEndClip param_5 = s;
|
||||
AnnoEndClip_write(param_3, param_4, param_5);
|
||||
}
|
||||
|
@ -537,8 +506,8 @@ void Annotated_EndClip_write(Alloc a, AnnotatedRef ref, AnnoEndClip s)
|
|||
void comp_main()
|
||||
{
|
||||
uint ix = gl_GlobalInvocationID.x * 8u;
|
||||
ElementRef _904 = { ix * 36u };
|
||||
ElementRef ref = _904;
|
||||
ElementRef _834 = { ix * 36u };
|
||||
ElementRef ref = _834;
|
||||
ElementRef param = ref;
|
||||
uint tag_word = Element_tag(param).tag;
|
||||
uint param_1 = tag_word;
|
||||
|
@ -575,11 +544,11 @@ void comp_main()
|
|||
DrawMonoid row = tag_monoid_identity();
|
||||
if (gl_WorkGroupID.x > 0u)
|
||||
{
|
||||
DrawMonoid _1010;
|
||||
_1010.path_ix = _1004.Load((gl_WorkGroupID.x - 1u) * 8 + 0);
|
||||
_1010.clip_ix = _1004.Load((gl_WorkGroupID.x - 1u) * 8 + 4);
|
||||
row.path_ix = _1010.path_ix;
|
||||
row.clip_ix = _1010.clip_ix;
|
||||
DrawMonoid _940;
|
||||
_940.path_ix = _934.Load((gl_WorkGroupID.x - 1u) * 8 + 0);
|
||||
_940.clip_ix = _934.Load((gl_WorkGroupID.x - 1u) * 8 + 4);
|
||||
row.path_ix = _940.path_ix;
|
||||
row.clip_ix = _940.clip_ix;
|
||||
}
|
||||
if (gl_LocalInvocationID.x > 0u)
|
||||
{
|
||||
|
@ -588,9 +557,10 @@ void comp_main()
|
|||
row = combine_tag_monoid(param_10, param_11);
|
||||
}
|
||||
uint out_ix = gl_GlobalInvocationID.x * 8u;
|
||||
uint out_base = (_1038.Load(44) >> uint(2)) + (out_ix * 2u);
|
||||
AnnotatedRef _1054 = { _1038.Load(32) + (out_ix * 40u) };
|
||||
AnnotatedRef out_ref = _1054;
|
||||
uint out_base = (_968.Load(44) >> uint(2)) + (out_ix * 2u);
|
||||
uint clip_out_base = _968.Load(48) >> uint(2);
|
||||
AnnotatedRef _989 = { _968.Load(32) + (out_ix * 40u) };
|
||||
AnnotatedRef out_ref = _989;
|
||||
float4 mat;
|
||||
float2 translate;
|
||||
AnnoColor anno_fill;
|
||||
|
@ -600,39 +570,43 @@ void comp_main()
|
|||
AnnoImage anno_img;
|
||||
Alloc param_28;
|
||||
AnnoBeginClip anno_begin_clip;
|
||||
Alloc param_33;
|
||||
Alloc param_32;
|
||||
AnnoEndClip anno_end_clip;
|
||||
Alloc param_38;
|
||||
Alloc param_36;
|
||||
for (uint i_2 = 0u; i_2 < 8u; i_2++)
|
||||
{
|
||||
DrawMonoid param_12 = row;
|
||||
DrawMonoid param_13 = local[i_2];
|
||||
DrawMonoid m = combine_tag_monoid(param_12, param_13);
|
||||
_201.Store((out_base + (i_2 * 2u)) * 4 + 8, m.path_ix);
|
||||
_201.Store(((out_base + (i_2 * 2u)) + 1u) * 4 + 8, m.clip_ix);
|
||||
DrawMonoid m = row;
|
||||
if (i_2 > 0u)
|
||||
{
|
||||
DrawMonoid param_12 = m;
|
||||
DrawMonoid param_13 = local[i_2 - 1u];
|
||||
m = combine_tag_monoid(param_12, param_13);
|
||||
}
|
||||
_187.Store((out_base + (i_2 * 2u)) * 4 + 8, m.path_ix);
|
||||
_187.Store(((out_base + (i_2 * 2u)) + 1u) * 4 + 8, m.clip_ix);
|
||||
ElementRef param_14 = ref;
|
||||
uint param_15 = i_2;
|
||||
ElementRef this_ref = Element_index(param_14, param_15);
|
||||
ElementRef param_16 = this_ref;
|
||||
tag_word = Element_tag(param_16).tag;
|
||||
if (((tag_word == 4u) || (tag_word == 5u)) || (tag_word == 6u))
|
||||
if ((((tag_word == 4u) || (tag_word == 5u)) || (tag_word == 6u)) || (tag_word == 9u))
|
||||
{
|
||||
uint bbox_offset = (_1038.Load(40) >> uint(2)) + (6u * (m.path_ix - 1u));
|
||||
float bbox_l = float(_201.Load(bbox_offset * 4 + 8)) - 32768.0f;
|
||||
float bbox_t = float(_201.Load((bbox_offset + 1u) * 4 + 8)) - 32768.0f;
|
||||
float bbox_r = float(_201.Load((bbox_offset + 2u) * 4 + 8)) - 32768.0f;
|
||||
float bbox_b = float(_201.Load((bbox_offset + 3u) * 4 + 8)) - 32768.0f;
|
||||
uint bbox_offset = (_968.Load(40) >> uint(2)) + (6u * m.path_ix);
|
||||
float bbox_l = float(_187.Load(bbox_offset * 4 + 8)) - 32768.0f;
|
||||
float bbox_t = float(_187.Load((bbox_offset + 1u) * 4 + 8)) - 32768.0f;
|
||||
float bbox_r = float(_187.Load((bbox_offset + 2u) * 4 + 8)) - 32768.0f;
|
||||
float bbox_b = float(_187.Load((bbox_offset + 3u) * 4 + 8)) - 32768.0f;
|
||||
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
|
||||
float linewidth = asfloat(_201.Load((bbox_offset + 4u) * 4 + 8));
|
||||
float linewidth = asfloat(_187.Load((bbox_offset + 4u) * 4 + 8));
|
||||
uint fill_mode = uint(linewidth >= 0.0f);
|
||||
if ((linewidth >= 0.0f) || (tag_word == 5u))
|
||||
{
|
||||
uint trans_ix = _201.Load((bbox_offset + 5u) * 4 + 8);
|
||||
uint t = (_1038.Load(36) >> uint(2)) + (6u * trans_ix);
|
||||
mat = asfloat(uint4(_201.Load(t * 4 + 8), _201.Load((t + 1u) * 4 + 8), _201.Load((t + 2u) * 4 + 8), _201.Load((t + 3u) * 4 + 8)));
|
||||
uint trans_ix = _187.Load((bbox_offset + 5u) * 4 + 8);
|
||||
uint t = (_968.Load(36) >> uint(2)) + (6u * trans_ix);
|
||||
mat = asfloat(uint4(_187.Load(t * 4 + 8), _187.Load((t + 1u) * 4 + 8), _187.Load((t + 2u) * 4 + 8), _187.Load((t + 3u) * 4 + 8)));
|
||||
if (tag_word == 5u)
|
||||
{
|
||||
translate = asfloat(uint2(_201.Load((t + 4u) * 4 + 8), _201.Load((t + 5u) * 4 + 8)));
|
||||
translate = asfloat(uint2(_187.Load((t + 4u) * 4 + 8), _187.Load((t + 5u) * 4 + 8)));
|
||||
}
|
||||
}
|
||||
if (linewidth >= 0.0f)
|
||||
|
@ -649,9 +623,9 @@ void comp_main()
|
|||
anno_fill.bbox = bbox;
|
||||
anno_fill.linewidth = linewidth;
|
||||
anno_fill.rgba_color = fill.rgba_color;
|
||||
Alloc _1257;
|
||||
_1257.offset = _1038.Load(32);
|
||||
param_18.offset = _1257.offset;
|
||||
Alloc _1203;
|
||||
_1203.offset = _968.Load(32);
|
||||
param_18.offset = _1203.offset;
|
||||
AnnotatedRef param_19 = out_ref;
|
||||
uint param_20 = fill_mode;
|
||||
AnnoColor param_21 = anno_fill;
|
||||
|
@ -674,9 +648,9 @@ void comp_main()
|
|||
anno_lin.line_x = line_x;
|
||||
anno_lin.line_y = line_y;
|
||||
anno_lin.line_c = -((p0.x * line_x) + (p0.y * line_y));
|
||||
Alloc _1353;
|
||||
_1353.offset = _1038.Load(32);
|
||||
param_23.offset = _1353.offset;
|
||||
Alloc _1299;
|
||||
_1299.offset = _968.Load(32);
|
||||
param_23.offset = _1299.offset;
|
||||
AnnotatedRef param_24 = out_ref;
|
||||
uint param_25 = fill_mode;
|
||||
AnnoLinGradient param_26 = anno_lin;
|
||||
|
@ -691,48 +665,51 @@ void comp_main()
|
|||
anno_img.linewidth = linewidth;
|
||||
anno_img.index = fill_img.index;
|
||||
anno_img.offset = fill_img.offset;
|
||||
Alloc _1381;
|
||||
_1381.offset = _1038.Load(32);
|
||||
param_28.offset = _1381.offset;
|
||||
Alloc _1327;
|
||||
_1327.offset = _968.Load(32);
|
||||
param_28.offset = _1327.offset;
|
||||
AnnotatedRef param_29 = out_ref;
|
||||
uint param_30 = fill_mode;
|
||||
AnnoImage param_31 = anno_img;
|
||||
Annotated_Image_write(param_28, param_29, param_30, param_31);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
case 9u:
|
||||
{
|
||||
if (tag_word == 9u)
|
||||
{
|
||||
ElementRef param_32 = this_ref;
|
||||
Clip begin_clip = Element_BeginClip_read(param_32);
|
||||
anno_begin_clip.bbox = begin_clip.bbox;
|
||||
anno_begin_clip.bbox = bbox;
|
||||
anno_begin_clip.linewidth = 0.0f;
|
||||
Alloc _1410;
|
||||
_1410.offset = _1038.Load(32);
|
||||
param_33.offset = _1410.offset;
|
||||
AnnotatedRef param_34 = out_ref;
|
||||
uint param_35 = 0u;
|
||||
AnnoBeginClip param_36 = anno_begin_clip;
|
||||
Annotated_BeginClip_write(param_33, param_34, param_35, param_36);
|
||||
Alloc _1344;
|
||||
_1344.offset = _968.Load(32);
|
||||
param_32.offset = _1344.offset;
|
||||
AnnotatedRef param_33 = out_ref;
|
||||
uint param_34 = 0u;
|
||||
AnnoBeginClip param_35 = anno_begin_clip;
|
||||
Annotated_BeginClip_write(param_32, param_33, param_34, param_35);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (tag_word == 10u)
|
||||
{
|
||||
ElementRef param_37 = this_ref;
|
||||
Clip end_clip = Element_EndClip_read(param_37);
|
||||
anno_end_clip.bbox = end_clip.bbox;
|
||||
Alloc _1435;
|
||||
_1435.offset = _1038.Load(32);
|
||||
param_38.offset = _1435.offset;
|
||||
AnnotatedRef param_39 = out_ref;
|
||||
AnnoEndClip param_40 = anno_end_clip;
|
||||
Annotated_EndClip_write(param_38, param_39, param_40);
|
||||
anno_end_clip.bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f);
|
||||
Alloc _1368;
|
||||
_1368.offset = _968.Load(32);
|
||||
param_36.offset = _1368.offset;
|
||||
AnnotatedRef param_37 = out_ref;
|
||||
AnnoEndClip param_38 = anno_end_clip;
|
||||
Annotated_EndClip_write(param_36, param_37, param_38);
|
||||
}
|
||||
}
|
||||
if ((tag_word == 9u) || (tag_word == 10u))
|
||||
{
|
||||
uint path_ix = ~(out_ix + i_2);
|
||||
if (tag_word == 9u)
|
||||
{
|
||||
path_ix = m.path_ix;
|
||||
}
|
||||
_187.Store((clip_out_base + m.clip_ix) * 4 + 8, path_ix);
|
||||
}
|
||||
out_ref.offset += 40u;
|
||||
}
|
||||
|
|
308
piet-gpu/shader/gen/draw_leaf.msl
generated
308
piet-gpu/shader/gen/draw_leaf.msl
generated
|
@ -87,16 +87,6 @@ struct FillImage
|
|||
int2 offset;
|
||||
};
|
||||
|
||||
struct ClipRef
|
||||
{
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct Clip
|
||||
{
|
||||
float4 bbox;
|
||||
};
|
||||
|
||||
struct ElementTag
|
||||
{
|
||||
uint tag;
|
||||
|
@ -217,8 +207,13 @@ struct Config
|
|||
Alloc_1 trans_alloc;
|
||||
Alloc_1 bbox_alloc;
|
||||
Alloc_1 drawmonoid_alloc;
|
||||
Alloc_1 clip_alloc;
|
||||
Alloc_1 clip_bic_alloc;
|
||||
Alloc_1 clip_stack_alloc;
|
||||
Alloc_1 clip_bbox_alloc;
|
||||
uint n_trans;
|
||||
uint n_path;
|
||||
uint n_clip;
|
||||
uint trans_offset;
|
||||
uint linewidth_offset;
|
||||
uint pathtag_offset;
|
||||
|
@ -233,9 +228,9 @@ struct ConfigBuf
|
|||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
ElementTag Element_tag(thread const ElementRef& ref, const device SceneBuf& v_225)
|
||||
ElementTag Element_tag(thread const ElementRef& ref, const device SceneBuf& v_211)
|
||||
{
|
||||
uint tag_and_flags = v_225.scene[ref.offset >> uint(2)];
|
||||
uint tag_and_flags = v_211.scene[ref.offset >> uint(2)];
|
||||
return ElementTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) };
|
||||
}
|
||||
|
||||
|
@ -284,20 +279,20 @@ DrawMonoid tag_monoid_identity()
|
|||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
FillColor FillColor_read(thread const FillColorRef& ref, const device SceneBuf& v_225)
|
||||
FillColor FillColor_read(thread const FillColorRef& ref, const device SceneBuf& v_211)
|
||||
{
|
||||
uint ix = ref.offset >> uint(2);
|
||||
uint raw0 = v_225.scene[ix + 0u];
|
||||
uint raw0 = v_211.scene[ix + 0u];
|
||||
FillColor s;
|
||||
s.rgba_color = raw0;
|
||||
return s;
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
FillColor Element_FillColor_read(thread const ElementRef& ref, const device SceneBuf& v_225)
|
||||
FillColor Element_FillColor_read(thread const ElementRef& ref, const device SceneBuf& v_211)
|
||||
{
|
||||
FillColorRef param = FillColorRef{ ref.offset + 4u };
|
||||
return FillColor_read(param, v_225);
|
||||
return FillColor_read(param, v_211);
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
|
@ -307,7 +302,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
|
|||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_201)
|
||||
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_187)
|
||||
{
|
||||
Alloc param = alloc;
|
||||
uint param_1 = offset;
|
||||
|
@ -315,61 +310,61 @@ void write_mem(thread const Alloc& alloc, thread const uint& offset, thread cons
|
|||
{
|
||||
return;
|
||||
}
|
||||
v_201.memory[offset] = val;
|
||||
v_187.memory[offset] = val;
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void AnnoColor_write(thread const Alloc& a, thread const AnnoColorRef& ref, thread const AnnoColor& s, device Memory& v_201)
|
||||
void AnnoColor_write(thread const Alloc& a, thread const AnnoColorRef& ref, thread const AnnoColor& s, device Memory& v_187)
|
||||
{
|
||||
uint ix = ref.offset >> uint(2);
|
||||
Alloc param = a;
|
||||
uint param_1 = ix + 0u;
|
||||
uint param_2 = as_type<uint>(s.bbox.x);
|
||||
write_mem(param, param_1, param_2, v_201);
|
||||
write_mem(param, param_1, param_2, v_187);
|
||||
Alloc param_3 = a;
|
||||
uint param_4 = ix + 1u;
|
||||
uint param_5 = as_type<uint>(s.bbox.y);
|
||||
write_mem(param_3, param_4, param_5, v_201);
|
||||
write_mem(param_3, param_4, param_5, v_187);
|
||||
Alloc param_6 = a;
|
||||
uint param_7 = ix + 2u;
|
||||
uint param_8 = as_type<uint>(s.bbox.z);
|
||||
write_mem(param_6, param_7, param_8, v_201);
|
||||
write_mem(param_6, param_7, param_8, v_187);
|
||||
Alloc param_9 = a;
|
||||
uint param_10 = ix + 3u;
|
||||
uint param_11 = as_type<uint>(s.bbox.w);
|
||||
write_mem(param_9, param_10, param_11, v_201);
|
||||
write_mem(param_9, param_10, param_11, v_187);
|
||||
Alloc param_12 = a;
|
||||
uint param_13 = ix + 4u;
|
||||
uint param_14 = as_type<uint>(s.linewidth);
|
||||
write_mem(param_12, param_13, param_14, v_201);
|
||||
write_mem(param_12, param_13, param_14, v_187);
|
||||
Alloc param_15 = a;
|
||||
uint param_16 = ix + 5u;
|
||||
uint param_17 = s.rgba_color;
|
||||
write_mem(param_15, param_16, param_17, v_201);
|
||||
write_mem(param_15, param_16, param_17, v_187);
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void Annotated_Color_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoColor& s, device Memory& v_201)
|
||||
void Annotated_Color_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoColor& s, device Memory& v_187)
|
||||
{
|
||||
Alloc param = a;
|
||||
uint param_1 = ref.offset >> uint(2);
|
||||
uint param_2 = (flags << uint(16)) | 1u;
|
||||
write_mem(param, param_1, param_2, v_201);
|
||||
write_mem(param, param_1, param_2, v_187);
|
||||
Alloc param_3 = a;
|
||||
AnnoColorRef param_4 = AnnoColorRef{ ref.offset + 4u };
|
||||
AnnoColor param_5 = s;
|
||||
AnnoColor_write(param_3, param_4, param_5, v_201);
|
||||
AnnoColor_write(param_3, param_4, param_5, v_187);
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
FillLinGradient FillLinGradient_read(thread const FillLinGradientRef& ref, const device SceneBuf& v_225)
|
||||
FillLinGradient FillLinGradient_read(thread const FillLinGradientRef& ref, const device SceneBuf& v_211)
|
||||
{
|
||||
uint ix = ref.offset >> uint(2);
|
||||
uint raw0 = v_225.scene[ix + 0u];
|
||||
uint raw1 = v_225.scene[ix + 1u];
|
||||
uint raw2 = v_225.scene[ix + 2u];
|
||||
uint raw3 = v_225.scene[ix + 3u];
|
||||
uint raw4 = v_225.scene[ix + 4u];
|
||||
uint raw0 = v_211.scene[ix + 0u];
|
||||
uint raw1 = v_211.scene[ix + 1u];
|
||||
uint raw2 = v_211.scene[ix + 2u];
|
||||
uint raw3 = v_211.scene[ix + 3u];
|
||||
uint raw4 = v_211.scene[ix + 4u];
|
||||
FillLinGradient s;
|
||||
s.index = raw0;
|
||||
s.p0 = float2(as_type<float>(raw1), as_type<float>(raw2));
|
||||
|
@ -378,73 +373,73 @@ FillLinGradient FillLinGradient_read(thread const FillLinGradientRef& ref, const
|
|||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
FillLinGradient Element_FillLinGradient_read(thread const ElementRef& ref, const device SceneBuf& v_225)
|
||||
FillLinGradient Element_FillLinGradient_read(thread const ElementRef& ref, const device SceneBuf& v_211)
|
||||
{
|
||||
FillLinGradientRef param = FillLinGradientRef{ ref.offset + 4u };
|
||||
return FillLinGradient_read(param, v_225);
|
||||
return FillLinGradient_read(param, v_211);
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void AnnoLinGradient_write(thread const Alloc& a, thread const AnnoLinGradientRef& ref, thread const AnnoLinGradient& s, device Memory& v_201)
|
||||
void AnnoLinGradient_write(thread const Alloc& a, thread const AnnoLinGradientRef& ref, thread const AnnoLinGradient& s, device Memory& v_187)
|
||||
{
|
||||
uint ix = ref.offset >> uint(2);
|
||||
Alloc param = a;
|
||||
uint param_1 = ix + 0u;
|
||||
uint param_2 = as_type<uint>(s.bbox.x);
|
||||
write_mem(param, param_1, param_2, v_201);
|
||||
write_mem(param, param_1, param_2, v_187);
|
||||
Alloc param_3 = a;
|
||||
uint param_4 = ix + 1u;
|
||||
uint param_5 = as_type<uint>(s.bbox.y);
|
||||
write_mem(param_3, param_4, param_5, v_201);
|
||||
write_mem(param_3, param_4, param_5, v_187);
|
||||
Alloc param_6 = a;
|
||||
uint param_7 = ix + 2u;
|
||||
uint param_8 = as_type<uint>(s.bbox.z);
|
||||
write_mem(param_6, param_7, param_8, v_201);
|
||||
write_mem(param_6, param_7, param_8, v_187);
|
||||
Alloc param_9 = a;
|
||||
uint param_10 = ix + 3u;
|
||||
uint param_11 = as_type<uint>(s.bbox.w);
|
||||
write_mem(param_9, param_10, param_11, v_201);
|
||||
write_mem(param_9, param_10, param_11, v_187);
|
||||
Alloc param_12 = a;
|
||||
uint param_13 = ix + 4u;
|
||||
uint param_14 = as_type<uint>(s.linewidth);
|
||||
write_mem(param_12, param_13, param_14, v_201);
|
||||
write_mem(param_12, param_13, param_14, v_187);
|
||||
Alloc param_15 = a;
|
||||
uint param_16 = ix + 5u;
|
||||
uint param_17 = s.index;
|
||||
write_mem(param_15, param_16, param_17, v_201);
|
||||
write_mem(param_15, param_16, param_17, v_187);
|
||||
Alloc param_18 = a;
|
||||
uint param_19 = ix + 6u;
|
||||
uint param_20 = as_type<uint>(s.line_x);
|
||||
write_mem(param_18, param_19, param_20, v_201);
|
||||
write_mem(param_18, param_19, param_20, v_187);
|
||||
Alloc param_21 = a;
|
||||
uint param_22 = ix + 7u;
|
||||
uint param_23 = as_type<uint>(s.line_y);
|
||||
write_mem(param_21, param_22, param_23, v_201);
|
||||
write_mem(param_21, param_22, param_23, v_187);
|
||||
Alloc param_24 = a;
|
||||
uint param_25 = ix + 8u;
|
||||
uint param_26 = as_type<uint>(s.line_c);
|
||||
write_mem(param_24, param_25, param_26, v_201);
|
||||
write_mem(param_24, param_25, param_26, v_187);
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void Annotated_LinGradient_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoLinGradient& s, device Memory& v_201)
|
||||
void Annotated_LinGradient_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoLinGradient& s, device Memory& v_187)
|
||||
{
|
||||
Alloc param = a;
|
||||
uint param_1 = ref.offset >> uint(2);
|
||||
uint param_2 = (flags << uint(16)) | 2u;
|
||||
write_mem(param, param_1, param_2, v_201);
|
||||
write_mem(param, param_1, param_2, v_187);
|
||||
Alloc param_3 = a;
|
||||
AnnoLinGradientRef param_4 = AnnoLinGradientRef{ ref.offset + 4u };
|
||||
AnnoLinGradient param_5 = s;
|
||||
AnnoLinGradient_write(param_3, param_4, param_5, v_201);
|
||||
AnnoLinGradient_write(param_3, param_4, param_5, v_187);
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
FillImage FillImage_read(thread const FillImageRef& ref, const device SceneBuf& v_225)
|
||||
FillImage FillImage_read(thread const FillImageRef& ref, const device SceneBuf& v_211)
|
||||
{
|
||||
uint ix = ref.offset >> uint(2);
|
||||
uint raw0 = v_225.scene[ix + 0u];
|
||||
uint raw1 = v_225.scene[ix + 1u];
|
||||
uint raw0 = v_211.scene[ix + 0u];
|
||||
uint raw1 = v_211.scene[ix + 1u];
|
||||
FillImage s;
|
||||
s.index = raw0;
|
||||
s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16);
|
||||
|
@ -452,167 +447,140 @@ FillImage FillImage_read(thread const FillImageRef& ref, const device SceneBuf&
|
|||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
FillImage Element_FillImage_read(thread const ElementRef& ref, const device SceneBuf& v_225)
|
||||
FillImage Element_FillImage_read(thread const ElementRef& ref, const device SceneBuf& v_211)
|
||||
{
|
||||
FillImageRef param = FillImageRef{ ref.offset + 4u };
|
||||
return FillImage_read(param, v_225);
|
||||
return FillImage_read(param, v_211);
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void AnnoImage_write(thread const Alloc& a, thread const AnnoImageRef& ref, thread const AnnoImage& s, device Memory& v_201)
|
||||
void AnnoImage_write(thread const Alloc& a, thread const AnnoImageRef& ref, thread const AnnoImage& s, device Memory& v_187)
|
||||
{
|
||||
uint ix = ref.offset >> uint(2);
|
||||
Alloc param = a;
|
||||
uint param_1 = ix + 0u;
|
||||
uint param_2 = as_type<uint>(s.bbox.x);
|
||||
write_mem(param, param_1, param_2, v_201);
|
||||
write_mem(param, param_1, param_2, v_187);
|
||||
Alloc param_3 = a;
|
||||
uint param_4 = ix + 1u;
|
||||
uint param_5 = as_type<uint>(s.bbox.y);
|
||||
write_mem(param_3, param_4, param_5, v_201);
|
||||
write_mem(param_3, param_4, param_5, v_187);
|
||||
Alloc param_6 = a;
|
||||
uint param_7 = ix + 2u;
|
||||
uint param_8 = as_type<uint>(s.bbox.z);
|
||||
write_mem(param_6, param_7, param_8, v_201);
|
||||
write_mem(param_6, param_7, param_8, v_187);
|
||||
Alloc param_9 = a;
|
||||
uint param_10 = ix + 3u;
|
||||
uint param_11 = as_type<uint>(s.bbox.w);
|
||||
write_mem(param_9, param_10, param_11, v_201);
|
||||
write_mem(param_9, param_10, param_11, v_187);
|
||||
Alloc param_12 = a;
|
||||
uint param_13 = ix + 4u;
|
||||
uint param_14 = as_type<uint>(s.linewidth);
|
||||
write_mem(param_12, param_13, param_14, v_201);
|
||||
write_mem(param_12, param_13, param_14, v_187);
|
||||
Alloc param_15 = a;
|
||||
uint param_16 = ix + 5u;
|
||||
uint param_17 = s.index;
|
||||
write_mem(param_15, param_16, param_17, v_201);
|
||||
write_mem(param_15, param_16, param_17, v_187);
|
||||
Alloc param_18 = a;
|
||||
uint param_19 = ix + 6u;
|
||||
uint param_20 = (uint(s.offset.x) & 65535u) | (uint(s.offset.y) << uint(16));
|
||||
write_mem(param_18, param_19, param_20, v_201);
|
||||
write_mem(param_18, param_19, param_20, v_187);
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void Annotated_Image_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoImage& s, device Memory& v_201)
|
||||
void Annotated_Image_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoImage& s, device Memory& v_187)
|
||||
{
|
||||
Alloc param = a;
|
||||
uint param_1 = ref.offset >> uint(2);
|
||||
uint param_2 = (flags << uint(16)) | 3u;
|
||||
write_mem(param, param_1, param_2, v_201);
|
||||
write_mem(param, param_1, param_2, v_187);
|
||||
Alloc param_3 = a;
|
||||
AnnoImageRef param_4 = AnnoImageRef{ ref.offset + 4u };
|
||||
AnnoImage param_5 = s;
|
||||
AnnoImage_write(param_3, param_4, param_5, v_201);
|
||||
AnnoImage_write(param_3, param_4, param_5, v_187);
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
Clip Clip_read(thread const ClipRef& ref, const device SceneBuf& v_225)
|
||||
{
|
||||
uint ix = ref.offset >> uint(2);
|
||||
uint raw0 = v_225.scene[ix + 0u];
|
||||
uint raw1 = v_225.scene[ix + 1u];
|
||||
uint raw2 = v_225.scene[ix + 2u];
|
||||
uint raw3 = v_225.scene[ix + 3u];
|
||||
Clip s;
|
||||
s.bbox = float4(as_type<float>(raw0), as_type<float>(raw1), as_type<float>(raw2), as_type<float>(raw3));
|
||||
return s;
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
Clip Element_BeginClip_read(thread const ElementRef& ref, const device SceneBuf& v_225)
|
||||
{
|
||||
ClipRef param = ClipRef{ ref.offset + 4u };
|
||||
return Clip_read(param, v_225);
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void AnnoBeginClip_write(thread const Alloc& a, thread const AnnoBeginClipRef& ref, thread const AnnoBeginClip& s, device Memory& v_201)
|
||||
void AnnoBeginClip_write(thread const Alloc& a, thread const AnnoBeginClipRef& ref, thread const AnnoBeginClip& s, device Memory& v_187)
|
||||
{
|
||||
uint ix = ref.offset >> uint(2);
|
||||
Alloc param = a;
|
||||
uint param_1 = ix + 0u;
|
||||
uint param_2 = as_type<uint>(s.bbox.x);
|
||||
write_mem(param, param_1, param_2, v_201);
|
||||
write_mem(param, param_1, param_2, v_187);
|
||||
Alloc param_3 = a;
|
||||
uint param_4 = ix + 1u;
|
||||
uint param_5 = as_type<uint>(s.bbox.y);
|
||||
write_mem(param_3, param_4, param_5, v_201);
|
||||
write_mem(param_3, param_4, param_5, v_187);
|
||||
Alloc param_6 = a;
|
||||
uint param_7 = ix + 2u;
|
||||
uint param_8 = as_type<uint>(s.bbox.z);
|
||||
write_mem(param_6, param_7, param_8, v_201);
|
||||
write_mem(param_6, param_7, param_8, v_187);
|
||||
Alloc param_9 = a;
|
||||
uint param_10 = ix + 3u;
|
||||
uint param_11 = as_type<uint>(s.bbox.w);
|
||||
write_mem(param_9, param_10, param_11, v_201);
|
||||
write_mem(param_9, param_10, param_11, v_187);
|
||||
Alloc param_12 = a;
|
||||
uint param_13 = ix + 4u;
|
||||
uint param_14 = as_type<uint>(s.linewidth);
|
||||
write_mem(param_12, param_13, param_14, v_201);
|
||||
write_mem(param_12, param_13, param_14, v_187);
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void Annotated_BeginClip_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoBeginClip& s, device Memory& v_201)
|
||||
void Annotated_BeginClip_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoBeginClip& s, device Memory& v_187)
|
||||
{
|
||||
Alloc param = a;
|
||||
uint param_1 = ref.offset >> uint(2);
|
||||
uint param_2 = (flags << uint(16)) | 4u;
|
||||
write_mem(param, param_1, param_2, v_201);
|
||||
write_mem(param, param_1, param_2, v_187);
|
||||
Alloc param_3 = a;
|
||||
AnnoBeginClipRef param_4 = AnnoBeginClipRef{ ref.offset + 4u };
|
||||
AnnoBeginClip param_5 = s;
|
||||
AnnoBeginClip_write(param_3, param_4, param_5, v_201);
|
||||
AnnoBeginClip_write(param_3, param_4, param_5, v_187);
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
Clip Element_EndClip_read(thread const ElementRef& ref, const device SceneBuf& v_225)
|
||||
{
|
||||
ClipRef param = ClipRef{ ref.offset + 4u };
|
||||
return Clip_read(param, v_225);
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void AnnoEndClip_write(thread const Alloc& a, thread const AnnoEndClipRef& ref, thread const AnnoEndClip& s, device Memory& v_201)
|
||||
void AnnoEndClip_write(thread const Alloc& a, thread const AnnoEndClipRef& ref, thread const AnnoEndClip& s, device Memory& v_187)
|
||||
{
|
||||
uint ix = ref.offset >> uint(2);
|
||||
Alloc param = a;
|
||||
uint param_1 = ix + 0u;
|
||||
uint param_2 = as_type<uint>(s.bbox.x);
|
||||
write_mem(param, param_1, param_2, v_201);
|
||||
write_mem(param, param_1, param_2, v_187);
|
||||
Alloc param_3 = a;
|
||||
uint param_4 = ix + 1u;
|
||||
uint param_5 = as_type<uint>(s.bbox.y);
|
||||
write_mem(param_3, param_4, param_5, v_201);
|
||||
write_mem(param_3, param_4, param_5, v_187);
|
||||
Alloc param_6 = a;
|
||||
uint param_7 = ix + 2u;
|
||||
uint param_8 = as_type<uint>(s.bbox.z);
|
||||
write_mem(param_6, param_7, param_8, v_201);
|
||||
write_mem(param_6, param_7, param_8, v_187);
|
||||
Alloc param_9 = a;
|
||||
uint param_10 = ix + 3u;
|
||||
uint param_11 = as_type<uint>(s.bbox.w);
|
||||
write_mem(param_9, param_10, param_11, v_201);
|
||||
write_mem(param_9, param_10, param_11, v_187);
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void Annotated_EndClip_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const AnnoEndClip& s, device Memory& v_201)
|
||||
void Annotated_EndClip_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const AnnoEndClip& s, device Memory& v_187)
|
||||
{
|
||||
Alloc param = a;
|
||||
uint param_1 = ref.offset >> uint(2);
|
||||
uint param_2 = 5u;
|
||||
write_mem(param, param_1, param_2, v_201);
|
||||
write_mem(param, param_1, param_2, v_187);
|
||||
Alloc param_3 = a;
|
||||
AnnoEndClipRef param_4 = AnnoEndClipRef{ ref.offset + 4u };
|
||||
AnnoEndClip param_5 = s;
|
||||
AnnoEndClip_write(param_3, param_4, param_5, v_201);
|
||||
AnnoEndClip_write(param_3, param_4, param_5, v_187);
|
||||
}
|
||||
|
||||
kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1038 [[buffer(1)]], const device SceneBuf& v_225 [[buffer(2)]], const device ParentBuf& _1004 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
|
||||
kernel void main0(device Memory& v_187 [[buffer(0)]], const device ConfigBuf& _968 [[buffer(1)]], const device SceneBuf& v_211 [[buffer(2)]], const device ParentBuf& _934 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
|
||||
{
|
||||
threadgroup DrawMonoid sh_scratch[256];
|
||||
uint ix = gl_GlobalInvocationID.x * 8u;
|
||||
ElementRef ref = ElementRef{ ix * 36u };
|
||||
ElementRef param = ref;
|
||||
uint tag_word = Element_tag(param, v_225).tag;
|
||||
uint tag_word = Element_tag(param, v_211).tag;
|
||||
uint param_1 = tag_word;
|
||||
DrawMonoid agg = map_tag(param_1);
|
||||
spvUnsafeArray<DrawMonoid, 8> local;
|
||||
|
@ -622,7 +590,7 @@ kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1
|
|||
ElementRef param_2 = ref;
|
||||
uint param_3 = i;
|
||||
ElementRef param_4 = Element_index(param_2, param_3);
|
||||
tag_word = Element_tag(param_4, v_225).tag;
|
||||
tag_word = Element_tag(param_4, v_211).tag;
|
||||
uint param_5 = tag_word;
|
||||
DrawMonoid param_6 = agg;
|
||||
DrawMonoid param_7 = map_tag(param_5);
|
||||
|
@ -647,9 +615,9 @@ kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1
|
|||
DrawMonoid row = tag_monoid_identity();
|
||||
if (gl_WorkGroupID.x > 0u)
|
||||
{
|
||||
uint _1007 = gl_WorkGroupID.x - 1u;
|
||||
row.path_ix = _1004.parent[_1007].path_ix;
|
||||
row.clip_ix = _1004.parent[_1007].clip_ix;
|
||||
uint _937 = gl_WorkGroupID.x - 1u;
|
||||
row.path_ix = _934.parent[_937].path_ix;
|
||||
row.clip_ix = _934.parent[_937].clip_ix;
|
||||
}
|
||||
if (gl_LocalInvocationID.x > 0u)
|
||||
{
|
||||
|
@ -658,8 +626,9 @@ kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1
|
|||
row = combine_tag_monoid(param_10, param_11);
|
||||
}
|
||||
uint out_ix = gl_GlobalInvocationID.x * 8u;
|
||||
uint out_base = (_1038.conf.drawmonoid_alloc.offset >> uint(2)) + (out_ix * 2u);
|
||||
AnnotatedRef out_ref = AnnotatedRef{ _1038.conf.anno_alloc.offset + (out_ix * 40u) };
|
||||
uint out_base = (_968.conf.drawmonoid_alloc.offset >> uint(2)) + (out_ix * 2u);
|
||||
uint clip_out_base = _968.conf.clip_alloc.offset >> uint(2);
|
||||
AnnotatedRef out_ref = AnnotatedRef{ _968.conf.anno_alloc.offset + (out_ix * 40u) };
|
||||
float4 mat;
|
||||
float2 translate;
|
||||
AnnoColor anno_fill;
|
||||
|
@ -669,39 +638,43 @@ kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1
|
|||
AnnoImage anno_img;
|
||||
Alloc param_28;
|
||||
AnnoBeginClip anno_begin_clip;
|
||||
Alloc param_33;
|
||||
Alloc param_32;
|
||||
AnnoEndClip anno_end_clip;
|
||||
Alloc param_38;
|
||||
Alloc param_36;
|
||||
for (uint i_2 = 0u; i_2 < 8u; i_2++)
|
||||
{
|
||||
DrawMonoid param_12 = row;
|
||||
DrawMonoid param_13 = local[i_2];
|
||||
DrawMonoid m = combine_tag_monoid(param_12, param_13);
|
||||
v_201.memory[out_base + (i_2 * 2u)] = m.path_ix;
|
||||
v_201.memory[(out_base + (i_2 * 2u)) + 1u] = m.clip_ix;
|
||||
DrawMonoid m = row;
|
||||
if (i_2 > 0u)
|
||||
{
|
||||
DrawMonoid param_12 = m;
|
||||
DrawMonoid param_13 = local[i_2 - 1u];
|
||||
m = combine_tag_monoid(param_12, param_13);
|
||||
}
|
||||
v_187.memory[out_base + (i_2 * 2u)] = m.path_ix;
|
||||
v_187.memory[(out_base + (i_2 * 2u)) + 1u] = m.clip_ix;
|
||||
ElementRef param_14 = ref;
|
||||
uint param_15 = i_2;
|
||||
ElementRef this_ref = Element_index(param_14, param_15);
|
||||
ElementRef param_16 = this_ref;
|
||||
tag_word = Element_tag(param_16, v_225).tag;
|
||||
if (((tag_word == 4u) || (tag_word == 5u)) || (tag_word == 6u))
|
||||
tag_word = Element_tag(param_16, v_211).tag;
|
||||
if ((((tag_word == 4u) || (tag_word == 5u)) || (tag_word == 6u)) || (tag_word == 9u))
|
||||
{
|
||||
uint bbox_offset = (_1038.conf.bbox_alloc.offset >> uint(2)) + (6u * (m.path_ix - 1u));
|
||||
float bbox_l = float(v_201.memory[bbox_offset]) - 32768.0;
|
||||
float bbox_t = float(v_201.memory[bbox_offset + 1u]) - 32768.0;
|
||||
float bbox_r = float(v_201.memory[bbox_offset + 2u]) - 32768.0;
|
||||
float bbox_b = float(v_201.memory[bbox_offset + 3u]) - 32768.0;
|
||||
uint bbox_offset = (_968.conf.bbox_alloc.offset >> uint(2)) + (6u * m.path_ix);
|
||||
float bbox_l = float(v_187.memory[bbox_offset]) - 32768.0;
|
||||
float bbox_t = float(v_187.memory[bbox_offset + 1u]) - 32768.0;
|
||||
float bbox_r = float(v_187.memory[bbox_offset + 2u]) - 32768.0;
|
||||
float bbox_b = float(v_187.memory[bbox_offset + 3u]) - 32768.0;
|
||||
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
|
||||
float linewidth = as_type<float>(v_201.memory[bbox_offset + 4u]);
|
||||
float linewidth = as_type<float>(v_187.memory[bbox_offset + 4u]);
|
||||
uint fill_mode = uint(linewidth >= 0.0);
|
||||
if ((linewidth >= 0.0) || (tag_word == 5u))
|
||||
{
|
||||
uint trans_ix = v_201.memory[bbox_offset + 5u];
|
||||
uint t = (_1038.conf.trans_alloc.offset >> uint(2)) + (6u * trans_ix);
|
||||
mat = as_type<float4>(uint4(v_201.memory[t], v_201.memory[t + 1u], v_201.memory[t + 2u], v_201.memory[t + 3u]));
|
||||
uint trans_ix = v_187.memory[bbox_offset + 5u];
|
||||
uint t = (_968.conf.trans_alloc.offset >> uint(2)) + (6u * trans_ix);
|
||||
mat = as_type<float4>(uint4(v_187.memory[t], v_187.memory[t + 1u], v_187.memory[t + 2u], v_187.memory[t + 3u]));
|
||||
if (tag_word == 5u)
|
||||
{
|
||||
translate = as_type<float2>(uint2(v_201.memory[t + 4u], v_201.memory[t + 5u]));
|
||||
translate = as_type<float2>(uint2(v_187.memory[t + 4u], v_187.memory[t + 5u]));
|
||||
}
|
||||
}
|
||||
if (linewidth >= 0.0)
|
||||
|
@ -714,21 +687,21 @@ kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1
|
|||
case 4u:
|
||||
{
|
||||
ElementRef param_17 = this_ref;
|
||||
FillColor fill = Element_FillColor_read(param_17, v_225);
|
||||
FillColor fill = Element_FillColor_read(param_17, v_211);
|
||||
anno_fill.bbox = bbox;
|
||||
anno_fill.linewidth = linewidth;
|
||||
anno_fill.rgba_color = fill.rgba_color;
|
||||
param_18.offset = _1038.conf.anno_alloc.offset;
|
||||
param_18.offset = _968.conf.anno_alloc.offset;
|
||||
AnnotatedRef param_19 = out_ref;
|
||||
uint param_20 = fill_mode;
|
||||
AnnoColor param_21 = anno_fill;
|
||||
Annotated_Color_write(param_18, param_19, param_20, param_21, v_201);
|
||||
Annotated_Color_write(param_18, param_19, param_20, param_21, v_187);
|
||||
break;
|
||||
}
|
||||
case 5u:
|
||||
{
|
||||
ElementRef param_22 = this_ref;
|
||||
FillLinGradient lin = Element_FillLinGradient_read(param_22, v_225);
|
||||
FillLinGradient lin = Element_FillLinGradient_read(param_22, v_211);
|
||||
anno_lin.bbox = bbox;
|
||||
anno_lin.linewidth = linewidth;
|
||||
anno_lin.index = lin.index;
|
||||
|
@ -741,57 +714,60 @@ kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1
|
|||
anno_lin.line_x = line_x;
|
||||
anno_lin.line_y = line_y;
|
||||
anno_lin.line_c = -((p0.x * line_x) + (p0.y * line_y));
|
||||
param_23.offset = _1038.conf.anno_alloc.offset;
|
||||
param_23.offset = _968.conf.anno_alloc.offset;
|
||||
AnnotatedRef param_24 = out_ref;
|
||||
uint param_25 = fill_mode;
|
||||
AnnoLinGradient param_26 = anno_lin;
|
||||
Annotated_LinGradient_write(param_23, param_24, param_25, param_26, v_201);
|
||||
Annotated_LinGradient_write(param_23, param_24, param_25, param_26, v_187);
|
||||
break;
|
||||
}
|
||||
case 6u:
|
||||
{
|
||||
ElementRef param_27 = this_ref;
|
||||
FillImage fill_img = Element_FillImage_read(param_27, v_225);
|
||||
FillImage fill_img = Element_FillImage_read(param_27, v_211);
|
||||
anno_img.bbox = bbox;
|
||||
anno_img.linewidth = linewidth;
|
||||
anno_img.index = fill_img.index;
|
||||
anno_img.offset = fill_img.offset;
|
||||
param_28.offset = _1038.conf.anno_alloc.offset;
|
||||
param_28.offset = _968.conf.anno_alloc.offset;
|
||||
AnnotatedRef param_29 = out_ref;
|
||||
uint param_30 = fill_mode;
|
||||
AnnoImage param_31 = anno_img;
|
||||
Annotated_Image_write(param_28, param_29, param_30, param_31, v_201);
|
||||
Annotated_Image_write(param_28, param_29, param_30, param_31, v_187);
|
||||
break;
|
||||
}
|
||||
case 9u:
|
||||
{
|
||||
anno_begin_clip.bbox = bbox;
|
||||
anno_begin_clip.linewidth = 0.0;
|
||||
param_32.offset = _968.conf.anno_alloc.offset;
|
||||
AnnotatedRef param_33 = out_ref;
|
||||
uint param_34 = 0u;
|
||||
AnnoBeginClip param_35 = anno_begin_clip;
|
||||
Annotated_BeginClip_write(param_32, param_33, param_34, param_35, v_187);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (tag_word == 9u)
|
||||
{
|
||||
ElementRef param_32 = this_ref;
|
||||
Clip begin_clip = Element_BeginClip_read(param_32, v_225);
|
||||
anno_begin_clip.bbox = begin_clip.bbox;
|
||||
anno_begin_clip.linewidth = 0.0;
|
||||
param_33.offset = _1038.conf.anno_alloc.offset;
|
||||
AnnotatedRef param_34 = out_ref;
|
||||
uint param_35 = 0u;
|
||||
AnnoBeginClip param_36 = anno_begin_clip;
|
||||
Annotated_BeginClip_write(param_33, param_34, param_35, param_36, v_201);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (tag_word == 10u)
|
||||
{
|
||||
ElementRef param_37 = this_ref;
|
||||
Clip end_clip = Element_EndClip_read(param_37, v_225);
|
||||
anno_end_clip.bbox = end_clip.bbox;
|
||||
param_38.offset = _1038.conf.anno_alloc.offset;
|
||||
AnnotatedRef param_39 = out_ref;
|
||||
AnnoEndClip param_40 = anno_end_clip;
|
||||
Annotated_EndClip_write(param_38, param_39, param_40, v_201);
|
||||
anno_end_clip.bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0);
|
||||
param_36.offset = _968.conf.anno_alloc.offset;
|
||||
AnnotatedRef param_37 = out_ref;
|
||||
AnnoEndClip param_38 = anno_end_clip;
|
||||
Annotated_EndClip_write(param_36, param_37, param_38, v_187);
|
||||
}
|
||||
}
|
||||
if ((tag_word == 9u) || (tag_word == 10u))
|
||||
{
|
||||
uint path_ix = ~(out_ix + i_2);
|
||||
if (tag_word == 9u)
|
||||
{
|
||||
path_ix = m.path_ix;
|
||||
}
|
||||
v_187.memory[clip_out_base + m.clip_ix] = path_ix;
|
||||
}
|
||||
out_ref.offset += 40u;
|
||||
}
|
||||
|
|
BIN
piet-gpu/shader/gen/draw_leaf.spv
generated
BIN
piet-gpu/shader/gen/draw_leaf.spv
generated
Binary file not shown.
5
piet-gpu/shader/gen/draw_reduce.hlsl
generated
5
piet-gpu/shader/gen/draw_reduce.hlsl
generated
|
@ -36,8 +36,13 @@ struct Config
|
|||
Alloc trans_alloc;
|
||||
Alloc bbox_alloc;
|
||||
Alloc drawmonoid_alloc;
|
||||
Alloc clip_alloc;
|
||||
Alloc clip_bic_alloc;
|
||||
Alloc clip_stack_alloc;
|
||||
Alloc clip_bbox_alloc;
|
||||
uint n_trans;
|
||||
uint n_path;
|
||||
uint n_clip;
|
||||
uint trans_offset;
|
||||
uint linewidth_offset;
|
||||
uint pathtag_offset;
|
||||
|
|
5
piet-gpu/shader/gen/draw_reduce.msl
generated
5
piet-gpu/shader/gen/draw_reduce.msl
generated
|
@ -66,8 +66,13 @@ struct Config
|
|||
Alloc trans_alloc;
|
||||
Alloc bbox_alloc;
|
||||
Alloc drawmonoid_alloc;
|
||||
Alloc clip_alloc;
|
||||
Alloc clip_bic_alloc;
|
||||
Alloc clip_stack_alloc;
|
||||
Alloc clip_bbox_alloc;
|
||||
uint n_trans;
|
||||
uint n_path;
|
||||
uint n_clip;
|
||||
uint trans_offset;
|
||||
uint linewidth_offset;
|
||||
uint pathtag_offset;
|
||||
|
|
BIN
piet-gpu/shader/gen/draw_reduce.spv
generated
BIN
piet-gpu/shader/gen/draw_reduce.spv
generated
Binary file not shown.
BIN
piet-gpu/shader/gen/kernel4.dxil
generated
BIN
piet-gpu/shader/gen/kernel4.dxil
generated
Binary file not shown.
13
piet-gpu/shader/gen/kernel4.hlsl
generated
13
piet-gpu/shader/gen/kernel4.hlsl
generated
|
@ -117,8 +117,13 @@ struct Config
|
|||
Alloc trans_alloc;
|
||||
Alloc bbox_alloc;
|
||||
Alloc drawmonoid_alloc;
|
||||
Alloc clip_alloc;
|
||||
Alloc clip_bic_alloc;
|
||||
Alloc clip_stack_alloc;
|
||||
Alloc clip_bbox_alloc;
|
||||
uint n_trans;
|
||||
uint n_path;
|
||||
uint n_clip;
|
||||
uint trans_offset;
|
||||
uint linewidth_offset;
|
||||
uint pathtag_offset;
|
||||
|
@ -457,7 +462,6 @@ void comp_main()
|
|||
TileSegRef tile_seg_ref;
|
||||
float area[8];
|
||||
uint blend_stack[128][8];
|
||||
float blend_alpha_stack[128][8];
|
||||
while (mem_ok)
|
||||
{
|
||||
Alloc param_3 = cmd_alloc;
|
||||
|
@ -640,7 +644,6 @@ void comp_main()
|
|||
float4 param_34 = float4(rgba[k_11]);
|
||||
uint _1390 = packsRGB(param_34);
|
||||
blend_stack[d_2][k_11] = _1390;
|
||||
blend_alpha_stack[d_2][k_11] = clamp(abs(area[k_11]), 0.0f, 1.0f);
|
||||
rgba[k_11] = 0.0f.xxxx;
|
||||
}
|
||||
clip_depth++;
|
||||
|
@ -655,7 +658,7 @@ void comp_main()
|
|||
uint d_3 = min(clip_depth, 127u);
|
||||
uint param_35 = blend_stack[d_3][k_12];
|
||||
float4 bg = unpacksRGB(param_35);
|
||||
float4 fg_1 = (rgba[k_12] * area[k_12]) * blend_alpha_stack[d_3][k_12];
|
||||
float4 fg_1 = rgba[k_12] * area[k_12];
|
||||
rgba[k_12] = (bg * (1.0f - fg_1.w)) + fg_1;
|
||||
}
|
||||
cmd_ref.offset += 4u;
|
||||
|
@ -665,8 +668,8 @@ void comp_main()
|
|||
{
|
||||
Alloc param_36 = cmd_alloc;
|
||||
CmdRef param_37 = cmd_ref;
|
||||
CmdRef _1469 = { Cmd_Jump_read(param_36, param_37).new_ref };
|
||||
cmd_ref = _1469;
|
||||
CmdRef _1453 = { Cmd_Jump_read(param_36, param_37).new_ref };
|
||||
cmd_ref = _1453;
|
||||
cmd_alloc.offset = cmd_ref.offset;
|
||||
break;
|
||||
}
|
||||
|
|
9
piet-gpu/shader/gen/kernel4.msl
generated
9
piet-gpu/shader/gen/kernel4.msl
generated
|
@ -175,8 +175,13 @@ struct Config
|
|||
Alloc_1 trans_alloc;
|
||||
Alloc_1 bbox_alloc;
|
||||
Alloc_1 drawmonoid_alloc;
|
||||
Alloc_1 clip_alloc;
|
||||
Alloc_1 clip_bic_alloc;
|
||||
Alloc_1 clip_stack_alloc;
|
||||
Alloc_1 clip_bbox_alloc;
|
||||
uint n_trans;
|
||||
uint n_path;
|
||||
uint n_clip;
|
||||
uint trans_offset;
|
||||
uint linewidth_offset;
|
||||
uint pathtag_offset;
|
||||
|
@ -507,7 +512,6 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7
|
|||
TileSegRef tile_seg_ref;
|
||||
spvUnsafeArray<float, 8> area;
|
||||
spvUnsafeArray<spvUnsafeArray<uint, 8>, 128> blend_stack;
|
||||
spvUnsafeArray<spvUnsafeArray<float, 8>, 128> blend_alpha_stack;
|
||||
while (mem_ok)
|
||||
{
|
||||
Alloc param_3 = cmd_alloc;
|
||||
|
@ -687,7 +691,6 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7
|
|||
float4 param_34 = float4(rgba[k_11]);
|
||||
uint _1390 = packsRGB(param_34);
|
||||
blend_stack[d_2][k_11] = _1390;
|
||||
blend_alpha_stack[d_2][k_11] = fast::clamp(abs(area[k_11]), 0.0, 1.0);
|
||||
rgba[k_11] = float4(0.0);
|
||||
}
|
||||
clip_depth++;
|
||||
|
@ -702,7 +705,7 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7
|
|||
uint d_3 = min(clip_depth, 127u);
|
||||
uint param_35 = blend_stack[d_3][k_12];
|
||||
float4 bg = unpacksRGB(param_35);
|
||||
float4 fg_1 = (rgba[k_12] * area[k_12]) * blend_alpha_stack[d_3][k_12];
|
||||
float4 fg_1 = rgba[k_12] * area[k_12];
|
||||
rgba[k_12] = (bg * (1.0 - fg_1.w)) + fg_1;
|
||||
}
|
||||
cmd_ref.offset += 4u;
|
||||
|
|
BIN
piet-gpu/shader/gen/kernel4.spv
generated
BIN
piet-gpu/shader/gen/kernel4.spv
generated
Binary file not shown.
BIN
piet-gpu/shader/gen/kernel4_gray.dxil
generated
BIN
piet-gpu/shader/gen/kernel4_gray.dxil
generated
Binary file not shown.
13
piet-gpu/shader/gen/kernel4_gray.hlsl
generated
13
piet-gpu/shader/gen/kernel4_gray.hlsl
generated
|
@ -117,8 +117,13 @@ struct Config
|
|||
Alloc trans_alloc;
|
||||
Alloc bbox_alloc;
|
||||
Alloc drawmonoid_alloc;
|
||||
Alloc clip_alloc;
|
||||
Alloc clip_bic_alloc;
|
||||
Alloc clip_stack_alloc;
|
||||
Alloc clip_bbox_alloc;
|
||||
uint n_trans;
|
||||
uint n_path;
|
||||
uint n_clip;
|
||||
uint trans_offset;
|
||||
uint linewidth_offset;
|
||||
uint pathtag_offset;
|
||||
|
@ -457,7 +462,6 @@ void comp_main()
|
|||
TileSegRef tile_seg_ref;
|
||||
float area[8];
|
||||
uint blend_stack[128][8];
|
||||
float blend_alpha_stack[128][8];
|
||||
while (mem_ok)
|
||||
{
|
||||
Alloc param_3 = cmd_alloc;
|
||||
|
@ -640,7 +644,6 @@ void comp_main()
|
|||
float4 param_34 = float4(rgba[k_11]);
|
||||
uint _1390 = packsRGB(param_34);
|
||||
blend_stack[d_2][k_11] = _1390;
|
||||
blend_alpha_stack[d_2][k_11] = clamp(abs(area[k_11]), 0.0f, 1.0f);
|
||||
rgba[k_11] = 0.0f.xxxx;
|
||||
}
|
||||
clip_depth++;
|
||||
|
@ -655,7 +658,7 @@ void comp_main()
|
|||
uint d_3 = min(clip_depth, 127u);
|
||||
uint param_35 = blend_stack[d_3][k_12];
|
||||
float4 bg = unpacksRGB(param_35);
|
||||
float4 fg_1 = (rgba[k_12] * area[k_12]) * blend_alpha_stack[d_3][k_12];
|
||||
float4 fg_1 = rgba[k_12] * area[k_12];
|
||||
rgba[k_12] = (bg * (1.0f - fg_1.w)) + fg_1;
|
||||
}
|
||||
cmd_ref.offset += 4u;
|
||||
|
@ -665,8 +668,8 @@ void comp_main()
|
|||
{
|
||||
Alloc param_36 = cmd_alloc;
|
||||
CmdRef param_37 = cmd_ref;
|
||||
CmdRef _1469 = { Cmd_Jump_read(param_36, param_37).new_ref };
|
||||
cmd_ref = _1469;
|
||||
CmdRef _1453 = { Cmd_Jump_read(param_36, param_37).new_ref };
|
||||
cmd_ref = _1453;
|
||||
cmd_alloc.offset = cmd_ref.offset;
|
||||
break;
|
||||
}
|
||||
|
|
9
piet-gpu/shader/gen/kernel4_gray.msl
generated
9
piet-gpu/shader/gen/kernel4_gray.msl
generated
|
@ -175,8 +175,13 @@ struct Config
|
|||
Alloc_1 trans_alloc;
|
||||
Alloc_1 bbox_alloc;
|
||||
Alloc_1 drawmonoid_alloc;
|
||||
Alloc_1 clip_alloc;
|
||||
Alloc_1 clip_bic_alloc;
|
||||
Alloc_1 clip_stack_alloc;
|
||||
Alloc_1 clip_bbox_alloc;
|
||||
uint n_trans;
|
||||
uint n_path;
|
||||
uint n_clip;
|
||||
uint trans_offset;
|
||||
uint linewidth_offset;
|
||||
uint pathtag_offset;
|
||||
|
@ -507,7 +512,6 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7
|
|||
TileSegRef tile_seg_ref;
|
||||
spvUnsafeArray<float, 8> area;
|
||||
spvUnsafeArray<spvUnsafeArray<uint, 8>, 128> blend_stack;
|
||||
spvUnsafeArray<spvUnsafeArray<float, 8>, 128> blend_alpha_stack;
|
||||
while (mem_ok)
|
||||
{
|
||||
Alloc param_3 = cmd_alloc;
|
||||
|
@ -687,7 +691,6 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7
|
|||
float4 param_34 = float4(rgba[k_11]);
|
||||
uint _1390 = packsRGB(param_34);
|
||||
blend_stack[d_2][k_11] = _1390;
|
||||
blend_alpha_stack[d_2][k_11] = fast::clamp(abs(area[k_11]), 0.0, 1.0);
|
||||
rgba[k_11] = float4(0.0);
|
||||
}
|
||||
clip_depth++;
|
||||
|
@ -702,7 +705,7 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7
|
|||
uint d_3 = min(clip_depth, 127u);
|
||||
uint param_35 = blend_stack[d_3][k_12];
|
||||
float4 bg = unpacksRGB(param_35);
|
||||
float4 fg_1 = (rgba[k_12] * area[k_12]) * blend_alpha_stack[d_3][k_12];
|
||||
float4 fg_1 = rgba[k_12] * area[k_12];
|
||||
rgba[k_12] = (bg * (1.0 - fg_1.w)) + fg_1;
|
||||
}
|
||||
cmd_ref.offset += 4u;
|
||||
|
|
BIN
piet-gpu/shader/gen/kernel4_gray.spv
generated
BIN
piet-gpu/shader/gen/kernel4_gray.spv
generated
Binary file not shown.
5
piet-gpu/shader/gen/path_coarse.hlsl
generated
5
piet-gpu/shader/gen/path_coarse.hlsl
generated
|
@ -86,8 +86,13 @@ struct Config
|
|||
Alloc trans_alloc;
|
||||
Alloc bbox_alloc;
|
||||
Alloc drawmonoid_alloc;
|
||||
Alloc clip_alloc;
|
||||
Alloc clip_bic_alloc;
|
||||
Alloc clip_stack_alloc;
|
||||
Alloc clip_bbox_alloc;
|
||||
uint n_trans;
|
||||
uint n_path;
|
||||
uint n_clip;
|
||||
uint trans_offset;
|
||||
uint linewidth_offset;
|
||||
uint pathtag_offset;
|
||||
|
|
5
piet-gpu/shader/gen/path_coarse.msl
generated
5
piet-gpu/shader/gen/path_coarse.msl
generated
|
@ -146,8 +146,13 @@ struct Config
|
|||
Alloc_1 trans_alloc;
|
||||
Alloc_1 bbox_alloc;
|
||||
Alloc_1 drawmonoid_alloc;
|
||||
Alloc_1 clip_alloc;
|
||||
Alloc_1 clip_bic_alloc;
|
||||
Alloc_1 clip_stack_alloc;
|
||||
Alloc_1 clip_bbox_alloc;
|
||||
uint n_trans;
|
||||
uint n_path;
|
||||
uint n_clip;
|
||||
uint trans_offset;
|
||||
uint linewidth_offset;
|
||||
uint pathtag_offset;
|
||||
|
|
BIN
piet-gpu/shader/gen/path_coarse.spv
generated
BIN
piet-gpu/shader/gen/path_coarse.spv
generated
Binary file not shown.
BIN
piet-gpu/shader/gen/pathseg.dxil
generated
BIN
piet-gpu/shader/gen/pathseg.dxil
generated
Binary file not shown.
127
piet-gpu/shader/gen/pathseg.hlsl
generated
127
piet-gpu/shader/gen/pathseg.hlsl
generated
|
@ -64,8 +64,13 @@ struct Config
|
|||
Alloc trans_alloc;
|
||||
Alloc bbox_alloc;
|
||||
Alloc drawmonoid_alloc;
|
||||
Alloc clip_alloc;
|
||||
Alloc clip_bic_alloc;
|
||||
Alloc clip_stack_alloc;
|
||||
Alloc clip_bbox_alloc;
|
||||
uint n_trans;
|
||||
uint n_path;
|
||||
uint n_clip;
|
||||
uint trans_offset;
|
||||
uint linewidth_offset;
|
||||
uint pathtag_offset;
|
||||
|
@ -80,7 +85,7 @@ static const Monoid _567 = { 0.0f.xxxx, 0u };
|
|||
RWByteAddressBuffer _111 : register(u0, space0);
|
||||
ByteAddressBuffer _574 : register(t2, space0);
|
||||
ByteAddressBuffer _639 : register(t1, space0);
|
||||
ByteAddressBuffer _709 : register(t3, space0);
|
||||
ByteAddressBuffer _710 : register(t3, space0);
|
||||
|
||||
static uint3 gl_WorkGroupID;
|
||||
static uint3 gl_LocalInvocationID;
|
||||
|
@ -356,7 +361,7 @@ uint round_up(float x)
|
|||
void comp_main()
|
||||
{
|
||||
uint ix = gl_GlobalInvocationID.x * 4u;
|
||||
uint tag_word = _574.Load(((_639.Load(64) >> uint(2)) + (ix >> uint(2))) * 4 + 0);
|
||||
uint tag_word = _574.Load(((_639.Load(84) >> uint(2)) + (ix >> uint(2))) * 4 + 0);
|
||||
uint param = tag_word;
|
||||
TagMonoid local_tm = reduce_tag(param);
|
||||
sh_tag[gl_LocalInvocationID.x] = local_tm;
|
||||
|
@ -377,17 +382,17 @@ void comp_main()
|
|||
TagMonoid tm = tag_monoid_identity();
|
||||
if (gl_WorkGroupID.x > 0u)
|
||||
{
|
||||
TagMonoid _715;
|
||||
_715.trans_ix = _709.Load((gl_WorkGroupID.x - 1u) * 20 + 0);
|
||||
_715.linewidth_ix = _709.Load((gl_WorkGroupID.x - 1u) * 20 + 4);
|
||||
_715.pathseg_ix = _709.Load((gl_WorkGroupID.x - 1u) * 20 + 8);
|
||||
_715.path_ix = _709.Load((gl_WorkGroupID.x - 1u) * 20 + 12);
|
||||
_715.pathseg_offset = _709.Load((gl_WorkGroupID.x - 1u) * 20 + 16);
|
||||
tm.trans_ix = _715.trans_ix;
|
||||
tm.linewidth_ix = _715.linewidth_ix;
|
||||
tm.pathseg_ix = _715.pathseg_ix;
|
||||
tm.path_ix = _715.path_ix;
|
||||
tm.pathseg_offset = _715.pathseg_offset;
|
||||
TagMonoid _716;
|
||||
_716.trans_ix = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 0);
|
||||
_716.linewidth_ix = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 4);
|
||||
_716.pathseg_ix = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 8);
|
||||
_716.path_ix = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 12);
|
||||
_716.pathseg_offset = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 16);
|
||||
tm.trans_ix = _716.trans_ix;
|
||||
tm.linewidth_ix = _716.linewidth_ix;
|
||||
tm.pathseg_ix = _716.pathseg_ix;
|
||||
tm.path_ix = _716.path_ix;
|
||||
tm.pathseg_offset = _716.pathseg_offset;
|
||||
}
|
||||
if (gl_LocalInvocationID.x > 0u)
|
||||
{
|
||||
|
@ -395,14 +400,14 @@ void comp_main()
|
|||
TagMonoid param_4 = sh_tag[gl_LocalInvocationID.x - 1u];
|
||||
tm = combine_tag_monoid(param_3, param_4);
|
||||
}
|
||||
uint ps_ix = (_639.Load(68) >> uint(2)) + tm.pathseg_offset;
|
||||
uint lw_ix = (_639.Load(60) >> uint(2)) + tm.linewidth_ix;
|
||||
uint ps_ix = (_639.Load(88) >> uint(2)) + tm.pathseg_offset;
|
||||
uint lw_ix = (_639.Load(80) >> uint(2)) + tm.linewidth_ix;
|
||||
uint save_path_ix = tm.path_ix;
|
||||
uint trans_ix = tm.trans_ix;
|
||||
TransformSegRef _770 = { _639.Load(36) + (trans_ix * 24u) };
|
||||
TransformSegRef trans_ref = _770;
|
||||
PathSegRef _780 = { _639.Load(28) + (tm.pathseg_ix * 52u) };
|
||||
PathSegRef ps_ref = _780;
|
||||
TransformSegRef _771 = { _639.Load(36) + (trans_ix * 24u) };
|
||||
TransformSegRef trans_ref = _771;
|
||||
PathSegRef _781 = { _639.Load(28) + (tm.pathseg_ix * 52u) };
|
||||
PathSegRef ps_ref = _781;
|
||||
float linewidth[4];
|
||||
uint save_trans_ix[4];
|
||||
float2 p0;
|
||||
|
@ -455,9 +460,9 @@ void comp_main()
|
|||
}
|
||||
}
|
||||
}
|
||||
Alloc _876;
|
||||
_876.offset = _639.Load(36);
|
||||
param_13.offset = _876.offset;
|
||||
Alloc _877;
|
||||
_877.offset = _639.Load(36);
|
||||
param_13.offset = _877.offset;
|
||||
TransformSegRef param_14 = trans_ref;
|
||||
TransformSeg transform = TransformSeg_read(param_13, param_14);
|
||||
p0 = ((transform.mat.xy * p0.x) + (transform.mat.zw * p0.y)) + transform.translate;
|
||||
|
@ -466,25 +471,25 @@ void comp_main()
|
|||
if (seg_type >= 2u)
|
||||
{
|
||||
p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate;
|
||||
float4 _946 = bbox;
|
||||
float2 _949 = min(_946.xy, p2);
|
||||
bbox.x = _949.x;
|
||||
bbox.y = _949.y;
|
||||
float4 _954 = bbox;
|
||||
float2 _957 = max(_954.zw, p2);
|
||||
bbox.z = _957.x;
|
||||
bbox.w = _957.y;
|
||||
float4 _947 = bbox;
|
||||
float2 _950 = min(_947.xy, p2);
|
||||
bbox.x = _950.x;
|
||||
bbox.y = _950.y;
|
||||
float4 _955 = bbox;
|
||||
float2 _958 = max(_955.zw, p2);
|
||||
bbox.z = _958.x;
|
||||
bbox.w = _958.y;
|
||||
if (seg_type == 3u)
|
||||
{
|
||||
p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate;
|
||||
float4 _982 = bbox;
|
||||
float2 _985 = min(_982.xy, p3);
|
||||
bbox.x = _985.x;
|
||||
bbox.y = _985.y;
|
||||
float4 _990 = bbox;
|
||||
float2 _993 = max(_990.zw, p3);
|
||||
bbox.z = _993.x;
|
||||
bbox.w = _993.y;
|
||||
float4 _983 = bbox;
|
||||
float2 _986 = min(_983.xy, p3);
|
||||
bbox.x = _986.x;
|
||||
bbox.y = _986.y;
|
||||
float4 _991 = bbox;
|
||||
float2 _994 = max(_991.zw, p3);
|
||||
bbox.z = _994.x;
|
||||
bbox.w = _994.y;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -515,9 +520,9 @@ void comp_main()
|
|||
cubic.trans_ix = (gl_GlobalInvocationID.x * 4u) + i_1;
|
||||
cubic.stroke = stroke;
|
||||
uint fill_mode = uint(linewidth[i_1] >= 0.0f);
|
||||
Alloc _1088;
|
||||
_1088.offset = _639.Load(28);
|
||||
param_15.offset = _1088.offset;
|
||||
Alloc _1089;
|
||||
_1089.offset = _639.Load(28);
|
||||
param_15.offset = _1089.offset;
|
||||
PathSegRef param_16 = ps_ref;
|
||||
uint param_17 = fill_mode;
|
||||
PathCubic param_18 = cubic;
|
||||
|
@ -574,17 +579,17 @@ void comp_main()
|
|||
Monoid param_24 = local[i_4];
|
||||
Monoid m = combine_monoid(param_23, param_24);
|
||||
bool do_atomic = false;
|
||||
bool _1263 = i_4 == 3u;
|
||||
bool _1269;
|
||||
if (_1263)
|
||||
bool _1264 = i_4 == 3u;
|
||||
bool _1270;
|
||||
if (_1264)
|
||||
{
|
||||
_1269 = gl_LocalInvocationID.x == 255u;
|
||||
_1270 = gl_LocalInvocationID.x == 255u;
|
||||
}
|
||||
else
|
||||
{
|
||||
_1269 = _1263;
|
||||
_1270 = _1264;
|
||||
}
|
||||
if (_1269)
|
||||
if (_1270)
|
||||
{
|
||||
do_atomic = true;
|
||||
}
|
||||
|
@ -612,30 +617,30 @@ void comp_main()
|
|||
}
|
||||
if (do_atomic)
|
||||
{
|
||||
bool _1334 = m.bbox.z > m.bbox.x;
|
||||
bool _1343;
|
||||
if (!_1334)
|
||||
bool _1335 = m.bbox.z > m.bbox.x;
|
||||
bool _1344;
|
||||
if (!_1335)
|
||||
{
|
||||
_1343 = m.bbox.w > m.bbox.y;
|
||||
_1344 = m.bbox.w > m.bbox.y;
|
||||
}
|
||||
else
|
||||
{
|
||||
_1343 = _1334;
|
||||
_1344 = _1335;
|
||||
}
|
||||
if (_1343)
|
||||
if (_1344)
|
||||
{
|
||||
float param_29 = m.bbox.x;
|
||||
uint _1352;
|
||||
_111.InterlockedMin(bbox_out_ix * 4 + 8, round_down(param_29), _1352);
|
||||
uint _1353;
|
||||
_111.InterlockedMin(bbox_out_ix * 4 + 8, round_down(param_29), _1353);
|
||||
float param_30 = m.bbox.y;
|
||||
uint _1360;
|
||||
_111.InterlockedMin((bbox_out_ix + 1u) * 4 + 8, round_down(param_30), _1360);
|
||||
uint _1361;
|
||||
_111.InterlockedMin((bbox_out_ix + 1u) * 4 + 8, round_down(param_30), _1361);
|
||||
float param_31 = m.bbox.z;
|
||||
uint _1368;
|
||||
_111.InterlockedMax((bbox_out_ix + 2u) * 4 + 8, round_up(param_31), _1368);
|
||||
uint _1369;
|
||||
_111.InterlockedMax((bbox_out_ix + 2u) * 4 + 8, round_up(param_31), _1369);
|
||||
float param_32 = m.bbox.w;
|
||||
uint _1376;
|
||||
_111.InterlockedMax((bbox_out_ix + 3u) * 4 + 8, round_up(param_32), _1376);
|
||||
uint _1377;
|
||||
_111.InterlockedMax((bbox_out_ix + 3u) * 4 + 8, round_up(param_32), _1377);
|
||||
}
|
||||
bbox_out_ix += 6u;
|
||||
}
|
||||
|
|
83
piet-gpu/shader/gen/pathseg.msl
generated
83
piet-gpu/shader/gen/pathseg.msl
generated
|
@ -129,8 +129,13 @@ struct Config
|
|||
Alloc_1 trans_alloc;
|
||||
Alloc_1 bbox_alloc;
|
||||
Alloc_1 drawmonoid_alloc;
|
||||
Alloc_1 clip_alloc;
|
||||
Alloc_1 clip_bic_alloc;
|
||||
Alloc_1 clip_stack_alloc;
|
||||
Alloc_1 clip_bbox_alloc;
|
||||
uint n_trans;
|
||||
uint n_path;
|
||||
uint n_clip;
|
||||
uint trans_offset;
|
||||
uint linewidth_offset;
|
||||
uint pathtag_offset;
|
||||
|
@ -430,7 +435,7 @@ uint round_up(thread const float& x)
|
|||
return uint(fast::min(65535.0, ceil(x) + 32768.0));
|
||||
}
|
||||
|
||||
kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _639 [[buffer(1)]], const device SceneBuf& v_574 [[buffer(2)]], const device ParentBuf& _709 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
|
||||
kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _639 [[buffer(1)]], const device SceneBuf& v_574 [[buffer(2)]], const device ParentBuf& _710 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
|
||||
{
|
||||
threadgroup TagMonoid sh_tag[256];
|
||||
threadgroup Monoid sh_scratch[256];
|
||||
|
@ -456,12 +461,12 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6
|
|||
TagMonoid tm = tag_monoid_identity();
|
||||
if (gl_WorkGroupID.x > 0u)
|
||||
{
|
||||
uint _712 = gl_WorkGroupID.x - 1u;
|
||||
tm.trans_ix = _709.parent[_712].trans_ix;
|
||||
tm.linewidth_ix = _709.parent[_712].linewidth_ix;
|
||||
tm.pathseg_ix = _709.parent[_712].pathseg_ix;
|
||||
tm.path_ix = _709.parent[_712].path_ix;
|
||||
tm.pathseg_offset = _709.parent[_712].pathseg_offset;
|
||||
uint _713 = gl_WorkGroupID.x - 1u;
|
||||
tm.trans_ix = _710.parent[_713].trans_ix;
|
||||
tm.linewidth_ix = _710.parent[_713].linewidth_ix;
|
||||
tm.pathseg_ix = _710.parent[_713].pathseg_ix;
|
||||
tm.path_ix = _710.parent[_713].path_ix;
|
||||
tm.pathseg_offset = _710.parent[_713].pathseg_offset;
|
||||
}
|
||||
if (gl_LocalInvocationID.x > 0u)
|
||||
{
|
||||
|
@ -536,25 +541,25 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6
|
|||
if (seg_type >= 2u)
|
||||
{
|
||||
p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate;
|
||||
float4 _946 = bbox;
|
||||
float2 _949 = fast::min(_946.xy, p2);
|
||||
bbox.x = _949.x;
|
||||
bbox.y = _949.y;
|
||||
float4 _954 = bbox;
|
||||
float2 _957 = fast::max(_954.zw, p2);
|
||||
bbox.z = _957.x;
|
||||
bbox.w = _957.y;
|
||||
float4 _947 = bbox;
|
||||
float2 _950 = fast::min(_947.xy, p2);
|
||||
bbox.x = _950.x;
|
||||
bbox.y = _950.y;
|
||||
float4 _955 = bbox;
|
||||
float2 _958 = fast::max(_955.zw, p2);
|
||||
bbox.z = _958.x;
|
||||
bbox.w = _958.y;
|
||||
if (seg_type == 3u)
|
||||
{
|
||||
p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate;
|
||||
float4 _982 = bbox;
|
||||
float2 _985 = fast::min(_982.xy, p3);
|
||||
bbox.x = _985.x;
|
||||
bbox.y = _985.y;
|
||||
float4 _990 = bbox;
|
||||
float2 _993 = fast::max(_990.zw, p3);
|
||||
bbox.z = _993.x;
|
||||
bbox.w = _993.y;
|
||||
float4 _983 = bbox;
|
||||
float2 _986 = fast::min(_983.xy, p3);
|
||||
bbox.x = _986.x;
|
||||
bbox.y = _986.y;
|
||||
float4 _991 = bbox;
|
||||
float2 _994 = fast::max(_991.zw, p3);
|
||||
bbox.z = _994.x;
|
||||
bbox.w = _994.y;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -642,17 +647,17 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6
|
|||
Monoid param_24 = local[i_4];
|
||||
Monoid m = combine_monoid(param_23, param_24);
|
||||
bool do_atomic = false;
|
||||
bool _1263 = i_4 == 3u;
|
||||
bool _1269;
|
||||
if (_1263)
|
||||
bool _1264 = i_4 == 3u;
|
||||
bool _1270;
|
||||
if (_1264)
|
||||
{
|
||||
_1269 = gl_LocalInvocationID.x == 255u;
|
||||
_1270 = gl_LocalInvocationID.x == 255u;
|
||||
}
|
||||
else
|
||||
{
|
||||
_1269 = _1263;
|
||||
_1270 = _1264;
|
||||
}
|
||||
if (_1269)
|
||||
if (_1270)
|
||||
{
|
||||
do_atomic = true;
|
||||
}
|
||||
|
@ -680,26 +685,26 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6
|
|||
}
|
||||
if (do_atomic)
|
||||
{
|
||||
bool _1334 = m.bbox.z > m.bbox.x;
|
||||
bool _1343;
|
||||
if (!_1334)
|
||||
bool _1335 = m.bbox.z > m.bbox.x;
|
||||
bool _1344;
|
||||
if (!_1335)
|
||||
{
|
||||
_1343 = m.bbox.w > m.bbox.y;
|
||||
_1344 = m.bbox.w > m.bbox.y;
|
||||
}
|
||||
else
|
||||
{
|
||||
_1343 = _1334;
|
||||
_1344 = _1335;
|
||||
}
|
||||
if (_1343)
|
||||
if (_1344)
|
||||
{
|
||||
float param_29 = m.bbox.x;
|
||||
uint _1352 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix], round_down(param_29), memory_order_relaxed);
|
||||
uint _1353 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix], round_down(param_29), memory_order_relaxed);
|
||||
float param_30 = m.bbox.y;
|
||||
uint _1360 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 1u], round_down(param_30), memory_order_relaxed);
|
||||
uint _1361 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 1u], round_down(param_30), memory_order_relaxed);
|
||||
float param_31 = m.bbox.z;
|
||||
uint _1368 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 2u], round_up(param_31), memory_order_relaxed);
|
||||
uint _1369 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 2u], round_up(param_31), memory_order_relaxed);
|
||||
float param_32 = m.bbox.w;
|
||||
uint _1376 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 3u], round_up(param_32), memory_order_relaxed);
|
||||
uint _1377 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 3u], round_up(param_32), memory_order_relaxed);
|
||||
}
|
||||
bbox_out_ix += 6u;
|
||||
}
|
||||
|
|
BIN
piet-gpu/shader/gen/pathseg.spv
generated
BIN
piet-gpu/shader/gen/pathseg.spv
generated
Binary file not shown.
BIN
piet-gpu/shader/gen/pathtag_reduce.dxil
generated
BIN
piet-gpu/shader/gen/pathtag_reduce.dxil
generated
Binary file not shown.
27
piet-gpu/shader/gen/pathtag_reduce.hlsl
generated
27
piet-gpu/shader/gen/pathtag_reduce.hlsl
generated
|
@ -26,8 +26,13 @@ struct Config
|
|||
Alloc trans_alloc;
|
||||
Alloc bbox_alloc;
|
||||
Alloc drawmonoid_alloc;
|
||||
Alloc clip_alloc;
|
||||
Alloc clip_bic_alloc;
|
||||
Alloc clip_stack_alloc;
|
||||
Alloc clip_bbox_alloc;
|
||||
uint n_trans;
|
||||
uint n_path;
|
||||
uint n_clip;
|
||||
uint trans_offset;
|
||||
uint linewidth_offset;
|
||||
uint pathtag_offset;
|
||||
|
@ -37,9 +42,9 @@ struct Config
|
|||
static const uint3 gl_WorkGroupSize = uint3(128u, 1u, 1u);
|
||||
|
||||
ByteAddressBuffer _139 : register(t1, space0);
|
||||
ByteAddressBuffer _150 : register(t2, space0);
|
||||
RWByteAddressBuffer _237 : register(u3, space0);
|
||||
RWByteAddressBuffer _257 : register(u0, space0);
|
||||
ByteAddressBuffer _151 : register(t2, space0);
|
||||
RWByteAddressBuffer _238 : register(u3, space0);
|
||||
RWByteAddressBuffer _258 : register(u0, space0);
|
||||
|
||||
static uint3 gl_WorkGroupID;
|
||||
static uint3 gl_LocalInvocationID;
|
||||
|
@ -83,13 +88,13 @@ TagMonoid combine_tag_monoid(TagMonoid a, TagMonoid b)
|
|||
void comp_main()
|
||||
{
|
||||
uint ix = gl_GlobalInvocationID.x * 2u;
|
||||
uint scene_ix = (_139.Load(64) >> uint(2)) + ix;
|
||||
uint tag_word = _150.Load(scene_ix * 4 + 0);
|
||||
uint scene_ix = (_139.Load(84) >> uint(2)) + ix;
|
||||
uint tag_word = _151.Load(scene_ix * 4 + 0);
|
||||
uint param = tag_word;
|
||||
TagMonoid agg = reduce_tag(param);
|
||||
for (uint i = 1u; i < 2u; i++)
|
||||
{
|
||||
tag_word = _150.Load((scene_ix + i) * 4 + 0);
|
||||
tag_word = _151.Load((scene_ix + i) * 4 + 0);
|
||||
uint param_1 = tag_word;
|
||||
TagMonoid param_2 = agg;
|
||||
TagMonoid param_3 = reduce_tag(param_1);
|
||||
|
@ -111,11 +116,11 @@ void comp_main()
|
|||
}
|
||||
if (gl_LocalInvocationID.x == 0u)
|
||||
{
|
||||
_237.Store(gl_WorkGroupID.x * 20 + 0, agg.trans_ix);
|
||||
_237.Store(gl_WorkGroupID.x * 20 + 4, agg.linewidth_ix);
|
||||
_237.Store(gl_WorkGroupID.x * 20 + 8, agg.pathseg_ix);
|
||||
_237.Store(gl_WorkGroupID.x * 20 + 12, agg.path_ix);
|
||||
_237.Store(gl_WorkGroupID.x * 20 + 16, agg.pathseg_offset);
|
||||
_238.Store(gl_WorkGroupID.x * 20 + 0, agg.trans_ix);
|
||||
_238.Store(gl_WorkGroupID.x * 20 + 4, agg.linewidth_ix);
|
||||
_238.Store(gl_WorkGroupID.x * 20 + 8, agg.pathseg_ix);
|
||||
_238.Store(gl_WorkGroupID.x * 20 + 12, agg.path_ix);
|
||||
_238.Store(gl_WorkGroupID.x * 20 + 16, agg.pathseg_offset);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
21
piet-gpu/shader/gen/pathtag_reduce.msl
generated
21
piet-gpu/shader/gen/pathtag_reduce.msl
generated
|
@ -33,8 +33,13 @@ struct Config
|
|||
Alloc trans_alloc;
|
||||
Alloc bbox_alloc;
|
||||
Alloc drawmonoid_alloc;
|
||||
Alloc clip_alloc;
|
||||
Alloc clip_bic_alloc;
|
||||
Alloc clip_stack_alloc;
|
||||
Alloc clip_bbox_alloc;
|
||||
uint n_trans;
|
||||
uint n_path;
|
||||
uint n_clip;
|
||||
uint trans_offset;
|
||||
uint linewidth_offset;
|
||||
uint pathtag_offset;
|
||||
|
@ -103,17 +108,17 @@ TagMonoid combine_tag_monoid(thread const TagMonoid& a, thread const TagMonoid&
|
|||
return c;
|
||||
}
|
||||
|
||||
kernel void main0(const device ConfigBuf& _139 [[buffer(1)]], const device SceneBuf& _150 [[buffer(2)]], device OutBuf& _237 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
|
||||
kernel void main0(const device ConfigBuf& _139 [[buffer(1)]], const device SceneBuf& _151 [[buffer(2)]], device OutBuf& _238 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
|
||||
{
|
||||
threadgroup TagMonoid sh_scratch[128];
|
||||
uint ix = gl_GlobalInvocationID.x * 2u;
|
||||
uint scene_ix = (_139.conf.pathtag_offset >> uint(2)) + ix;
|
||||
uint tag_word = _150.scene[scene_ix];
|
||||
uint tag_word = _151.scene[scene_ix];
|
||||
uint param = tag_word;
|
||||
TagMonoid agg = reduce_tag(param);
|
||||
for (uint i = 1u; i < 2u; i++)
|
||||
{
|
||||
tag_word = _150.scene[scene_ix + i];
|
||||
tag_word = _151.scene[scene_ix + i];
|
||||
uint param_1 = tag_word;
|
||||
TagMonoid param_2 = agg;
|
||||
TagMonoid param_3 = reduce_tag(param_1);
|
||||
|
@ -135,11 +140,11 @@ kernel void main0(const device ConfigBuf& _139 [[buffer(1)]], const device Scene
|
|||
}
|
||||
if (gl_LocalInvocationID.x == 0u)
|
||||
{
|
||||
_237.outbuf[gl_WorkGroupID.x].trans_ix = agg.trans_ix;
|
||||
_237.outbuf[gl_WorkGroupID.x].linewidth_ix = agg.linewidth_ix;
|
||||
_237.outbuf[gl_WorkGroupID.x].pathseg_ix = agg.pathseg_ix;
|
||||
_237.outbuf[gl_WorkGroupID.x].path_ix = agg.path_ix;
|
||||
_237.outbuf[gl_WorkGroupID.x].pathseg_offset = agg.pathseg_offset;
|
||||
_238.outbuf[gl_WorkGroupID.x].trans_ix = agg.trans_ix;
|
||||
_238.outbuf[gl_WorkGroupID.x].linewidth_ix = agg.linewidth_ix;
|
||||
_238.outbuf[gl_WorkGroupID.x].pathseg_ix = agg.pathseg_ix;
|
||||
_238.outbuf[gl_WorkGroupID.x].path_ix = agg.path_ix;
|
||||
_238.outbuf[gl_WorkGroupID.x].pathseg_offset = agg.pathseg_offset;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
BIN
piet-gpu/shader/gen/pathtag_reduce.spv
generated
BIN
piet-gpu/shader/gen/pathtag_reduce.spv
generated
Binary file not shown.
5
piet-gpu/shader/gen/tile_alloc.hlsl
generated
5
piet-gpu/shader/gen/tile_alloc.hlsl
generated
|
@ -60,8 +60,13 @@ struct Config
|
|||
Alloc trans_alloc;
|
||||
Alloc bbox_alloc;
|
||||
Alloc drawmonoid_alloc;
|
||||
Alloc clip_alloc;
|
||||
Alloc clip_bic_alloc;
|
||||
Alloc clip_stack_alloc;
|
||||
Alloc clip_bbox_alloc;
|
||||
uint n_trans;
|
||||
uint n_path;
|
||||
uint n_clip;
|
||||
uint trans_offset;
|
||||
uint linewidth_offset;
|
||||
uint pathtag_offset;
|
||||
|
|
5
piet-gpu/shader/gen/tile_alloc.msl
generated
5
piet-gpu/shader/gen/tile_alloc.msl
generated
|
@ -81,8 +81,13 @@ struct Config
|
|||
Alloc_1 trans_alloc;
|
||||
Alloc_1 bbox_alloc;
|
||||
Alloc_1 drawmonoid_alloc;
|
||||
Alloc_1 clip_alloc;
|
||||
Alloc_1 clip_bic_alloc;
|
||||
Alloc_1 clip_stack_alloc;
|
||||
Alloc_1 clip_bbox_alloc;
|
||||
uint n_trans;
|
||||
uint n_path;
|
||||
uint n_clip;
|
||||
uint trans_offset;
|
||||
uint linewidth_offset;
|
||||
uint pathtag_offset;
|
||||
|
|
BIN
piet-gpu/shader/gen/tile_alloc.spv
generated
BIN
piet-gpu/shader/gen/tile_alloc.spv
generated
Binary file not shown.
BIN
piet-gpu/shader/gen/transform_leaf.dxil
generated
BIN
piet-gpu/shader/gen/transform_leaf.dxil
generated
Binary file not shown.
7
piet-gpu/shader/gen/transform_leaf.hlsl
generated
7
piet-gpu/shader/gen/transform_leaf.hlsl
generated
|
@ -39,8 +39,13 @@ struct Config
|
|||
Alloc trans_alloc;
|
||||
Alloc bbox_alloc;
|
||||
Alloc drawmonoid_alloc;
|
||||
Alloc clip_alloc;
|
||||
Alloc clip_bic_alloc;
|
||||
Alloc clip_stack_alloc;
|
||||
Alloc clip_bbox_alloc;
|
||||
uint n_trans;
|
||||
uint n_path;
|
||||
uint n_clip;
|
||||
uint trans_offset;
|
||||
uint linewidth_offset;
|
||||
uint pathtag_offset;
|
||||
|
@ -150,7 +155,7 @@ void TransformSeg_write(Alloc a, TransformSegRef ref, TransformSeg s)
|
|||
void comp_main()
|
||||
{
|
||||
uint ix = gl_GlobalInvocationID.x * 8u;
|
||||
TransformRef _285 = { _278.Load(56) + (ix * 24u) };
|
||||
TransformRef _285 = { _278.Load(76) + (ix * 24u) };
|
||||
TransformRef ref = _285;
|
||||
TransformRef param = ref;
|
||||
Transform agg = Transform_read(param);
|
||||
|
|
5
piet-gpu/shader/gen/transform_leaf.msl
generated
5
piet-gpu/shader/gen/transform_leaf.msl
generated
|
@ -102,8 +102,13 @@ struct Config
|
|||
Alloc_1 trans_alloc;
|
||||
Alloc_1 bbox_alloc;
|
||||
Alloc_1 drawmonoid_alloc;
|
||||
Alloc_1 clip_alloc;
|
||||
Alloc_1 clip_bic_alloc;
|
||||
Alloc_1 clip_stack_alloc;
|
||||
Alloc_1 clip_bbox_alloc;
|
||||
uint n_trans;
|
||||
uint n_path;
|
||||
uint n_clip;
|
||||
uint trans_offset;
|
||||
uint linewidth_offset;
|
||||
uint pathtag_offset;
|
||||
|
|
BIN
piet-gpu/shader/gen/transform_leaf.spv
generated
BIN
piet-gpu/shader/gen/transform_leaf.spv
generated
Binary file not shown.
BIN
piet-gpu/shader/gen/transform_reduce.dxil
generated
BIN
piet-gpu/shader/gen/transform_reduce.dxil
generated
Binary file not shown.
7
piet-gpu/shader/gen/transform_reduce.hlsl
generated
7
piet-gpu/shader/gen/transform_reduce.hlsl
generated
|
@ -28,8 +28,13 @@ struct Config
|
|||
Alloc trans_alloc;
|
||||
Alloc bbox_alloc;
|
||||
Alloc drawmonoid_alloc;
|
||||
Alloc clip_alloc;
|
||||
Alloc clip_bic_alloc;
|
||||
Alloc clip_stack_alloc;
|
||||
Alloc clip_bbox_alloc;
|
||||
uint n_trans;
|
||||
uint n_path;
|
||||
uint n_clip;
|
||||
uint trans_offset;
|
||||
uint linewidth_offset;
|
||||
uint pathtag_offset;
|
||||
|
@ -87,7 +92,7 @@ Transform combine_monoid(Transform a, Transform b)
|
|||
void comp_main()
|
||||
{
|
||||
uint ix = gl_GlobalInvocationID.x * 8u;
|
||||
TransformRef _168 = { _161.Load(56) + (ix * 24u) };
|
||||
TransformRef _168 = { _161.Load(76) + (ix * 24u) };
|
||||
TransformRef ref = _168;
|
||||
TransformRef param = ref;
|
||||
Transform agg = Transform_read(param);
|
||||
|
|
5
piet-gpu/shader/gen/transform_reduce.msl
generated
5
piet-gpu/shader/gen/transform_reduce.msl
generated
|
@ -40,8 +40,13 @@ struct Config
|
|||
Alloc trans_alloc;
|
||||
Alloc bbox_alloc;
|
||||
Alloc drawmonoid_alloc;
|
||||
Alloc clip_alloc;
|
||||
Alloc clip_bic_alloc;
|
||||
Alloc clip_stack_alloc;
|
||||
Alloc clip_bbox_alloc;
|
||||
uint n_trans;
|
||||
uint n_path;
|
||||
uint n_clip;
|
||||
uint trans_offset;
|
||||
uint linewidth_offset;
|
||||
uint pathtag_offset;
|
||||
|
|
BIN
piet-gpu/shader/gen/transform_reduce.spv
generated
BIN
piet-gpu/shader/gen/transform_reduce.spv
generated
Binary file not shown.
|
@ -91,7 +91,6 @@ void main() {
|
|||
vec2 xy = vec2(xy_uint);
|
||||
mediump vec4 rgba[CHUNK];
|
||||
uint blend_stack[MAX_BLEND_STACK][CHUNK];
|
||||
mediump float blend_alpha_stack[MAX_BLEND_STACK][CHUNK];
|
||||
for (uint i = 0; i < CHUNK; i++) {
|
||||
rgba[i] = vec4(0.0);
|
||||
}
|
||||
|
@ -211,7 +210,6 @@ void main() {
|
|||
// The following is a sanity check so we don't corrupt memory should there be malformed inputs.
|
||||
uint d = min(clip_depth, MAX_BLEND_STACK - 1);
|
||||
blend_stack[d][k] = packsRGB(vec4(rgba[k]));
|
||||
blend_alpha_stack[d][k] = clamp(abs(area[k]), 0.0, 1.0);
|
||||
rgba[k] = vec4(0.0);
|
||||
}
|
||||
clip_depth++;
|
||||
|
@ -222,7 +220,7 @@ void main() {
|
|||
for (uint k = 0; k < CHUNK; k++) {
|
||||
uint d = min(clip_depth, MAX_BLEND_STACK - 1);
|
||||
mediump vec4 bg = unpacksRGB(blend_stack[d][k]);
|
||||
mediump vec4 fg = rgba[k] * area[k] * blend_alpha_stack[d][k];
|
||||
mediump vec4 fg = rgba[k] * area[k];
|
||||
rgba[k] = bg * (1.0 - fg.a) + fg;
|
||||
}
|
||||
cmd_ref.offset += 4;
|
||||
|
|
|
@ -46,11 +46,23 @@ struct Config {
|
|||
// Monoid for draw objects
|
||||
Alloc drawmonoid_alloc;
|
||||
|
||||
// BeginClip(path_ix) / EndClip
|
||||
Alloc clip_alloc;
|
||||
// Intermediate bicyclic semigroup
|
||||
Alloc clip_bic_alloc;
|
||||
// Intermediate stack
|
||||
Alloc clip_stack_alloc;
|
||||
// Clip processing results (path_ix + bbox)
|
||||
Alloc clip_bbox_alloc;
|
||||
|
||||
// Number of transforms in scene
|
||||
// This is probably not needed.
|
||||
uint n_trans;
|
||||
// This only counts actual paths, not EndClip.
|
||||
// This *should* count only actual paths, but in the current
|
||||
// implementation is redundant with n_elements.
|
||||
uint n_path;
|
||||
// Total number of BeginClip and EndClip draw objects.
|
||||
uint n_clip;
|
||||
// Offset (in bytes) of transform stream in scene buffer
|
||||
uint trans_offset;
|
||||
// Offset (in bytes) of linewidth stream in scene
|
||||
|
|
|
@ -20,7 +20,8 @@ use bytemuck::{Pod, Zeroable};
|
|||
use piet_gpu_hal::BufWrite;
|
||||
|
||||
use crate::stages::{
|
||||
self, Config, PathEncoder, Transform, DRAW_PART_SIZE, PATHSEG_PART_SIZE, TRANSFORM_PART_SIZE,
|
||||
self, Config, PathEncoder, Transform, CLIP_PART_SIZE, DRAW_PART_SIZE, PATHSEG_PART_SIZE,
|
||||
TRANSFORM_PART_SIZE,
|
||||
};
|
||||
|
||||
pub struct Encoder {
|
||||
|
@ -31,6 +32,7 @@ pub struct Encoder {
|
|||
drawobj_stream: Vec<u8>,
|
||||
n_path: u32,
|
||||
n_pathseg: u32,
|
||||
n_clip: u32,
|
||||
}
|
||||
|
||||
/// A scene fragment encoding a glyph.
|
||||
|
@ -98,6 +100,7 @@ impl Encoder {
|
|||
drawobj_stream: Vec::new(),
|
||||
n_path: 0,
|
||||
n_pathseg: 0,
|
||||
n_clip: 0,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -155,6 +158,7 @@ impl Encoder {
|
|||
..Default::default()
|
||||
};
|
||||
self.drawobj_stream.extend(bytemuck::bytes_of(&element));
|
||||
self.n_clip += 1;
|
||||
saved
|
||||
}
|
||||
|
||||
|
@ -170,6 +174,7 @@ impl Encoder {
|
|||
// This is a dummy path, and will go away with the new clip impl.
|
||||
self.tag_stream.push(0x10);
|
||||
self.n_path += 1;
|
||||
self.n_clip += 1;
|
||||
}
|
||||
|
||||
/// Return a config for the element processing pipeline.
|
||||
|
@ -203,6 +208,20 @@ impl Encoder {
|
|||
alloc += n_drawobj_padded * DRAWMONOID_SIZE;
|
||||
let anno_alloc = alloc;
|
||||
alloc += n_drawobj * ANNOTATED_SIZE;
|
||||
let clip_alloc = alloc;
|
||||
let n_clip = self.n_clip as usize;
|
||||
const CLIP_SIZE: usize = 4;
|
||||
alloc += n_clip * CLIP_SIZE;
|
||||
let clip_bic_alloc = alloc;
|
||||
const CLIP_BIC_SIZE: usize = 8;
|
||||
// This can round down, as we only reduce the prefix
|
||||
alloc += (n_clip / CLIP_PART_SIZE as usize) * CLIP_BIC_SIZE;
|
||||
let clip_stack_alloc = alloc;
|
||||
const CLIP_EL_SIZE: usize = 20;
|
||||
alloc += n_clip * CLIP_EL_SIZE;
|
||||
let clip_bbox_alloc = alloc;
|
||||
const CLIP_BBOX_SIZE: usize = 16;
|
||||
alloc += align_up(n_clip as usize, CLIP_PART_SIZE as usize) * CLIP_BBOX_SIZE;
|
||||
|
||||
let config = Config {
|
||||
n_elements: n_drawobj as u32,
|
||||
|
@ -212,8 +231,13 @@ impl Encoder {
|
|||
trans_alloc: trans_alloc as u32,
|
||||
bbox_alloc: bbox_alloc as u32,
|
||||
drawmonoid_alloc: drawmonoid_alloc as u32,
|
||||
clip_alloc: clip_alloc as u32,
|
||||
clip_bic_alloc: clip_bic_alloc as u32,
|
||||
clip_stack_alloc: clip_stack_alloc as u32,
|
||||
clip_bbox_alloc: clip_bbox_alloc as u32,
|
||||
n_trans: n_trans as u32,
|
||||
n_path: self.n_path,
|
||||
n_clip: self.n_clip,
|
||||
trans_offset: trans_offset as u32,
|
||||
linewidth_offset: linewidth_offset as u32,
|
||||
pathtag_offset: pathtag_offset as u32,
|
||||
|
@ -261,6 +285,10 @@ impl Encoder {
|
|||
self.tag_stream.len()
|
||||
}
|
||||
|
||||
pub(crate) fn n_clip(&self) -> u32 {
|
||||
self.n_clip
|
||||
}
|
||||
|
||||
pub(crate) fn encode_glyph(&mut self, glyph: &GlyphEncoder) {
|
||||
self.tag_stream.extend(&glyph.tag_stream);
|
||||
self.pathseg_stream.extend(&glyph.pathseg_stream);
|
||||
|
|
|
@ -20,9 +20,9 @@ use piet_gpu_hal::{
|
|||
};
|
||||
|
||||
use pico_svg::PicoSvg;
|
||||
use stages::{ElementBinding, ElementCode};
|
||||
use stages::{ClipBinding, ElementBinding, ElementCode};
|
||||
|
||||
use crate::stages::{Config, ElementStage};
|
||||
use crate::stages::{ClipCode, Config, ElementStage};
|
||||
|
||||
const TILE_W: usize = 16;
|
||||
const TILE_H: usize = 16;
|
||||
|
@ -86,6 +86,9 @@ pub struct Renderer {
|
|||
element_stage: ElementStage,
|
||||
element_bindings: Vec<ElementBinding>,
|
||||
|
||||
clip_code: ClipCode,
|
||||
clip_binding: ClipBinding,
|
||||
|
||||
tile_pipeline: Pipeline,
|
||||
tile_ds: DescriptorSet,
|
||||
|
||||
|
@ -110,6 +113,7 @@ pub struct Renderer {
|
|||
n_paths: usize,
|
||||
n_pathseg: usize,
|
||||
n_pathtag: usize,
|
||||
n_clip: u32,
|
||||
|
||||
// Keep a reference to the image so that it is not destroyed.
|
||||
_bg_image: Image,
|
||||
|
@ -191,18 +195,20 @@ impl Renderer {
|
|||
let element_stage = ElementStage::new(session, &element_code);
|
||||
let element_bindings = scene_bufs
|
||||
.iter()
|
||||
.zip(&config_bufs)
|
||||
.map(|(scene_buf, config_buf)| {
|
||||
.map(|scene_buf| {
|
||||
element_stage.bind(
|
||||
session,
|
||||
&element_code,
|
||||
config_buf,
|
||||
&config_buf,
|
||||
scene_buf,
|
||||
&memory_buf_dev,
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let clip_code = ClipCode::new(session);
|
||||
let clip_binding = ClipBinding::new(session, &clip_code, &config_buf, &memory_buf_dev);
|
||||
|
||||
let tile_alloc_code = include_shader!(session, "../shader/gen/tile_alloc");
|
||||
let tile_pipeline = session
|
||||
.create_compute_pipeline(tile_alloc_code, &[BindType::Buffer, BindType::BufReadOnly])?;
|
||||
|
@ -286,6 +292,8 @@ impl Renderer {
|
|||
element_code,
|
||||
element_stage,
|
||||
element_bindings,
|
||||
clip_code,
|
||||
clip_binding,
|
||||
tile_pipeline,
|
||||
tile_ds,
|
||||
path_pipeline,
|
||||
|
@ -304,6 +312,7 @@ impl Renderer {
|
|||
n_paths: 0,
|
||||
n_pathseg: 0,
|
||||
n_pathtag: 0,
|
||||
n_clip: 0,
|
||||
_bg_image: bg_image,
|
||||
gradient_bufs,
|
||||
gradients,
|
||||
|
@ -329,6 +338,7 @@ impl Renderer {
|
|||
self.n_drawobj = render_ctx.n_drawobj();
|
||||
self.n_pathseg = render_ctx.n_pathseg() as usize;
|
||||
self.n_pathtag = render_ctx.n_pathtag();
|
||||
self.n_clip = render_ctx.n_clip();
|
||||
|
||||
// These constants depend on encoding and may need to be updated.
|
||||
// Perhaps we can plumb these from piet-gpu-derive?
|
||||
|
@ -342,6 +352,7 @@ impl Renderer {
|
|||
alloc += ((n_drawobj + 255) & !255) * BIN_SIZE;
|
||||
let ptcl_base = alloc;
|
||||
alloc += width_in_tiles * height_in_tiles * PTCL_INITIAL_ALLOC;
|
||||
|
||||
config.width_in_tiles = width_in_tiles as u32;
|
||||
config.height_in_tiles = height_in_tiles as u32;
|
||||
config.tile_alloc = tile_base as u32;
|
||||
|
@ -401,6 +412,19 @@ impl Renderer {
|
|||
cmd_buf.end_debug_label();
|
||||
cmd_buf.write_timestamp(&query_pool, 1);
|
||||
cmd_buf.memory_barrier();
|
||||
cmd_buf.begin_debug_label("Clip bounding box calculation");
|
||||
self.clip_binding
|
||||
.record(cmd_buf, &self.clip_code, self.n_clip as u32);
|
||||
cmd_buf.end_debug_label();
|
||||
cmd_buf.begin_debug_label("Element binning");
|
||||
cmd_buf.dispatch(
|
||||
&self.bin_pipeline,
|
||||
&self.bin_ds,
|
||||
(((self.n_paths + 255) / 256) as u32, 1, 1),
|
||||
(256, 1, 1),
|
||||
);
|
||||
cmd_buf.end_debug_label();
|
||||
cmd_buf.memory_barrier();
|
||||
cmd_buf.begin_debug_label("Tile allocation");
|
||||
cmd_buf.dispatch(
|
||||
&self.tile_pipeline,
|
||||
|
@ -430,18 +454,7 @@ impl Renderer {
|
|||
);
|
||||
cmd_buf.end_debug_label();
|
||||
cmd_buf.write_timestamp(&query_pool, 4);
|
||||
// Note: this barrier is not needed as an actual dependency between
|
||||
// pipeline stages, but I am keeping it in so that timer queries are
|
||||
// easier to interpret.
|
||||
cmd_buf.memory_barrier();
|
||||
cmd_buf.begin_debug_label("Element binning");
|
||||
cmd_buf.dispatch(
|
||||
&self.bin_pipeline,
|
||||
&self.bin_ds,
|
||||
(((self.n_paths + 255) / 256) as u32, 1, 1),
|
||||
(256, 1, 1),
|
||||
);
|
||||
cmd_buf.end_debug_label();
|
||||
// TODO: redo query accounting
|
||||
cmd_buf.write_timestamp(&query_pool, 5);
|
||||
cmd_buf.memory_barrier();
|
||||
cmd_buf.begin_debug_label("Coarse raster");
|
||||
|
|
|
@ -123,6 +123,10 @@ impl PietGpuRenderContext {
|
|||
self.new_encoder.n_transform()
|
||||
}
|
||||
|
||||
pub fn n_clip(&self) -> u32 {
|
||||
self.new_encoder.n_clip()
|
||||
}
|
||||
|
||||
pub fn write_scene(&self, buf: &mut BufWrite) {
|
||||
self.new_encoder.write_scene(buf);
|
||||
}
|
||||
|
|
|
@ -16,12 +16,14 @@
|
|||
|
||||
//! Stages for new element pipeline, exposed for testing.
|
||||
|
||||
mod clip;
|
||||
mod draw;
|
||||
mod path;
|
||||
mod transform;
|
||||
|
||||
use bytemuck::{Pod, Zeroable};
|
||||
|
||||
pub use clip::{ClipBinding, ClipCode, CLIP_PART_SIZE};
|
||||
pub use draw::{DrawBinding, DrawCode, DrawMonoid, DrawStage, DRAW_PART_SIZE};
|
||||
pub use path::{PathBinding, PathCode, PathEncoder, PathStage, PATHSEG_PART_SIZE};
|
||||
use piet_gpu_hal::{Buffer, CmdBuf, Session};
|
||||
|
@ -47,8 +49,13 @@ pub struct Config {
|
|||
pub trans_alloc: u32,
|
||||
pub bbox_alloc: u32,
|
||||
pub drawmonoid_alloc: u32,
|
||||
pub clip_alloc: u32,
|
||||
pub clip_bic_alloc: u32,
|
||||
pub clip_stack_alloc: u32,
|
||||
pub clip_bbox_alloc: u32,
|
||||
pub n_trans: u32,
|
||||
pub n_path: u32,
|
||||
pub n_clip: u32,
|
||||
pub trans_offset: u32,
|
||||
pub linewidth_offset: u32,
|
||||
pub pathtag_offset: u32,
|
||||
|
|
94
piet-gpu/src/stages/clip.rs
Normal file
94
piet-gpu/src/stages/clip.rs
Normal file
|
@ -0,0 +1,94 @@
|
|||
// Copyright 2022 The piet-gpu authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Also licensed under MIT license, at your choice.
|
||||
|
||||
//! The clip processing stage (includes substages).
|
||||
|
||||
use piet_gpu_hal::{include_shader, BindType, Buffer, CmdBuf, DescriptorSet, Pipeline, Session};
|
||||
|
||||
// Note that this isn't the code/stage/binding pattern of most of the other stages
|
||||
// in the new element processing pipeline. We want to move those temporary buffers
|
||||
// into common memory and converge on this pattern.
|
||||
pub struct ClipCode {
|
||||
reduce_pipeline: Pipeline,
|
||||
leaf_pipeline: Pipeline,
|
||||
}
|
||||
|
||||
pub struct ClipBinding {
|
||||
reduce_ds: DescriptorSet,
|
||||
leaf_ds: DescriptorSet,
|
||||
}
|
||||
|
||||
pub const CLIP_PART_SIZE: u32 = 256;
|
||||
|
||||
impl ClipCode {
|
||||
pub unsafe fn new(session: &Session) -> ClipCode {
|
||||
let reduce_code = include_shader!(session, "../../shader/gen/clip_reduce");
|
||||
let reduce_pipeline = session
|
||||
.create_compute_pipeline(reduce_code, &[BindType::Buffer, BindType::BufReadOnly])
|
||||
.unwrap();
|
||||
let leaf_code = include_shader!(session, "../../shader/gen/clip_leaf");
|
||||
let leaf_pipeline = session
|
||||
.create_compute_pipeline(leaf_code, &[BindType::Buffer, BindType::BufReadOnly])
|
||||
.unwrap();
|
||||
ClipCode {
|
||||
reduce_pipeline,
|
||||
leaf_pipeline,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ClipBinding {
|
||||
pub unsafe fn new(
|
||||
session: &Session,
|
||||
code: &ClipCode,
|
||||
config: &Buffer,
|
||||
memory: &Buffer,
|
||||
) -> ClipBinding {
|
||||
let reduce_ds = session
|
||||
.create_simple_descriptor_set(&code.reduce_pipeline, &[memory, config])
|
||||
.unwrap();
|
||||
let leaf_ds = session
|
||||
.create_simple_descriptor_set(&code.leaf_pipeline, &[memory, config])
|
||||
.unwrap();
|
||||
ClipBinding { reduce_ds, leaf_ds }
|
||||
}
|
||||
|
||||
/// Record the clip dispatches.
|
||||
///
|
||||
/// Assumes memory barrier on entry. Provides memory barrier on exit.
|
||||
pub unsafe fn record(&self, cmd_buf: &mut CmdBuf, code: &ClipCode, n_clip: u32) {
|
||||
let n_wg_reduce = n_clip.saturating_sub(1) / CLIP_PART_SIZE;
|
||||
if n_wg_reduce > 0 {
|
||||
cmd_buf.dispatch(
|
||||
&code.reduce_pipeline,
|
||||
&self.reduce_ds,
|
||||
(n_wg_reduce, 1, 1),
|
||||
(CLIP_PART_SIZE, 1, 1),
|
||||
);
|
||||
cmd_buf.memory_barrier();
|
||||
}
|
||||
let n_wg = (n_clip + CLIP_PART_SIZE - 1) / CLIP_PART_SIZE;
|
||||
if n_wg > 0 {
|
||||
cmd_buf.dispatch(
|
||||
&code.leaf_pipeline,
|
||||
&self.leaf_ds,
|
||||
(n_wg, 1, 1),
|
||||
(CLIP_PART_SIZE, 1, 1),
|
||||
);
|
||||
cmd_buf.memory_barrier();
|
||||
}
|
||||
}
|
||||
}
|
237
tests/src/clip.rs
Normal file
237
tests/src/clip.rs
Normal file
|
@ -0,0 +1,237 @@
|
|||
// Copyright 2022 The piet-gpu authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Also licensed under MIT license, at your choice.
|
||||
|
||||
//! Tests for the piet-gpu clip processing stage.
|
||||
|
||||
use bytemuck::{Pod, Zeroable};
|
||||
use rand::Rng;
|
||||
|
||||
use piet_gpu::stages::{self, ClipBinding, ClipCode, DrawMonoid};
|
||||
use piet_gpu_hal::{BufWrite, BufferUsage};
|
||||
|
||||
use crate::{Config, Runner, TestResult};
|
||||
|
||||
struct ClipData {
|
||||
clip_stream: Vec<u32>,
|
||||
// In the atomic-int friendly encoding
|
||||
path_bbox_stream: Vec<PathBbox>,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, Pod, Zeroable, Default)]
|
||||
#[repr(C)]
|
||||
struct PathBbox {
|
||||
bbox: [u32; 4],
|
||||
linewidth: f32,
|
||||
trans_ix: u32,
|
||||
}
|
||||
|
||||
pub unsafe fn clip_test(runner: &mut Runner, config: &Config) -> TestResult {
|
||||
let mut result = TestResult::new("clip");
|
||||
let n_clip: u64 = config.size.choose(1 << 8, 1 << 12, 1 << 16);
|
||||
let data = ClipData::new(n_clip);
|
||||
let stage_config = data.get_config();
|
||||
let config_buf = runner
|
||||
.session
|
||||
.create_buffer_init(std::slice::from_ref(&stage_config), BufferUsage::STORAGE)
|
||||
.unwrap();
|
||||
// Need to actually get data uploaded
|
||||
let mut memory = runner.buf_down(data.memory_size(), BufferUsage::STORAGE);
|
||||
{
|
||||
let mut buf_write = memory.map_write(..);
|
||||
data.fill_memory(&mut buf_write);
|
||||
}
|
||||
|
||||
let code = ClipCode::new(&runner.session);
|
||||
let binding = ClipBinding::new(&runner.session, &code, &config_buf, &memory.dev_buf);
|
||||
|
||||
let mut commands = runner.commands();
|
||||
commands.write_timestamp(0);
|
||||
commands.upload(&memory);
|
||||
binding.record(&mut commands.cmd_buf, &code, n_clip as u32);
|
||||
commands.download(&memory);
|
||||
commands.write_timestamp(1);
|
||||
runner.submit(commands);
|
||||
let dst = memory.map_read(..);
|
||||
if let Some(failure) = data.verify(&dst) {
|
||||
result.fail(failure);
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
fn rand_bbox() -> [u32; 4] {
|
||||
let mut rng = rand::thread_rng();
|
||||
const Y_MIN: u32 = 32768;
|
||||
const Y_MAX: u32 = Y_MIN + 1000;
|
||||
let mut x0 = rng.gen_range(Y_MIN, Y_MAX);
|
||||
let mut y0 = rng.gen_range(Y_MIN, Y_MAX);
|
||||
let mut x1 = rng.gen_range(Y_MIN, Y_MAX);
|
||||
let mut y1 = rng.gen_range(Y_MIN, Y_MAX);
|
||||
if x0 > x1 {
|
||||
std::mem::swap(&mut x0, &mut x1);
|
||||
}
|
||||
if y0 > y1 {
|
||||
std::mem::swap(&mut y0, &mut y1);
|
||||
}
|
||||
[x0, y0, x1, y1]
|
||||
}
|
||||
|
||||
/// Convert from atomic-friendly to normal float bbox.
|
||||
fn decode_bbox(raw: [u32; 4]) -> [f32; 4] {
|
||||
fn decode(x: u32) -> f32 {
|
||||
x as f32 - 32768.0
|
||||
}
|
||||
[
|
||||
decode(raw[0]),
|
||||
decode(raw[1]),
|
||||
decode(raw[2]),
|
||||
decode(raw[3]),
|
||||
]
|
||||
}
|
||||
|
||||
fn intersect_bbox(b0: [f32; 4], b1: [f32; 4]) -> [f32; 4] {
|
||||
[
|
||||
b0[0].max(b1[0]),
|
||||
b0[1].max(b1[1]),
|
||||
b0[2].min(b1[2]),
|
||||
b0[3].min(b1[3]),
|
||||
]
|
||||
}
|
||||
|
||||
const INFTY_BBOX: [f32; 4] = [-1e9, -1e9, 1e9, 1e9];
|
||||
|
||||
impl ClipData {
|
||||
/// Generate a random clip sequence
|
||||
fn new(n: u64) -> ClipData {
|
||||
// Simple LCG random generator, for deterministic results
|
||||
let mut z = 20170705u64;
|
||||
let mut depth = 0;
|
||||
let mut path_bbox_stream = Vec::new();
|
||||
let clip_stream = (0..n)
|
||||
.map(|i| {
|
||||
let is_push = if depth == 0 {
|
||||
true
|
||||
} else if depth >= 255 {
|
||||
false
|
||||
} else {
|
||||
z = z.wrapping_mul(742938285) % ((1 << 31) - 1);
|
||||
(z % 2) != 0
|
||||
};
|
||||
if is_push {
|
||||
depth += 1;
|
||||
let path_ix = path_bbox_stream.len() as u32;
|
||||
let bbox = rand_bbox();
|
||||
let path_bbox = PathBbox {
|
||||
bbox,
|
||||
..Default::default()
|
||||
};
|
||||
path_bbox_stream.push(path_bbox);
|
||||
path_ix
|
||||
} else {
|
||||
depth -= 1;
|
||||
!(i as u32)
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
ClipData {
|
||||
clip_stream,
|
||||
path_bbox_stream,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_config(&self) -> stages::Config {
|
||||
let n_clip = self.clip_stream.len();
|
||||
let n_path = self.path_bbox_stream.len();
|
||||
let clip_alloc = 0;
|
||||
let path_bbox_alloc = clip_alloc + 4 * n_clip;
|
||||
let drawmonoid_alloc = path_bbox_alloc + 24 * n_path;
|
||||
let clip_bic_alloc = drawmonoid_alloc + 8 * n_clip;
|
||||
// TODO: this is over-allocated, we only need one bic per wg
|
||||
let clip_stack_alloc = clip_bic_alloc + 8 * n_clip;
|
||||
let clip_bbox_alloc = clip_stack_alloc + 20 * n_clip;
|
||||
stages::Config {
|
||||
clip_alloc: clip_alloc as u32,
|
||||
// TODO: this wants to be renamed to path_bbox_alloc
|
||||
bbox_alloc: path_bbox_alloc as u32,
|
||||
drawmonoid_alloc: drawmonoid_alloc as u32,
|
||||
clip_bic_alloc: clip_bic_alloc as u32,
|
||||
clip_stack_alloc: clip_stack_alloc as u32,
|
||||
clip_bbox_alloc: clip_bbox_alloc as u32,
|
||||
n_clip: n_clip as u32,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
fn memory_size(&self) -> u64 {
|
||||
(8 + self.clip_stream.len() * (4 + 8 + 8 + 20 + 16) + self.path_bbox_stream.len() * 24)
|
||||
as u64
|
||||
}
|
||||
|
||||
fn fill_memory(&self, buf: &mut BufWrite) {
|
||||
// offset / header; no dynamic allocation
|
||||
buf.fill_zero(8);
|
||||
buf.extend_slice(&self.clip_stream);
|
||||
buf.extend_slice(&self.path_bbox_stream);
|
||||
// drawmonoid is left uninitialized
|
||||
}
|
||||
|
||||
fn verify(&self, buf: &[u8]) -> Option<String> {
|
||||
let n_clip = self.clip_stream.len();
|
||||
let n_path = self.path_bbox_stream.len();
|
||||
let clip_bbox_start = 8 + n_clip * (4 + 8 + 8 + 20) + n_path * 24;
|
||||
let clip_range = clip_bbox_start..(clip_bbox_start + n_clip * 16);
|
||||
let clip_result = bytemuck::cast_slice::<u8, [f32; 4]>(&buf[clip_range]);
|
||||
let draw_start = 8 + n_clip * 4 + n_path * 24;
|
||||
let draw_range = draw_start..(draw_start + n_clip * 8);
|
||||
let draw_result = bytemuck::cast_slice::<u8, DrawMonoid>(&buf[draw_range]);
|
||||
let mut bbox_stack = Vec::new();
|
||||
let mut parent_stack = Vec::new();
|
||||
for (i, path_ix) in self.clip_stream.iter().enumerate() {
|
||||
let mut expected_path = None;
|
||||
if *path_ix >= 0x8000_0000 {
|
||||
let parent = parent_stack.pop().unwrap();
|
||||
expected_path = Some(self.clip_stream[parent as usize]);
|
||||
bbox_stack.pop().unwrap();
|
||||
} else {
|
||||
parent_stack.push(i);
|
||||
let path_bbox_stream = self.path_bbox_stream[*path_ix as usize];
|
||||
let bbox = decode_bbox(path_bbox_stream.bbox);
|
||||
let new = match bbox_stack.last() {
|
||||
None => bbox,
|
||||
Some(old) => intersect_bbox(*old, bbox),
|
||||
};
|
||||
bbox_stack.push(new);
|
||||
};
|
||||
let expected = bbox_stack.last().copied().unwrap_or(INFTY_BBOX);
|
||||
let clip_bbox = clip_result[i];
|
||||
if clip_bbox != expected {
|
||||
return Some(format!(
|
||||
"{}: path_ix={}, expected bbox={:?}, clip_bbox={:?}",
|
||||
i, path_ix, expected, clip_bbox
|
||||
));
|
||||
}
|
||||
if let Some(expected_path) = expected_path {
|
||||
let actual_path = draw_result[i].path_ix;
|
||||
if expected_path != actual_path {
|
||||
return Some(format!(
|
||||
"{}: expected path {}, actual {}",
|
||||
i, expected_path, actual_path
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
|
@ -102,17 +102,21 @@ impl DrawTestData {
|
|||
// Layout of memory
|
||||
let drawmonoid_alloc = 0;
|
||||
let anno_alloc = drawmonoid_alloc + 8 * n_tags;
|
||||
let clip_alloc = anno_alloc + ANNOTATED_SIZE * n_tags;
|
||||
let stage_config = stages::Config {
|
||||
n_elements: n_tags as u32,
|
||||
anno_alloc: anno_alloc as u32,
|
||||
drawmonoid_alloc: drawmonoid_alloc as u32,
|
||||
clip_alloc: clip_alloc as u32,
|
||||
..Default::default()
|
||||
};
|
||||
stage_config
|
||||
}
|
||||
|
||||
fn memory_size(&self) -> u64 {
|
||||
(8 + self.tags.len() * (8 + ANNOTATED_SIZE)) as u64
|
||||
// Note: this overallocates the clip buf a bit - only needed for the
|
||||
// total number of begin_clip and end_clip tags.
|
||||
(8 + self.tags.len() * (8 + 4 + ANNOTATED_SIZE)) as u64
|
||||
}
|
||||
|
||||
fn fill_scene(&self, buf: &mut BufWrite) {
|
||||
|
@ -128,14 +132,13 @@ impl DrawTestData {
|
|||
let actual = bytemuck::cast_slice::<u8, DrawMonoid>(&buf[8..8 + size]);
|
||||
let mut expected = DrawMonoid::default();
|
||||
for (i, (tag, actual)) in self.tags.iter().zip(actual).enumerate() {
|
||||
// We compute an inclusive prefix sum, but for this application
|
||||
// exclusive would be slightly better. We can adapt though.
|
||||
// Verify exclusive prefix sum.
|
||||
let (path_ix, clip_ix) = Self::reduce_tag(*tag);
|
||||
expected.path_ix += path_ix;
|
||||
expected.clip_ix += clip_ix;
|
||||
if *actual != expected {
|
||||
return Some(format!("draw mismatch at {}", i));
|
||||
}
|
||||
expected.path_ix += path_ix;
|
||||
expected.clip_ix += clip_ix;
|
||||
}
|
||||
None
|
||||
}
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
//! Tests for piet-gpu shaders and GPU capabilities.
|
||||
|
||||
mod clear;
|
||||
mod clip;
|
||||
mod config;
|
||||
mod draw;
|
||||
mod linkedlist;
|
||||
|
@ -139,6 +140,7 @@ fn main() {
|
|||
report(&transform::transform_test(&mut runner, &config));
|
||||
report(&path::path_test(&mut runner, &config));
|
||||
report(&draw::draw_test(&mut runner, &config));
|
||||
report(&clip::clip_test(&mut runner, &config));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,8 +20,8 @@ use std::ops::RangeBounds;
|
|||
|
||||
use bytemuck::Pod;
|
||||
use piet_gpu_hal::{
|
||||
BackendType, BufReadGuard, Buffer, BufferUsage, CmdBuf, Instance, InstanceFlags, QueryPool,
|
||||
Session,
|
||||
BackendType, BufReadGuard, BufWriteGuard, Buffer, BufferUsage, CmdBuf, Instance, InstanceFlags,
|
||||
QueryPool, Session,
|
||||
};
|
||||
|
||||
pub struct Runner {
|
||||
|
@ -37,15 +37,8 @@ pub struct Commands {
|
|||
query_pool: QueryPool,
|
||||
}
|
||||
|
||||
/// Buffer for uploading data to GPU.
|
||||
#[allow(unused)]
|
||||
pub struct BufUp {
|
||||
pub stage_buf: Buffer,
|
||||
pub dev_buf: Buffer,
|
||||
}
|
||||
|
||||
/// Buffer for downloading data from GPU.
|
||||
pub struct BufDown {
|
||||
/// Buffer for both uploading and downloading
|
||||
pub struct BufStage {
|
||||
pub stage_buf: Buffer,
|
||||
pub dev_buf: Buffer,
|
||||
}
|
||||
|
@ -92,7 +85,7 @@ impl Runner {
|
|||
}
|
||||
|
||||
#[allow(unused)]
|
||||
pub fn buf_up(&self, size: u64) -> BufUp {
|
||||
pub fn buf_up(&self, size: u64) -> BufStage {
|
||||
let stage_buf = self
|
||||
.session
|
||||
.create_buffer(size, BufferUsage::MAP_WRITE | BufferUsage::COPY_SRC)
|
||||
|
@ -101,13 +94,13 @@ impl Runner {
|
|||
.session
|
||||
.create_buffer(size, BufferUsage::COPY_DST | BufferUsage::STORAGE)
|
||||
.unwrap();
|
||||
BufUp { stage_buf, dev_buf }
|
||||
BufStage { stage_buf, dev_buf }
|
||||
}
|
||||
|
||||
/// Create a buffer for download (readback).
|
||||
///
|
||||
/// The `usage` parameter need not include COPY_SRC and STORAGE.
|
||||
pub fn buf_down(&self, size: u64, usage: BufferUsage) -> BufDown {
|
||||
pub fn buf_down(&self, size: u64, usage: BufferUsage) -> BufStage {
|
||||
let stage_buf = self
|
||||
.session
|
||||
.create_buffer(size, BufferUsage::MAP_READ | BufferUsage::COPY_DST)
|
||||
|
@ -116,7 +109,7 @@ impl Runner {
|
|||
.session
|
||||
.create_buffer(size, usage | BufferUsage::COPY_SRC | BufferUsage::STORAGE)
|
||||
.unwrap();
|
||||
BufDown { stage_buf, dev_buf }
|
||||
BufStage { stage_buf, dev_buf }
|
||||
}
|
||||
|
||||
pub fn backend_type(&self) -> BackendType {
|
||||
|
@ -129,17 +122,16 @@ impl Commands {
|
|||
self.cmd_buf.write_timestamp(&self.query_pool, query);
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
pub unsafe fn upload(&mut self, buf: &BufUp) {
|
||||
pub unsafe fn upload(&mut self, buf: &BufStage) {
|
||||
self.cmd_buf.copy_buffer(&buf.stage_buf, &buf.dev_buf);
|
||||
}
|
||||
|
||||
pub unsafe fn download(&mut self, buf: &BufDown) {
|
||||
pub unsafe fn download(&mut self, buf: &BufStage) {
|
||||
self.cmd_buf.copy_buffer(&buf.dev_buf, &buf.stage_buf);
|
||||
}
|
||||
}
|
||||
|
||||
impl BufDown {
|
||||
impl BufStage {
|
||||
pub unsafe fn read(&self, dst: &mut Vec<impl Pod>) {
|
||||
self.stage_buf.read(dst).unwrap()
|
||||
}
|
||||
|
@ -147,4 +139,8 @@ impl BufDown {
|
|||
pub unsafe fn map_read<'a>(&'a self, range: impl RangeBounds<usize>) -> BufReadGuard<'a> {
|
||||
self.stage_buf.map_read(range).unwrap()
|
||||
}
|
||||
|
||||
pub unsafe fn map_write<'a>(&'a mut self, range: impl RangeBounds<usize>) -> BufWriteGuard {
|
||||
self.stage_buf.map_write(range).unwrap()
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue