mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 20:51:29 +11:00
commit
d81e5cb4ee
|
@ -20,6 +20,7 @@ layout(set = 0, binding = 1) readonly buffer ConfigBuf {
|
||||||
|
|
||||||
#include "annotated.h"
|
#include "annotated.h"
|
||||||
#include "bins.h"
|
#include "bins.h"
|
||||||
|
#include "drawtag.h"
|
||||||
|
|
||||||
// scale factors useful for converting coordinates to bins
|
// scale factors useful for converting coordinates to bins
|
||||||
#define SX (1.0 / float(N_TILE_X * TILE_WIDTH_PX))
|
#define SX (1.0 / float(N_TILE_X * TILE_WIDTH_PX))
|
||||||
|
@ -35,6 +36,47 @@ shared uint count[N_SLICE][N_TILE];
|
||||||
shared Alloc sh_chunk_alloc[N_TILE];
|
shared Alloc sh_chunk_alloc[N_TILE];
|
||||||
shared bool sh_alloc_failed;
|
shared bool sh_alloc_failed;
|
||||||
|
|
||||||
|
DrawMonoid load_draw_monoid(uint element_ix) {
|
||||||
|
uint base = (conf.drawmonoid_alloc.offset >> 2) + 2 * element_ix;
|
||||||
|
uint path_ix = memory[base];
|
||||||
|
uint clip_ix = memory[base + 1];
|
||||||
|
return DrawMonoid(path_ix, clip_ix);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load bounding box computed by clip processing
|
||||||
|
vec4 load_clip_bbox(uint clip_ix) {
|
||||||
|
uint base = (conf.clip_bbox_alloc.offset >> 2) + 4 * clip_ix;
|
||||||
|
float x0 = uintBitsToFloat(memory[base]);
|
||||||
|
float y0 = uintBitsToFloat(memory[base + 1]);
|
||||||
|
float x1 = uintBitsToFloat(memory[base + 2]);
|
||||||
|
float y1 = uintBitsToFloat(memory[base + 3]);
|
||||||
|
vec4 bbox = vec4(x0, y0, x1, y1);
|
||||||
|
return bbox;
|
||||||
|
}
|
||||||
|
|
||||||
|
vec4 bbox_intersect(vec4 a, vec4 b) {
|
||||||
|
return vec4(max(a.xy, b.xy), min(a.zw, b.zw));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load path's bbox from bbox (as written by pathseg).
|
||||||
|
vec4 load_path_bbox(uint path_ix) {
|
||||||
|
uint base = (conf.bbox_alloc.offset >> 2) + 6 * path_ix;
|
||||||
|
float bbox_l = float(memory[base]) - 32768.0;
|
||||||
|
float bbox_t = float(memory[base + 1]) - 32768.0;
|
||||||
|
float bbox_r = float(memory[base + 2]) - 32768.0;
|
||||||
|
float bbox_b = float(memory[base + 3]) - 32768.0;
|
||||||
|
vec4 bbox = vec4(bbox_l, bbox_t, bbox_r, bbox_b);
|
||||||
|
return bbox;
|
||||||
|
}
|
||||||
|
|
||||||
|
void store_path_bbox(AnnotatedRef ref, vec4 bbox) {
|
||||||
|
uint ix = ref.offset >> 2;
|
||||||
|
memory[ix + 1] = floatBitsToUint(bbox.x);
|
||||||
|
memory[ix + 2] = floatBitsToUint(bbox.y);
|
||||||
|
memory[ix + 3] = floatBitsToUint(bbox.z);
|
||||||
|
memory[ix + 4] = floatBitsToUint(bbox.w);
|
||||||
|
}
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
uint my_n_elements = conf.n_elements;
|
uint my_n_elements = conf.n_elements;
|
||||||
uint my_partition = gl_WorkGroupID.x;
|
uint my_partition = gl_WorkGroupID.x;
|
||||||
|
@ -61,13 +103,27 @@ void main() {
|
||||||
case Annotated_Image:
|
case Annotated_Image:
|
||||||
case Annotated_BeginClip:
|
case Annotated_BeginClip:
|
||||||
case Annotated_EndClip:
|
case Annotated_EndClip:
|
||||||
// Note: we take advantage of the fact that these drawing elements
|
DrawMonoid draw_monoid = load_draw_monoid(element_ix);
|
||||||
// have the bbox at the same place in their layout.
|
uint path_ix = draw_monoid.path_ix;
|
||||||
AnnoEndClip clip = Annotated_EndClip_read(conf.anno_alloc, ref);
|
vec4 clip_bbox = vec4(-1e9, -1e9, 1e9, 1e9);
|
||||||
x0 = int(floor(clip.bbox.x * SX));
|
uint clip_ix = draw_monoid.clip_ix;
|
||||||
y0 = int(floor(clip.bbox.y * SY));
|
if (clip_ix > 0) {
|
||||||
x1 = int(ceil(clip.bbox.z * SX));
|
clip_bbox = load_clip_bbox(clip_ix - 1);
|
||||||
y1 = int(ceil(clip.bbox.w * SY));
|
}
|
||||||
|
// For clip elements, clip_bbox is the bbox of the clip path, intersected
|
||||||
|
// with enclosing clips.
|
||||||
|
// For other elements, it is the bbox of the enclosing clips.
|
||||||
|
|
||||||
|
vec4 path_bbox = load_path_bbox(path_ix);
|
||||||
|
vec4 bbox = bbox_intersect(path_bbox, clip_bbox);
|
||||||
|
// Avoid negative-size bbox (is this necessary)?
|
||||||
|
bbox.zw = max(bbox.xy, bbox.zw);
|
||||||
|
// Store clip-intersected bbox for tile_alloc.
|
||||||
|
store_path_bbox(ref, bbox);
|
||||||
|
x0 = int(floor(bbox.x * SX));
|
||||||
|
y0 = int(floor(bbox.y * SY));
|
||||||
|
x1 = int(ceil(bbox.z * SX));
|
||||||
|
y1 = int(ceil(bbox.w * SY));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -22,7 +22,7 @@ rule dxil
|
||||||
rule msl
|
rule msl
|
||||||
command = $spirv_cross --msl $in --output $out $msl_flags
|
command = $spirv_cross --msl $in --output $out $msl_flags
|
||||||
|
|
||||||
build gen/binning.spv: glsl binning.comp | annotated.h state.h bins.h setup.h mem.h
|
build gen/binning.spv: glsl binning.comp | annotated.h bins.h drawtag.h setup.h mem.h
|
||||||
build gen/binning.hlsl: hlsl gen/binning.spv
|
build gen/binning.hlsl: hlsl gen/binning.spv
|
||||||
build gen/binning.dxil: dxil gen/binning.hlsl
|
build gen/binning.dxil: dxil gen/binning.hlsl
|
||||||
build gen/binning.msl: msl gen/binning.spv
|
build gen/binning.msl: msl gen/binning.spv
|
||||||
|
@ -119,6 +119,16 @@ build gen/draw_leaf.hlsl: hlsl gen/draw_leaf.spv
|
||||||
build gen/draw_leaf.dxil: dxil gen/draw_leaf.hlsl
|
build gen/draw_leaf.dxil: dxil gen/draw_leaf.hlsl
|
||||||
build gen/draw_leaf.msl: msl gen/draw_leaf.spv
|
build gen/draw_leaf.msl: msl gen/draw_leaf.spv
|
||||||
|
|
||||||
build spv: phony gen/backdrop_lg.spv gen/backdrop.spv gen/bbox_clear.spv gen/binning.spv gen/coarse.spv gen/draw_leaf.spv gen/draw_reduce.spv gen/draw_root.spv gen/kernel4.spv gen/kernel4_gray.spv gen/path_coarse.spv gen/pathseg.spv gen/pathtag_reduce.spv gen/pathtag_root.spv gen/tile_alloc.spv gen/transform_leaf.spv gen/transform_reduce.spv gen/transform_root.spv
|
build gen/clip_reduce.spv: glsl clip_reduce.comp | mem.h setup.h annotated.h
|
||||||
build dxil: phony gen/backdrop.hlsl gen/backdrop_lg.hlsl gen/bbox_clear.hlsl gen/binning.hlsl gen/coarse.hlsl gen/draw_leaf.hlsl gen/draw_reduce.hlsl gen/draw_root.hlsl gen/kernel4.hlsl gen/kernel4_gray.hlsl gen/path_coarse.hlsl gen/pathseg.hlsl gen/pathtag_reduce.hlsl gen/pathtag_root.hlsl gen/tile_alloc.hlsl gen/transform_leaf.hlsl gen/transform_reduce.hlsl gen/transform_root.hlsl
|
build gen/clip_reduce.hlsl: hlsl gen/clip_reduce.spv
|
||||||
build msl: phony gen/backdrop_lg.msl gen/backdrop.msl gen/bbox_clear.msl gen/binning.msl gen/coarse.msl gen/draw_leaf.msl gen/draw_reduce.msl gen/draw_root.msl gen/kernel4.msl gen/kernel4_gray.msl gen/path_coarse.msl gen/pathseg.msl gen/pathtag_reduce.msl gen/pathtag_root.msl gen/tile_alloc.msl gen/transform_leaf.msl gen/transform_reduce.msl gen/transform_root.msl
|
build gen/clip_reduce.dxil: dxil gen/clip_reduce.hlsl
|
||||||
|
build gen/clip_reduce.msl: msl gen/clip_reduce.spv
|
||||||
|
|
||||||
|
build gen/clip_leaf.spv: glsl clip_leaf.comp | mem.h setup.h annotated.h
|
||||||
|
build gen/clip_leaf.hlsl: hlsl gen/clip_leaf.spv
|
||||||
|
build gen/clip_leaf.dxil: dxil gen/clip_leaf.hlsl
|
||||||
|
build gen/clip_leaf.msl: msl gen/clip_leaf.spv
|
||||||
|
|
||||||
|
build spv: phony gen/backdrop_lg.spv gen/backdrop.spv gen/bbox_clear.spv gen/binning.spv gen/clip_leaf.spv gen/clip_reduce.spv gen/coarse.spv gen/draw_leaf.spv gen/draw_reduce.spv gen/draw_root.spv gen/kernel4.spv gen/kernel4_gray.spv gen/path_coarse.spv gen/pathseg.spv gen/pathtag_reduce.spv gen/pathtag_root.spv gen/tile_alloc.spv gen/transform_leaf.spv gen/transform_reduce.spv gen/transform_root.spv
|
||||||
|
build dxil: phony gen/backdrop.hlsl gen/backdrop_lg.hlsl gen/bbox_clear.hlsl gen/binning.hlsl gen/clip_leaf.hlsl gen/clip_reduce.hlsl gen/coarse.hlsl gen/draw_leaf.hlsl gen/draw_reduce.hlsl gen/draw_root.hlsl gen/kernel4.hlsl gen/kernel4_gray.hlsl gen/path_coarse.hlsl gen/pathseg.hlsl gen/pathtag_reduce.hlsl gen/pathtag_root.hlsl gen/tile_alloc.hlsl gen/transform_leaf.hlsl gen/transform_reduce.hlsl gen/transform_root.hlsl
|
||||||
|
build msl: phony gen/backdrop_lg.msl gen/backdrop.msl gen/bbox_clear.msl gen/binning.msl gen/clip_leaf.msl gen/clip_reduce.msl gen/coarse.msl gen/draw_leaf.msl gen/draw_reduce.msl gen/draw_root.msl gen/kernel4.msl gen/kernel4_gray.msl gen/path_coarse.msl gen/pathseg.msl gen/pathtag_reduce.msl gen/pathtag_root.msl gen/tile_alloc.msl gen/transform_leaf.msl gen/transform_reduce.msl gen/transform_root.msl
|
||||||
|
|
287
piet-gpu/shader/clip_leaf.comp
Normal file
287
piet-gpu/shader/clip_leaf.comp
Normal file
|
@ -0,0 +1,287 @@
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||||
|
|
||||||
|
// The second dispatch of clip stack processing.
|
||||||
|
|
||||||
|
#version 450
|
||||||
|
#extension GL_GOOGLE_include_directive : enable
|
||||||
|
|
||||||
|
#include "mem.h"
|
||||||
|
#include "setup.h"
|
||||||
|
|
||||||
|
#define LG_WG_SIZE (7 + LG_WG_FACTOR)
|
||||||
|
#define WG_SIZE (1 << LG_WG_SIZE)
|
||||||
|
#define PARTITION_SIZE WG_SIZE
|
||||||
|
|
||||||
|
layout(local_size_x = WG_SIZE) in;
|
||||||
|
|
||||||
|
layout(binding = 1) readonly buffer ConfigBuf {
|
||||||
|
Config conf;
|
||||||
|
};
|
||||||
|
|
||||||
|
#include "annotated.h"
|
||||||
|
|
||||||
|
// Some of this is cut'n'paste duplication with the reduce pass, and
|
||||||
|
// arguably should be moved to a common .h file.
|
||||||
|
// The bicyclic monoid
|
||||||
|
|
||||||
|
struct ClipEl {
|
||||||
|
// index of parent node
|
||||||
|
uint parent_ix;
|
||||||
|
// bounding box
|
||||||
|
vec4 bbox;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Bic {
|
||||||
|
uint a;
|
||||||
|
uint b;
|
||||||
|
};
|
||||||
|
|
||||||
|
Bic bic_combine(Bic x, Bic y) {
|
||||||
|
uint m = min(x.b, y.a);
|
||||||
|
return Bic(x.a + y.a - m, x.b + y.b - m);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load path's bbox from bbox (as written by pathseg).
|
||||||
|
vec4 load_path_bbox(uint path_ix) {
|
||||||
|
uint base = (conf.bbox_alloc.offset >> 2) + 6 * path_ix;
|
||||||
|
float bbox_l = float(memory[base]) - 32768.0;
|
||||||
|
float bbox_t = float(memory[base + 1]) - 32768.0;
|
||||||
|
float bbox_r = float(memory[base + 2]) - 32768.0;
|
||||||
|
float bbox_b = float(memory[base + 3]) - 32768.0;
|
||||||
|
vec4 bbox = vec4(bbox_l, bbox_t, bbox_r, bbox_b);
|
||||||
|
return bbox;
|
||||||
|
}
|
||||||
|
|
||||||
|
vec4 bbox_intersect(vec4 a, vec4 b) {
|
||||||
|
return vec4(max(a.xy, b.xy), min(a.zw, b.zw));
|
||||||
|
}
|
||||||
|
|
||||||
|
shared Bic sh_bic[WG_SIZE * 2 - 2];
|
||||||
|
shared uint sh_stack[PARTITION_SIZE];
|
||||||
|
shared vec4 sh_stack_bbox[PARTITION_SIZE];
|
||||||
|
shared uint sh_link[PARTITION_SIZE];
|
||||||
|
shared vec4 sh_bbox[PARTITION_SIZE];
|
||||||
|
|
||||||
|
// This is adapted directly from the stack monoid impl.
|
||||||
|
// Return value is reference within partition if >= 0,
|
||||||
|
// otherwise reference to stack.
|
||||||
|
uint search_link(inout Bic bic) {
|
||||||
|
uint ix = gl_LocalInvocationID.x;
|
||||||
|
uint j = 0;
|
||||||
|
while (j < LG_WG_SIZE) {
|
||||||
|
uint base = 2 * WG_SIZE - (2u << (LG_WG_SIZE - j));
|
||||||
|
if (((ix >> j) & 1) != 0) {
|
||||||
|
Bic test = bic_combine(sh_bic[base + (ix >> j) - 1], bic);
|
||||||
|
if (test.b > 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
bic = test;
|
||||||
|
ix -= 1u << j;
|
||||||
|
}
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
if (ix > 0) {
|
||||||
|
while (j > 0) {
|
||||||
|
j--;
|
||||||
|
uint base = 2 * WG_SIZE - (2u << (LG_WG_SIZE - j));
|
||||||
|
Bic test = bic_combine(sh_bic[base + (ix >> j) - 1], bic);
|
||||||
|
if (test.b == 0) {
|
||||||
|
bic = test;
|
||||||
|
ix -= 1u << j;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// ix is the smallest value such that reduce(ix..th).b == 0
|
||||||
|
if (ix > 0) {
|
||||||
|
return ix - 1;
|
||||||
|
} else {
|
||||||
|
return ~0u - bic.a;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Bic load_bic(uint ix) {
|
||||||
|
uint base = (conf.clip_bic_alloc.offset >> 2) + 2 * ix;
|
||||||
|
return Bic(memory[base], memory[base + 1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
ClipEl load_clip_el(uint ix) {
|
||||||
|
uint base = (conf.clip_stack_alloc.offset >> 2) + 5 * ix;
|
||||||
|
uint parent_ix = memory[base];
|
||||||
|
float x0 = uintBitsToFloat(memory[base + 1]);
|
||||||
|
float y0 = uintBitsToFloat(memory[base + 2]);
|
||||||
|
float x1 = uintBitsToFloat(memory[base + 3]);
|
||||||
|
float y1 = uintBitsToFloat(memory[base + 4]);
|
||||||
|
vec4 bbox = vec4(x0, y0, x1, y1);
|
||||||
|
return ClipEl(parent_ix, bbox);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint load_path_ix(uint ix) {
|
||||||
|
// This is one approach to a partial final block. Another would be
|
||||||
|
// to do a memset to the padding in the command queue.
|
||||||
|
if (ix < conf.n_clip) {
|
||||||
|
return memory[(conf.clip_alloc.offset >> 2) + ix];
|
||||||
|
} else {
|
||||||
|
// EndClip tags don't implicate further loads.
|
||||||
|
return 0x80000000;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void store_clip_bbox(uint ix, vec4 bbox) {
|
||||||
|
uint base = (conf.clip_bbox_alloc.offset >> 2) + 4 * ix;
|
||||||
|
memory[base] = floatBitsToUint(bbox.x);
|
||||||
|
memory[base + 1] = floatBitsToUint(bbox.y);
|
||||||
|
memory[base + 2] = floatBitsToUint(bbox.z);
|
||||||
|
memory[base + 3] = floatBitsToUint(bbox.w);
|
||||||
|
}
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
// materialize stack up to the start of this partition. This
|
||||||
|
// is based on the pure stack monoid, but with two additions.
|
||||||
|
|
||||||
|
// First, (this only matters if the stack goes deeper than the
|
||||||
|
// partition size, which might be unlikely in practice), the
|
||||||
|
// topmost stack element from each partition is picked, then an
|
||||||
|
// exclusive scan of those. Also note that if this is skipped,
|
||||||
|
// a scan is not needed in the reduce stage.
|
||||||
|
|
||||||
|
// Second, after the stream compaction, do a scan of the retrieved
|
||||||
|
// bbox values.
|
||||||
|
uint th = gl_LocalInvocationID.x;
|
||||||
|
Bic bic = Bic(0, 0);
|
||||||
|
if (th < gl_WorkGroupID.x) {
|
||||||
|
bic = load_bic(th);
|
||||||
|
}
|
||||||
|
sh_bic[th] = bic;
|
||||||
|
for (uint i = 0; i < LG_WG_SIZE; i++) {
|
||||||
|
barrier();
|
||||||
|
if (th + (1u << i) < WG_SIZE) {
|
||||||
|
Bic other = sh_bic[th + (1u << i)];
|
||||||
|
bic = bic_combine(bic, other);
|
||||||
|
}
|
||||||
|
barrier();
|
||||||
|
sh_bic[th] = bic;
|
||||||
|
}
|
||||||
|
barrier();
|
||||||
|
uint stack_size = sh_bic[0].b;
|
||||||
|
|
||||||
|
// TODO: do bbox scan here (to unlock greater stack depth)
|
||||||
|
|
||||||
|
// binary search in stack
|
||||||
|
uint sp = PARTITION_SIZE - 1 - th;
|
||||||
|
uint ix = 0;
|
||||||
|
for (uint i = 0; i < LG_WG_SIZE; i++) {
|
||||||
|
uint probe = ix + (uint(PARTITION_SIZE / 2) >> i);
|
||||||
|
if (sp < sh_bic[probe].b) {
|
||||||
|
ix = probe;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// ix is largest value such that sp < sh_bic[ix].b (if any)
|
||||||
|
uint b = sh_bic[ix].b;
|
||||||
|
vec4 bbox = vec4(-1e9, -1e9, 1e9, 1e9);
|
||||||
|
if (sp < b) {
|
||||||
|
// maybe store the index here for future use?
|
||||||
|
ClipEl el = load_clip_el(ix * PARTITION_SIZE + b - sp - 1);
|
||||||
|
sh_stack[th] = el.parent_ix;
|
||||||
|
bbox = el.bbox;
|
||||||
|
// other element values here?
|
||||||
|
}
|
||||||
|
|
||||||
|
// forward scan of bbox values of prefix stack
|
||||||
|
for (uint i = 0; i < LG_WG_SIZE; i++) {
|
||||||
|
sh_stack_bbox[th] = bbox;
|
||||||
|
barrier();
|
||||||
|
if (th >= (1u << i)) {
|
||||||
|
bbox = bbox_intersect(sh_stack_bbox[th - (1u << i)], bbox);
|
||||||
|
}
|
||||||
|
barrier();
|
||||||
|
}
|
||||||
|
sh_stack_bbox[th] = bbox;
|
||||||
|
|
||||||
|
// Read input and compute bicyclic semigroup binary tree
|
||||||
|
uint inp = load_path_ix(gl_GlobalInvocationID.x);
|
||||||
|
bool is_push = int(inp) >= 0;
|
||||||
|
bic = Bic(1 - uint(is_push), uint(is_push));
|
||||||
|
sh_bic[th] = bic;
|
||||||
|
if (is_push) {
|
||||||
|
bbox = load_path_bbox(inp);
|
||||||
|
} else {
|
||||||
|
bbox = vec4(-1e9, -1e9, 1e9, 1e9);
|
||||||
|
}
|
||||||
|
uint inbase = 0;
|
||||||
|
for (uint i = 0; i < LG_WG_SIZE - 1; i++) {
|
||||||
|
uint outbase = 2 * WG_SIZE - (1u << (LG_WG_SIZE - i));
|
||||||
|
barrier();
|
||||||
|
if (th < (1u << (LG_WG_SIZE - 1 - i))) {
|
||||||
|
sh_bic[outbase + th] = bic_combine(sh_bic[inbase + th * 2], sh_bic[inbase + th * 2 + 1]);
|
||||||
|
}
|
||||||
|
inbase = outbase;
|
||||||
|
}
|
||||||
|
barrier();
|
||||||
|
// Search for predecessor node
|
||||||
|
bic = Bic(0, 0);
|
||||||
|
uint link = search_link(bic);
|
||||||
|
// we use N_SEQ > 1 convention here:
|
||||||
|
// link >= 0 is index within partition
|
||||||
|
// link < 0 is reference to stack
|
||||||
|
|
||||||
|
// We want grandparent bbox for pop nodes, so follow those links.
|
||||||
|
sh_link[th] = link;
|
||||||
|
barrier();
|
||||||
|
uint grandparent;
|
||||||
|
if (int(link) >= 0) {
|
||||||
|
grandparent = sh_link[link];
|
||||||
|
} else {
|
||||||
|
grandparent = link - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Resolve parent
|
||||||
|
uint parent;
|
||||||
|
if (int(link) >= 0) {
|
||||||
|
parent = gl_WorkGroupID.x * PARTITION_SIZE + link;
|
||||||
|
} else if (int(link + stack_size) >= 0) {
|
||||||
|
parent = sh_stack[PARTITION_SIZE + link];
|
||||||
|
} else {
|
||||||
|
parent = ~0u;
|
||||||
|
}
|
||||||
|
|
||||||
|
// bbox scan along parent links
|
||||||
|
for (uint i = 0; i < LG_WG_SIZE; i++) {
|
||||||
|
// sh_link was already stored for first iteration
|
||||||
|
if (i != 0) {
|
||||||
|
sh_link[th] = link;
|
||||||
|
}
|
||||||
|
sh_bbox[th] = bbox;
|
||||||
|
barrier();
|
||||||
|
if (int(link) >= 0) {
|
||||||
|
bbox = bbox_intersect(sh_bbox[link], bbox);
|
||||||
|
link = sh_link[link];
|
||||||
|
}
|
||||||
|
barrier();
|
||||||
|
}
|
||||||
|
if (int(link + stack_size) >= 0) {
|
||||||
|
bbox = bbox_intersect(sh_stack_bbox[PARTITION_SIZE + link], bbox);
|
||||||
|
}
|
||||||
|
// At this point, bbox is the reduction of bounding boxes along the tree.
|
||||||
|
sh_bbox[th] = bbox;
|
||||||
|
barrier();
|
||||||
|
|
||||||
|
uint path_ix = inp;
|
||||||
|
if (!is_push && gl_GlobalInvocationID.x < conf.n_clip) {
|
||||||
|
// Is this load expensive? If so, it's loaded earlier for in-partition
|
||||||
|
// and is in the ClipEl for cross-partition.
|
||||||
|
// If not, can probably get rid of it in the stack intermediate buf.
|
||||||
|
path_ix = load_path_ix(parent);
|
||||||
|
uint drawmonoid_out_base = (conf.drawmonoid_alloc.offset >> 2) + 2 * ~inp;
|
||||||
|
// Fix up drawmonoid so path_ix at EndClip matches BeginClip
|
||||||
|
memory[drawmonoid_out_base] = path_ix;
|
||||||
|
|
||||||
|
if (int(grandparent) >= 0) {
|
||||||
|
bbox = sh_bbox[grandparent];
|
||||||
|
} else if (int(grandparent + stack_size) >= 0) {
|
||||||
|
bbox = sh_stack_bbox[PARTITION_SIZE + grandparent];
|
||||||
|
} else {
|
||||||
|
bbox = vec4(-1e9, -1e9, 1e9, 1e9);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
store_clip_bbox(gl_GlobalInvocationID.x, bbox);
|
||||||
|
}
|
148
piet-gpu/shader/clip_reduce.comp
Normal file
148
piet-gpu/shader/clip_reduce.comp
Normal file
|
@ -0,0 +1,148 @@
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||||
|
|
||||||
|
// The reduce pass for clip stack processing.
|
||||||
|
|
||||||
|
// The primary input is a sequence of path ids representing paths to
|
||||||
|
// push, with a special value of ~0 to represent pop.
|
||||||
|
|
||||||
|
// For each path, the bounding box is found in the anno stream
|
||||||
|
// (anno_alloc), though this may change.
|
||||||
|
|
||||||
|
// Output is a stack monoid reduction for the partition. The Bic
|
||||||
|
// is stored in the BicBuf, and the stack slice in StackBuf.
|
||||||
|
|
||||||
|
// Note: for this shader, only pushes are represented in the stack
|
||||||
|
// monoid reduction output, so we don't have to worry about the
|
||||||
|
// interpretation of pops.
|
||||||
|
|
||||||
|
#version 450
|
||||||
|
#extension GL_GOOGLE_include_directive : enable
|
||||||
|
|
||||||
|
#include "mem.h"
|
||||||
|
#include "setup.h"
|
||||||
|
|
||||||
|
#define LG_WG_SIZE (7 + LG_WG_FACTOR)
|
||||||
|
#define WG_SIZE (1 << LG_WG_SIZE)
|
||||||
|
#define PARTITION_SIZE WG_SIZE
|
||||||
|
|
||||||
|
layout(local_size_x = WG_SIZE) in;
|
||||||
|
|
||||||
|
layout(binding = 1) readonly buffer ConfigBuf {
|
||||||
|
Config conf;
|
||||||
|
};
|
||||||
|
|
||||||
|
#include "annotated.h"
|
||||||
|
|
||||||
|
// The intermediate state for clip processing.
|
||||||
|
struct ClipEl {
|
||||||
|
// index of parent node
|
||||||
|
uint parent_ix;
|
||||||
|
// bounding box
|
||||||
|
vec4 bbox;
|
||||||
|
};
|
||||||
|
|
||||||
|
// The bicyclic monoid
|
||||||
|
struct Bic {
|
||||||
|
uint a;
|
||||||
|
uint b;
|
||||||
|
};
|
||||||
|
|
||||||
|
Bic bic_combine(Bic x, Bic y) {
|
||||||
|
uint m = min(x.b, y.a);
|
||||||
|
return Bic(x.a + y.a - m, x.b + y.b - m);
|
||||||
|
}
|
||||||
|
|
||||||
|
shared Bic sh_bic[WG_SIZE];
|
||||||
|
shared uint sh_parent[WG_SIZE];
|
||||||
|
shared uint sh_path_ix[WG_SIZE];
|
||||||
|
shared vec4 sh_bbox[WG_SIZE];
|
||||||
|
|
||||||
|
// Load path's bbox from bbox (as written by pathseg).
|
||||||
|
vec4 load_path_bbox(uint path_ix) {
|
||||||
|
uint base = (conf.bbox_alloc.offset >> 2) + 6 * path_ix;
|
||||||
|
float bbox_l = float(memory[base]) - 32768.0;
|
||||||
|
float bbox_t = float(memory[base + 1]) - 32768.0;
|
||||||
|
float bbox_r = float(memory[base + 2]) - 32768.0;
|
||||||
|
float bbox_b = float(memory[base + 3]) - 32768.0;
|
||||||
|
vec4 bbox = vec4(bbox_l, bbox_t, bbox_r, bbox_b);
|
||||||
|
return bbox;
|
||||||
|
}
|
||||||
|
|
||||||
|
vec4 bbox_intersect(vec4 a, vec4 b) {
|
||||||
|
return vec4(max(a.xy, b.xy), min(a.zw, b.zw));
|
||||||
|
}
|
||||||
|
|
||||||
|
void store_bic(uint ix, Bic bic) {
|
||||||
|
uint base = (conf.clip_bic_alloc.offset >> 2) + 2 * ix;
|
||||||
|
memory[base] = bic.a;
|
||||||
|
memory[base + 1] = bic.b;
|
||||||
|
}
|
||||||
|
|
||||||
|
void store_clip_el(uint ix, ClipEl el) {
|
||||||
|
uint base = (conf.clip_stack_alloc.offset >> 2) + 5 * ix;
|
||||||
|
memory[base] = el.parent_ix;
|
||||||
|
memory[base + 1] = floatBitsToUint(el.bbox.x);
|
||||||
|
memory[base + 2] = floatBitsToUint(el.bbox.y);
|
||||||
|
memory[base + 3] = floatBitsToUint(el.bbox.z);
|
||||||
|
memory[base + 4] = floatBitsToUint(el.bbox.w);
|
||||||
|
}
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
uint th = gl_LocalInvocationID.x;
|
||||||
|
uint inp = memory[(conf.clip_alloc.offset >> 2) + gl_GlobalInvocationID.x];
|
||||||
|
bool is_push = int(inp) >= 0;
|
||||||
|
// reverse scan of bicyclic semigroup
|
||||||
|
Bic bic = Bic(1 - uint(is_push), uint(is_push));
|
||||||
|
sh_bic[gl_LocalInvocationID.x] = bic;
|
||||||
|
for (uint i = 0; i < LG_WG_SIZE; i++) {
|
||||||
|
barrier();
|
||||||
|
if (th + (1u << i) < WG_SIZE) {
|
||||||
|
Bic other = sh_bic[gl_LocalInvocationID.x + (1u << i)];
|
||||||
|
bic = bic_combine(bic, other);
|
||||||
|
}
|
||||||
|
barrier();
|
||||||
|
sh_bic[th] = bic;
|
||||||
|
}
|
||||||
|
if (th == 0) {
|
||||||
|
store_bic(gl_WorkGroupID.x, bic);
|
||||||
|
}
|
||||||
|
barrier();
|
||||||
|
uint size = sh_bic[0].b;
|
||||||
|
bic = Bic(0, 0);
|
||||||
|
if (th + 1 < WG_SIZE) {
|
||||||
|
bic = sh_bic[th + 1];
|
||||||
|
}
|
||||||
|
if (is_push && bic.a == 0) {
|
||||||
|
uint local_ix = size - bic.b - 1;
|
||||||
|
sh_parent[local_ix] = th;
|
||||||
|
sh_path_ix[local_ix] = inp;
|
||||||
|
}
|
||||||
|
barrier();
|
||||||
|
// Do forward scan of bounding box intersection
|
||||||
|
vec4 bbox;
|
||||||
|
uint path_ix;
|
||||||
|
if (th < size) {
|
||||||
|
path_ix = sh_path_ix[th];
|
||||||
|
bbox = load_path_bbox(path_ix);
|
||||||
|
}
|
||||||
|
// Not necessary if depth is bounded by wg size
|
||||||
|
#if 0
|
||||||
|
for (uint i = 0; i < LG_WG_SIZE; i++) {
|
||||||
|
// We gate so we never access uninit data, but it might
|
||||||
|
// be more efficient to avoid the conditionals.
|
||||||
|
if (th < size) {
|
||||||
|
sh_bbox[th] = bbox;
|
||||||
|
}
|
||||||
|
barrier();
|
||||||
|
if (th < size && th >= (1u << i)) {
|
||||||
|
bbox = bbox_intersect(sh_bbox[th - (1u << i)], bbox);
|
||||||
|
}
|
||||||
|
barrier();
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
if (th < size) {
|
||||||
|
uint parent_ix = sh_parent[th] + gl_WorkGroupID.x * PARTITION_SIZE;
|
||||||
|
ClipEl el = ClipEl(parent_ix, bbox);
|
||||||
|
store_clip_el(gl_GlobalInvocationID.x, el);
|
||||||
|
}
|
||||||
|
}
|
|
@ -136,9 +136,6 @@ void main() {
|
||||||
// currently in a clip for which the entire tile has an alpha of zero, and
|
// currently in a clip for which the entire tile has an alpha of zero, and
|
||||||
// the value is the depth after the "begin clip" of that element.
|
// the value is the depth after the "begin clip" of that element.
|
||||||
uint clip_zero_depth = 0;
|
uint clip_zero_depth = 0;
|
||||||
// State for the "clip one" optimization. If bit `i` is set, then that means
|
|
||||||
// that the clip pushed at depth `i` has an alpha of all one.
|
|
||||||
uint clip_one_mask = 0;
|
|
||||||
|
|
||||||
// I'm sure we can figure out how to do this with at least one fewer register...
|
// I'm sure we can figure out how to do this with at least one fewer register...
|
||||||
// Items up to rd_ix have been read from sh_elements
|
// Items up to rd_ix have been read from sh_elements
|
||||||
|
@ -227,9 +224,8 @@ void main() {
|
||||||
case Annotated_LinGradient:
|
case Annotated_LinGradient:
|
||||||
case Annotated_BeginClip:
|
case Annotated_BeginClip:
|
||||||
case Annotated_EndClip:
|
case Annotated_EndClip:
|
||||||
// We have one "path" for each element, even if the element isn't
|
uint drawmonoid_base = (conf.drawmonoid_alloc.offset >> 2) + 2 * element_ix;
|
||||||
// actually a path (currently EndClip, but images etc in the future).
|
uint path_ix = memory[drawmonoid_base];
|
||||||
uint path_ix = element_ix;
|
|
||||||
Path path = Path_read(conf.tile_alloc, PathRef(conf.tile_alloc.offset + path_ix * Path_size));
|
Path path = Path_read(conf.tile_alloc, PathRef(conf.tile_alloc.offset + path_ix * Path_size));
|
||||||
uint stride = path.bbox.z - path.bbox.x;
|
uint stride = path.bbox.z - path.bbox.x;
|
||||||
sh_tile_stride[th_ix] = stride;
|
sh_tile_stride[th_ix] = stride;
|
||||||
|
@ -283,15 +279,15 @@ void main() {
|
||||||
uint x = sh_tile_x0[el_ix] + seq_ix % width;
|
uint x = sh_tile_x0[el_ix] + seq_ix % width;
|
||||||
uint y = sh_tile_y0[el_ix] + seq_ix / width;
|
uint y = sh_tile_y0[el_ix] + seq_ix / width;
|
||||||
bool include_tile = false;
|
bool include_tile = false;
|
||||||
if (tag == Annotated_BeginClip || tag == Annotated_EndClip) {
|
if (mem_ok) {
|
||||||
include_tile = true;
|
|
||||||
} else if (mem_ok) {
|
|
||||||
Tile tile = Tile_read(read_tile_alloc(el_ix, mem_ok),
|
Tile tile = Tile_read(read_tile_alloc(el_ix, mem_ok),
|
||||||
TileRef(sh_tile_base[el_ix] + (sh_tile_stride[el_ix] * y + x) * Tile_size));
|
TileRef(sh_tile_base[el_ix] + (sh_tile_stride[el_ix] * y + x) * Tile_size));
|
||||||
// Include the path in the tile if
|
bool is_clip = tag == Annotated_BeginClip || tag == Annotated_EndClip;
|
||||||
// - the tile contains at least a segment (tile offset non-zero)
|
// Always include the tile if it contains a path segment.
|
||||||
// - the tile is completely covered (backdrop non-zero)
|
// For draws, include the tile if it is solid.
|
||||||
include_tile = tile.tile.offset != 0 || tile.backdrop != 0;
|
// For clips, include the tile if it is empty - this way, logic
|
||||||
|
// below will suppress the drawing of inner elements.
|
||||||
|
include_tile = tile.tile.offset != 0 || (tile.backdrop == 0) == is_clip;
|
||||||
}
|
}
|
||||||
if (include_tile) {
|
if (include_tile) {
|
||||||
uint el_slice = el_ix / 32;
|
uint el_slice = el_ix / 32;
|
||||||
|
@ -378,33 +374,26 @@ void main() {
|
||||||
(sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
|
(sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
|
||||||
if (tile.tile.offset == 0 && tile.backdrop == 0) {
|
if (tile.tile.offset == 0 && tile.backdrop == 0) {
|
||||||
clip_zero_depth = clip_depth + 1;
|
clip_zero_depth = clip_depth + 1;
|
||||||
} else if (tile.tile.offset == 0 && clip_depth < 32) {
|
|
||||||
clip_one_mask |= (1u << clip_depth);
|
|
||||||
} else {
|
} else {
|
||||||
AnnoBeginClip begin_clip = Annotated_BeginClip_read(conf.anno_alloc, ref);
|
|
||||||
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
|
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
write_fill(cmd_alloc, cmd_ref, tag.flags, tile, begin_clip.linewidth);
|
|
||||||
Cmd_BeginClip_write(cmd_alloc, cmd_ref);
|
Cmd_BeginClip_write(cmd_alloc, cmd_ref);
|
||||||
cmd_ref.offset += 4;
|
cmd_ref.offset += 4;
|
||||||
if (clip_depth < 32) {
|
|
||||||
clip_one_mask &= ~(1u << clip_depth);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
clip_depth++;
|
clip_depth++;
|
||||||
break;
|
break;
|
||||||
case Annotated_EndClip:
|
case Annotated_EndClip:
|
||||||
|
tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok),
|
||||||
|
TileRef(sh_tile_base[element_ref_ix] +
|
||||||
|
(sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
|
||||||
clip_depth--;
|
clip_depth--;
|
||||||
if (clip_depth >= 32 || (clip_one_mask & (1u << clip_depth)) == 0) {
|
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
|
||||||
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
|
break;
|
||||||
break;
|
|
||||||
}
|
|
||||||
Cmd_Solid_write(cmd_alloc, cmd_ref);
|
|
||||||
cmd_ref.offset += 4;
|
|
||||||
Cmd_EndClip_write(cmd_alloc, cmd_ref);
|
|
||||||
cmd_ref.offset += 4;
|
|
||||||
}
|
}
|
||||||
|
write_fill(cmd_alloc, cmd_ref, MODE_NONZERO, tile, 0.0);
|
||||||
|
Cmd_EndClip_write(cmd_alloc, cmd_ref);
|
||||||
|
cmd_ref.offset += 4;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -72,9 +72,14 @@ void main() {
|
||||||
}
|
}
|
||||||
uint out_ix = gl_GlobalInvocationID.x * N_ROWS;
|
uint out_ix = gl_GlobalInvocationID.x * N_ROWS;
|
||||||
uint out_base = (conf.drawmonoid_alloc.offset >> 2) + out_ix * 2;
|
uint out_base = (conf.drawmonoid_alloc.offset >> 2) + out_ix * 2;
|
||||||
|
uint clip_out_base = conf.clip_alloc.offset >> 2;
|
||||||
AnnotatedRef out_ref = AnnotatedRef(conf.anno_alloc.offset + out_ix * Annotated_size);
|
AnnotatedRef out_ref = AnnotatedRef(conf.anno_alloc.offset + out_ix * Annotated_size);
|
||||||
for (uint i = 0; i < N_ROWS; i++) {
|
for (uint i = 0; i < N_ROWS; i++) {
|
||||||
Monoid m = combine_tag_monoid(row, local[i]);
|
Monoid m = row;
|
||||||
|
if (i > 0) {
|
||||||
|
m = combine_tag_monoid(m, local[i - 1]);
|
||||||
|
}
|
||||||
|
// m now holds exclusive scan of draw monoid
|
||||||
memory[out_base + i * 2] = m.path_ix;
|
memory[out_base + i * 2] = m.path_ix;
|
||||||
memory[out_base + i * 2 + 1] = m.clip_ix;
|
memory[out_base + i * 2 + 1] = m.clip_ix;
|
||||||
|
|
||||||
|
@ -83,8 +88,9 @@ void main() {
|
||||||
// later stages read scene + bbox etc.
|
// later stages read scene + bbox etc.
|
||||||
ElementRef this_ref = Element_index(ref, i);
|
ElementRef this_ref = Element_index(ref, i);
|
||||||
tag_word = Element_tag(this_ref).tag;
|
tag_word = Element_tag(this_ref).tag;
|
||||||
if (tag_word == Element_FillColor || tag_word == Element_FillLinGradient || tag_word == Element_FillImage) {
|
if (tag_word == Element_FillColor || tag_word == Element_FillLinGradient || tag_word == Element_FillImage ||
|
||||||
uint bbox_offset = (conf.bbox_alloc.offset >> 2) + 6 * (m.path_ix - 1);
|
tag_word == Element_BeginClip) {
|
||||||
|
uint bbox_offset = (conf.bbox_alloc.offset >> 2) + 6 * m.path_ix;
|
||||||
float bbox_l = float(memory[bbox_offset]) - 32768.0;
|
float bbox_l = float(memory[bbox_offset]) - 32768.0;
|
||||||
float bbox_t = float(memory[bbox_offset + 1]) - 32768.0;
|
float bbox_t = float(memory[bbox_offset + 1]) - 32768.0;
|
||||||
float bbox_r = float(memory[bbox_offset + 2]) - 32768.0;
|
float bbox_r = float(memory[bbox_offset + 2]) - 32768.0;
|
||||||
|
@ -142,21 +148,27 @@ void main() {
|
||||||
anno_img.offset = fill_img.offset;
|
anno_img.offset = fill_img.offset;
|
||||||
Annotated_Image_write(conf.anno_alloc, out_ref, fill_mode, anno_img);
|
Annotated_Image_write(conf.anno_alloc, out_ref, fill_mode, anno_img);
|
||||||
break;
|
break;
|
||||||
|
case Element_BeginClip:
|
||||||
|
AnnoBeginClip anno_begin_clip;
|
||||||
|
anno_begin_clip.bbox = bbox;
|
||||||
|
anno_begin_clip.linewidth = 0.0; // don't support clip-with-stroke
|
||||||
|
Annotated_BeginClip_write(conf.anno_alloc, out_ref, 0, anno_begin_clip);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
} else if (tag_word == Element_BeginClip) {
|
|
||||||
Clip begin_clip = Element_BeginClip_read(this_ref);
|
|
||||||
AnnoBeginClip anno_begin_clip;
|
|
||||||
// This is the absolute bbox, it's been transformed during encoding.
|
|
||||||
anno_begin_clip.bbox = begin_clip.bbox;
|
|
||||||
anno_begin_clip.linewidth = 0.0; // don't support clip-with-stroke
|
|
||||||
Annotated_BeginClip_write(conf.anno_alloc, out_ref, 0, anno_begin_clip);
|
|
||||||
} else if (tag_word == Element_EndClip) {
|
} else if (tag_word == Element_EndClip) {
|
||||||
Clip end_clip = Element_EndClip_read(this_ref);
|
|
||||||
AnnoEndClip anno_end_clip;
|
AnnoEndClip anno_end_clip;
|
||||||
// This bbox is expected to be the same as the begin one.
|
// The actual bbox will be reconstructed from clip stream output.
|
||||||
anno_end_clip.bbox = end_clip.bbox;
|
anno_end_clip.bbox = vec4(-1e9, -1e9, 1e9, 1e9);
|
||||||
Annotated_EndClip_write(conf.anno_alloc, out_ref, anno_end_clip);
|
Annotated_EndClip_write(conf.anno_alloc, out_ref, anno_end_clip);
|
||||||
}
|
}
|
||||||
|
// Generate clip stream.
|
||||||
|
if (tag_word == Element_BeginClip || tag_word == Element_EndClip) {
|
||||||
|
uint path_ix = ~(out_ix + i);
|
||||||
|
if (tag_word == Element_BeginClip) {
|
||||||
|
path_ix = m.path_ix;
|
||||||
|
}
|
||||||
|
memory[clip_out_base + m.clip_ix] = path_ix;
|
||||||
|
}
|
||||||
out_ref.offset += Annotated_size;
|
out_ref.offset += Annotated_size;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
5
piet-gpu/shader/gen/backdrop.hlsl
generated
5
piet-gpu/shader/gen/backdrop.hlsl
generated
|
@ -44,8 +44,13 @@ struct Config
|
||||||
Alloc trans_alloc;
|
Alloc trans_alloc;
|
||||||
Alloc bbox_alloc;
|
Alloc bbox_alloc;
|
||||||
Alloc drawmonoid_alloc;
|
Alloc drawmonoid_alloc;
|
||||||
|
Alloc clip_alloc;
|
||||||
|
Alloc clip_bic_alloc;
|
||||||
|
Alloc clip_stack_alloc;
|
||||||
|
Alloc clip_bbox_alloc;
|
||||||
uint n_trans;
|
uint n_trans;
|
||||||
uint n_path;
|
uint n_path;
|
||||||
|
uint n_clip;
|
||||||
uint trans_offset;
|
uint trans_offset;
|
||||||
uint linewidth_offset;
|
uint linewidth_offset;
|
||||||
uint pathtag_offset;
|
uint pathtag_offset;
|
||||||
|
|
5
piet-gpu/shader/gen/backdrop.msl
generated
5
piet-gpu/shader/gen/backdrop.msl
generated
|
@ -63,8 +63,13 @@ struct Config
|
||||||
Alloc_1 trans_alloc;
|
Alloc_1 trans_alloc;
|
||||||
Alloc_1 bbox_alloc;
|
Alloc_1 bbox_alloc;
|
||||||
Alloc_1 drawmonoid_alloc;
|
Alloc_1 drawmonoid_alloc;
|
||||||
|
Alloc_1 clip_alloc;
|
||||||
|
Alloc_1 clip_bic_alloc;
|
||||||
|
Alloc_1 clip_stack_alloc;
|
||||||
|
Alloc_1 clip_bbox_alloc;
|
||||||
uint n_trans;
|
uint n_trans;
|
||||||
uint n_path;
|
uint n_path;
|
||||||
|
uint n_clip;
|
||||||
uint trans_offset;
|
uint trans_offset;
|
||||||
uint linewidth_offset;
|
uint linewidth_offset;
|
||||||
uint pathtag_offset;
|
uint pathtag_offset;
|
||||||
|
|
BIN
piet-gpu/shader/gen/backdrop.spv
generated
BIN
piet-gpu/shader/gen/backdrop.spv
generated
Binary file not shown.
5
piet-gpu/shader/gen/backdrop_lg.hlsl
generated
5
piet-gpu/shader/gen/backdrop_lg.hlsl
generated
|
@ -44,8 +44,13 @@ struct Config
|
||||||
Alloc trans_alloc;
|
Alloc trans_alloc;
|
||||||
Alloc bbox_alloc;
|
Alloc bbox_alloc;
|
||||||
Alloc drawmonoid_alloc;
|
Alloc drawmonoid_alloc;
|
||||||
|
Alloc clip_alloc;
|
||||||
|
Alloc clip_bic_alloc;
|
||||||
|
Alloc clip_stack_alloc;
|
||||||
|
Alloc clip_bbox_alloc;
|
||||||
uint n_trans;
|
uint n_trans;
|
||||||
uint n_path;
|
uint n_path;
|
||||||
|
uint n_clip;
|
||||||
uint trans_offset;
|
uint trans_offset;
|
||||||
uint linewidth_offset;
|
uint linewidth_offset;
|
||||||
uint pathtag_offset;
|
uint pathtag_offset;
|
||||||
|
|
5
piet-gpu/shader/gen/backdrop_lg.msl
generated
5
piet-gpu/shader/gen/backdrop_lg.msl
generated
|
@ -63,8 +63,13 @@ struct Config
|
||||||
Alloc_1 trans_alloc;
|
Alloc_1 trans_alloc;
|
||||||
Alloc_1 bbox_alloc;
|
Alloc_1 bbox_alloc;
|
||||||
Alloc_1 drawmonoid_alloc;
|
Alloc_1 drawmonoid_alloc;
|
||||||
|
Alloc_1 clip_alloc;
|
||||||
|
Alloc_1 clip_bic_alloc;
|
||||||
|
Alloc_1 clip_stack_alloc;
|
||||||
|
Alloc_1 clip_bbox_alloc;
|
||||||
uint n_trans;
|
uint n_trans;
|
||||||
uint n_path;
|
uint n_path;
|
||||||
|
uint n_clip;
|
||||||
uint trans_offset;
|
uint trans_offset;
|
||||||
uint linewidth_offset;
|
uint linewidth_offset;
|
||||||
uint pathtag_offset;
|
uint pathtag_offset;
|
||||||
|
|
BIN
piet-gpu/shader/gen/backdrop_lg.spv
generated
BIN
piet-gpu/shader/gen/backdrop_lg.spv
generated
Binary file not shown.
BIN
piet-gpu/shader/gen/bbox_clear.dxil
generated
BIN
piet-gpu/shader/gen/bbox_clear.dxil
generated
Binary file not shown.
7
piet-gpu/shader/gen/bbox_clear.hlsl
generated
7
piet-gpu/shader/gen/bbox_clear.hlsl
generated
|
@ -17,8 +17,13 @@ struct Config
|
||||||
Alloc trans_alloc;
|
Alloc trans_alloc;
|
||||||
Alloc bbox_alloc;
|
Alloc bbox_alloc;
|
||||||
Alloc drawmonoid_alloc;
|
Alloc drawmonoid_alloc;
|
||||||
|
Alloc clip_alloc;
|
||||||
|
Alloc clip_bic_alloc;
|
||||||
|
Alloc clip_stack_alloc;
|
||||||
|
Alloc clip_bbox_alloc;
|
||||||
uint n_trans;
|
uint n_trans;
|
||||||
uint n_path;
|
uint n_path;
|
||||||
|
uint n_clip;
|
||||||
uint trans_offset;
|
uint trans_offset;
|
||||||
uint linewidth_offset;
|
uint linewidth_offset;
|
||||||
uint pathtag_offset;
|
uint pathtag_offset;
|
||||||
|
@ -39,7 +44,7 @@ struct SPIRV_Cross_Input
|
||||||
void comp_main()
|
void comp_main()
|
||||||
{
|
{
|
||||||
uint ix = gl_GlobalInvocationID.x;
|
uint ix = gl_GlobalInvocationID.x;
|
||||||
if (ix < _21.Load(52))
|
if (ix < _21.Load(68))
|
||||||
{
|
{
|
||||||
uint out_ix = (_21.Load(40) >> uint(2)) + (6u * ix);
|
uint out_ix = (_21.Load(40) >> uint(2)) + (6u * ix);
|
||||||
_45.Store(out_ix * 4 + 8, 65535u);
|
_45.Store(out_ix * 4 + 8, 65535u);
|
||||||
|
|
5
piet-gpu/shader/gen/bbox_clear.msl
generated
5
piet-gpu/shader/gen/bbox_clear.msl
generated
|
@ -22,8 +22,13 @@ struct Config
|
||||||
Alloc trans_alloc;
|
Alloc trans_alloc;
|
||||||
Alloc bbox_alloc;
|
Alloc bbox_alloc;
|
||||||
Alloc drawmonoid_alloc;
|
Alloc drawmonoid_alloc;
|
||||||
|
Alloc clip_alloc;
|
||||||
|
Alloc clip_bic_alloc;
|
||||||
|
Alloc clip_stack_alloc;
|
||||||
|
Alloc clip_bbox_alloc;
|
||||||
uint n_trans;
|
uint n_trans;
|
||||||
uint n_path;
|
uint n_path;
|
||||||
|
uint n_clip;
|
||||||
uint trans_offset;
|
uint trans_offset;
|
||||||
uint linewidth_offset;
|
uint linewidth_offset;
|
||||||
uint pathtag_offset;
|
uint pathtag_offset;
|
||||||
|
|
BIN
piet-gpu/shader/gen/bbox_clear.spv
generated
BIN
piet-gpu/shader/gen/bbox_clear.spv
generated
Binary file not shown.
BIN
piet-gpu/shader/gen/binning.dxil
generated
BIN
piet-gpu/shader/gen/binning.dxil
generated
Binary file not shown.
237
piet-gpu/shader/gen/binning.hlsl
generated
237
piet-gpu/shader/gen/binning.hlsl
generated
|
@ -9,16 +9,6 @@ struct MallocResult
|
||||||
bool failed;
|
bool failed;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct AnnoEndClipRef
|
|
||||||
{
|
|
||||||
uint offset;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct AnnoEndClip
|
|
||||||
{
|
|
||||||
float4 bbox;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct AnnotatedRef
|
struct AnnotatedRef
|
||||||
{
|
{
|
||||||
uint offset;
|
uint offset;
|
||||||
|
@ -40,6 +30,12 @@ struct BinInstance
|
||||||
uint element_ix;
|
uint element_ix;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct DrawMonoid
|
||||||
|
{
|
||||||
|
uint path_ix;
|
||||||
|
uint clip_ix;
|
||||||
|
};
|
||||||
|
|
||||||
struct Config
|
struct Config
|
||||||
{
|
{
|
||||||
uint n_elements;
|
uint n_elements;
|
||||||
|
@ -54,8 +50,13 @@ struct Config
|
||||||
Alloc trans_alloc;
|
Alloc trans_alloc;
|
||||||
Alloc bbox_alloc;
|
Alloc bbox_alloc;
|
||||||
Alloc drawmonoid_alloc;
|
Alloc drawmonoid_alloc;
|
||||||
|
Alloc clip_alloc;
|
||||||
|
Alloc clip_bic_alloc;
|
||||||
|
Alloc clip_stack_alloc;
|
||||||
|
Alloc clip_bbox_alloc;
|
||||||
uint n_trans;
|
uint n_trans;
|
||||||
uint n_path;
|
uint n_path;
|
||||||
|
uint n_clip;
|
||||||
uint trans_offset;
|
uint trans_offset;
|
||||||
uint linewidth_offset;
|
uint linewidth_offset;
|
||||||
uint pathtag_offset;
|
uint pathtag_offset;
|
||||||
|
@ -64,8 +65,8 @@ struct Config
|
||||||
|
|
||||||
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
|
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
|
||||||
|
|
||||||
RWByteAddressBuffer _84 : register(u0, space0);
|
RWByteAddressBuffer _94 : register(u0, space0);
|
||||||
ByteAddressBuffer _253 : register(t1, space0);
|
ByteAddressBuffer _202 : register(t1, space0);
|
||||||
|
|
||||||
static uint3 gl_WorkGroupID;
|
static uint3 gl_WorkGroupID;
|
||||||
static uint3 gl_LocalInvocationID;
|
static uint3 gl_LocalInvocationID;
|
||||||
|
@ -93,7 +94,7 @@ uint read_mem(Alloc alloc, uint offset)
|
||||||
{
|
{
|
||||||
return 0u;
|
return 0u;
|
||||||
}
|
}
|
||||||
uint v = _84.Load(offset * 4 + 8);
|
uint v = _94.Load(offset * 4 + 8);
|
||||||
return v;
|
return v;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -102,36 +103,53 @@ AnnotatedTag Annotated_tag(Alloc a, AnnotatedRef ref)
|
||||||
Alloc param = a;
|
Alloc param = a;
|
||||||
uint param_1 = ref.offset >> uint(2);
|
uint param_1 = ref.offset >> uint(2);
|
||||||
uint tag_and_flags = read_mem(param, param_1);
|
uint tag_and_flags = read_mem(param, param_1);
|
||||||
AnnotatedTag _221 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) };
|
AnnotatedTag _181 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) };
|
||||||
return _221;
|
return _181;
|
||||||
}
|
}
|
||||||
|
|
||||||
AnnoEndClip AnnoEndClip_read(Alloc a, AnnoEndClipRef ref)
|
DrawMonoid load_draw_monoid(uint element_ix)
|
||||||
|
{
|
||||||
|
uint base = (_202.Load(44) >> uint(2)) + (2u * element_ix);
|
||||||
|
uint path_ix = _94.Load(base * 4 + 8);
|
||||||
|
uint clip_ix = _94.Load((base + 1u) * 4 + 8);
|
||||||
|
DrawMonoid _222 = { path_ix, clip_ix };
|
||||||
|
return _222;
|
||||||
|
}
|
||||||
|
|
||||||
|
float4 load_clip_bbox(uint clip_ix)
|
||||||
|
{
|
||||||
|
uint base = (_202.Load(60) >> uint(2)) + (4u * clip_ix);
|
||||||
|
float x0 = asfloat(_94.Load(base * 4 + 8));
|
||||||
|
float y0 = asfloat(_94.Load((base + 1u) * 4 + 8));
|
||||||
|
float x1 = asfloat(_94.Load((base + 2u) * 4 + 8));
|
||||||
|
float y1 = asfloat(_94.Load((base + 3u) * 4 + 8));
|
||||||
|
float4 bbox = float4(x0, y0, x1, y1);
|
||||||
|
return bbox;
|
||||||
|
}
|
||||||
|
|
||||||
|
float4 load_path_bbox(uint path_ix)
|
||||||
|
{
|
||||||
|
uint base = (_202.Load(40) >> uint(2)) + (6u * path_ix);
|
||||||
|
float bbox_l = float(_94.Load(base * 4 + 8)) - 32768.0f;
|
||||||
|
float bbox_t = float(_94.Load((base + 1u) * 4 + 8)) - 32768.0f;
|
||||||
|
float bbox_r = float(_94.Load((base + 2u) * 4 + 8)) - 32768.0f;
|
||||||
|
float bbox_b = float(_94.Load((base + 3u) * 4 + 8)) - 32768.0f;
|
||||||
|
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
|
||||||
|
return bbox;
|
||||||
|
}
|
||||||
|
|
||||||
|
float4 bbox_intersect(float4 a, float4 b)
|
||||||
|
{
|
||||||
|
return float4(max(a.xy, b.xy), min(a.zw, b.zw));
|
||||||
|
}
|
||||||
|
|
||||||
|
void store_path_bbox(AnnotatedRef ref, float4 bbox)
|
||||||
{
|
{
|
||||||
uint ix = ref.offset >> uint(2);
|
uint ix = ref.offset >> uint(2);
|
||||||
Alloc param = a;
|
_94.Store((ix + 1u) * 4 + 8, asuint(bbox.x));
|
||||||
uint param_1 = ix + 0u;
|
_94.Store((ix + 2u) * 4 + 8, asuint(bbox.y));
|
||||||
uint raw0 = read_mem(param, param_1);
|
_94.Store((ix + 3u) * 4 + 8, asuint(bbox.z));
|
||||||
Alloc param_2 = a;
|
_94.Store((ix + 4u) * 4 + 8, asuint(bbox.w));
|
||||||
uint param_3 = ix + 1u;
|
|
||||||
uint raw1 = read_mem(param_2, param_3);
|
|
||||||
Alloc param_4 = a;
|
|
||||||
uint param_5 = ix + 2u;
|
|
||||||
uint raw2 = read_mem(param_4, param_5);
|
|
||||||
Alloc param_6 = a;
|
|
||||||
uint param_7 = ix + 3u;
|
|
||||||
uint raw3 = read_mem(param_6, param_7);
|
|
||||||
AnnoEndClip s;
|
|
||||||
s.bbox = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3));
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
AnnoEndClip Annotated_EndClip_read(Alloc a, AnnotatedRef ref)
|
|
||||||
{
|
|
||||||
AnnoEndClipRef _228 = { ref.offset + 4u };
|
|
||||||
Alloc param = a;
|
|
||||||
AnnoEndClipRef param_1 = _228;
|
|
||||||
return AnnoEndClip_read(param, param_1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Alloc new_alloc(uint offset, uint size, bool mem_ok)
|
Alloc new_alloc(uint offset, uint size, bool mem_ok)
|
||||||
|
@ -143,22 +161,22 @@ Alloc new_alloc(uint offset, uint size, bool mem_ok)
|
||||||
|
|
||||||
MallocResult malloc(uint size)
|
MallocResult malloc(uint size)
|
||||||
{
|
{
|
||||||
uint _90;
|
uint _100;
|
||||||
_84.InterlockedAdd(0, size, _90);
|
_94.InterlockedAdd(0, size, _100);
|
||||||
uint offset = _90;
|
uint offset = _100;
|
||||||
uint _97;
|
uint _107;
|
||||||
_84.GetDimensions(_97);
|
_94.GetDimensions(_107);
|
||||||
_97 = (_97 - 8) / 4;
|
_107 = (_107 - 8) / 4;
|
||||||
MallocResult r;
|
MallocResult r;
|
||||||
r.failed = (offset + size) > uint(int(_97) * 4);
|
r.failed = (offset + size) > uint(int(_107) * 4);
|
||||||
uint param = offset;
|
uint param = offset;
|
||||||
uint param_1 = size;
|
uint param_1 = size;
|
||||||
bool param_2 = !r.failed;
|
bool param_2 = !r.failed;
|
||||||
r.alloc = new_alloc(param, param_1, param_2);
|
r.alloc = new_alloc(param, param_1, param_2);
|
||||||
if (r.failed)
|
if (r.failed)
|
||||||
{
|
{
|
||||||
uint _119;
|
uint _129;
|
||||||
_84.InterlockedMax(4, 1u, _119);
|
_94.InterlockedMax(4, 1u, _129);
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
return r;
|
return r;
|
||||||
|
@ -172,7 +190,7 @@ void write_mem(Alloc alloc, uint offset, uint val)
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
_84.Store(offset * 4 + 8, val);
|
_94.Store(offset * 4 + 8, val);
|
||||||
}
|
}
|
||||||
|
|
||||||
void BinInstance_write(Alloc a, BinInstanceRef ref, BinInstance s)
|
void BinInstance_write(Alloc a, BinInstanceRef ref, BinInstance s)
|
||||||
|
@ -186,7 +204,7 @@ void BinInstance_write(Alloc a, BinInstanceRef ref, BinInstance s)
|
||||||
|
|
||||||
void comp_main()
|
void comp_main()
|
||||||
{
|
{
|
||||||
uint my_n_elements = _253.Load(0);
|
uint my_n_elements = _202.Load(0);
|
||||||
uint my_partition = gl_WorkGroupID.x;
|
uint my_partition = gl_WorkGroupID.x;
|
||||||
for (uint i = 0u; i < 8u; i++)
|
for (uint i = 0u; i < 8u; i++)
|
||||||
{
|
{
|
||||||
|
@ -198,15 +216,15 @@ void comp_main()
|
||||||
}
|
}
|
||||||
GroupMemoryBarrierWithGroupSync();
|
GroupMemoryBarrierWithGroupSync();
|
||||||
uint element_ix = (my_partition * 256u) + gl_LocalInvocationID.x;
|
uint element_ix = (my_partition * 256u) + gl_LocalInvocationID.x;
|
||||||
AnnotatedRef _308 = { _253.Load(32) + (element_ix * 40u) };
|
AnnotatedRef _415 = { _202.Load(32) + (element_ix * 40u) };
|
||||||
AnnotatedRef ref = _308;
|
AnnotatedRef ref = _415;
|
||||||
uint tag = 0u;
|
uint tag = 0u;
|
||||||
if (element_ix < my_n_elements)
|
if (element_ix < my_n_elements)
|
||||||
{
|
{
|
||||||
Alloc _318;
|
Alloc _425;
|
||||||
_318.offset = _253.Load(32);
|
_425.offset = _202.Load(32);
|
||||||
Alloc param;
|
Alloc param;
|
||||||
param.offset = _318.offset;
|
param.offset = _425.offset;
|
||||||
AnnotatedRef param_1 = ref;
|
AnnotatedRef param_1 = ref;
|
||||||
tag = Annotated_tag(param, param_1).tag;
|
tag = Annotated_tag(param, param_1).tag;
|
||||||
}
|
}
|
||||||
|
@ -222,21 +240,38 @@ void comp_main()
|
||||||
case 4u:
|
case 4u:
|
||||||
case 5u:
|
case 5u:
|
||||||
{
|
{
|
||||||
Alloc _336;
|
uint param_2 = element_ix;
|
||||||
_336.offset = _253.Load(32);
|
DrawMonoid draw_monoid = load_draw_monoid(param_2);
|
||||||
Alloc param_2;
|
uint path_ix = draw_monoid.path_ix;
|
||||||
param_2.offset = _336.offset;
|
float4 clip_bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f);
|
||||||
AnnotatedRef param_3 = ref;
|
uint clip_ix = draw_monoid.clip_ix;
|
||||||
AnnoEndClip clip = Annotated_EndClip_read(param_2, param_3);
|
if (clip_ix > 0u)
|
||||||
x0 = int(floor(clip.bbox.x * 0.00390625f));
|
{
|
||||||
y0 = int(floor(clip.bbox.y * 0.00390625f));
|
uint param_3 = clip_ix - 1u;
|
||||||
x1 = int(ceil(clip.bbox.z * 0.00390625f));
|
clip_bbox = load_clip_bbox(param_3);
|
||||||
y1 = int(ceil(clip.bbox.w * 0.00390625f));
|
}
|
||||||
|
uint param_4 = path_ix;
|
||||||
|
float4 path_bbox = load_path_bbox(param_4);
|
||||||
|
float4 param_5 = path_bbox;
|
||||||
|
float4 param_6 = clip_bbox;
|
||||||
|
float4 bbox = bbox_intersect(param_5, param_6);
|
||||||
|
float4 _473 = bbox;
|
||||||
|
float4 _475 = bbox;
|
||||||
|
float2 _477 = max(_473.xy, _475.zw);
|
||||||
|
bbox.z = _477.x;
|
||||||
|
bbox.w = _477.y;
|
||||||
|
AnnotatedRef param_7 = ref;
|
||||||
|
float4 param_8 = bbox;
|
||||||
|
store_path_bbox(param_7, param_8);
|
||||||
|
x0 = int(floor(bbox.x * 0.00390625f));
|
||||||
|
y0 = int(floor(bbox.y * 0.00390625f));
|
||||||
|
x1 = int(ceil(bbox.z * 0.00390625f));
|
||||||
|
y1 = int(ceil(bbox.w * 0.00390625f));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
uint width_in_bins = ((_253.Load(8) + 16u) - 1u) / 16u;
|
uint width_in_bins = ((_202.Load(8) + 16u) - 1u) / 16u;
|
||||||
uint height_in_bins = ((_253.Load(12) + 16u) - 1u) / 16u;
|
uint height_in_bins = ((_202.Load(12) + 16u) - 1u) / 16u;
|
||||||
x0 = clamp(x0, 0, int(width_in_bins));
|
x0 = clamp(x0, 0, int(width_in_bins));
|
||||||
x1 = clamp(x1, x0, int(width_in_bins));
|
x1 = clamp(x1, x0, int(width_in_bins));
|
||||||
y0 = clamp(y0, 0, int(height_in_bins));
|
y0 = clamp(y0, 0, int(height_in_bins));
|
||||||
|
@ -251,8 +286,8 @@ void comp_main()
|
||||||
uint my_mask = 1u << (gl_LocalInvocationID.x & 31u);
|
uint my_mask = 1u << (gl_LocalInvocationID.x & 31u);
|
||||||
while (y < y1)
|
while (y < y1)
|
||||||
{
|
{
|
||||||
uint _437;
|
uint _581;
|
||||||
InterlockedOr(bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, _437);
|
InterlockedOr(bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, _581);
|
||||||
x++;
|
x++;
|
||||||
if (x == x1)
|
if (x == x1)
|
||||||
{
|
{
|
||||||
|
@ -267,15 +302,15 @@ void comp_main()
|
||||||
element_count += uint(int(countbits(bitmaps[i_1][gl_LocalInvocationID.x])));
|
element_count += uint(int(countbits(bitmaps[i_1][gl_LocalInvocationID.x])));
|
||||||
count[i_1][gl_LocalInvocationID.x] = element_count;
|
count[i_1][gl_LocalInvocationID.x] = element_count;
|
||||||
}
|
}
|
||||||
uint param_4 = 0u;
|
uint param_9 = 0u;
|
||||||
uint param_5 = 0u;
|
uint param_10 = 0u;
|
||||||
bool param_6 = true;
|
bool param_11 = true;
|
||||||
Alloc chunk_alloc = new_alloc(param_4, param_5, param_6);
|
Alloc chunk_alloc = new_alloc(param_9, param_10, param_11);
|
||||||
if (element_count != 0u)
|
if (element_count != 0u)
|
||||||
{
|
{
|
||||||
uint param_7 = element_count * 4u;
|
uint param_12 = element_count * 4u;
|
||||||
MallocResult _487 = malloc(param_7);
|
MallocResult _631 = malloc(param_12);
|
||||||
MallocResult chunk = _487;
|
MallocResult chunk = _631;
|
||||||
chunk_alloc = chunk.alloc;
|
chunk_alloc = chunk.alloc;
|
||||||
sh_chunk_alloc[gl_LocalInvocationID.x] = chunk_alloc;
|
sh_chunk_alloc[gl_LocalInvocationID.x] = chunk_alloc;
|
||||||
if (chunk.failed)
|
if (chunk.failed)
|
||||||
|
@ -283,32 +318,32 @@ void comp_main()
|
||||||
sh_alloc_failed = true;
|
sh_alloc_failed = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
uint out_ix = (_253.Load(20) >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u);
|
uint out_ix = (_202.Load(20) >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u);
|
||||||
Alloc _516;
|
Alloc _660;
|
||||||
_516.offset = _253.Load(20);
|
_660.offset = _202.Load(20);
|
||||||
Alloc param_8;
|
Alloc param_13;
|
||||||
param_8.offset = _516.offset;
|
param_13.offset = _660.offset;
|
||||||
uint param_9 = out_ix;
|
uint param_14 = out_ix;
|
||||||
uint param_10 = element_count;
|
uint param_15 = element_count;
|
||||||
write_mem(param_8, param_9, param_10);
|
write_mem(param_13, param_14, param_15);
|
||||||
Alloc _528;
|
Alloc _672;
|
||||||
_528.offset = _253.Load(20);
|
_672.offset = _202.Load(20);
|
||||||
Alloc param_11;
|
Alloc param_16;
|
||||||
param_11.offset = _528.offset;
|
param_16.offset = _672.offset;
|
||||||
uint param_12 = out_ix + 1u;
|
uint param_17 = out_ix + 1u;
|
||||||
uint param_13 = chunk_alloc.offset;
|
uint param_18 = chunk_alloc.offset;
|
||||||
write_mem(param_11, param_12, param_13);
|
write_mem(param_16, param_17, param_18);
|
||||||
GroupMemoryBarrierWithGroupSync();
|
GroupMemoryBarrierWithGroupSync();
|
||||||
bool _543;
|
bool _687;
|
||||||
if (!sh_alloc_failed)
|
if (!sh_alloc_failed)
|
||||||
{
|
{
|
||||||
_543 = _84.Load(4) != 0u;
|
_687 = _94.Load(4) != 0u;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
_543 = sh_alloc_failed;
|
_687 = sh_alloc_failed;
|
||||||
}
|
}
|
||||||
if (_543)
|
if (_687)
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -327,12 +362,12 @@ void comp_main()
|
||||||
}
|
}
|
||||||
Alloc out_alloc = sh_chunk_alloc[bin_ix];
|
Alloc out_alloc = sh_chunk_alloc[bin_ix];
|
||||||
uint out_offset = out_alloc.offset + (idx * 4u);
|
uint out_offset = out_alloc.offset + (idx * 4u);
|
||||||
BinInstanceRef _605 = { out_offset };
|
BinInstanceRef _749 = { out_offset };
|
||||||
BinInstance _607 = { element_ix };
|
BinInstance _751 = { element_ix };
|
||||||
Alloc param_14 = out_alloc;
|
Alloc param_19 = out_alloc;
|
||||||
BinInstanceRef param_15 = _605;
|
BinInstanceRef param_20 = _749;
|
||||||
BinInstance param_16 = _607;
|
BinInstance param_21 = _751;
|
||||||
BinInstance_write(param_14, param_15, param_16);
|
BinInstance_write(param_19, param_20, param_21);
|
||||||
}
|
}
|
||||||
x++;
|
x++;
|
||||||
if (x == x1)
|
if (x == x1)
|
||||||
|
|
214
piet-gpu/shader/gen/binning.msl
generated
214
piet-gpu/shader/gen/binning.msl
generated
|
@ -18,16 +18,6 @@ struct MallocResult
|
||||||
bool failed;
|
bool failed;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct AnnoEndClipRef
|
|
||||||
{
|
|
||||||
uint offset;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct AnnoEndClip
|
|
||||||
{
|
|
||||||
float4 bbox;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct AnnotatedRef
|
struct AnnotatedRef
|
||||||
{
|
{
|
||||||
uint offset;
|
uint offset;
|
||||||
|
@ -49,6 +39,12 @@ struct BinInstance
|
||||||
uint element_ix;
|
uint element_ix;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct DrawMonoid
|
||||||
|
{
|
||||||
|
uint path_ix;
|
||||||
|
uint clip_ix;
|
||||||
|
};
|
||||||
|
|
||||||
struct Memory
|
struct Memory
|
||||||
{
|
{
|
||||||
uint mem_offset;
|
uint mem_offset;
|
||||||
|
@ -75,8 +71,13 @@ struct Config
|
||||||
Alloc_1 trans_alloc;
|
Alloc_1 trans_alloc;
|
||||||
Alloc_1 bbox_alloc;
|
Alloc_1 bbox_alloc;
|
||||||
Alloc_1 drawmonoid_alloc;
|
Alloc_1 drawmonoid_alloc;
|
||||||
|
Alloc_1 clip_alloc;
|
||||||
|
Alloc_1 clip_bic_alloc;
|
||||||
|
Alloc_1 clip_stack_alloc;
|
||||||
|
Alloc_1 clip_bbox_alloc;
|
||||||
uint n_trans;
|
uint n_trans;
|
||||||
uint n_path;
|
uint n_path;
|
||||||
|
uint n_clip;
|
||||||
uint trans_offset;
|
uint trans_offset;
|
||||||
uint linewidth_offset;
|
uint linewidth_offset;
|
||||||
uint pathtag_offset;
|
uint pathtag_offset;
|
||||||
|
@ -97,7 +98,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __attribute__((always_inline))
|
static inline __attribute__((always_inline))
|
||||||
uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_84, constant uint& v_84BufferSize)
|
uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_94, constant uint& v_94BufferSize)
|
||||||
{
|
{
|
||||||
Alloc param = alloc;
|
Alloc param = alloc;
|
||||||
uint param_1 = offset;
|
uint param_1 = offset;
|
||||||
|
@ -105,46 +106,66 @@ uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memor
|
||||||
{
|
{
|
||||||
return 0u;
|
return 0u;
|
||||||
}
|
}
|
||||||
uint v = v_84.memory[offset];
|
uint v = v_94.memory[offset];
|
||||||
return v;
|
return v;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __attribute__((always_inline))
|
static inline __attribute__((always_inline))
|
||||||
AnnotatedTag Annotated_tag(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_84, constant uint& v_84BufferSize)
|
AnnotatedTag Annotated_tag(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_94, constant uint& v_94BufferSize)
|
||||||
{
|
{
|
||||||
Alloc param = a;
|
Alloc param = a;
|
||||||
uint param_1 = ref.offset >> uint(2);
|
uint param_1 = ref.offset >> uint(2);
|
||||||
uint tag_and_flags = read_mem(param, param_1, v_84, v_84BufferSize);
|
uint tag_and_flags = read_mem(param, param_1, v_94, v_94BufferSize);
|
||||||
return AnnotatedTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) };
|
return AnnotatedTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) };
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __attribute__((always_inline))
|
static inline __attribute__((always_inline))
|
||||||
AnnoEndClip AnnoEndClip_read(thread const Alloc& a, thread const AnnoEndClipRef& ref, device Memory& v_84, constant uint& v_84BufferSize)
|
DrawMonoid load_draw_monoid(thread const uint& element_ix, device Memory& v_94, constant uint& v_94BufferSize, const device ConfigBuf& v_202)
|
||||||
{
|
{
|
||||||
uint ix = ref.offset >> uint(2);
|
uint base = (v_202.conf.drawmonoid_alloc.offset >> uint(2)) + (2u * element_ix);
|
||||||
Alloc param = a;
|
uint path_ix = v_94.memory[base];
|
||||||
uint param_1 = ix + 0u;
|
uint clip_ix = v_94.memory[base + 1u];
|
||||||
uint raw0 = read_mem(param, param_1, v_84, v_84BufferSize);
|
return DrawMonoid{ path_ix, clip_ix };
|
||||||
Alloc param_2 = a;
|
|
||||||
uint param_3 = ix + 1u;
|
|
||||||
uint raw1 = read_mem(param_2, param_3, v_84, v_84BufferSize);
|
|
||||||
Alloc param_4 = a;
|
|
||||||
uint param_5 = ix + 2u;
|
|
||||||
uint raw2 = read_mem(param_4, param_5, v_84, v_84BufferSize);
|
|
||||||
Alloc param_6 = a;
|
|
||||||
uint param_7 = ix + 3u;
|
|
||||||
uint raw3 = read_mem(param_6, param_7, v_84, v_84BufferSize);
|
|
||||||
AnnoEndClip s;
|
|
||||||
s.bbox = float4(as_type<float>(raw0), as_type<float>(raw1), as_type<float>(raw2), as_type<float>(raw3));
|
|
||||||
return s;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __attribute__((always_inline))
|
static inline __attribute__((always_inline))
|
||||||
AnnoEndClip Annotated_EndClip_read(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_84, constant uint& v_84BufferSize)
|
float4 load_clip_bbox(thread const uint& clip_ix, device Memory& v_94, constant uint& v_94BufferSize, const device ConfigBuf& v_202)
|
||||||
{
|
{
|
||||||
Alloc param = a;
|
uint base = (v_202.conf.clip_bbox_alloc.offset >> uint(2)) + (4u * clip_ix);
|
||||||
AnnoEndClipRef param_1 = AnnoEndClipRef{ ref.offset + 4u };
|
float x0 = as_type<float>(v_94.memory[base]);
|
||||||
return AnnoEndClip_read(param, param_1, v_84, v_84BufferSize);
|
float y0 = as_type<float>(v_94.memory[base + 1u]);
|
||||||
|
float x1 = as_type<float>(v_94.memory[base + 2u]);
|
||||||
|
float y1 = as_type<float>(v_94.memory[base + 3u]);
|
||||||
|
float4 bbox = float4(x0, y0, x1, y1);
|
||||||
|
return bbox;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __attribute__((always_inline))
|
||||||
|
float4 load_path_bbox(thread const uint& path_ix, device Memory& v_94, constant uint& v_94BufferSize, const device ConfigBuf& v_202)
|
||||||
|
{
|
||||||
|
uint base = (v_202.conf.bbox_alloc.offset >> uint(2)) + (6u * path_ix);
|
||||||
|
float bbox_l = float(v_94.memory[base]) - 32768.0;
|
||||||
|
float bbox_t = float(v_94.memory[base + 1u]) - 32768.0;
|
||||||
|
float bbox_r = float(v_94.memory[base + 2u]) - 32768.0;
|
||||||
|
float bbox_b = float(v_94.memory[base + 3u]) - 32768.0;
|
||||||
|
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
|
||||||
|
return bbox;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __attribute__((always_inline))
|
||||||
|
float4 bbox_intersect(thread const float4& a, thread const float4& b)
|
||||||
|
{
|
||||||
|
return float4(fast::max(a.xy, b.xy), fast::min(a.zw, b.zw));
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __attribute__((always_inline))
|
||||||
|
void store_path_bbox(thread const AnnotatedRef& ref, thread const float4& bbox, device Memory& v_94, constant uint& v_94BufferSize)
|
||||||
|
{
|
||||||
|
uint ix = ref.offset >> uint(2);
|
||||||
|
v_94.memory[ix + 1u] = as_type<uint>(bbox.x);
|
||||||
|
v_94.memory[ix + 2u] = as_type<uint>(bbox.y);
|
||||||
|
v_94.memory[ix + 3u] = as_type<uint>(bbox.z);
|
||||||
|
v_94.memory[ix + 4u] = as_type<uint>(bbox.w);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __attribute__((always_inline))
|
static inline __attribute__((always_inline))
|
||||||
|
@ -156,26 +177,26 @@ Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __attribute__((always_inline))
|
static inline __attribute__((always_inline))
|
||||||
MallocResult malloc(thread const uint& size, device Memory& v_84, constant uint& v_84BufferSize)
|
MallocResult malloc(thread const uint& size, device Memory& v_94, constant uint& v_94BufferSize)
|
||||||
{
|
{
|
||||||
uint _90 = atomic_fetch_add_explicit((device atomic_uint*)&v_84.mem_offset, size, memory_order_relaxed);
|
uint _100 = atomic_fetch_add_explicit((device atomic_uint*)&v_94.mem_offset, size, memory_order_relaxed);
|
||||||
uint offset = _90;
|
uint offset = _100;
|
||||||
MallocResult r;
|
MallocResult r;
|
||||||
r.failed = (offset + size) > uint(int((v_84BufferSize - 8) / 4) * 4);
|
r.failed = (offset + size) > uint(int((v_94BufferSize - 8) / 4) * 4);
|
||||||
uint param = offset;
|
uint param = offset;
|
||||||
uint param_1 = size;
|
uint param_1 = size;
|
||||||
bool param_2 = !r.failed;
|
bool param_2 = !r.failed;
|
||||||
r.alloc = new_alloc(param, param_1, param_2);
|
r.alloc = new_alloc(param, param_1, param_2);
|
||||||
if (r.failed)
|
if (r.failed)
|
||||||
{
|
{
|
||||||
uint _119 = atomic_fetch_max_explicit((device atomic_uint*)&v_84.mem_error, 1u, memory_order_relaxed);
|
uint _129 = atomic_fetch_max_explicit((device atomic_uint*)&v_94.mem_error, 1u, memory_order_relaxed);
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __attribute__((always_inline))
|
static inline __attribute__((always_inline))
|
||||||
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_84, constant uint& v_84BufferSize)
|
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_94, constant uint& v_94BufferSize)
|
||||||
{
|
{
|
||||||
Alloc param = alloc;
|
Alloc param = alloc;
|
||||||
uint param_1 = offset;
|
uint param_1 = offset;
|
||||||
|
@ -183,27 +204,27 @@ void write_mem(thread const Alloc& alloc, thread const uint& offset, thread cons
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
v_84.memory[offset] = val;
|
v_94.memory[offset] = val;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __attribute__((always_inline))
|
static inline __attribute__((always_inline))
|
||||||
void BinInstance_write(thread const Alloc& a, thread const BinInstanceRef& ref, thread const BinInstance& s, device Memory& v_84, constant uint& v_84BufferSize)
|
void BinInstance_write(thread const Alloc& a, thread const BinInstanceRef& ref, thread const BinInstance& s, device Memory& v_94, constant uint& v_94BufferSize)
|
||||||
{
|
{
|
||||||
uint ix = ref.offset >> uint(2);
|
uint ix = ref.offset >> uint(2);
|
||||||
Alloc param = a;
|
Alloc param = a;
|
||||||
uint param_1 = ix + 0u;
|
uint param_1 = ix + 0u;
|
||||||
uint param_2 = s.element_ix;
|
uint param_2 = s.element_ix;
|
||||||
write_mem(param, param_1, param_2, v_84, v_84BufferSize);
|
write_mem(param, param_1, param_2, v_94, v_94BufferSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_84 [[buffer(0)]], const device ConfigBuf& _253 [[buffer(1)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
|
kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_94 [[buffer(0)]], const device ConfigBuf& v_202 [[buffer(1)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
|
||||||
{
|
{
|
||||||
threadgroup uint bitmaps[8][256];
|
threadgroup uint bitmaps[8][256];
|
||||||
threadgroup short sh_alloc_failed;
|
threadgroup short sh_alloc_failed;
|
||||||
threadgroup uint count[8][256];
|
threadgroup uint count[8][256];
|
||||||
threadgroup Alloc sh_chunk_alloc[256];
|
threadgroup Alloc sh_chunk_alloc[256];
|
||||||
constant uint& v_84BufferSize = spvBufferSizeConstants[0];
|
constant uint& v_94BufferSize = spvBufferSizeConstants[0];
|
||||||
uint my_n_elements = _253.conf.n_elements;
|
uint my_n_elements = v_202.conf.n_elements;
|
||||||
uint my_partition = gl_WorkGroupID.x;
|
uint my_partition = gl_WorkGroupID.x;
|
||||||
for (uint i = 0u; i < 8u; i++)
|
for (uint i = 0u; i < 8u; i++)
|
||||||
{
|
{
|
||||||
|
@ -215,14 +236,14 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
|
||||||
}
|
}
|
||||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
uint element_ix = (my_partition * 256u) + gl_LocalInvocationID.x;
|
uint element_ix = (my_partition * 256u) + gl_LocalInvocationID.x;
|
||||||
AnnotatedRef ref = AnnotatedRef{ _253.conf.anno_alloc.offset + (element_ix * 40u) };
|
AnnotatedRef ref = AnnotatedRef{ v_202.conf.anno_alloc.offset + (element_ix * 40u) };
|
||||||
uint tag = 0u;
|
uint tag = 0u;
|
||||||
if (element_ix < my_n_elements)
|
if (element_ix < my_n_elements)
|
||||||
{
|
{
|
||||||
Alloc param;
|
Alloc param;
|
||||||
param.offset = _253.conf.anno_alloc.offset;
|
param.offset = v_202.conf.anno_alloc.offset;
|
||||||
AnnotatedRef param_1 = ref;
|
AnnotatedRef param_1 = ref;
|
||||||
tag = Annotated_tag(param, param_1, v_84, v_84BufferSize).tag;
|
tag = Annotated_tag(param, param_1, v_94, v_94BufferSize).tag;
|
||||||
}
|
}
|
||||||
int x0 = 0;
|
int x0 = 0;
|
||||||
int y0 = 0;
|
int y0 = 0;
|
||||||
|
@ -236,19 +257,38 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
|
||||||
case 4u:
|
case 4u:
|
||||||
case 5u:
|
case 5u:
|
||||||
{
|
{
|
||||||
Alloc param_2;
|
uint param_2 = element_ix;
|
||||||
param_2.offset = _253.conf.anno_alloc.offset;
|
DrawMonoid draw_monoid = load_draw_monoid(param_2, v_94, v_94BufferSize, v_202);
|
||||||
AnnotatedRef param_3 = ref;
|
uint path_ix = draw_monoid.path_ix;
|
||||||
AnnoEndClip clip = Annotated_EndClip_read(param_2, param_3, v_84, v_84BufferSize);
|
float4 clip_bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0);
|
||||||
x0 = int(floor(clip.bbox.x * 0.00390625));
|
uint clip_ix = draw_monoid.clip_ix;
|
||||||
y0 = int(floor(clip.bbox.y * 0.00390625));
|
if (clip_ix > 0u)
|
||||||
x1 = int(ceil(clip.bbox.z * 0.00390625));
|
{
|
||||||
y1 = int(ceil(clip.bbox.w * 0.00390625));
|
uint param_3 = clip_ix - 1u;
|
||||||
|
clip_bbox = load_clip_bbox(param_3, v_94, v_94BufferSize, v_202);
|
||||||
|
}
|
||||||
|
uint param_4 = path_ix;
|
||||||
|
float4 path_bbox = load_path_bbox(param_4, v_94, v_94BufferSize, v_202);
|
||||||
|
float4 param_5 = path_bbox;
|
||||||
|
float4 param_6 = clip_bbox;
|
||||||
|
float4 bbox = bbox_intersect(param_5, param_6);
|
||||||
|
float4 _473 = bbox;
|
||||||
|
float4 _475 = bbox;
|
||||||
|
float2 _477 = fast::max(_473.xy, _475.zw);
|
||||||
|
bbox.z = _477.x;
|
||||||
|
bbox.w = _477.y;
|
||||||
|
AnnotatedRef param_7 = ref;
|
||||||
|
float4 param_8 = bbox;
|
||||||
|
store_path_bbox(param_7, param_8, v_94, v_94BufferSize);
|
||||||
|
x0 = int(floor(bbox.x * 0.00390625));
|
||||||
|
y0 = int(floor(bbox.y * 0.00390625));
|
||||||
|
x1 = int(ceil(bbox.z * 0.00390625));
|
||||||
|
y1 = int(ceil(bbox.w * 0.00390625));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
uint width_in_bins = ((_253.conf.width_in_tiles + 16u) - 1u) / 16u;
|
uint width_in_bins = ((v_202.conf.width_in_tiles + 16u) - 1u) / 16u;
|
||||||
uint height_in_bins = ((_253.conf.height_in_tiles + 16u) - 1u) / 16u;
|
uint height_in_bins = ((v_202.conf.height_in_tiles + 16u) - 1u) / 16u;
|
||||||
x0 = clamp(x0, 0, int(width_in_bins));
|
x0 = clamp(x0, 0, int(width_in_bins));
|
||||||
x1 = clamp(x1, x0, int(width_in_bins));
|
x1 = clamp(x1, x0, int(width_in_bins));
|
||||||
y0 = clamp(y0, 0, int(height_in_bins));
|
y0 = clamp(y0, 0, int(height_in_bins));
|
||||||
|
@ -263,7 +303,7 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
|
||||||
uint my_mask = 1u << (gl_LocalInvocationID.x & 31u);
|
uint my_mask = 1u << (gl_LocalInvocationID.x & 31u);
|
||||||
while (y < y1)
|
while (y < y1)
|
||||||
{
|
{
|
||||||
uint _437 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, memory_order_relaxed);
|
uint _581 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, memory_order_relaxed);
|
||||||
x++;
|
x++;
|
||||||
if (x == x1)
|
if (x == x1)
|
||||||
{
|
{
|
||||||
|
@ -278,15 +318,15 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
|
||||||
element_count += uint(int(popcount(bitmaps[i_1][gl_LocalInvocationID.x])));
|
element_count += uint(int(popcount(bitmaps[i_1][gl_LocalInvocationID.x])));
|
||||||
count[i_1][gl_LocalInvocationID.x] = element_count;
|
count[i_1][gl_LocalInvocationID.x] = element_count;
|
||||||
}
|
}
|
||||||
uint param_4 = 0u;
|
uint param_9 = 0u;
|
||||||
uint param_5 = 0u;
|
uint param_10 = 0u;
|
||||||
bool param_6 = true;
|
bool param_11 = true;
|
||||||
Alloc chunk_alloc = new_alloc(param_4, param_5, param_6);
|
Alloc chunk_alloc = new_alloc(param_9, param_10, param_11);
|
||||||
if (element_count != 0u)
|
if (element_count != 0u)
|
||||||
{
|
{
|
||||||
uint param_7 = element_count * 4u;
|
uint param_12 = element_count * 4u;
|
||||||
MallocResult _487 = malloc(param_7, v_84, v_84BufferSize);
|
MallocResult _631 = malloc(param_12, v_94, v_94BufferSize);
|
||||||
MallocResult chunk = _487;
|
MallocResult chunk = _631;
|
||||||
chunk_alloc = chunk.alloc;
|
chunk_alloc = chunk.alloc;
|
||||||
sh_chunk_alloc[gl_LocalInvocationID.x] = chunk_alloc;
|
sh_chunk_alloc[gl_LocalInvocationID.x] = chunk_alloc;
|
||||||
if (chunk.failed)
|
if (chunk.failed)
|
||||||
|
@ -294,28 +334,28 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
|
||||||
sh_alloc_failed = short(true);
|
sh_alloc_failed = short(true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
uint out_ix = (_253.conf.bin_alloc.offset >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u);
|
uint out_ix = (v_202.conf.bin_alloc.offset >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u);
|
||||||
Alloc param_8;
|
Alloc param_13;
|
||||||
param_8.offset = _253.conf.bin_alloc.offset;
|
param_13.offset = v_202.conf.bin_alloc.offset;
|
||||||
uint param_9 = out_ix;
|
uint param_14 = out_ix;
|
||||||
uint param_10 = element_count;
|
uint param_15 = element_count;
|
||||||
write_mem(param_8, param_9, param_10, v_84, v_84BufferSize);
|
write_mem(param_13, param_14, param_15, v_94, v_94BufferSize);
|
||||||
Alloc param_11;
|
Alloc param_16;
|
||||||
param_11.offset = _253.conf.bin_alloc.offset;
|
param_16.offset = v_202.conf.bin_alloc.offset;
|
||||||
uint param_12 = out_ix + 1u;
|
uint param_17 = out_ix + 1u;
|
||||||
uint param_13 = chunk_alloc.offset;
|
uint param_18 = chunk_alloc.offset;
|
||||||
write_mem(param_11, param_12, param_13, v_84, v_84BufferSize);
|
write_mem(param_16, param_17, param_18, v_94, v_94BufferSize);
|
||||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
bool _543;
|
bool _687;
|
||||||
if (!bool(sh_alloc_failed))
|
if (!bool(sh_alloc_failed))
|
||||||
{
|
{
|
||||||
_543 = v_84.mem_error != 0u;
|
_687 = v_94.mem_error != 0u;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
_543 = bool(sh_alloc_failed);
|
_687 = bool(sh_alloc_failed);
|
||||||
}
|
}
|
||||||
if (_543)
|
if (_687)
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -334,10 +374,10 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
|
||||||
}
|
}
|
||||||
Alloc out_alloc = sh_chunk_alloc[bin_ix];
|
Alloc out_alloc = sh_chunk_alloc[bin_ix];
|
||||||
uint out_offset = out_alloc.offset + (idx * 4u);
|
uint out_offset = out_alloc.offset + (idx * 4u);
|
||||||
Alloc param_14 = out_alloc;
|
Alloc param_19 = out_alloc;
|
||||||
BinInstanceRef param_15 = BinInstanceRef{ out_offset };
|
BinInstanceRef param_20 = BinInstanceRef{ out_offset };
|
||||||
BinInstance param_16 = BinInstance{ element_ix };
|
BinInstance param_21 = BinInstance{ element_ix };
|
||||||
BinInstance_write(param_14, param_15, param_16, v_84, v_84BufferSize);
|
BinInstance_write(param_19, param_20, param_21, v_94, v_94BufferSize);
|
||||||
}
|
}
|
||||||
x++;
|
x++;
|
||||||
if (x == x1)
|
if (x == x1)
|
||||||
|
|
BIN
piet-gpu/shader/gen/binning.spv
generated
BIN
piet-gpu/shader/gen/binning.spv
generated
Binary file not shown.
BIN
piet-gpu/shader/gen/clip_leaf.dxil
generated
Normal file
BIN
piet-gpu/shader/gen/clip_leaf.dxil
generated
Normal file
Binary file not shown.
367
piet-gpu/shader/gen/clip_leaf.hlsl
generated
Normal file
367
piet-gpu/shader/gen/clip_leaf.hlsl
generated
Normal file
|
@ -0,0 +1,367 @@
|
||||||
|
struct Bic
|
||||||
|
{
|
||||||
|
uint a;
|
||||||
|
uint b;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ClipEl
|
||||||
|
{
|
||||||
|
uint parent_ix;
|
||||||
|
float4 bbox;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Alloc
|
||||||
|
{
|
||||||
|
uint offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Config
|
||||||
|
{
|
||||||
|
uint n_elements;
|
||||||
|
uint n_pathseg;
|
||||||
|
uint width_in_tiles;
|
||||||
|
uint height_in_tiles;
|
||||||
|
Alloc tile_alloc;
|
||||||
|
Alloc bin_alloc;
|
||||||
|
Alloc ptcl_alloc;
|
||||||
|
Alloc pathseg_alloc;
|
||||||
|
Alloc anno_alloc;
|
||||||
|
Alloc trans_alloc;
|
||||||
|
Alloc bbox_alloc;
|
||||||
|
Alloc drawmonoid_alloc;
|
||||||
|
Alloc clip_alloc;
|
||||||
|
Alloc clip_bic_alloc;
|
||||||
|
Alloc clip_stack_alloc;
|
||||||
|
Alloc clip_bbox_alloc;
|
||||||
|
uint n_trans;
|
||||||
|
uint n_path;
|
||||||
|
uint n_clip;
|
||||||
|
uint trans_offset;
|
||||||
|
uint linewidth_offset;
|
||||||
|
uint pathtag_offset;
|
||||||
|
uint pathseg_offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
|
||||||
|
|
||||||
|
static const Bic _393 = { 0u, 0u };
|
||||||
|
|
||||||
|
ByteAddressBuffer _80 : register(t1, space0);
|
||||||
|
RWByteAddressBuffer _96 : register(u0, space0);
|
||||||
|
|
||||||
|
static uint3 gl_WorkGroupID;
|
||||||
|
static uint3 gl_LocalInvocationID;
|
||||||
|
static uint3 gl_GlobalInvocationID;
|
||||||
|
struct SPIRV_Cross_Input
|
||||||
|
{
|
||||||
|
uint3 gl_WorkGroupID : SV_GroupID;
|
||||||
|
uint3 gl_LocalInvocationID : SV_GroupThreadID;
|
||||||
|
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
|
||||||
|
};
|
||||||
|
|
||||||
|
groupshared Bic sh_bic[510];
|
||||||
|
groupshared uint sh_stack[256];
|
||||||
|
groupshared float4 sh_stack_bbox[256];
|
||||||
|
groupshared uint sh_link[256];
|
||||||
|
groupshared float4 sh_bbox[256];
|
||||||
|
|
||||||
|
Bic load_bic(uint ix)
|
||||||
|
{
|
||||||
|
uint base = (_80.Load(52) >> uint(2)) + (2u * ix);
|
||||||
|
Bic _286 = { _96.Load(base * 4 + 8), _96.Load((base + 1u) * 4 + 8) };
|
||||||
|
return _286;
|
||||||
|
}
|
||||||
|
|
||||||
|
Bic bic_combine(Bic x, Bic y)
|
||||||
|
{
|
||||||
|
uint m = min(x.b, y.a);
|
||||||
|
Bic _72 = { (x.a + y.a) - m, (x.b + y.b) - m };
|
||||||
|
return _72;
|
||||||
|
}
|
||||||
|
|
||||||
|
ClipEl load_clip_el(uint ix)
|
||||||
|
{
|
||||||
|
uint base = (_80.Load(56) >> uint(2)) + (5u * ix);
|
||||||
|
uint parent_ix = _96.Load(base * 4 + 8);
|
||||||
|
float x0 = asfloat(_96.Load((base + 1u) * 4 + 8));
|
||||||
|
float y0 = asfloat(_96.Load((base + 2u) * 4 + 8));
|
||||||
|
float x1 = asfloat(_96.Load((base + 3u) * 4 + 8));
|
||||||
|
float y1 = asfloat(_96.Load((base + 4u) * 4 + 8));
|
||||||
|
float4 bbox = float4(x0, y0, x1, y1);
|
||||||
|
ClipEl _335 = { parent_ix, bbox };
|
||||||
|
return _335;
|
||||||
|
}
|
||||||
|
|
||||||
|
float4 bbox_intersect(float4 a, float4 b)
|
||||||
|
{
|
||||||
|
return float4(max(a.xy, b.xy), min(a.zw, b.zw));
|
||||||
|
}
|
||||||
|
|
||||||
|
uint load_path_ix(uint ix)
|
||||||
|
{
|
||||||
|
if (ix < _80.Load(72))
|
||||||
|
{
|
||||||
|
return _96.Load(((_80.Load(48) >> uint(2)) + ix) * 4 + 8);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return 2147483648u;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
float4 load_path_bbox(uint path_ix)
|
||||||
|
{
|
||||||
|
uint base = (_80.Load(40) >> uint(2)) + (6u * path_ix);
|
||||||
|
float bbox_l = float(_96.Load(base * 4 + 8)) - 32768.0f;
|
||||||
|
float bbox_t = float(_96.Load((base + 1u) * 4 + 8)) - 32768.0f;
|
||||||
|
float bbox_r = float(_96.Load((base + 2u) * 4 + 8)) - 32768.0f;
|
||||||
|
float bbox_b = float(_96.Load((base + 3u) * 4 + 8)) - 32768.0f;
|
||||||
|
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
|
||||||
|
return bbox;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint search_link(inout Bic bic)
|
||||||
|
{
|
||||||
|
uint ix = gl_LocalInvocationID.x;
|
||||||
|
uint j = 0u;
|
||||||
|
while (j < 8u)
|
||||||
|
{
|
||||||
|
uint base = 512u - (2u << (8u - j));
|
||||||
|
if (((ix >> j) & 1u) != 0u)
|
||||||
|
{
|
||||||
|
Bic param = sh_bic[(base + (ix >> j)) - 1u];
|
||||||
|
Bic param_1 = bic;
|
||||||
|
Bic test = bic_combine(param, param_1);
|
||||||
|
if (test.b > 0u)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
bic = test;
|
||||||
|
ix -= (1u << j);
|
||||||
|
}
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
if (ix > 0u)
|
||||||
|
{
|
||||||
|
while (j > 0u)
|
||||||
|
{
|
||||||
|
j--;
|
||||||
|
uint base_1 = 512u - (2u << (8u - j));
|
||||||
|
Bic param_2 = sh_bic[(base_1 + (ix >> j)) - 1u];
|
||||||
|
Bic param_3 = bic;
|
||||||
|
Bic test_1 = bic_combine(param_2, param_3);
|
||||||
|
if (test_1.b == 0u)
|
||||||
|
{
|
||||||
|
bic = test_1;
|
||||||
|
ix -= (1u << j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (ix > 0u)
|
||||||
|
{
|
||||||
|
return ix - 1u;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return 4294967295u - bic.a;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void store_clip_bbox(uint ix, float4 bbox)
|
||||||
|
{
|
||||||
|
uint base = (_80.Load(60) >> uint(2)) + (4u * ix);
|
||||||
|
_96.Store(base * 4 + 8, asuint(bbox.x));
|
||||||
|
_96.Store((base + 1u) * 4 + 8, asuint(bbox.y));
|
||||||
|
_96.Store((base + 2u) * 4 + 8, asuint(bbox.z));
|
||||||
|
_96.Store((base + 3u) * 4 + 8, asuint(bbox.w));
|
||||||
|
}
|
||||||
|
|
||||||
|
void comp_main()
|
||||||
|
{
|
||||||
|
uint th = gl_LocalInvocationID.x;
|
||||||
|
Bic bic = _393;
|
||||||
|
if (th < gl_WorkGroupID.x)
|
||||||
|
{
|
||||||
|
uint param = th;
|
||||||
|
bic = load_bic(param);
|
||||||
|
}
|
||||||
|
sh_bic[th] = bic;
|
||||||
|
for (uint i = 0u; i < 8u; i++)
|
||||||
|
{
|
||||||
|
GroupMemoryBarrierWithGroupSync();
|
||||||
|
if ((th + (1u << i)) < 256u)
|
||||||
|
{
|
||||||
|
Bic other = sh_bic[th + (1u << i)];
|
||||||
|
Bic param_1 = bic;
|
||||||
|
Bic param_2 = other;
|
||||||
|
bic = bic_combine(param_1, param_2);
|
||||||
|
}
|
||||||
|
GroupMemoryBarrierWithGroupSync();
|
||||||
|
sh_bic[th] = bic;
|
||||||
|
}
|
||||||
|
GroupMemoryBarrierWithGroupSync();
|
||||||
|
uint stack_size = sh_bic[0].b;
|
||||||
|
uint sp = 255u - th;
|
||||||
|
uint ix = 0u;
|
||||||
|
for (uint i_1 = 0u; i_1 < 8u; i_1++)
|
||||||
|
{
|
||||||
|
uint probe = ix + (128u >> i_1);
|
||||||
|
if (sp < sh_bic[probe].b)
|
||||||
|
{
|
||||||
|
ix = probe;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
uint b = sh_bic[ix].b;
|
||||||
|
float4 bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f);
|
||||||
|
if (sp < b)
|
||||||
|
{
|
||||||
|
uint param_3 = (((ix * 256u) + b) - sp) - 1u;
|
||||||
|
ClipEl el = load_clip_el(param_3);
|
||||||
|
sh_stack[th] = el.parent_ix;
|
||||||
|
bbox = el.bbox;
|
||||||
|
}
|
||||||
|
for (uint i_2 = 0u; i_2 < 8u; i_2++)
|
||||||
|
{
|
||||||
|
sh_stack_bbox[th] = bbox;
|
||||||
|
GroupMemoryBarrierWithGroupSync();
|
||||||
|
if (th >= (1u << i_2))
|
||||||
|
{
|
||||||
|
float4 param_4 = sh_stack_bbox[th - (1u << i_2)];
|
||||||
|
float4 param_5 = bbox;
|
||||||
|
bbox = bbox_intersect(param_4, param_5);
|
||||||
|
}
|
||||||
|
GroupMemoryBarrierWithGroupSync();
|
||||||
|
}
|
||||||
|
sh_stack_bbox[th] = bbox;
|
||||||
|
uint param_6 = gl_GlobalInvocationID.x;
|
||||||
|
uint inp = load_path_ix(param_6);
|
||||||
|
bool is_push = int(inp) >= 0;
|
||||||
|
Bic _559 = { 1u - uint(is_push), uint(is_push) };
|
||||||
|
bic = _559;
|
||||||
|
sh_bic[th] = bic;
|
||||||
|
if (is_push)
|
||||||
|
{
|
||||||
|
uint param_7 = inp;
|
||||||
|
bbox = load_path_bbox(param_7);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f);
|
||||||
|
}
|
||||||
|
uint inbase = 0u;
|
||||||
|
for (uint i_3 = 0u; i_3 < 7u; i_3++)
|
||||||
|
{
|
||||||
|
uint outbase = 512u - (1u << (8u - i_3));
|
||||||
|
GroupMemoryBarrierWithGroupSync();
|
||||||
|
if (th < (1u << (7u - i_3)))
|
||||||
|
{
|
||||||
|
Bic param_8 = sh_bic[inbase + (th * 2u)];
|
||||||
|
Bic param_9 = sh_bic[(inbase + (th * 2u)) + 1u];
|
||||||
|
sh_bic[outbase + th] = bic_combine(param_8, param_9);
|
||||||
|
}
|
||||||
|
inbase = outbase;
|
||||||
|
}
|
||||||
|
GroupMemoryBarrierWithGroupSync();
|
||||||
|
bic = _393;
|
||||||
|
Bic param_10 = bic;
|
||||||
|
uint _618 = search_link(param_10);
|
||||||
|
bic = param_10;
|
||||||
|
uint link = _618;
|
||||||
|
sh_link[th] = link;
|
||||||
|
GroupMemoryBarrierWithGroupSync();
|
||||||
|
uint grandparent;
|
||||||
|
if (int(link) >= 0)
|
||||||
|
{
|
||||||
|
grandparent = sh_link[link];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
grandparent = link - 1u;
|
||||||
|
}
|
||||||
|
uint parent;
|
||||||
|
if (int(link) >= 0)
|
||||||
|
{
|
||||||
|
parent = (gl_WorkGroupID.x * 256u) + link;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (int(link + stack_size) >= 0)
|
||||||
|
{
|
||||||
|
parent = sh_stack[256u + link];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
parent = 4294967295u;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (uint i_4 = 0u; i_4 < 8u; i_4++)
|
||||||
|
{
|
||||||
|
if (i_4 != 0u)
|
||||||
|
{
|
||||||
|
sh_link[th] = link;
|
||||||
|
}
|
||||||
|
sh_bbox[th] = bbox;
|
||||||
|
GroupMemoryBarrierWithGroupSync();
|
||||||
|
if (int(link) >= 0)
|
||||||
|
{
|
||||||
|
float4 param_11 = sh_bbox[link];
|
||||||
|
float4 param_12 = bbox;
|
||||||
|
bbox = bbox_intersect(param_11, param_12);
|
||||||
|
link = sh_link[link];
|
||||||
|
}
|
||||||
|
GroupMemoryBarrierWithGroupSync();
|
||||||
|
}
|
||||||
|
if (int(link + stack_size) >= 0)
|
||||||
|
{
|
||||||
|
float4 param_13 = sh_stack_bbox[256u + link];
|
||||||
|
float4 param_14 = bbox;
|
||||||
|
bbox = bbox_intersect(param_13, param_14);
|
||||||
|
}
|
||||||
|
sh_bbox[th] = bbox;
|
||||||
|
GroupMemoryBarrierWithGroupSync();
|
||||||
|
uint path_ix = inp;
|
||||||
|
bool _717 = !is_push;
|
||||||
|
bool _725;
|
||||||
|
if (_717)
|
||||||
|
{
|
||||||
|
_725 = gl_GlobalInvocationID.x < _80.Load(72);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_725 = _717;
|
||||||
|
}
|
||||||
|
if (_725)
|
||||||
|
{
|
||||||
|
uint param_15 = parent;
|
||||||
|
path_ix = load_path_ix(param_15);
|
||||||
|
uint drawmonoid_out_base = (_80.Load(44) >> uint(2)) + (2u * (~inp));
|
||||||
|
_96.Store(drawmonoid_out_base * 4 + 8, path_ix);
|
||||||
|
if (int(grandparent) >= 0)
|
||||||
|
{
|
||||||
|
bbox = sh_bbox[grandparent];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (int(grandparent + stack_size) >= 0)
|
||||||
|
{
|
||||||
|
bbox = sh_stack_bbox[256u + grandparent];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
uint param_16 = gl_GlobalInvocationID.x;
|
||||||
|
float4 param_17 = bbox;
|
||||||
|
store_clip_bbox(param_16, param_17);
|
||||||
|
}
|
||||||
|
|
||||||
|
[numthreads(256, 1, 1)]
|
||||||
|
void main(SPIRV_Cross_Input stage_input)
|
||||||
|
{
|
||||||
|
gl_WorkGroupID = stage_input.gl_WorkGroupID;
|
||||||
|
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
|
||||||
|
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
|
||||||
|
comp_main();
|
||||||
|
}
|
366
piet-gpu/shader/gen/clip_leaf.msl
generated
Normal file
366
piet-gpu/shader/gen/clip_leaf.msl
generated
Normal file
|
@ -0,0 +1,366 @@
|
||||||
|
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||||
|
|
||||||
|
#include <metal_stdlib>
|
||||||
|
#include <simd/simd.h>
|
||||||
|
|
||||||
|
using namespace metal;
|
||||||
|
|
||||||
|
struct Bic
|
||||||
|
{
|
||||||
|
uint a;
|
||||||
|
uint b;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ClipEl
|
||||||
|
{
|
||||||
|
uint parent_ix;
|
||||||
|
float4 bbox;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Alloc
|
||||||
|
{
|
||||||
|
uint offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Config
|
||||||
|
{
|
||||||
|
uint n_elements;
|
||||||
|
uint n_pathseg;
|
||||||
|
uint width_in_tiles;
|
||||||
|
uint height_in_tiles;
|
||||||
|
Alloc tile_alloc;
|
||||||
|
Alloc bin_alloc;
|
||||||
|
Alloc ptcl_alloc;
|
||||||
|
Alloc pathseg_alloc;
|
||||||
|
Alloc anno_alloc;
|
||||||
|
Alloc trans_alloc;
|
||||||
|
Alloc bbox_alloc;
|
||||||
|
Alloc drawmonoid_alloc;
|
||||||
|
Alloc clip_alloc;
|
||||||
|
Alloc clip_bic_alloc;
|
||||||
|
Alloc clip_stack_alloc;
|
||||||
|
Alloc clip_bbox_alloc;
|
||||||
|
uint n_trans;
|
||||||
|
uint n_path;
|
||||||
|
uint n_clip;
|
||||||
|
uint trans_offset;
|
||||||
|
uint linewidth_offset;
|
||||||
|
uint pathtag_offset;
|
||||||
|
uint pathseg_offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ConfigBuf
|
||||||
|
{
|
||||||
|
Config conf;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Memory
|
||||||
|
{
|
||||||
|
uint mem_offset;
|
||||||
|
uint mem_error;
|
||||||
|
uint memory[1];
|
||||||
|
};
|
||||||
|
|
||||||
|
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
|
||||||
|
|
||||||
|
static inline __attribute__((always_inline))
|
||||||
|
Bic load_bic(thread const uint& ix, const device ConfigBuf& v_80, device Memory& v_96)
|
||||||
|
{
|
||||||
|
uint base = (v_80.conf.clip_bic_alloc.offset >> uint(2)) + (2u * ix);
|
||||||
|
return Bic{ v_96.memory[base], v_96.memory[base + 1u] };
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __attribute__((always_inline))
|
||||||
|
Bic bic_combine(thread const Bic& x, thread const Bic& y)
|
||||||
|
{
|
||||||
|
uint m = min(x.b, y.a);
|
||||||
|
return Bic{ (x.a + y.a) - m, (x.b + y.b) - m };
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __attribute__((always_inline))
|
||||||
|
ClipEl load_clip_el(thread const uint& ix, const device ConfigBuf& v_80, device Memory& v_96)
|
||||||
|
{
|
||||||
|
uint base = (v_80.conf.clip_stack_alloc.offset >> uint(2)) + (5u * ix);
|
||||||
|
uint parent_ix = v_96.memory[base];
|
||||||
|
float x0 = as_type<float>(v_96.memory[base + 1u]);
|
||||||
|
float y0 = as_type<float>(v_96.memory[base + 2u]);
|
||||||
|
float x1 = as_type<float>(v_96.memory[base + 3u]);
|
||||||
|
float y1 = as_type<float>(v_96.memory[base + 4u]);
|
||||||
|
float4 bbox = float4(x0, y0, x1, y1);
|
||||||
|
return ClipEl{ parent_ix, bbox };
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __attribute__((always_inline))
|
||||||
|
float4 bbox_intersect(thread const float4& a, thread const float4& b)
|
||||||
|
{
|
||||||
|
return float4(fast::max(a.xy, b.xy), fast::min(a.zw, b.zw));
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __attribute__((always_inline))
|
||||||
|
uint load_path_ix(thread const uint& ix, const device ConfigBuf& v_80, device Memory& v_96)
|
||||||
|
{
|
||||||
|
if (ix < v_80.conf.n_clip)
|
||||||
|
{
|
||||||
|
return v_96.memory[(v_80.conf.clip_alloc.offset >> uint(2)) + ix];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return 2147483648u;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __attribute__((always_inline))
|
||||||
|
float4 load_path_bbox(thread const uint& path_ix, const device ConfigBuf& v_80, device Memory& v_96)
|
||||||
|
{
|
||||||
|
uint base = (v_80.conf.bbox_alloc.offset >> uint(2)) + (6u * path_ix);
|
||||||
|
float bbox_l = float(v_96.memory[base]) - 32768.0;
|
||||||
|
float bbox_t = float(v_96.memory[base + 1u]) - 32768.0;
|
||||||
|
float bbox_r = float(v_96.memory[base + 2u]) - 32768.0;
|
||||||
|
float bbox_b = float(v_96.memory[base + 3u]) - 32768.0;
|
||||||
|
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
|
||||||
|
return bbox;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __attribute__((always_inline))
|
||||||
|
uint search_link(thread Bic& bic, thread uint3& gl_LocalInvocationID, threadgroup Bic (&sh_bic)[510])
|
||||||
|
{
|
||||||
|
uint ix = gl_LocalInvocationID.x;
|
||||||
|
uint j = 0u;
|
||||||
|
while (j < 8u)
|
||||||
|
{
|
||||||
|
uint base = 512u - (2u << (8u - j));
|
||||||
|
if (((ix >> j) & 1u) != 0u)
|
||||||
|
{
|
||||||
|
Bic param = sh_bic[(base + (ix >> j)) - 1u];
|
||||||
|
Bic param_1 = bic;
|
||||||
|
Bic test = bic_combine(param, param_1);
|
||||||
|
if (test.b > 0u)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
bic = test;
|
||||||
|
ix -= (1u << j);
|
||||||
|
}
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
if (ix > 0u)
|
||||||
|
{
|
||||||
|
while (j > 0u)
|
||||||
|
{
|
||||||
|
j--;
|
||||||
|
uint base_1 = 512u - (2u << (8u - j));
|
||||||
|
Bic param_2 = sh_bic[(base_1 + (ix >> j)) - 1u];
|
||||||
|
Bic param_3 = bic;
|
||||||
|
Bic test_1 = bic_combine(param_2, param_3);
|
||||||
|
if (test_1.b == 0u)
|
||||||
|
{
|
||||||
|
bic = test_1;
|
||||||
|
ix -= (1u << j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (ix > 0u)
|
||||||
|
{
|
||||||
|
return ix - 1u;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return 4294967295u - bic.a;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __attribute__((always_inline))
|
||||||
|
void store_clip_bbox(thread const uint& ix, thread const float4& bbox, const device ConfigBuf& v_80, device Memory& v_96)
|
||||||
|
{
|
||||||
|
uint base = (v_80.conf.clip_bbox_alloc.offset >> uint(2)) + (4u * ix);
|
||||||
|
v_96.memory[base] = as_type<uint>(bbox.x);
|
||||||
|
v_96.memory[base + 1u] = as_type<uint>(bbox.y);
|
||||||
|
v_96.memory[base + 2u] = as_type<uint>(bbox.z);
|
||||||
|
v_96.memory[base + 3u] = as_type<uint>(bbox.w);
|
||||||
|
}
|
||||||
|
|
||||||
|
kernel void main0(device Memory& v_96 [[buffer(0)]], const device ConfigBuf& v_80 [[buffer(1)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
|
||||||
|
{
|
||||||
|
threadgroup Bic sh_bic[510];
|
||||||
|
threadgroup uint sh_stack[256];
|
||||||
|
threadgroup float4 sh_stack_bbox[256];
|
||||||
|
threadgroup uint sh_link[256];
|
||||||
|
threadgroup float4 sh_bbox[256];
|
||||||
|
uint th = gl_LocalInvocationID.x;
|
||||||
|
Bic bic = Bic{ 0u, 0u };
|
||||||
|
if (th < gl_WorkGroupID.x)
|
||||||
|
{
|
||||||
|
uint param = th;
|
||||||
|
bic = load_bic(param, v_80, v_96);
|
||||||
|
}
|
||||||
|
sh_bic[th] = bic;
|
||||||
|
for (uint i = 0u; i < 8u; i++)
|
||||||
|
{
|
||||||
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
|
if ((th + (1u << i)) < 256u)
|
||||||
|
{
|
||||||
|
Bic other = sh_bic[th + (1u << i)];
|
||||||
|
Bic param_1 = bic;
|
||||||
|
Bic param_2 = other;
|
||||||
|
bic = bic_combine(param_1, param_2);
|
||||||
|
}
|
||||||
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
|
sh_bic[th] = bic;
|
||||||
|
}
|
||||||
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
|
uint stack_size = sh_bic[0].b;
|
||||||
|
uint sp = 255u - th;
|
||||||
|
uint ix = 0u;
|
||||||
|
for (uint i_1 = 0u; i_1 < 8u; i_1++)
|
||||||
|
{
|
||||||
|
uint probe = ix + (128u >> i_1);
|
||||||
|
if (sp < sh_bic[probe].b)
|
||||||
|
{
|
||||||
|
ix = probe;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
uint b = sh_bic[ix].b;
|
||||||
|
float4 bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0);
|
||||||
|
if (sp < b)
|
||||||
|
{
|
||||||
|
uint param_3 = (((ix * 256u) + b) - sp) - 1u;
|
||||||
|
ClipEl el = load_clip_el(param_3, v_80, v_96);
|
||||||
|
sh_stack[th] = el.parent_ix;
|
||||||
|
bbox = el.bbox;
|
||||||
|
}
|
||||||
|
for (uint i_2 = 0u; i_2 < 8u; i_2++)
|
||||||
|
{
|
||||||
|
sh_stack_bbox[th] = bbox;
|
||||||
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
|
if (th >= (1u << i_2))
|
||||||
|
{
|
||||||
|
float4 param_4 = sh_stack_bbox[th - (1u << i_2)];
|
||||||
|
float4 param_5 = bbox;
|
||||||
|
bbox = bbox_intersect(param_4, param_5);
|
||||||
|
}
|
||||||
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
|
}
|
||||||
|
sh_stack_bbox[th] = bbox;
|
||||||
|
uint param_6 = gl_GlobalInvocationID.x;
|
||||||
|
uint inp = load_path_ix(param_6, v_80, v_96);
|
||||||
|
bool is_push = int(inp) >= 0;
|
||||||
|
bic = Bic{ 1u - uint(is_push), uint(is_push) };
|
||||||
|
sh_bic[th] = bic;
|
||||||
|
if (is_push)
|
||||||
|
{
|
||||||
|
uint param_7 = inp;
|
||||||
|
bbox = load_path_bbox(param_7, v_80, v_96);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0);
|
||||||
|
}
|
||||||
|
uint inbase = 0u;
|
||||||
|
for (uint i_3 = 0u; i_3 < 7u; i_3++)
|
||||||
|
{
|
||||||
|
uint outbase = 512u - (1u << (8u - i_3));
|
||||||
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
|
if (th < (1u << (7u - i_3)))
|
||||||
|
{
|
||||||
|
Bic param_8 = sh_bic[inbase + (th * 2u)];
|
||||||
|
Bic param_9 = sh_bic[(inbase + (th * 2u)) + 1u];
|
||||||
|
sh_bic[outbase + th] = bic_combine(param_8, param_9);
|
||||||
|
}
|
||||||
|
inbase = outbase;
|
||||||
|
}
|
||||||
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
|
bic = Bic{ 0u, 0u };
|
||||||
|
Bic param_10 = bic;
|
||||||
|
uint _618 = search_link(param_10, gl_LocalInvocationID, sh_bic);
|
||||||
|
bic = param_10;
|
||||||
|
uint link = _618;
|
||||||
|
sh_link[th] = link;
|
||||||
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
|
uint grandparent;
|
||||||
|
if (int(link) >= 0)
|
||||||
|
{
|
||||||
|
grandparent = sh_link[link];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
grandparent = link - 1u;
|
||||||
|
}
|
||||||
|
uint parent;
|
||||||
|
if (int(link) >= 0)
|
||||||
|
{
|
||||||
|
parent = (gl_WorkGroupID.x * 256u) + link;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (int(link + stack_size) >= 0)
|
||||||
|
{
|
||||||
|
parent = sh_stack[256u + link];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
parent = 4294967295u;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (uint i_4 = 0u; i_4 < 8u; i_4++)
|
||||||
|
{
|
||||||
|
if (i_4 != 0u)
|
||||||
|
{
|
||||||
|
sh_link[th] = link;
|
||||||
|
}
|
||||||
|
sh_bbox[th] = bbox;
|
||||||
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
|
if (int(link) >= 0)
|
||||||
|
{
|
||||||
|
float4 param_11 = sh_bbox[link];
|
||||||
|
float4 param_12 = bbox;
|
||||||
|
bbox = bbox_intersect(param_11, param_12);
|
||||||
|
link = sh_link[link];
|
||||||
|
}
|
||||||
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
|
}
|
||||||
|
if (int(link + stack_size) >= 0)
|
||||||
|
{
|
||||||
|
float4 param_13 = sh_stack_bbox[256u + link];
|
||||||
|
float4 param_14 = bbox;
|
||||||
|
bbox = bbox_intersect(param_13, param_14);
|
||||||
|
}
|
||||||
|
sh_bbox[th] = bbox;
|
||||||
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
|
uint path_ix = inp;
|
||||||
|
bool _717 = !is_push;
|
||||||
|
bool _725;
|
||||||
|
if (_717)
|
||||||
|
{
|
||||||
|
_725 = gl_GlobalInvocationID.x < v_80.conf.n_clip;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_725 = _717;
|
||||||
|
}
|
||||||
|
if (_725)
|
||||||
|
{
|
||||||
|
uint param_15 = parent;
|
||||||
|
path_ix = load_path_ix(param_15, v_80, v_96);
|
||||||
|
uint drawmonoid_out_base = (v_80.conf.drawmonoid_alloc.offset >> uint(2)) + (2u * (~inp));
|
||||||
|
v_96.memory[drawmonoid_out_base] = path_ix;
|
||||||
|
if (int(grandparent) >= 0)
|
||||||
|
{
|
||||||
|
bbox = sh_bbox[grandparent];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (int(grandparent + stack_size) >= 0)
|
||||||
|
{
|
||||||
|
bbox = sh_stack_bbox[256u + grandparent];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
uint param_16 = gl_GlobalInvocationID.x;
|
||||||
|
float4 param_17 = bbox;
|
||||||
|
store_clip_bbox(param_16, param_17, v_80, v_96);
|
||||||
|
}
|
||||||
|
|
BIN
piet-gpu/shader/gen/clip_leaf.spv
generated
Normal file
BIN
piet-gpu/shader/gen/clip_leaf.spv
generated
Normal file
Binary file not shown.
BIN
piet-gpu/shader/gen/clip_reduce.dxil
generated
Normal file
BIN
piet-gpu/shader/gen/clip_reduce.dxil
generated
Normal file
Binary file not shown.
177
piet-gpu/shader/gen/clip_reduce.hlsl
generated
Normal file
177
piet-gpu/shader/gen/clip_reduce.hlsl
generated
Normal file
|
@ -0,0 +1,177 @@
|
||||||
|
struct Bic
|
||||||
|
{
|
||||||
|
uint a;
|
||||||
|
uint b;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ClipEl
|
||||||
|
{
|
||||||
|
uint parent_ix;
|
||||||
|
float4 bbox;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Alloc
|
||||||
|
{
|
||||||
|
uint offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Config
|
||||||
|
{
|
||||||
|
uint n_elements;
|
||||||
|
uint n_pathseg;
|
||||||
|
uint width_in_tiles;
|
||||||
|
uint height_in_tiles;
|
||||||
|
Alloc tile_alloc;
|
||||||
|
Alloc bin_alloc;
|
||||||
|
Alloc ptcl_alloc;
|
||||||
|
Alloc pathseg_alloc;
|
||||||
|
Alloc anno_alloc;
|
||||||
|
Alloc trans_alloc;
|
||||||
|
Alloc bbox_alloc;
|
||||||
|
Alloc drawmonoid_alloc;
|
||||||
|
Alloc clip_alloc;
|
||||||
|
Alloc clip_bic_alloc;
|
||||||
|
Alloc clip_stack_alloc;
|
||||||
|
Alloc clip_bbox_alloc;
|
||||||
|
uint n_trans;
|
||||||
|
uint n_path;
|
||||||
|
uint n_clip;
|
||||||
|
uint trans_offset;
|
||||||
|
uint linewidth_offset;
|
||||||
|
uint pathtag_offset;
|
||||||
|
uint pathseg_offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
|
||||||
|
|
||||||
|
static const Bic _267 = { 0u, 0u };
|
||||||
|
|
||||||
|
ByteAddressBuffer _64 : register(t1, space0);
|
||||||
|
RWByteAddressBuffer _80 : register(u0, space0);
|
||||||
|
|
||||||
|
static uint3 gl_WorkGroupID;
|
||||||
|
static uint3 gl_LocalInvocationID;
|
||||||
|
static uint3 gl_GlobalInvocationID;
|
||||||
|
struct SPIRV_Cross_Input
|
||||||
|
{
|
||||||
|
uint3 gl_WorkGroupID : SV_GroupID;
|
||||||
|
uint3 gl_LocalInvocationID : SV_GroupThreadID;
|
||||||
|
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
|
||||||
|
};
|
||||||
|
|
||||||
|
groupshared Bic sh_bic[256];
|
||||||
|
groupshared uint sh_parent[256];
|
||||||
|
groupshared uint sh_path_ix[256];
|
||||||
|
groupshared float4 sh_bbox[256];
|
||||||
|
|
||||||
|
Bic bic_combine(Bic x, Bic y)
|
||||||
|
{
|
||||||
|
uint m = min(x.b, y.a);
|
||||||
|
Bic _56 = { (x.a + y.a) - m, (x.b + y.b) - m };
|
||||||
|
return _56;
|
||||||
|
}
|
||||||
|
|
||||||
|
void store_bic(uint ix, Bic bic)
|
||||||
|
{
|
||||||
|
uint base = (_64.Load(52) >> uint(2)) + (2u * ix);
|
||||||
|
_80.Store(base * 4 + 8, bic.a);
|
||||||
|
_80.Store((base + 1u) * 4 + 8, bic.b);
|
||||||
|
}
|
||||||
|
|
||||||
|
float4 load_path_bbox(uint path_ix)
|
||||||
|
{
|
||||||
|
uint base = (_64.Load(40) >> uint(2)) + (6u * path_ix);
|
||||||
|
float bbox_l = float(_80.Load(base * 4 + 8)) - 32768.0f;
|
||||||
|
float bbox_t = float(_80.Load((base + 1u) * 4 + 8)) - 32768.0f;
|
||||||
|
float bbox_r = float(_80.Load((base + 2u) * 4 + 8)) - 32768.0f;
|
||||||
|
float bbox_b = float(_80.Load((base + 3u) * 4 + 8)) - 32768.0f;
|
||||||
|
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
|
||||||
|
return bbox;
|
||||||
|
}
|
||||||
|
|
||||||
|
void store_clip_el(uint ix, ClipEl el)
|
||||||
|
{
|
||||||
|
uint base = (_64.Load(56) >> uint(2)) + (5u * ix);
|
||||||
|
_80.Store(base * 4 + 8, el.parent_ix);
|
||||||
|
_80.Store((base + 1u) * 4 + 8, asuint(el.bbox.x));
|
||||||
|
_80.Store((base + 2u) * 4 + 8, asuint(el.bbox.y));
|
||||||
|
_80.Store((base + 3u) * 4 + 8, asuint(el.bbox.z));
|
||||||
|
_80.Store((base + 4u) * 4 + 8, asuint(el.bbox.w));
|
||||||
|
}
|
||||||
|
|
||||||
|
void comp_main()
|
||||||
|
{
|
||||||
|
uint th = gl_LocalInvocationID.x;
|
||||||
|
uint inp = _80.Load(((_64.Load(48) >> uint(2)) + gl_GlobalInvocationID.x) * 4 + 8);
|
||||||
|
bool is_push = int(inp) >= 0;
|
||||||
|
Bic _207 = { 1u - uint(is_push), uint(is_push) };
|
||||||
|
Bic bic = _207;
|
||||||
|
sh_bic[gl_LocalInvocationID.x] = bic;
|
||||||
|
for (uint i = 0u; i < 8u; i++)
|
||||||
|
{
|
||||||
|
GroupMemoryBarrierWithGroupSync();
|
||||||
|
if ((th + (1u << i)) < 256u)
|
||||||
|
{
|
||||||
|
Bic other = sh_bic[gl_LocalInvocationID.x + (1u << i)];
|
||||||
|
Bic param = bic;
|
||||||
|
Bic param_1 = other;
|
||||||
|
bic = bic_combine(param, param_1);
|
||||||
|
}
|
||||||
|
GroupMemoryBarrierWithGroupSync();
|
||||||
|
sh_bic[th] = bic;
|
||||||
|
}
|
||||||
|
if (th == 0u)
|
||||||
|
{
|
||||||
|
uint param_2 = gl_WorkGroupID.x;
|
||||||
|
Bic param_3 = bic;
|
||||||
|
store_bic(param_2, param_3);
|
||||||
|
}
|
||||||
|
GroupMemoryBarrierWithGroupSync();
|
||||||
|
uint size = sh_bic[0].b;
|
||||||
|
bic = _267;
|
||||||
|
if ((th + 1u) < 256u)
|
||||||
|
{
|
||||||
|
bic = sh_bic[th + 1u];
|
||||||
|
}
|
||||||
|
bool _283;
|
||||||
|
if (is_push)
|
||||||
|
{
|
||||||
|
_283 = bic.a == 0u;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_283 = is_push;
|
||||||
|
}
|
||||||
|
if (_283)
|
||||||
|
{
|
||||||
|
uint local_ix = (size - bic.b) - 1u;
|
||||||
|
sh_parent[local_ix] = th;
|
||||||
|
sh_path_ix[local_ix] = inp;
|
||||||
|
}
|
||||||
|
GroupMemoryBarrierWithGroupSync();
|
||||||
|
float4 bbox;
|
||||||
|
if (th < size)
|
||||||
|
{
|
||||||
|
uint path_ix = sh_path_ix[th];
|
||||||
|
uint param_4 = path_ix;
|
||||||
|
bbox = load_path_bbox(param_4);
|
||||||
|
}
|
||||||
|
if (th < size)
|
||||||
|
{
|
||||||
|
uint parent_ix = sh_parent[th] + (gl_WorkGroupID.x * 256u);
|
||||||
|
ClipEl _331 = { parent_ix, bbox };
|
||||||
|
ClipEl el = _331;
|
||||||
|
uint param_5 = gl_GlobalInvocationID.x;
|
||||||
|
ClipEl param_6 = el;
|
||||||
|
store_clip_el(param_5, param_6);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[numthreads(256, 1, 1)]
|
||||||
|
void main(SPIRV_Cross_Input stage_input)
|
||||||
|
{
|
||||||
|
gl_WorkGroupID = stage_input.gl_WorkGroupID;
|
||||||
|
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
|
||||||
|
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
|
||||||
|
comp_main();
|
||||||
|
}
|
173
piet-gpu/shader/gen/clip_reduce.msl
generated
Normal file
173
piet-gpu/shader/gen/clip_reduce.msl
generated
Normal file
|
@ -0,0 +1,173 @@
|
||||||
|
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||||
|
|
||||||
|
#include <metal_stdlib>
|
||||||
|
#include <simd/simd.h>
|
||||||
|
|
||||||
|
using namespace metal;
|
||||||
|
|
||||||
|
struct Bic
|
||||||
|
{
|
||||||
|
uint a;
|
||||||
|
uint b;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ClipEl
|
||||||
|
{
|
||||||
|
uint parent_ix;
|
||||||
|
float4 bbox;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Alloc
|
||||||
|
{
|
||||||
|
uint offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Config
|
||||||
|
{
|
||||||
|
uint n_elements;
|
||||||
|
uint n_pathseg;
|
||||||
|
uint width_in_tiles;
|
||||||
|
uint height_in_tiles;
|
||||||
|
Alloc tile_alloc;
|
||||||
|
Alloc bin_alloc;
|
||||||
|
Alloc ptcl_alloc;
|
||||||
|
Alloc pathseg_alloc;
|
||||||
|
Alloc anno_alloc;
|
||||||
|
Alloc trans_alloc;
|
||||||
|
Alloc bbox_alloc;
|
||||||
|
Alloc drawmonoid_alloc;
|
||||||
|
Alloc clip_alloc;
|
||||||
|
Alloc clip_bic_alloc;
|
||||||
|
Alloc clip_stack_alloc;
|
||||||
|
Alloc clip_bbox_alloc;
|
||||||
|
uint n_trans;
|
||||||
|
uint n_path;
|
||||||
|
uint n_clip;
|
||||||
|
uint trans_offset;
|
||||||
|
uint linewidth_offset;
|
||||||
|
uint pathtag_offset;
|
||||||
|
uint pathseg_offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ConfigBuf
|
||||||
|
{
|
||||||
|
Config conf;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Memory
|
||||||
|
{
|
||||||
|
uint mem_offset;
|
||||||
|
uint mem_error;
|
||||||
|
uint memory[1];
|
||||||
|
};
|
||||||
|
|
||||||
|
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
|
||||||
|
|
||||||
|
static inline __attribute__((always_inline))
|
||||||
|
Bic bic_combine(thread const Bic& x, thread const Bic& y)
|
||||||
|
{
|
||||||
|
uint m = min(x.b, y.a);
|
||||||
|
return Bic{ (x.a + y.a) - m, (x.b + y.b) - m };
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __attribute__((always_inline))
|
||||||
|
void store_bic(thread const uint& ix, thread const Bic& bic, const device ConfigBuf& v_64, device Memory& v_80)
|
||||||
|
{
|
||||||
|
uint base = (v_64.conf.clip_bic_alloc.offset >> uint(2)) + (2u * ix);
|
||||||
|
v_80.memory[base] = bic.a;
|
||||||
|
v_80.memory[base + 1u] = bic.b;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __attribute__((always_inline))
|
||||||
|
float4 load_path_bbox(thread const uint& path_ix, const device ConfigBuf& v_64, device Memory& v_80)
|
||||||
|
{
|
||||||
|
uint base = (v_64.conf.bbox_alloc.offset >> uint(2)) + (6u * path_ix);
|
||||||
|
float bbox_l = float(v_80.memory[base]) - 32768.0;
|
||||||
|
float bbox_t = float(v_80.memory[base + 1u]) - 32768.0;
|
||||||
|
float bbox_r = float(v_80.memory[base + 2u]) - 32768.0;
|
||||||
|
float bbox_b = float(v_80.memory[base + 3u]) - 32768.0;
|
||||||
|
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
|
||||||
|
return bbox;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __attribute__((always_inline))
|
||||||
|
void store_clip_el(thread const uint& ix, thread const ClipEl& el, const device ConfigBuf& v_64, device Memory& v_80)
|
||||||
|
{
|
||||||
|
uint base = (v_64.conf.clip_stack_alloc.offset >> uint(2)) + (5u * ix);
|
||||||
|
v_80.memory[base] = el.parent_ix;
|
||||||
|
v_80.memory[base + 1u] = as_type<uint>(el.bbox.x);
|
||||||
|
v_80.memory[base + 2u] = as_type<uint>(el.bbox.y);
|
||||||
|
v_80.memory[base + 3u] = as_type<uint>(el.bbox.z);
|
||||||
|
v_80.memory[base + 4u] = as_type<uint>(el.bbox.w);
|
||||||
|
}
|
||||||
|
|
||||||
|
kernel void main0(device Memory& v_80 [[buffer(0)]], const device ConfigBuf& v_64 [[buffer(1)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
|
||||||
|
{
|
||||||
|
threadgroup Bic sh_bic[256];
|
||||||
|
threadgroup uint sh_parent[256];
|
||||||
|
threadgroup uint sh_path_ix[256];
|
||||||
|
threadgroup float4 sh_bbox[256];
|
||||||
|
uint th = gl_LocalInvocationID.x;
|
||||||
|
uint inp = v_80.memory[(v_64.conf.clip_alloc.offset >> uint(2)) + gl_GlobalInvocationID.x];
|
||||||
|
bool is_push = int(inp) >= 0;
|
||||||
|
Bic bic = Bic{ 1u - uint(is_push), uint(is_push) };
|
||||||
|
sh_bic[gl_LocalInvocationID.x] = bic;
|
||||||
|
for (uint i = 0u; i < 8u; i++)
|
||||||
|
{
|
||||||
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
|
if ((th + (1u << i)) < 256u)
|
||||||
|
{
|
||||||
|
Bic other = sh_bic[gl_LocalInvocationID.x + (1u << i)];
|
||||||
|
Bic param = bic;
|
||||||
|
Bic param_1 = other;
|
||||||
|
bic = bic_combine(param, param_1);
|
||||||
|
}
|
||||||
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
|
sh_bic[th] = bic;
|
||||||
|
}
|
||||||
|
if (th == 0u)
|
||||||
|
{
|
||||||
|
uint param_2 = gl_WorkGroupID.x;
|
||||||
|
Bic param_3 = bic;
|
||||||
|
store_bic(param_2, param_3, v_64, v_80);
|
||||||
|
}
|
||||||
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
|
uint size = sh_bic[0].b;
|
||||||
|
bic = Bic{ 0u, 0u };
|
||||||
|
if ((th + 1u) < 256u)
|
||||||
|
{
|
||||||
|
bic = sh_bic[th + 1u];
|
||||||
|
}
|
||||||
|
bool _283;
|
||||||
|
if (is_push)
|
||||||
|
{
|
||||||
|
_283 = bic.a == 0u;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_283 = is_push;
|
||||||
|
}
|
||||||
|
if (_283)
|
||||||
|
{
|
||||||
|
uint local_ix = (size - bic.b) - 1u;
|
||||||
|
sh_parent[local_ix] = th;
|
||||||
|
sh_path_ix[local_ix] = inp;
|
||||||
|
}
|
||||||
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
|
float4 bbox;
|
||||||
|
if (th < size)
|
||||||
|
{
|
||||||
|
uint path_ix = sh_path_ix[th];
|
||||||
|
uint param_4 = path_ix;
|
||||||
|
bbox = load_path_bbox(param_4, v_64, v_80);
|
||||||
|
}
|
||||||
|
if (th < size)
|
||||||
|
{
|
||||||
|
uint parent_ix = sh_parent[th] + (gl_WorkGroupID.x * 256u);
|
||||||
|
ClipEl el = ClipEl{ parent_ix, bbox };
|
||||||
|
uint param_5 = gl_GlobalInvocationID.x;
|
||||||
|
ClipEl param_6 = el;
|
||||||
|
store_clip_el(param_5, param_6, v_64, v_80);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
BIN
piet-gpu/shader/gen/clip_reduce.spv
generated
Normal file
BIN
piet-gpu/shader/gen/clip_reduce.spv
generated
Normal file
Binary file not shown.
BIN
piet-gpu/shader/gen/coarse.dxil
generated
BIN
piet-gpu/shader/gen/coarse.dxil
generated
Binary file not shown.
506
piet-gpu/shader/gen/coarse.hlsl
generated
506
piet-gpu/shader/gen/coarse.hlsl
generated
|
@ -49,17 +49,6 @@ struct AnnoLinGradient
|
||||||
float line_c;
|
float line_c;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct AnnoBeginClipRef
|
|
||||||
{
|
|
||||||
uint offset;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct AnnoBeginClip
|
|
||||||
{
|
|
||||||
float4 bbox;
|
|
||||||
float linewidth;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct AnnotatedRef
|
struct AnnotatedRef
|
||||||
{
|
{
|
||||||
uint offset;
|
uint offset;
|
||||||
|
@ -193,8 +182,13 @@ struct Config
|
||||||
Alloc trans_alloc;
|
Alloc trans_alloc;
|
||||||
Alloc bbox_alloc;
|
Alloc bbox_alloc;
|
||||||
Alloc drawmonoid_alloc;
|
Alloc drawmonoid_alloc;
|
||||||
|
Alloc clip_alloc;
|
||||||
|
Alloc clip_bic_alloc;
|
||||||
|
Alloc clip_stack_alloc;
|
||||||
|
Alloc clip_bbox_alloc;
|
||||||
uint n_trans;
|
uint n_trans;
|
||||||
uint n_path;
|
uint n_path;
|
||||||
|
uint n_clip;
|
||||||
uint trans_offset;
|
uint trans_offset;
|
||||||
uint linewidth_offset;
|
uint linewidth_offset;
|
||||||
uint pathtag_offset;
|
uint pathtag_offset;
|
||||||
|
@ -203,8 +197,8 @@ struct Config
|
||||||
|
|
||||||
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
|
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
|
||||||
|
|
||||||
RWByteAddressBuffer _296 : register(u0, space0);
|
RWByteAddressBuffer _283 : register(u0, space0);
|
||||||
ByteAddressBuffer _1249 : register(t1, space0);
|
ByteAddressBuffer _1169 : register(t1, space0);
|
||||||
|
|
||||||
static uint3 gl_WorkGroupID;
|
static uint3 gl_WorkGroupID;
|
||||||
static uint3 gl_LocalInvocationID;
|
static uint3 gl_LocalInvocationID;
|
||||||
|
@ -227,8 +221,8 @@ groupshared uint sh_tile_count[256];
|
||||||
|
|
||||||
Alloc slice_mem(Alloc a, uint offset, uint size)
|
Alloc slice_mem(Alloc a, uint offset, uint size)
|
||||||
{
|
{
|
||||||
Alloc _373 = { a.offset + offset };
|
Alloc _360 = { a.offset + offset };
|
||||||
return _373;
|
return _360;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool touch_mem(Alloc alloc, uint offset)
|
bool touch_mem(Alloc alloc, uint offset)
|
||||||
|
@ -244,7 +238,7 @@ uint read_mem(Alloc alloc, uint offset)
|
||||||
{
|
{
|
||||||
return 0u;
|
return 0u;
|
||||||
}
|
}
|
||||||
uint v = _296.Load(offset * 4 + 8);
|
uint v = _283.Load(offset * 4 + 8);
|
||||||
return v;
|
return v;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -257,8 +251,8 @@ Alloc new_alloc(uint offset, uint size, bool mem_ok)
|
||||||
|
|
||||||
BinInstanceRef BinInstance_index(BinInstanceRef ref, uint index)
|
BinInstanceRef BinInstance_index(BinInstanceRef ref, uint index)
|
||||||
{
|
{
|
||||||
BinInstanceRef _754 = { ref.offset + (index * 4u) };
|
BinInstanceRef _674 = { ref.offset + (index * 4u) };
|
||||||
return _754;
|
return _674;
|
||||||
}
|
}
|
||||||
|
|
||||||
BinInstance BinInstance_read(Alloc a, BinInstanceRef ref)
|
BinInstance BinInstance_read(Alloc a, BinInstanceRef ref)
|
||||||
|
@ -277,8 +271,8 @@ AnnotatedTag Annotated_tag(Alloc a, AnnotatedRef ref)
|
||||||
Alloc param = a;
|
Alloc param = a;
|
||||||
uint param_1 = ref.offset >> uint(2);
|
uint param_1 = ref.offset >> uint(2);
|
||||||
uint tag_and_flags = read_mem(param, param_1);
|
uint tag_and_flags = read_mem(param, param_1);
|
||||||
AnnotatedTag _706 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) };
|
AnnotatedTag _636 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) };
|
||||||
return _706;
|
return _636;
|
||||||
}
|
}
|
||||||
|
|
||||||
Path Path_read(Alloc a, PathRef ref)
|
Path Path_read(Alloc a, PathRef ref)
|
||||||
|
@ -295,8 +289,8 @@ Path Path_read(Alloc a, PathRef ref)
|
||||||
uint raw2 = read_mem(param_4, param_5);
|
uint raw2 = read_mem(param_4, param_5);
|
||||||
Path s;
|
Path s;
|
||||||
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
|
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
|
||||||
TileRef _814 = { raw2 };
|
TileRef _734 = { raw2 };
|
||||||
s.tiles = _814;
|
s.tiles = _734;
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -306,11 +300,11 @@ void write_tile_alloc(uint el_ix, Alloc a)
|
||||||
|
|
||||||
Alloc read_tile_alloc(uint el_ix, bool mem_ok)
|
Alloc read_tile_alloc(uint el_ix, bool mem_ok)
|
||||||
{
|
{
|
||||||
uint _1135;
|
uint _1055;
|
||||||
_296.GetDimensions(_1135);
|
_283.GetDimensions(_1055);
|
||||||
_1135 = (_1135 - 8) / 4;
|
_1055 = (_1055 - 8) / 4;
|
||||||
uint param = 0u;
|
uint param = 0u;
|
||||||
uint param_1 = uint(int(_1135) * 4);
|
uint param_1 = uint(int(_1055) * 4);
|
||||||
bool param_2 = mem_ok;
|
bool param_2 = mem_ok;
|
||||||
return new_alloc(param, param_1, param_2);
|
return new_alloc(param, param_1, param_2);
|
||||||
}
|
}
|
||||||
|
@ -324,9 +318,9 @@ Tile Tile_read(Alloc a, TileRef ref)
|
||||||
Alloc param_2 = a;
|
Alloc param_2 = a;
|
||||||
uint param_3 = ix + 1u;
|
uint param_3 = ix + 1u;
|
||||||
uint raw1 = read_mem(param_2, param_3);
|
uint raw1 = read_mem(param_2, param_3);
|
||||||
TileSegRef _839 = { raw0 };
|
TileSegRef _759 = { raw0 };
|
||||||
Tile s;
|
Tile s;
|
||||||
s.tile = _839;
|
s.tile = _759;
|
||||||
s.backdrop = int(raw1);
|
s.backdrop = int(raw1);
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
@ -361,30 +355,30 @@ AnnoColor AnnoColor_read(Alloc a, AnnoColorRef ref)
|
||||||
|
|
||||||
AnnoColor Annotated_Color_read(Alloc a, AnnotatedRef ref)
|
AnnoColor Annotated_Color_read(Alloc a, AnnotatedRef ref)
|
||||||
{
|
{
|
||||||
AnnoColorRef _712 = { ref.offset + 4u };
|
AnnoColorRef _642 = { ref.offset + 4u };
|
||||||
Alloc param = a;
|
Alloc param = a;
|
||||||
AnnoColorRef param_1 = _712;
|
AnnoColorRef param_1 = _642;
|
||||||
return AnnoColor_read(param, param_1);
|
return AnnoColor_read(param, param_1);
|
||||||
}
|
}
|
||||||
|
|
||||||
MallocResult malloc(uint size)
|
MallocResult malloc(uint size)
|
||||||
{
|
{
|
||||||
uint _302;
|
uint _289;
|
||||||
_296.InterlockedAdd(0, size, _302);
|
_283.InterlockedAdd(0, size, _289);
|
||||||
uint offset = _302;
|
uint offset = _289;
|
||||||
uint _309;
|
uint _296;
|
||||||
_296.GetDimensions(_309);
|
_283.GetDimensions(_296);
|
||||||
_309 = (_309 - 8) / 4;
|
_296 = (_296 - 8) / 4;
|
||||||
MallocResult r;
|
MallocResult r;
|
||||||
r.failed = (offset + size) > uint(int(_309) * 4);
|
r.failed = (offset + size) > uint(int(_296) * 4);
|
||||||
uint param = offset;
|
uint param = offset;
|
||||||
uint param_1 = size;
|
uint param_1 = size;
|
||||||
bool param_2 = !r.failed;
|
bool param_2 = !r.failed;
|
||||||
r.alloc = new_alloc(param, param_1, param_2);
|
r.alloc = new_alloc(param, param_1, param_2);
|
||||||
if (r.failed)
|
if (r.failed)
|
||||||
{
|
{
|
||||||
uint _331;
|
uint _318;
|
||||||
_296.InterlockedMax(4, 1u, _331);
|
_283.InterlockedMax(4, 1u, _318);
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
return r;
|
return r;
|
||||||
|
@ -398,7 +392,7 @@ void write_mem(Alloc alloc, uint offset, uint val)
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
_296.Store(offset * 4 + 8, val);
|
_283.Store(offset * 4 + 8, val);
|
||||||
}
|
}
|
||||||
|
|
||||||
void CmdJump_write(Alloc a, CmdJumpRef ref, CmdJump s)
|
void CmdJump_write(Alloc a, CmdJumpRef ref, CmdJump s)
|
||||||
|
@ -416,9 +410,9 @@ void Cmd_Jump_write(Alloc a, CmdRef ref, CmdJump s)
|
||||||
uint param_1 = ref.offset >> uint(2);
|
uint param_1 = ref.offset >> uint(2);
|
||||||
uint param_2 = 10u;
|
uint param_2 = 10u;
|
||||||
write_mem(param, param_1, param_2);
|
write_mem(param, param_1, param_2);
|
||||||
CmdJumpRef _1128 = { ref.offset + 4u };
|
CmdJumpRef _1048 = { ref.offset + 4u };
|
||||||
Alloc param_3 = a;
|
Alloc param_3 = a;
|
||||||
CmdJumpRef param_4 = _1128;
|
CmdJumpRef param_4 = _1048;
|
||||||
CmdJump param_5 = s;
|
CmdJump param_5 = s;
|
||||||
CmdJump_write(param_3, param_4, param_5);
|
CmdJump_write(param_3, param_4, param_5);
|
||||||
}
|
}
|
||||||
|
@ -430,21 +424,21 @@ bool alloc_cmd(inout Alloc cmd_alloc, inout CmdRef cmd_ref, inout uint cmd_limit
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
uint param = 1024u;
|
uint param = 1024u;
|
||||||
MallocResult _1156 = malloc(param);
|
MallocResult _1076 = malloc(param);
|
||||||
MallocResult new_cmd = _1156;
|
MallocResult new_cmd = _1076;
|
||||||
if (new_cmd.failed)
|
if (new_cmd.failed)
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
CmdJump _1166 = { new_cmd.alloc.offset };
|
CmdJump _1086 = { new_cmd.alloc.offset };
|
||||||
CmdJump jump = _1166;
|
CmdJump jump = _1086;
|
||||||
Alloc param_1 = cmd_alloc;
|
Alloc param_1 = cmd_alloc;
|
||||||
CmdRef param_2 = cmd_ref;
|
CmdRef param_2 = cmd_ref;
|
||||||
CmdJump param_3 = jump;
|
CmdJump param_3 = jump;
|
||||||
Cmd_Jump_write(param_1, param_2, param_3);
|
Cmd_Jump_write(param_1, param_2, param_3);
|
||||||
cmd_alloc = new_cmd.alloc;
|
cmd_alloc = new_cmd.alloc;
|
||||||
CmdRef _1178 = { cmd_alloc.offset };
|
CmdRef _1098 = { cmd_alloc.offset };
|
||||||
cmd_ref = _1178;
|
cmd_ref = _1098;
|
||||||
cmd_limit = (cmd_alloc.offset + 1024u) - 60u;
|
cmd_limit = (cmd_alloc.offset + 1024u) - 60u;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -473,9 +467,9 @@ void Cmd_Fill_write(Alloc a, CmdRef ref, CmdFill s)
|
||||||
uint param_1 = ref.offset >> uint(2);
|
uint param_1 = ref.offset >> uint(2);
|
||||||
uint param_2 = 1u;
|
uint param_2 = 1u;
|
||||||
write_mem(param, param_1, param_2);
|
write_mem(param, param_1, param_2);
|
||||||
CmdFillRef _1012 = { ref.offset + 4u };
|
CmdFillRef _932 = { ref.offset + 4u };
|
||||||
Alloc param_3 = a;
|
Alloc param_3 = a;
|
||||||
CmdFillRef param_4 = _1012;
|
CmdFillRef param_4 = _932;
|
||||||
CmdFill param_5 = s;
|
CmdFill param_5 = s;
|
||||||
CmdFill_write(param_3, param_4, param_5);
|
CmdFill_write(param_3, param_4, param_5);
|
||||||
}
|
}
|
||||||
|
@ -507,9 +501,9 @@ void Cmd_Stroke_write(Alloc a, CmdRef ref, CmdStroke s)
|
||||||
uint param_1 = ref.offset >> uint(2);
|
uint param_1 = ref.offset >> uint(2);
|
||||||
uint param_2 = 2u;
|
uint param_2 = 2u;
|
||||||
write_mem(param, param_1, param_2);
|
write_mem(param, param_1, param_2);
|
||||||
CmdStrokeRef _1030 = { ref.offset + 4u };
|
CmdStrokeRef _950 = { ref.offset + 4u };
|
||||||
Alloc param_3 = a;
|
Alloc param_3 = a;
|
||||||
CmdStrokeRef param_4 = _1030;
|
CmdStrokeRef param_4 = _950;
|
||||||
CmdStroke param_5 = s;
|
CmdStroke param_5 = s;
|
||||||
CmdStroke_write(param_3, param_4, param_5);
|
CmdStroke_write(param_3, param_4, param_5);
|
||||||
}
|
}
|
||||||
|
@ -521,8 +515,8 @@ void write_fill(Alloc alloc, inout CmdRef cmd_ref, uint flags, Tile tile, float
|
||||||
{
|
{
|
||||||
if (tile.tile.offset != 0u)
|
if (tile.tile.offset != 0u)
|
||||||
{
|
{
|
||||||
CmdFill _1202 = { tile.tile.offset, tile.backdrop };
|
CmdFill _1122 = { tile.tile.offset, tile.backdrop };
|
||||||
CmdFill cmd_fill = _1202;
|
CmdFill cmd_fill = _1122;
|
||||||
Alloc param_1 = alloc;
|
Alloc param_1 = alloc;
|
||||||
CmdRef param_2 = cmd_ref;
|
CmdRef param_2 = cmd_ref;
|
||||||
CmdFill param_3 = cmd_fill;
|
CmdFill param_3 = cmd_fill;
|
||||||
|
@ -539,8 +533,8 @@ void write_fill(Alloc alloc, inout CmdRef cmd_ref, uint flags, Tile tile, float
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
CmdStroke _1232 = { tile.tile.offset, 0.5f * linewidth };
|
CmdStroke _1152 = { tile.tile.offset, 0.5f * linewidth };
|
||||||
CmdStroke cmd_stroke = _1232;
|
CmdStroke cmd_stroke = _1152;
|
||||||
Alloc param_6 = alloc;
|
Alloc param_6 = alloc;
|
||||||
CmdRef param_7 = cmd_ref;
|
CmdRef param_7 = cmd_ref;
|
||||||
CmdStroke param_8 = cmd_stroke;
|
CmdStroke param_8 = cmd_stroke;
|
||||||
|
@ -564,9 +558,9 @@ void Cmd_Color_write(Alloc a, CmdRef ref, CmdColor s)
|
||||||
uint param_1 = ref.offset >> uint(2);
|
uint param_1 = ref.offset >> uint(2);
|
||||||
uint param_2 = 5u;
|
uint param_2 = 5u;
|
||||||
write_mem(param, param_1, param_2);
|
write_mem(param, param_1, param_2);
|
||||||
CmdColorRef _1056 = { ref.offset + 4u };
|
CmdColorRef _976 = { ref.offset + 4u };
|
||||||
Alloc param_3 = a;
|
Alloc param_3 = a;
|
||||||
CmdColorRef param_4 = _1056;
|
CmdColorRef param_4 = _976;
|
||||||
CmdColor param_5 = s;
|
CmdColor param_5 = s;
|
||||||
CmdColor_write(param_3, param_4, param_5);
|
CmdColor_write(param_3, param_4, param_5);
|
||||||
}
|
}
|
||||||
|
@ -613,9 +607,9 @@ AnnoLinGradient AnnoLinGradient_read(Alloc a, AnnoLinGradientRef ref)
|
||||||
|
|
||||||
AnnoLinGradient Annotated_LinGradient_read(Alloc a, AnnotatedRef ref)
|
AnnoLinGradient Annotated_LinGradient_read(Alloc a, AnnotatedRef ref)
|
||||||
{
|
{
|
||||||
AnnoLinGradientRef _722 = { ref.offset + 4u };
|
AnnoLinGradientRef _652 = { ref.offset + 4u };
|
||||||
Alloc param = a;
|
Alloc param = a;
|
||||||
AnnoLinGradientRef param_1 = _722;
|
AnnoLinGradientRef param_1 = _652;
|
||||||
return AnnoLinGradient_read(param, param_1);
|
return AnnoLinGradient_read(param, param_1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -646,9 +640,9 @@ void Cmd_LinGrad_write(Alloc a, CmdRef ref, CmdLinGrad s)
|
||||||
uint param_1 = ref.offset >> uint(2);
|
uint param_1 = ref.offset >> uint(2);
|
||||||
uint param_2 = 6u;
|
uint param_2 = 6u;
|
||||||
write_mem(param, param_1, param_2);
|
write_mem(param, param_1, param_2);
|
||||||
CmdLinGradRef _1074 = { ref.offset + 4u };
|
CmdLinGradRef _994 = { ref.offset + 4u };
|
||||||
Alloc param_3 = a;
|
Alloc param_3 = a;
|
||||||
CmdLinGradRef param_4 = _1074;
|
CmdLinGradRef param_4 = _994;
|
||||||
CmdLinGrad param_5 = s;
|
CmdLinGrad param_5 = s;
|
||||||
CmdLinGrad_write(param_3, param_4, param_5);
|
CmdLinGrad_write(param_3, param_4, param_5);
|
||||||
}
|
}
|
||||||
|
@ -687,9 +681,9 @@ AnnoImage AnnoImage_read(Alloc a, AnnoImageRef ref)
|
||||||
|
|
||||||
AnnoImage Annotated_Image_read(Alloc a, AnnotatedRef ref)
|
AnnoImage Annotated_Image_read(Alloc a, AnnotatedRef ref)
|
||||||
{
|
{
|
||||||
AnnoImageRef _732 = { ref.offset + 4u };
|
AnnoImageRef _662 = { ref.offset + 4u };
|
||||||
Alloc param = a;
|
Alloc param = a;
|
||||||
AnnoImageRef param_1 = _732;
|
AnnoImageRef param_1 = _662;
|
||||||
return AnnoImage_read(param, param_1);
|
return AnnoImage_read(param, param_1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -712,45 +706,13 @@ void Cmd_Image_write(Alloc a, CmdRef ref, CmdImage s)
|
||||||
uint param_1 = ref.offset >> uint(2);
|
uint param_1 = ref.offset >> uint(2);
|
||||||
uint param_2 = 7u;
|
uint param_2 = 7u;
|
||||||
write_mem(param, param_1, param_2);
|
write_mem(param, param_1, param_2);
|
||||||
CmdImageRef _1092 = { ref.offset + 4u };
|
CmdImageRef _1012 = { ref.offset + 4u };
|
||||||
Alloc param_3 = a;
|
Alloc param_3 = a;
|
||||||
CmdImageRef param_4 = _1092;
|
CmdImageRef param_4 = _1012;
|
||||||
CmdImage param_5 = s;
|
CmdImage param_5 = s;
|
||||||
CmdImage_write(param_3, param_4, param_5);
|
CmdImage_write(param_3, param_4, param_5);
|
||||||
}
|
}
|
||||||
|
|
||||||
AnnoBeginClip AnnoBeginClip_read(Alloc a, AnnoBeginClipRef ref)
|
|
||||||
{
|
|
||||||
uint ix = ref.offset >> uint(2);
|
|
||||||
Alloc param = a;
|
|
||||||
uint param_1 = ix + 0u;
|
|
||||||
uint raw0 = read_mem(param, param_1);
|
|
||||||
Alloc param_2 = a;
|
|
||||||
uint param_3 = ix + 1u;
|
|
||||||
uint raw1 = read_mem(param_2, param_3);
|
|
||||||
Alloc param_4 = a;
|
|
||||||
uint param_5 = ix + 2u;
|
|
||||||
uint raw2 = read_mem(param_4, param_5);
|
|
||||||
Alloc param_6 = a;
|
|
||||||
uint param_7 = ix + 3u;
|
|
||||||
uint raw3 = read_mem(param_6, param_7);
|
|
||||||
Alloc param_8 = a;
|
|
||||||
uint param_9 = ix + 4u;
|
|
||||||
uint raw4 = read_mem(param_8, param_9);
|
|
||||||
AnnoBeginClip s;
|
|
||||||
s.bbox = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3));
|
|
||||||
s.linewidth = asfloat(raw4);
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
AnnoBeginClip Annotated_BeginClip_read(Alloc a, AnnotatedRef ref)
|
|
||||||
{
|
|
||||||
AnnoBeginClipRef _742 = { ref.offset + 4u };
|
|
||||||
Alloc param = a;
|
|
||||||
AnnoBeginClipRef param_1 = _742;
|
|
||||||
return AnnoBeginClip_read(param, param_1);
|
|
||||||
}
|
|
||||||
|
|
||||||
void Cmd_BeginClip_write(Alloc a, CmdRef ref)
|
void Cmd_BeginClip_write(Alloc a, CmdRef ref)
|
||||||
{
|
{
|
||||||
Alloc param = a;
|
Alloc param = a;
|
||||||
|
@ -777,44 +739,43 @@ void Cmd_End_write(Alloc a, CmdRef ref)
|
||||||
|
|
||||||
void comp_main()
|
void comp_main()
|
||||||
{
|
{
|
||||||
uint width_in_bins = ((_1249.Load(8) + 16u) - 1u) / 16u;
|
uint width_in_bins = ((_1169.Load(8) + 16u) - 1u) / 16u;
|
||||||
uint bin_ix = (width_in_bins * gl_WorkGroupID.y) + gl_WorkGroupID.x;
|
uint bin_ix = (width_in_bins * gl_WorkGroupID.y) + gl_WorkGroupID.x;
|
||||||
uint partition_ix = 0u;
|
uint partition_ix = 0u;
|
||||||
uint n_partitions = ((_1249.Load(0) + 256u) - 1u) / 256u;
|
uint n_partitions = ((_1169.Load(0) + 256u) - 1u) / 256u;
|
||||||
uint th_ix = gl_LocalInvocationID.x;
|
uint th_ix = gl_LocalInvocationID.x;
|
||||||
uint bin_tile_x = 16u * gl_WorkGroupID.x;
|
uint bin_tile_x = 16u * gl_WorkGroupID.x;
|
||||||
uint bin_tile_y = 16u * gl_WorkGroupID.y;
|
uint bin_tile_y = 16u * gl_WorkGroupID.y;
|
||||||
uint tile_x = gl_LocalInvocationID.x % 16u;
|
uint tile_x = gl_LocalInvocationID.x % 16u;
|
||||||
uint tile_y = gl_LocalInvocationID.x / 16u;
|
uint tile_y = gl_LocalInvocationID.x / 16u;
|
||||||
uint this_tile_ix = (((bin_tile_y + tile_y) * _1249.Load(8)) + bin_tile_x) + tile_x;
|
uint this_tile_ix = (((bin_tile_y + tile_y) * _1169.Load(8)) + bin_tile_x) + tile_x;
|
||||||
Alloc _1314;
|
Alloc _1234;
|
||||||
_1314.offset = _1249.Load(24);
|
_1234.offset = _1169.Load(24);
|
||||||
Alloc param;
|
Alloc param;
|
||||||
param.offset = _1314.offset;
|
param.offset = _1234.offset;
|
||||||
uint param_1 = this_tile_ix * 1024u;
|
uint param_1 = this_tile_ix * 1024u;
|
||||||
uint param_2 = 1024u;
|
uint param_2 = 1024u;
|
||||||
Alloc cmd_alloc = slice_mem(param, param_1, param_2);
|
Alloc cmd_alloc = slice_mem(param, param_1, param_2);
|
||||||
CmdRef _1323 = { cmd_alloc.offset };
|
CmdRef _1243 = { cmd_alloc.offset };
|
||||||
CmdRef cmd_ref = _1323;
|
CmdRef cmd_ref = _1243;
|
||||||
uint cmd_limit = (cmd_ref.offset + 1024u) - 60u;
|
uint cmd_limit = (cmd_ref.offset + 1024u) - 60u;
|
||||||
uint clip_depth = 0u;
|
uint clip_depth = 0u;
|
||||||
uint clip_zero_depth = 0u;
|
uint clip_zero_depth = 0u;
|
||||||
uint clip_one_mask = 0u;
|
|
||||||
uint rd_ix = 0u;
|
uint rd_ix = 0u;
|
||||||
uint wr_ix = 0u;
|
uint wr_ix = 0u;
|
||||||
uint part_start_ix = 0u;
|
uint part_start_ix = 0u;
|
||||||
uint ready_ix = 0u;
|
uint ready_ix = 0u;
|
||||||
bool mem_ok = _296.Load(4) == 0u;
|
bool mem_ok = _283.Load(4) == 0u;
|
||||||
Alloc param_3;
|
Alloc param_3;
|
||||||
Alloc param_5;
|
Alloc param_5;
|
||||||
uint _1529;
|
uint _1448;
|
||||||
uint element_ix;
|
uint element_ix;
|
||||||
AnnotatedRef ref;
|
AnnotatedRef ref;
|
||||||
Alloc param_14;
|
Alloc param_14;
|
||||||
Alloc param_16;
|
Alloc param_16;
|
||||||
uint tile_count;
|
uint tile_count;
|
||||||
Alloc param_23;
|
Alloc param_23;
|
||||||
uint _1841;
|
uint _1770;
|
||||||
Alloc param_29;
|
Alloc param_29;
|
||||||
Tile tile_1;
|
Tile tile_1;
|
||||||
AnnoColor fill;
|
AnnoColor fill;
|
||||||
|
@ -822,41 +783,40 @@ void comp_main()
|
||||||
Alloc param_52;
|
Alloc param_52;
|
||||||
CmdLinGrad cmd_lin;
|
CmdLinGrad cmd_lin;
|
||||||
Alloc param_69;
|
Alloc param_69;
|
||||||
Alloc param_86;
|
|
||||||
while (true)
|
while (true)
|
||||||
{
|
{
|
||||||
for (uint i = 0u; i < 8u; i++)
|
for (uint i = 0u; i < 8u; i++)
|
||||||
{
|
{
|
||||||
sh_bitmaps[i][th_ix] = 0u;
|
sh_bitmaps[i][th_ix] = 0u;
|
||||||
}
|
}
|
||||||
bool _1581;
|
bool _1500;
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
if ((ready_ix == wr_ix) && (partition_ix < n_partitions))
|
if ((ready_ix == wr_ix) && (partition_ix < n_partitions))
|
||||||
{
|
{
|
||||||
part_start_ix = ready_ix;
|
part_start_ix = ready_ix;
|
||||||
uint count = 0u;
|
uint count = 0u;
|
||||||
bool _1379 = th_ix < 256u;
|
bool _1298 = th_ix < 256u;
|
||||||
bool _1387;
|
bool _1306;
|
||||||
if (_1379)
|
if (_1298)
|
||||||
{
|
{
|
||||||
_1387 = (partition_ix + th_ix) < n_partitions;
|
_1306 = (partition_ix + th_ix) < n_partitions;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
_1387 = _1379;
|
_1306 = _1298;
|
||||||
}
|
}
|
||||||
if (_1387)
|
if (_1306)
|
||||||
{
|
{
|
||||||
uint in_ix = (_1249.Load(20) >> uint(2)) + ((((partition_ix + th_ix) * 256u) + bin_ix) * 2u);
|
uint in_ix = (_1169.Load(20) >> uint(2)) + ((((partition_ix + th_ix) * 256u) + bin_ix) * 2u);
|
||||||
Alloc _1404;
|
Alloc _1323;
|
||||||
_1404.offset = _1249.Load(20);
|
_1323.offset = _1169.Load(20);
|
||||||
param_3.offset = _1404.offset;
|
param_3.offset = _1323.offset;
|
||||||
uint param_4 = in_ix;
|
uint param_4 = in_ix;
|
||||||
count = read_mem(param_3, param_4);
|
count = read_mem(param_3, param_4);
|
||||||
Alloc _1415;
|
Alloc _1334;
|
||||||
_1415.offset = _1249.Load(20);
|
_1334.offset = _1169.Load(20);
|
||||||
param_5.offset = _1415.offset;
|
param_5.offset = _1334.offset;
|
||||||
uint param_6 = in_ix + 1u;
|
uint param_6 = in_ix + 1u;
|
||||||
uint offset = read_mem(param_5, param_6);
|
uint offset = read_mem(param_5, param_6);
|
||||||
uint param_7 = offset;
|
uint param_7 = offset;
|
||||||
|
@ -902,16 +862,16 @@ void comp_main()
|
||||||
}
|
}
|
||||||
if (part_ix > 0u)
|
if (part_ix > 0u)
|
||||||
{
|
{
|
||||||
_1529 = sh_part_count[part_ix - 1u];
|
_1448 = sh_part_count[part_ix - 1u];
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
_1529 = part_start_ix;
|
_1448 = part_start_ix;
|
||||||
}
|
}
|
||||||
ix -= _1529;
|
ix -= _1448;
|
||||||
Alloc bin_alloc = sh_part_elements[part_ix];
|
Alloc bin_alloc = sh_part_elements[part_ix];
|
||||||
BinInstanceRef _1548 = { bin_alloc.offset };
|
BinInstanceRef _1467 = { bin_alloc.offset };
|
||||||
BinInstanceRef inst_ref = _1548;
|
BinInstanceRef inst_ref = _1467;
|
||||||
BinInstanceRef param_10 = inst_ref;
|
BinInstanceRef param_10 = inst_ref;
|
||||||
uint param_11 = ix;
|
uint param_11 = ix;
|
||||||
Alloc param_12 = bin_alloc;
|
Alloc param_12 = bin_alloc;
|
||||||
|
@ -921,16 +881,16 @@ void comp_main()
|
||||||
}
|
}
|
||||||
GroupMemoryBarrierWithGroupSync();
|
GroupMemoryBarrierWithGroupSync();
|
||||||
wr_ix = min((rd_ix + 256u), ready_ix);
|
wr_ix = min((rd_ix + 256u), ready_ix);
|
||||||
bool _1571 = (wr_ix - rd_ix) < 256u;
|
bool _1490 = (wr_ix - rd_ix) < 256u;
|
||||||
if (_1571)
|
if (_1490)
|
||||||
{
|
{
|
||||||
_1581 = (wr_ix < ready_ix) || (partition_ix < n_partitions);
|
_1500 = (wr_ix < ready_ix) || (partition_ix < n_partitions);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
_1581 = _1571;
|
_1500 = _1490;
|
||||||
}
|
}
|
||||||
if (_1581)
|
if (_1500)
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -943,11 +903,11 @@ void comp_main()
|
||||||
if ((th_ix + rd_ix) < wr_ix)
|
if ((th_ix + rd_ix) < wr_ix)
|
||||||
{
|
{
|
||||||
element_ix = sh_elements[th_ix];
|
element_ix = sh_elements[th_ix];
|
||||||
AnnotatedRef _1602 = { _1249.Load(32) + (element_ix * 40u) };
|
AnnotatedRef _1521 = { _1169.Load(32) + (element_ix * 40u) };
|
||||||
ref = _1602;
|
ref = _1521;
|
||||||
Alloc _1605;
|
Alloc _1524;
|
||||||
_1605.offset = _1249.Load(32);
|
_1524.offset = _1169.Load(32);
|
||||||
param_14.offset = _1605.offset;
|
param_14.offset = _1524.offset;
|
||||||
AnnotatedRef param_15 = ref;
|
AnnotatedRef param_15 = ref;
|
||||||
tag = Annotated_tag(param_14, param_15).tag;
|
tag = Annotated_tag(param_14, param_15).tag;
|
||||||
}
|
}
|
||||||
|
@ -959,12 +919,13 @@ void comp_main()
|
||||||
case 4u:
|
case 4u:
|
||||||
case 5u:
|
case 5u:
|
||||||
{
|
{
|
||||||
uint path_ix = element_ix;
|
uint drawmonoid_base = (_1169.Load(44) >> uint(2)) + (2u * element_ix);
|
||||||
PathRef _1624 = { _1249.Load(16) + (path_ix * 12u) };
|
uint path_ix = _283.Load(drawmonoid_base * 4 + 8);
|
||||||
Alloc _1627;
|
PathRef _1553 = { _1169.Load(16) + (path_ix * 12u) };
|
||||||
_1627.offset = _1249.Load(16);
|
Alloc _1556;
|
||||||
param_16.offset = _1627.offset;
|
_1556.offset = _1169.Load(16);
|
||||||
PathRef param_17 = _1624;
|
param_16.offset = _1556.offset;
|
||||||
|
PathRef param_17 = _1553;
|
||||||
Path path = Path_read(param_16, param_17);
|
Path path = Path_read(param_16, param_17);
|
||||||
uint stride = path.bbox.z - path.bbox.x;
|
uint stride = path.bbox.z - path.bbox.x;
|
||||||
sh_tile_stride[th_ix] = stride;
|
sh_tile_stride[th_ix] = stride;
|
||||||
|
@ -1019,59 +980,53 @@ void comp_main()
|
||||||
el_ix = probe_1;
|
el_ix = probe_1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
AnnotatedRef _1826 = { _1249.Load(32) + (sh_elements[el_ix] * 40u) };
|
AnnotatedRef _1755 = { _1169.Load(32) + (sh_elements[el_ix] * 40u) };
|
||||||
AnnotatedRef ref_1 = _1826;
|
AnnotatedRef ref_1 = _1755;
|
||||||
Alloc _1830;
|
Alloc _1759;
|
||||||
_1830.offset = _1249.Load(32);
|
_1759.offset = _1169.Load(32);
|
||||||
param_23.offset = _1830.offset;
|
param_23.offset = _1759.offset;
|
||||||
AnnotatedRef param_24 = ref_1;
|
AnnotatedRef param_24 = ref_1;
|
||||||
uint tag_1 = Annotated_tag(param_23, param_24).tag;
|
uint tag_1 = Annotated_tag(param_23, param_24).tag;
|
||||||
if (el_ix > 0u)
|
if (el_ix > 0u)
|
||||||
{
|
{
|
||||||
_1841 = sh_tile_count[el_ix - 1u];
|
_1770 = sh_tile_count[el_ix - 1u];
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
_1841 = 0u;
|
_1770 = 0u;
|
||||||
}
|
}
|
||||||
uint seq_ix = ix_1 - _1841;
|
uint seq_ix = ix_1 - _1770;
|
||||||
uint width = sh_tile_width[el_ix];
|
uint width = sh_tile_width[el_ix];
|
||||||
uint x = sh_tile_x0[el_ix] + (seq_ix % width);
|
uint x = sh_tile_x0[el_ix] + (seq_ix % width);
|
||||||
uint y = sh_tile_y0[el_ix] + (seq_ix / width);
|
uint y = sh_tile_y0[el_ix] + (seq_ix / width);
|
||||||
bool include_tile = false;
|
bool include_tile = false;
|
||||||
if ((tag_1 == 4u) || (tag_1 == 5u))
|
if (mem_ok)
|
||||||
{
|
{
|
||||||
include_tile = true;
|
uint param_25 = el_ix;
|
||||||
}
|
bool param_26 = mem_ok;
|
||||||
else
|
TileRef _1822 = { sh_tile_base[el_ix] + (((sh_tile_stride[el_ix] * y) + x) * 8u) };
|
||||||
{
|
Alloc param_27 = read_tile_alloc(param_25, param_26);
|
||||||
if (mem_ok)
|
TileRef param_28 = _1822;
|
||||||
|
Tile tile = Tile_read(param_27, param_28);
|
||||||
|
bool is_clip = (tag_1 == 4u) || (tag_1 == 5u);
|
||||||
|
bool _1834 = tile.tile.offset != 0u;
|
||||||
|
bool _1843;
|
||||||
|
if (!_1834)
|
||||||
{
|
{
|
||||||
uint param_25 = el_ix;
|
_1843 = (tile.backdrop == 0) == is_clip;
|
||||||
bool param_26 = mem_ok;
|
|
||||||
TileRef _1901 = { sh_tile_base[el_ix] + (((sh_tile_stride[el_ix] * y) + x) * 8u) };
|
|
||||||
Alloc param_27 = read_tile_alloc(param_25, param_26);
|
|
||||||
TileRef param_28 = _1901;
|
|
||||||
Tile tile = Tile_read(param_27, param_28);
|
|
||||||
bool _1907 = tile.tile.offset != 0u;
|
|
||||||
bool _1914;
|
|
||||||
if (!_1907)
|
|
||||||
{
|
|
||||||
_1914 = tile.backdrop != 0;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
_1914 = _1907;
|
|
||||||
}
|
|
||||||
include_tile = _1914;
|
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_1843 = _1834;
|
||||||
|
}
|
||||||
|
include_tile = _1843;
|
||||||
}
|
}
|
||||||
if (include_tile)
|
if (include_tile)
|
||||||
{
|
{
|
||||||
uint el_slice = el_ix / 32u;
|
uint el_slice = el_ix / 32u;
|
||||||
uint el_mask = 1u << (el_ix & 31u);
|
uint el_mask = 1u << (el_ix & 31u);
|
||||||
uint _1934;
|
uint _1863;
|
||||||
InterlockedOr(sh_bitmaps[el_slice][(y * 16u) + x], el_mask, _1934);
|
InterlockedOr(sh_bitmaps[el_slice][(y * 16u) + x], el_mask, _1863);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
GroupMemoryBarrierWithGroupSync();
|
GroupMemoryBarrierWithGroupSync();
|
||||||
|
@ -1095,11 +1050,11 @@ void comp_main()
|
||||||
uint element_ref_ix = (slice_ix * 32u) + uint(int(firstbitlow(bitmap)));
|
uint element_ref_ix = (slice_ix * 32u) + uint(int(firstbitlow(bitmap)));
|
||||||
uint element_ix_1 = sh_elements[element_ref_ix];
|
uint element_ix_1 = sh_elements[element_ref_ix];
|
||||||
bitmap &= (bitmap - 1u);
|
bitmap &= (bitmap - 1u);
|
||||||
AnnotatedRef _1988 = { _1249.Load(32) + (element_ix_1 * 40u) };
|
AnnotatedRef _1917 = { _1169.Load(32) + (element_ix_1 * 40u) };
|
||||||
ref = _1988;
|
ref = _1917;
|
||||||
Alloc _1993;
|
Alloc _1922;
|
||||||
_1993.offset = _1249.Load(32);
|
_1922.offset = _1169.Load(32);
|
||||||
param_29.offset = _1993.offset;
|
param_29.offset = _1922.offset;
|
||||||
AnnotatedRef param_30 = ref;
|
AnnotatedRef param_30 = ref;
|
||||||
AnnotatedTag tag_2 = Annotated_tag(param_29, param_30);
|
AnnotatedTag tag_2 = Annotated_tag(param_29, param_30);
|
||||||
if (clip_zero_depth == 0u)
|
if (clip_zero_depth == 0u)
|
||||||
|
@ -1110,23 +1065,23 @@ void comp_main()
|
||||||
{
|
{
|
||||||
uint param_31 = element_ref_ix;
|
uint param_31 = element_ref_ix;
|
||||||
bool param_32 = mem_ok;
|
bool param_32 = mem_ok;
|
||||||
TileRef _2029 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) };
|
TileRef _1958 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) };
|
||||||
Alloc param_33 = read_tile_alloc(param_31, param_32);
|
Alloc param_33 = read_tile_alloc(param_31, param_32);
|
||||||
TileRef param_34 = _2029;
|
TileRef param_34 = _1958;
|
||||||
tile_1 = Tile_read(param_33, param_34);
|
tile_1 = Tile_read(param_33, param_34);
|
||||||
Alloc _2036;
|
Alloc _1965;
|
||||||
_2036.offset = _1249.Load(32);
|
_1965.offset = _1169.Load(32);
|
||||||
param_35.offset = _2036.offset;
|
param_35.offset = _1965.offset;
|
||||||
AnnotatedRef param_36 = ref;
|
AnnotatedRef param_36 = ref;
|
||||||
fill = Annotated_Color_read(param_35, param_36);
|
fill = Annotated_Color_read(param_35, param_36);
|
||||||
Alloc param_37 = cmd_alloc;
|
Alloc param_37 = cmd_alloc;
|
||||||
CmdRef param_38 = cmd_ref;
|
CmdRef param_38 = cmd_ref;
|
||||||
uint param_39 = cmd_limit;
|
uint param_39 = cmd_limit;
|
||||||
bool _2048 = alloc_cmd(param_37, param_38, param_39);
|
bool _1977 = alloc_cmd(param_37, param_38, param_39);
|
||||||
cmd_alloc = param_37;
|
cmd_alloc = param_37;
|
||||||
cmd_ref = param_38;
|
cmd_ref = param_38;
|
||||||
cmd_limit = param_39;
|
cmd_limit = param_39;
|
||||||
if (!_2048)
|
if (!_1977)
|
||||||
{
|
{
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -1137,10 +1092,10 @@ void comp_main()
|
||||||
float param_44 = fill.linewidth;
|
float param_44 = fill.linewidth;
|
||||||
write_fill(param_40, param_41, param_42, param_43, param_44);
|
write_fill(param_40, param_41, param_42, param_43, param_44);
|
||||||
cmd_ref = param_41;
|
cmd_ref = param_41;
|
||||||
CmdColor _2072 = { fill.rgba_color };
|
CmdColor _2001 = { fill.rgba_color };
|
||||||
Alloc param_45 = cmd_alloc;
|
Alloc param_45 = cmd_alloc;
|
||||||
CmdRef param_46 = cmd_ref;
|
CmdRef param_46 = cmd_ref;
|
||||||
CmdColor param_47 = _2072;
|
CmdColor param_47 = _2001;
|
||||||
Cmd_Color_write(param_45, param_46, param_47);
|
Cmd_Color_write(param_45, param_46, param_47);
|
||||||
cmd_ref.offset += 8u;
|
cmd_ref.offset += 8u;
|
||||||
break;
|
break;
|
||||||
|
@ -1149,23 +1104,23 @@ void comp_main()
|
||||||
{
|
{
|
||||||
uint param_48 = element_ref_ix;
|
uint param_48 = element_ref_ix;
|
||||||
bool param_49 = mem_ok;
|
bool param_49 = mem_ok;
|
||||||
TileRef _2101 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) };
|
TileRef _2030 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) };
|
||||||
Alloc param_50 = read_tile_alloc(param_48, param_49);
|
Alloc param_50 = read_tile_alloc(param_48, param_49);
|
||||||
TileRef param_51 = _2101;
|
TileRef param_51 = _2030;
|
||||||
tile_1 = Tile_read(param_50, param_51);
|
tile_1 = Tile_read(param_50, param_51);
|
||||||
Alloc _2108;
|
Alloc _2037;
|
||||||
_2108.offset = _1249.Load(32);
|
_2037.offset = _1169.Load(32);
|
||||||
param_52.offset = _2108.offset;
|
param_52.offset = _2037.offset;
|
||||||
AnnotatedRef param_53 = ref;
|
AnnotatedRef param_53 = ref;
|
||||||
AnnoLinGradient lin = Annotated_LinGradient_read(param_52, param_53);
|
AnnoLinGradient lin = Annotated_LinGradient_read(param_52, param_53);
|
||||||
Alloc param_54 = cmd_alloc;
|
Alloc param_54 = cmd_alloc;
|
||||||
CmdRef param_55 = cmd_ref;
|
CmdRef param_55 = cmd_ref;
|
||||||
uint param_56 = cmd_limit;
|
uint param_56 = cmd_limit;
|
||||||
bool _2120 = alloc_cmd(param_54, param_55, param_56);
|
bool _2049 = alloc_cmd(param_54, param_55, param_56);
|
||||||
cmd_alloc = param_54;
|
cmd_alloc = param_54;
|
||||||
cmd_ref = param_55;
|
cmd_ref = param_55;
|
||||||
cmd_limit = param_56;
|
cmd_limit = param_56;
|
||||||
if (!_2120)
|
if (!_2049)
|
||||||
{
|
{
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -1191,23 +1146,23 @@ void comp_main()
|
||||||
{
|
{
|
||||||
uint param_65 = element_ref_ix;
|
uint param_65 = element_ref_ix;
|
||||||
bool param_66 = mem_ok;
|
bool param_66 = mem_ok;
|
||||||
TileRef _2185 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) };
|
TileRef _2114 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) };
|
||||||
Alloc param_67 = read_tile_alloc(param_65, param_66);
|
Alloc param_67 = read_tile_alloc(param_65, param_66);
|
||||||
TileRef param_68 = _2185;
|
TileRef param_68 = _2114;
|
||||||
tile_1 = Tile_read(param_67, param_68);
|
tile_1 = Tile_read(param_67, param_68);
|
||||||
Alloc _2192;
|
Alloc _2121;
|
||||||
_2192.offset = _1249.Load(32);
|
_2121.offset = _1169.Load(32);
|
||||||
param_69.offset = _2192.offset;
|
param_69.offset = _2121.offset;
|
||||||
AnnotatedRef param_70 = ref;
|
AnnotatedRef param_70 = ref;
|
||||||
AnnoImage fill_img = Annotated_Image_read(param_69, param_70);
|
AnnoImage fill_img = Annotated_Image_read(param_69, param_70);
|
||||||
Alloc param_71 = cmd_alloc;
|
Alloc param_71 = cmd_alloc;
|
||||||
CmdRef param_72 = cmd_ref;
|
CmdRef param_72 = cmd_ref;
|
||||||
uint param_73 = cmd_limit;
|
uint param_73 = cmd_limit;
|
||||||
bool _2204 = alloc_cmd(param_71, param_72, param_73);
|
bool _2133 = alloc_cmd(param_71, param_72, param_73);
|
||||||
cmd_alloc = param_71;
|
cmd_alloc = param_71;
|
||||||
cmd_ref = param_72;
|
cmd_ref = param_72;
|
||||||
cmd_limit = param_73;
|
cmd_limit = param_73;
|
||||||
if (!_2204)
|
if (!_2133)
|
||||||
{
|
{
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -1218,10 +1173,10 @@ void comp_main()
|
||||||
float param_78 = fill_img.linewidth;
|
float param_78 = fill_img.linewidth;
|
||||||
write_fill(param_74, param_75, param_76, param_77, param_78);
|
write_fill(param_74, param_75, param_76, param_77, param_78);
|
||||||
cmd_ref = param_75;
|
cmd_ref = param_75;
|
||||||
CmdImage _2230 = { fill_img.index, fill_img.offset };
|
CmdImage _2159 = { fill_img.index, fill_img.offset };
|
||||||
Alloc param_79 = cmd_alloc;
|
Alloc param_79 = cmd_alloc;
|
||||||
CmdRef param_80 = cmd_ref;
|
CmdRef param_80 = cmd_ref;
|
||||||
CmdImage param_81 = _2230;
|
CmdImage param_81 = _2159;
|
||||||
Cmd_Image_write(param_79, param_80, param_81);
|
Cmd_Image_write(param_79, param_80, param_81);
|
||||||
cmd_ref.offset += 12u;
|
cmd_ref.offset += 12u;
|
||||||
break;
|
break;
|
||||||
|
@ -1230,103 +1185,76 @@ void comp_main()
|
||||||
{
|
{
|
||||||
uint param_82 = element_ref_ix;
|
uint param_82 = element_ref_ix;
|
||||||
bool param_83 = mem_ok;
|
bool param_83 = mem_ok;
|
||||||
TileRef _2259 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) };
|
TileRef _2188 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) };
|
||||||
Alloc param_84 = read_tile_alloc(param_82, param_83);
|
Alloc param_84 = read_tile_alloc(param_82, param_83);
|
||||||
TileRef param_85 = _2259;
|
TileRef param_85 = _2188;
|
||||||
tile_1 = Tile_read(param_84, param_85);
|
tile_1 = Tile_read(param_84, param_85);
|
||||||
bool _2265 = tile_1.tile.offset == 0u;
|
bool _2194 = tile_1.tile.offset == 0u;
|
||||||
bool _2271;
|
bool _2200;
|
||||||
if (_2265)
|
if (_2194)
|
||||||
{
|
{
|
||||||
_2271 = tile_1.backdrop == 0;
|
_2200 = tile_1.backdrop == 0;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
_2271 = _2265;
|
_2200 = _2194;
|
||||||
}
|
}
|
||||||
if (_2271)
|
if (_2200)
|
||||||
{
|
{
|
||||||
clip_zero_depth = clip_depth + 1u;
|
clip_zero_depth = clip_depth + 1u;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if ((tile_1.tile.offset == 0u) && (clip_depth < 32u))
|
Alloc param_86 = cmd_alloc;
|
||||||
|
CmdRef param_87 = cmd_ref;
|
||||||
|
uint param_88 = cmd_limit;
|
||||||
|
bool _2212 = alloc_cmd(param_86, param_87, param_88);
|
||||||
|
cmd_alloc = param_86;
|
||||||
|
cmd_ref = param_87;
|
||||||
|
cmd_limit = param_88;
|
||||||
|
if (!_2212)
|
||||||
{
|
{
|
||||||
clip_one_mask |= (1u << clip_depth);
|
break;
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
Alloc _2293;
|
|
||||||
_2293.offset = _1249.Load(32);
|
|
||||||
param_86.offset = _2293.offset;
|
|
||||||
AnnotatedRef param_87 = ref;
|
|
||||||
AnnoBeginClip begin_clip = Annotated_BeginClip_read(param_86, param_87);
|
|
||||||
Alloc param_88 = cmd_alloc;
|
|
||||||
CmdRef param_89 = cmd_ref;
|
|
||||||
uint param_90 = cmd_limit;
|
|
||||||
bool _2305 = alloc_cmd(param_88, param_89, param_90);
|
|
||||||
cmd_alloc = param_88;
|
|
||||||
cmd_ref = param_89;
|
|
||||||
cmd_limit = param_90;
|
|
||||||
if (!_2305)
|
|
||||||
{
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
Alloc param_91 = cmd_alloc;
|
|
||||||
CmdRef param_92 = cmd_ref;
|
|
||||||
uint param_93 = tag_2.flags;
|
|
||||||
Tile param_94 = tile_1;
|
|
||||||
float param_95 = begin_clip.linewidth;
|
|
||||||
write_fill(param_91, param_92, param_93, param_94, param_95);
|
|
||||||
cmd_ref = param_92;
|
|
||||||
Alloc param_96 = cmd_alloc;
|
|
||||||
CmdRef param_97 = cmd_ref;
|
|
||||||
Cmd_BeginClip_write(param_96, param_97);
|
|
||||||
cmd_ref.offset += 4u;
|
|
||||||
if (clip_depth < 32u)
|
|
||||||
{
|
|
||||||
clip_one_mask &= (~(1u << clip_depth));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
Alloc param_89 = cmd_alloc;
|
||||||
|
CmdRef param_90 = cmd_ref;
|
||||||
|
Cmd_BeginClip_write(param_89, param_90);
|
||||||
|
cmd_ref.offset += 4u;
|
||||||
}
|
}
|
||||||
clip_depth++;
|
clip_depth++;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case 5u:
|
case 5u:
|
||||||
{
|
{
|
||||||
|
uint param_91 = element_ref_ix;
|
||||||
|
bool param_92 = mem_ok;
|
||||||
|
TileRef _2249 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) };
|
||||||
|
Alloc param_93 = read_tile_alloc(param_91, param_92);
|
||||||
|
TileRef param_94 = _2249;
|
||||||
|
tile_1 = Tile_read(param_93, param_94);
|
||||||
clip_depth--;
|
clip_depth--;
|
||||||
bool _2351 = clip_depth >= 32u;
|
Alloc param_95 = cmd_alloc;
|
||||||
bool _2360;
|
CmdRef param_96 = cmd_ref;
|
||||||
if (!_2351)
|
uint param_97 = cmd_limit;
|
||||||
|
bool _2261 = alloc_cmd(param_95, param_96, param_97);
|
||||||
|
cmd_alloc = param_95;
|
||||||
|
cmd_ref = param_96;
|
||||||
|
cmd_limit = param_97;
|
||||||
|
if (!_2261)
|
||||||
{
|
{
|
||||||
_2360 = (clip_one_mask & (1u << clip_depth)) == 0u;
|
break;
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
_2360 = _2351;
|
|
||||||
}
|
|
||||||
if (_2360)
|
|
||||||
{
|
|
||||||
Alloc param_98 = cmd_alloc;
|
|
||||||
CmdRef param_99 = cmd_ref;
|
|
||||||
uint param_100 = cmd_limit;
|
|
||||||
bool _2369 = alloc_cmd(param_98, param_99, param_100);
|
|
||||||
cmd_alloc = param_98;
|
|
||||||
cmd_ref = param_99;
|
|
||||||
cmd_limit = param_100;
|
|
||||||
if (!_2369)
|
|
||||||
{
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
Alloc param_101 = cmd_alloc;
|
|
||||||
CmdRef param_102 = cmd_ref;
|
|
||||||
Cmd_Solid_write(param_101, param_102);
|
|
||||||
cmd_ref.offset += 4u;
|
|
||||||
Alloc param_103 = cmd_alloc;
|
|
||||||
CmdRef param_104 = cmd_ref;
|
|
||||||
Cmd_EndClip_write(param_103, param_104);
|
|
||||||
cmd_ref.offset += 4u;
|
|
||||||
}
|
}
|
||||||
|
Alloc param_98 = cmd_alloc;
|
||||||
|
CmdRef param_99 = cmd_ref;
|
||||||
|
uint param_100 = 0u;
|
||||||
|
Tile param_101 = tile_1;
|
||||||
|
float param_102 = 0.0f;
|
||||||
|
write_fill(param_98, param_99, param_100, param_101, param_102);
|
||||||
|
cmd_ref = param_99;
|
||||||
|
Alloc param_103 = cmd_alloc;
|
||||||
|
CmdRef param_104 = cmd_ref;
|
||||||
|
Cmd_EndClip_write(param_103, param_104);
|
||||||
|
cmd_ref.offset += 4u;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1359,17 +1287,17 @@ void comp_main()
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
bool _2432 = (bin_tile_x + tile_x) < _1249.Load(8);
|
bool _2326 = (bin_tile_x + tile_x) < _1169.Load(8);
|
||||||
bool _2441;
|
bool _2335;
|
||||||
if (_2432)
|
if (_2326)
|
||||||
{
|
{
|
||||||
_2441 = (bin_tile_y + tile_y) < _1249.Load(12);
|
_2335 = (bin_tile_y + tile_y) < _1169.Load(12);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
_2441 = _2432;
|
_2335 = _2326;
|
||||||
}
|
}
|
||||||
if (_2441)
|
if (_2335)
|
||||||
{
|
{
|
||||||
Alloc param_105 = cmd_alloc;
|
Alloc param_105 = cmd_alloc;
|
||||||
CmdRef param_106 = cmd_ref;
|
CmdRef param_106 = cmd_ref;
|
||||||
|
|
574
piet-gpu/shader/gen/coarse.msl
generated
574
piet-gpu/shader/gen/coarse.msl
generated
File diff suppressed because it is too large
Load diff
BIN
piet-gpu/shader/gen/coarse.spv
generated
BIN
piet-gpu/shader/gen/coarse.spv
generated
Binary file not shown.
BIN
piet-gpu/shader/gen/draw_leaf.dxil
generated
BIN
piet-gpu/shader/gen/draw_leaf.dxil
generated
Binary file not shown.
257
piet-gpu/shader/gen/draw_leaf.hlsl
generated
257
piet-gpu/shader/gen/draw_leaf.hlsl
generated
|
@ -41,16 +41,6 @@ struct FillImage
|
||||||
int2 offset;
|
int2 offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ClipRef
|
|
||||||
{
|
|
||||||
uint offset;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct Clip
|
|
||||||
{
|
|
||||||
float4 bbox;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct ElementTag
|
struct ElementTag
|
||||||
{
|
{
|
||||||
uint tag;
|
uint tag;
|
||||||
|
@ -143,8 +133,13 @@ struct Config
|
||||||
Alloc trans_alloc;
|
Alloc trans_alloc;
|
||||||
Alloc bbox_alloc;
|
Alloc bbox_alloc;
|
||||||
Alloc drawmonoid_alloc;
|
Alloc drawmonoid_alloc;
|
||||||
|
Alloc clip_alloc;
|
||||||
|
Alloc clip_bic_alloc;
|
||||||
|
Alloc clip_stack_alloc;
|
||||||
|
Alloc clip_bbox_alloc;
|
||||||
uint n_trans;
|
uint n_trans;
|
||||||
uint n_path;
|
uint n_path;
|
||||||
|
uint n_clip;
|
||||||
uint trans_offset;
|
uint trans_offset;
|
||||||
uint linewidth_offset;
|
uint linewidth_offset;
|
||||||
uint pathtag_offset;
|
uint pathtag_offset;
|
||||||
|
@ -153,14 +148,14 @@ struct Config
|
||||||
|
|
||||||
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
|
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
|
||||||
|
|
||||||
static const DrawMonoid _418 = { 0u, 0u };
|
static const DrawMonoid _348 = { 0u, 0u };
|
||||||
static const DrawMonoid _442 = { 1u, 0u };
|
static const DrawMonoid _372 = { 1u, 0u };
|
||||||
static const DrawMonoid _444 = { 1u, 1u };
|
static const DrawMonoid _374 = { 1u, 1u };
|
||||||
|
|
||||||
RWByteAddressBuffer _201 : register(u0, space0);
|
RWByteAddressBuffer _187 : register(u0, space0);
|
||||||
ByteAddressBuffer _225 : register(t2, space0);
|
ByteAddressBuffer _211 : register(t2, space0);
|
||||||
ByteAddressBuffer _1004 : register(t3, space0);
|
ByteAddressBuffer _934 : register(t3, space0);
|
||||||
ByteAddressBuffer _1038 : register(t1, space0);
|
ByteAddressBuffer _968 : register(t1, space0);
|
||||||
|
|
||||||
static uint3 gl_WorkGroupID;
|
static uint3 gl_WorkGroupID;
|
||||||
static uint3 gl_LocalInvocationID;
|
static uint3 gl_LocalInvocationID;
|
||||||
|
@ -176,9 +171,9 @@ groupshared DrawMonoid sh_scratch[256];
|
||||||
|
|
||||||
ElementTag Element_tag(ElementRef ref)
|
ElementTag Element_tag(ElementRef ref)
|
||||||
{
|
{
|
||||||
uint tag_and_flags = _225.Load((ref.offset >> uint(2)) * 4 + 0);
|
uint tag_and_flags = _211.Load((ref.offset >> uint(2)) * 4 + 0);
|
||||||
ElementTag _375 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) };
|
ElementTag _321 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) };
|
||||||
return _375;
|
return _321;
|
||||||
}
|
}
|
||||||
|
|
||||||
DrawMonoid map_tag(uint tag_word)
|
DrawMonoid map_tag(uint tag_word)
|
||||||
|
@ -189,24 +184,24 @@ DrawMonoid map_tag(uint tag_word)
|
||||||
case 5u:
|
case 5u:
|
||||||
case 6u:
|
case 6u:
|
||||||
{
|
{
|
||||||
return _442;
|
return _372;
|
||||||
}
|
}
|
||||||
case 9u:
|
case 9u:
|
||||||
case 10u:
|
case 10u:
|
||||||
{
|
{
|
||||||
return _444;
|
return _374;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
{
|
{
|
||||||
return _418;
|
return _348;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ElementRef Element_index(ElementRef ref, uint index)
|
ElementRef Element_index(ElementRef ref, uint index)
|
||||||
{
|
{
|
||||||
ElementRef _214 = { ref.offset + (index * 36u) };
|
ElementRef _200 = { ref.offset + (index * 36u) };
|
||||||
return _214;
|
return _200;
|
||||||
}
|
}
|
||||||
|
|
||||||
DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b)
|
DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b)
|
||||||
|
@ -219,13 +214,13 @@ DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b)
|
||||||
|
|
||||||
DrawMonoid tag_monoid_identity()
|
DrawMonoid tag_monoid_identity()
|
||||||
{
|
{
|
||||||
return _418;
|
return _348;
|
||||||
}
|
}
|
||||||
|
|
||||||
FillColor FillColor_read(FillColorRef ref)
|
FillColor FillColor_read(FillColorRef ref)
|
||||||
{
|
{
|
||||||
uint ix = ref.offset >> uint(2);
|
uint ix = ref.offset >> uint(2);
|
||||||
uint raw0 = _225.Load((ix + 0u) * 4 + 0);
|
uint raw0 = _211.Load((ix + 0u) * 4 + 0);
|
||||||
FillColor s;
|
FillColor s;
|
||||||
s.rgba_color = raw0;
|
s.rgba_color = raw0;
|
||||||
return s;
|
return s;
|
||||||
|
@ -233,8 +228,8 @@ FillColor FillColor_read(FillColorRef ref)
|
||||||
|
|
||||||
FillColor Element_FillColor_read(ElementRef ref)
|
FillColor Element_FillColor_read(ElementRef ref)
|
||||||
{
|
{
|
||||||
FillColorRef _381 = { ref.offset + 4u };
|
FillColorRef _327 = { ref.offset + 4u };
|
||||||
FillColorRef param = _381;
|
FillColorRef param = _327;
|
||||||
return FillColor_read(param);
|
return FillColor_read(param);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -251,7 +246,7 @@ void write_mem(Alloc alloc, uint offset, uint val)
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
_201.Store(offset * 4 + 8, val);
|
_187.Store(offset * 4 + 8, val);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AnnoColor_write(Alloc a, AnnoColorRef ref, AnnoColor s)
|
void AnnoColor_write(Alloc a, AnnoColorRef ref, AnnoColor s)
|
||||||
|
@ -289,9 +284,9 @@ void Annotated_Color_write(Alloc a, AnnotatedRef ref, uint flags, AnnoColor s)
|
||||||
uint param_1 = ref.offset >> uint(2);
|
uint param_1 = ref.offset >> uint(2);
|
||||||
uint param_2 = (flags << uint(16)) | 1u;
|
uint param_2 = (flags << uint(16)) | 1u;
|
||||||
write_mem(param, param_1, param_2);
|
write_mem(param, param_1, param_2);
|
||||||
AnnoColorRef _805 = { ref.offset + 4u };
|
AnnoColorRef _735 = { ref.offset + 4u };
|
||||||
Alloc param_3 = a;
|
Alloc param_3 = a;
|
||||||
AnnoColorRef param_4 = _805;
|
AnnoColorRef param_4 = _735;
|
||||||
AnnoColor param_5 = s;
|
AnnoColor param_5 = s;
|
||||||
AnnoColor_write(param_3, param_4, param_5);
|
AnnoColor_write(param_3, param_4, param_5);
|
||||||
}
|
}
|
||||||
|
@ -299,11 +294,11 @@ void Annotated_Color_write(Alloc a, AnnotatedRef ref, uint flags, AnnoColor s)
|
||||||
FillLinGradient FillLinGradient_read(FillLinGradientRef ref)
|
FillLinGradient FillLinGradient_read(FillLinGradientRef ref)
|
||||||
{
|
{
|
||||||
uint ix = ref.offset >> uint(2);
|
uint ix = ref.offset >> uint(2);
|
||||||
uint raw0 = _225.Load((ix + 0u) * 4 + 0);
|
uint raw0 = _211.Load((ix + 0u) * 4 + 0);
|
||||||
uint raw1 = _225.Load((ix + 1u) * 4 + 0);
|
uint raw1 = _211.Load((ix + 1u) * 4 + 0);
|
||||||
uint raw2 = _225.Load((ix + 2u) * 4 + 0);
|
uint raw2 = _211.Load((ix + 2u) * 4 + 0);
|
||||||
uint raw3 = _225.Load((ix + 3u) * 4 + 0);
|
uint raw3 = _211.Load((ix + 3u) * 4 + 0);
|
||||||
uint raw4 = _225.Load((ix + 4u) * 4 + 0);
|
uint raw4 = _211.Load((ix + 4u) * 4 + 0);
|
||||||
FillLinGradient s;
|
FillLinGradient s;
|
||||||
s.index = raw0;
|
s.index = raw0;
|
||||||
s.p0 = float2(asfloat(raw1), asfloat(raw2));
|
s.p0 = float2(asfloat(raw1), asfloat(raw2));
|
||||||
|
@ -313,8 +308,8 @@ FillLinGradient FillLinGradient_read(FillLinGradientRef ref)
|
||||||
|
|
||||||
FillLinGradient Element_FillLinGradient_read(ElementRef ref)
|
FillLinGradient Element_FillLinGradient_read(ElementRef ref)
|
||||||
{
|
{
|
||||||
FillLinGradientRef _389 = { ref.offset + 4u };
|
FillLinGradientRef _335 = { ref.offset + 4u };
|
||||||
FillLinGradientRef param = _389;
|
FillLinGradientRef param = _335;
|
||||||
return FillLinGradient_read(param);
|
return FillLinGradient_read(param);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -365,9 +360,9 @@ void Annotated_LinGradient_write(Alloc a, AnnotatedRef ref, uint flags, AnnoLinG
|
||||||
uint param_1 = ref.offset >> uint(2);
|
uint param_1 = ref.offset >> uint(2);
|
||||||
uint param_2 = (flags << uint(16)) | 2u;
|
uint param_2 = (flags << uint(16)) | 2u;
|
||||||
write_mem(param, param_1, param_2);
|
write_mem(param, param_1, param_2);
|
||||||
AnnoLinGradientRef _826 = { ref.offset + 4u };
|
AnnoLinGradientRef _756 = { ref.offset + 4u };
|
||||||
Alloc param_3 = a;
|
Alloc param_3 = a;
|
||||||
AnnoLinGradientRef param_4 = _826;
|
AnnoLinGradientRef param_4 = _756;
|
||||||
AnnoLinGradient param_5 = s;
|
AnnoLinGradient param_5 = s;
|
||||||
AnnoLinGradient_write(param_3, param_4, param_5);
|
AnnoLinGradient_write(param_3, param_4, param_5);
|
||||||
}
|
}
|
||||||
|
@ -375,8 +370,8 @@ void Annotated_LinGradient_write(Alloc a, AnnotatedRef ref, uint flags, AnnoLinG
|
||||||
FillImage FillImage_read(FillImageRef ref)
|
FillImage FillImage_read(FillImageRef ref)
|
||||||
{
|
{
|
||||||
uint ix = ref.offset >> uint(2);
|
uint ix = ref.offset >> uint(2);
|
||||||
uint raw0 = _225.Load((ix + 0u) * 4 + 0);
|
uint raw0 = _211.Load((ix + 0u) * 4 + 0);
|
||||||
uint raw1 = _225.Load((ix + 1u) * 4 + 0);
|
uint raw1 = _211.Load((ix + 1u) * 4 + 0);
|
||||||
FillImage s;
|
FillImage s;
|
||||||
s.index = raw0;
|
s.index = raw0;
|
||||||
s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16);
|
s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16);
|
||||||
|
@ -385,8 +380,8 @@ FillImage FillImage_read(FillImageRef ref)
|
||||||
|
|
||||||
FillImage Element_FillImage_read(ElementRef ref)
|
FillImage Element_FillImage_read(ElementRef ref)
|
||||||
{
|
{
|
||||||
FillImageRef _397 = { ref.offset + 4u };
|
FillImageRef _343 = { ref.offset + 4u };
|
||||||
FillImageRef param = _397;
|
FillImageRef param = _343;
|
||||||
return FillImage_read(param);
|
return FillImage_read(param);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -429,32 +424,13 @@ void Annotated_Image_write(Alloc a, AnnotatedRef ref, uint flags, AnnoImage s)
|
||||||
uint param_1 = ref.offset >> uint(2);
|
uint param_1 = ref.offset >> uint(2);
|
||||||
uint param_2 = (flags << uint(16)) | 3u;
|
uint param_2 = (flags << uint(16)) | 3u;
|
||||||
write_mem(param, param_1, param_2);
|
write_mem(param, param_1, param_2);
|
||||||
AnnoImageRef _847 = { ref.offset + 4u };
|
AnnoImageRef _777 = { ref.offset + 4u };
|
||||||
Alloc param_3 = a;
|
Alloc param_3 = a;
|
||||||
AnnoImageRef param_4 = _847;
|
AnnoImageRef param_4 = _777;
|
||||||
AnnoImage param_5 = s;
|
AnnoImage param_5 = s;
|
||||||
AnnoImage_write(param_3, param_4, param_5);
|
AnnoImage_write(param_3, param_4, param_5);
|
||||||
}
|
}
|
||||||
|
|
||||||
Clip Clip_read(ClipRef ref)
|
|
||||||
{
|
|
||||||
uint ix = ref.offset >> uint(2);
|
|
||||||
uint raw0 = _225.Load((ix + 0u) * 4 + 0);
|
|
||||||
uint raw1 = _225.Load((ix + 1u) * 4 + 0);
|
|
||||||
uint raw2 = _225.Load((ix + 2u) * 4 + 0);
|
|
||||||
uint raw3 = _225.Load((ix + 3u) * 4 + 0);
|
|
||||||
Clip s;
|
|
||||||
s.bbox = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3));
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
Clip Element_BeginClip_read(ElementRef ref)
|
|
||||||
{
|
|
||||||
ClipRef _405 = { ref.offset + 4u };
|
|
||||||
ClipRef param = _405;
|
|
||||||
return Clip_read(param);
|
|
||||||
}
|
|
||||||
|
|
||||||
void AnnoBeginClip_write(Alloc a, AnnoBeginClipRef ref, AnnoBeginClip s)
|
void AnnoBeginClip_write(Alloc a, AnnoBeginClipRef ref, AnnoBeginClip s)
|
||||||
{
|
{
|
||||||
uint ix = ref.offset >> uint(2);
|
uint ix = ref.offset >> uint(2);
|
||||||
|
@ -486,20 +462,13 @@ void Annotated_BeginClip_write(Alloc a, AnnotatedRef ref, uint flags, AnnoBeginC
|
||||||
uint param_1 = ref.offset >> uint(2);
|
uint param_1 = ref.offset >> uint(2);
|
||||||
uint param_2 = (flags << uint(16)) | 4u;
|
uint param_2 = (flags << uint(16)) | 4u;
|
||||||
write_mem(param, param_1, param_2);
|
write_mem(param, param_1, param_2);
|
||||||
AnnoBeginClipRef _868 = { ref.offset + 4u };
|
AnnoBeginClipRef _798 = { ref.offset + 4u };
|
||||||
Alloc param_3 = a;
|
Alloc param_3 = a;
|
||||||
AnnoBeginClipRef param_4 = _868;
|
AnnoBeginClipRef param_4 = _798;
|
||||||
AnnoBeginClip param_5 = s;
|
AnnoBeginClip param_5 = s;
|
||||||
AnnoBeginClip_write(param_3, param_4, param_5);
|
AnnoBeginClip_write(param_3, param_4, param_5);
|
||||||
}
|
}
|
||||||
|
|
||||||
Clip Element_EndClip_read(ElementRef ref)
|
|
||||||
{
|
|
||||||
ClipRef _413 = { ref.offset + 4u };
|
|
||||||
ClipRef param = _413;
|
|
||||||
return Clip_read(param);
|
|
||||||
}
|
|
||||||
|
|
||||||
void AnnoEndClip_write(Alloc a, AnnoEndClipRef ref, AnnoEndClip s)
|
void AnnoEndClip_write(Alloc a, AnnoEndClipRef ref, AnnoEndClip s)
|
||||||
{
|
{
|
||||||
uint ix = ref.offset >> uint(2);
|
uint ix = ref.offset >> uint(2);
|
||||||
|
@ -527,9 +496,9 @@ void Annotated_EndClip_write(Alloc a, AnnotatedRef ref, AnnoEndClip s)
|
||||||
uint param_1 = ref.offset >> uint(2);
|
uint param_1 = ref.offset >> uint(2);
|
||||||
uint param_2 = 5u;
|
uint param_2 = 5u;
|
||||||
write_mem(param, param_1, param_2);
|
write_mem(param, param_1, param_2);
|
||||||
AnnoEndClipRef _886 = { ref.offset + 4u };
|
AnnoEndClipRef _816 = { ref.offset + 4u };
|
||||||
Alloc param_3 = a;
|
Alloc param_3 = a;
|
||||||
AnnoEndClipRef param_4 = _886;
|
AnnoEndClipRef param_4 = _816;
|
||||||
AnnoEndClip param_5 = s;
|
AnnoEndClip param_5 = s;
|
||||||
AnnoEndClip_write(param_3, param_4, param_5);
|
AnnoEndClip_write(param_3, param_4, param_5);
|
||||||
}
|
}
|
||||||
|
@ -537,8 +506,8 @@ void Annotated_EndClip_write(Alloc a, AnnotatedRef ref, AnnoEndClip s)
|
||||||
void comp_main()
|
void comp_main()
|
||||||
{
|
{
|
||||||
uint ix = gl_GlobalInvocationID.x * 8u;
|
uint ix = gl_GlobalInvocationID.x * 8u;
|
||||||
ElementRef _904 = { ix * 36u };
|
ElementRef _834 = { ix * 36u };
|
||||||
ElementRef ref = _904;
|
ElementRef ref = _834;
|
||||||
ElementRef param = ref;
|
ElementRef param = ref;
|
||||||
uint tag_word = Element_tag(param).tag;
|
uint tag_word = Element_tag(param).tag;
|
||||||
uint param_1 = tag_word;
|
uint param_1 = tag_word;
|
||||||
|
@ -575,11 +544,11 @@ void comp_main()
|
||||||
DrawMonoid row = tag_monoid_identity();
|
DrawMonoid row = tag_monoid_identity();
|
||||||
if (gl_WorkGroupID.x > 0u)
|
if (gl_WorkGroupID.x > 0u)
|
||||||
{
|
{
|
||||||
DrawMonoid _1010;
|
DrawMonoid _940;
|
||||||
_1010.path_ix = _1004.Load((gl_WorkGroupID.x - 1u) * 8 + 0);
|
_940.path_ix = _934.Load((gl_WorkGroupID.x - 1u) * 8 + 0);
|
||||||
_1010.clip_ix = _1004.Load((gl_WorkGroupID.x - 1u) * 8 + 4);
|
_940.clip_ix = _934.Load((gl_WorkGroupID.x - 1u) * 8 + 4);
|
||||||
row.path_ix = _1010.path_ix;
|
row.path_ix = _940.path_ix;
|
||||||
row.clip_ix = _1010.clip_ix;
|
row.clip_ix = _940.clip_ix;
|
||||||
}
|
}
|
||||||
if (gl_LocalInvocationID.x > 0u)
|
if (gl_LocalInvocationID.x > 0u)
|
||||||
{
|
{
|
||||||
|
@ -588,9 +557,10 @@ void comp_main()
|
||||||
row = combine_tag_monoid(param_10, param_11);
|
row = combine_tag_monoid(param_10, param_11);
|
||||||
}
|
}
|
||||||
uint out_ix = gl_GlobalInvocationID.x * 8u;
|
uint out_ix = gl_GlobalInvocationID.x * 8u;
|
||||||
uint out_base = (_1038.Load(44) >> uint(2)) + (out_ix * 2u);
|
uint out_base = (_968.Load(44) >> uint(2)) + (out_ix * 2u);
|
||||||
AnnotatedRef _1054 = { _1038.Load(32) + (out_ix * 40u) };
|
uint clip_out_base = _968.Load(48) >> uint(2);
|
||||||
AnnotatedRef out_ref = _1054;
|
AnnotatedRef _989 = { _968.Load(32) + (out_ix * 40u) };
|
||||||
|
AnnotatedRef out_ref = _989;
|
||||||
float4 mat;
|
float4 mat;
|
||||||
float2 translate;
|
float2 translate;
|
||||||
AnnoColor anno_fill;
|
AnnoColor anno_fill;
|
||||||
|
@ -600,39 +570,43 @@ void comp_main()
|
||||||
AnnoImage anno_img;
|
AnnoImage anno_img;
|
||||||
Alloc param_28;
|
Alloc param_28;
|
||||||
AnnoBeginClip anno_begin_clip;
|
AnnoBeginClip anno_begin_clip;
|
||||||
Alloc param_33;
|
Alloc param_32;
|
||||||
AnnoEndClip anno_end_clip;
|
AnnoEndClip anno_end_clip;
|
||||||
Alloc param_38;
|
Alloc param_36;
|
||||||
for (uint i_2 = 0u; i_2 < 8u; i_2++)
|
for (uint i_2 = 0u; i_2 < 8u; i_2++)
|
||||||
{
|
{
|
||||||
DrawMonoid param_12 = row;
|
DrawMonoid m = row;
|
||||||
DrawMonoid param_13 = local[i_2];
|
if (i_2 > 0u)
|
||||||
DrawMonoid m = combine_tag_monoid(param_12, param_13);
|
{
|
||||||
_201.Store((out_base + (i_2 * 2u)) * 4 + 8, m.path_ix);
|
DrawMonoid param_12 = m;
|
||||||
_201.Store(((out_base + (i_2 * 2u)) + 1u) * 4 + 8, m.clip_ix);
|
DrawMonoid param_13 = local[i_2 - 1u];
|
||||||
|
m = combine_tag_monoid(param_12, param_13);
|
||||||
|
}
|
||||||
|
_187.Store((out_base + (i_2 * 2u)) * 4 + 8, m.path_ix);
|
||||||
|
_187.Store(((out_base + (i_2 * 2u)) + 1u) * 4 + 8, m.clip_ix);
|
||||||
ElementRef param_14 = ref;
|
ElementRef param_14 = ref;
|
||||||
uint param_15 = i_2;
|
uint param_15 = i_2;
|
||||||
ElementRef this_ref = Element_index(param_14, param_15);
|
ElementRef this_ref = Element_index(param_14, param_15);
|
||||||
ElementRef param_16 = this_ref;
|
ElementRef param_16 = this_ref;
|
||||||
tag_word = Element_tag(param_16).tag;
|
tag_word = Element_tag(param_16).tag;
|
||||||
if (((tag_word == 4u) || (tag_word == 5u)) || (tag_word == 6u))
|
if ((((tag_word == 4u) || (tag_word == 5u)) || (tag_word == 6u)) || (tag_word == 9u))
|
||||||
{
|
{
|
||||||
uint bbox_offset = (_1038.Load(40) >> uint(2)) + (6u * (m.path_ix - 1u));
|
uint bbox_offset = (_968.Load(40) >> uint(2)) + (6u * m.path_ix);
|
||||||
float bbox_l = float(_201.Load(bbox_offset * 4 + 8)) - 32768.0f;
|
float bbox_l = float(_187.Load(bbox_offset * 4 + 8)) - 32768.0f;
|
||||||
float bbox_t = float(_201.Load((bbox_offset + 1u) * 4 + 8)) - 32768.0f;
|
float bbox_t = float(_187.Load((bbox_offset + 1u) * 4 + 8)) - 32768.0f;
|
||||||
float bbox_r = float(_201.Load((bbox_offset + 2u) * 4 + 8)) - 32768.0f;
|
float bbox_r = float(_187.Load((bbox_offset + 2u) * 4 + 8)) - 32768.0f;
|
||||||
float bbox_b = float(_201.Load((bbox_offset + 3u) * 4 + 8)) - 32768.0f;
|
float bbox_b = float(_187.Load((bbox_offset + 3u) * 4 + 8)) - 32768.0f;
|
||||||
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
|
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
|
||||||
float linewidth = asfloat(_201.Load((bbox_offset + 4u) * 4 + 8));
|
float linewidth = asfloat(_187.Load((bbox_offset + 4u) * 4 + 8));
|
||||||
uint fill_mode = uint(linewidth >= 0.0f);
|
uint fill_mode = uint(linewidth >= 0.0f);
|
||||||
if ((linewidth >= 0.0f) || (tag_word == 5u))
|
if ((linewidth >= 0.0f) || (tag_word == 5u))
|
||||||
{
|
{
|
||||||
uint trans_ix = _201.Load((bbox_offset + 5u) * 4 + 8);
|
uint trans_ix = _187.Load((bbox_offset + 5u) * 4 + 8);
|
||||||
uint t = (_1038.Load(36) >> uint(2)) + (6u * trans_ix);
|
uint t = (_968.Load(36) >> uint(2)) + (6u * trans_ix);
|
||||||
mat = asfloat(uint4(_201.Load(t * 4 + 8), _201.Load((t + 1u) * 4 + 8), _201.Load((t + 2u) * 4 + 8), _201.Load((t + 3u) * 4 + 8)));
|
mat = asfloat(uint4(_187.Load(t * 4 + 8), _187.Load((t + 1u) * 4 + 8), _187.Load((t + 2u) * 4 + 8), _187.Load((t + 3u) * 4 + 8)));
|
||||||
if (tag_word == 5u)
|
if (tag_word == 5u)
|
||||||
{
|
{
|
||||||
translate = asfloat(uint2(_201.Load((t + 4u) * 4 + 8), _201.Load((t + 5u) * 4 + 8)));
|
translate = asfloat(uint2(_187.Load((t + 4u) * 4 + 8), _187.Load((t + 5u) * 4 + 8)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (linewidth >= 0.0f)
|
if (linewidth >= 0.0f)
|
||||||
|
@ -649,9 +623,9 @@ void comp_main()
|
||||||
anno_fill.bbox = bbox;
|
anno_fill.bbox = bbox;
|
||||||
anno_fill.linewidth = linewidth;
|
anno_fill.linewidth = linewidth;
|
||||||
anno_fill.rgba_color = fill.rgba_color;
|
anno_fill.rgba_color = fill.rgba_color;
|
||||||
Alloc _1257;
|
Alloc _1203;
|
||||||
_1257.offset = _1038.Load(32);
|
_1203.offset = _968.Load(32);
|
||||||
param_18.offset = _1257.offset;
|
param_18.offset = _1203.offset;
|
||||||
AnnotatedRef param_19 = out_ref;
|
AnnotatedRef param_19 = out_ref;
|
||||||
uint param_20 = fill_mode;
|
uint param_20 = fill_mode;
|
||||||
AnnoColor param_21 = anno_fill;
|
AnnoColor param_21 = anno_fill;
|
||||||
|
@ -674,9 +648,9 @@ void comp_main()
|
||||||
anno_lin.line_x = line_x;
|
anno_lin.line_x = line_x;
|
||||||
anno_lin.line_y = line_y;
|
anno_lin.line_y = line_y;
|
||||||
anno_lin.line_c = -((p0.x * line_x) + (p0.y * line_y));
|
anno_lin.line_c = -((p0.x * line_x) + (p0.y * line_y));
|
||||||
Alloc _1353;
|
Alloc _1299;
|
||||||
_1353.offset = _1038.Load(32);
|
_1299.offset = _968.Load(32);
|
||||||
param_23.offset = _1353.offset;
|
param_23.offset = _1299.offset;
|
||||||
AnnotatedRef param_24 = out_ref;
|
AnnotatedRef param_24 = out_ref;
|
||||||
uint param_25 = fill_mode;
|
uint param_25 = fill_mode;
|
||||||
AnnoLinGradient param_26 = anno_lin;
|
AnnoLinGradient param_26 = anno_lin;
|
||||||
|
@ -691,48 +665,51 @@ void comp_main()
|
||||||
anno_img.linewidth = linewidth;
|
anno_img.linewidth = linewidth;
|
||||||
anno_img.index = fill_img.index;
|
anno_img.index = fill_img.index;
|
||||||
anno_img.offset = fill_img.offset;
|
anno_img.offset = fill_img.offset;
|
||||||
Alloc _1381;
|
Alloc _1327;
|
||||||
_1381.offset = _1038.Load(32);
|
_1327.offset = _968.Load(32);
|
||||||
param_28.offset = _1381.offset;
|
param_28.offset = _1327.offset;
|
||||||
AnnotatedRef param_29 = out_ref;
|
AnnotatedRef param_29 = out_ref;
|
||||||
uint param_30 = fill_mode;
|
uint param_30 = fill_mode;
|
||||||
AnnoImage param_31 = anno_img;
|
AnnoImage param_31 = anno_img;
|
||||||
Annotated_Image_write(param_28, param_29, param_30, param_31);
|
Annotated_Image_write(param_28, param_29, param_30, param_31);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case 9u:
|
||||||
|
{
|
||||||
|
anno_begin_clip.bbox = bbox;
|
||||||
|
anno_begin_clip.linewidth = 0.0f;
|
||||||
|
Alloc _1344;
|
||||||
|
_1344.offset = _968.Load(32);
|
||||||
|
param_32.offset = _1344.offset;
|
||||||
|
AnnotatedRef param_33 = out_ref;
|
||||||
|
uint param_34 = 0u;
|
||||||
|
AnnoBeginClip param_35 = anno_begin_clip;
|
||||||
|
Annotated_BeginClip_write(param_32, param_33, param_34, param_35);
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
if (tag_word == 10u)
|
||||||
|
{
|
||||||
|
anno_end_clip.bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f);
|
||||||
|
Alloc _1368;
|
||||||
|
_1368.offset = _968.Load(32);
|
||||||
|
param_36.offset = _1368.offset;
|
||||||
|
AnnotatedRef param_37 = out_ref;
|
||||||
|
AnnoEndClip param_38 = anno_end_clip;
|
||||||
|
Annotated_EndClip_write(param_36, param_37, param_38);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ((tag_word == 9u) || (tag_word == 10u))
|
||||||
|
{
|
||||||
|
uint path_ix = ~(out_ix + i_2);
|
||||||
if (tag_word == 9u)
|
if (tag_word == 9u)
|
||||||
{
|
{
|
||||||
ElementRef param_32 = this_ref;
|
path_ix = m.path_ix;
|
||||||
Clip begin_clip = Element_BeginClip_read(param_32);
|
|
||||||
anno_begin_clip.bbox = begin_clip.bbox;
|
|
||||||
anno_begin_clip.linewidth = 0.0f;
|
|
||||||
Alloc _1410;
|
|
||||||
_1410.offset = _1038.Load(32);
|
|
||||||
param_33.offset = _1410.offset;
|
|
||||||
AnnotatedRef param_34 = out_ref;
|
|
||||||
uint param_35 = 0u;
|
|
||||||
AnnoBeginClip param_36 = anno_begin_clip;
|
|
||||||
Annotated_BeginClip_write(param_33, param_34, param_35, param_36);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (tag_word == 10u)
|
|
||||||
{
|
|
||||||
ElementRef param_37 = this_ref;
|
|
||||||
Clip end_clip = Element_EndClip_read(param_37);
|
|
||||||
anno_end_clip.bbox = end_clip.bbox;
|
|
||||||
Alloc _1435;
|
|
||||||
_1435.offset = _1038.Load(32);
|
|
||||||
param_38.offset = _1435.offset;
|
|
||||||
AnnotatedRef param_39 = out_ref;
|
|
||||||
AnnoEndClip param_40 = anno_end_clip;
|
|
||||||
Annotated_EndClip_write(param_38, param_39, param_40);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
_187.Store((clip_out_base + m.clip_ix) * 4 + 8, path_ix);
|
||||||
}
|
}
|
||||||
out_ref.offset += 40u;
|
out_ref.offset += 40u;
|
||||||
}
|
}
|
||||||
|
|
310
piet-gpu/shader/gen/draw_leaf.msl
generated
310
piet-gpu/shader/gen/draw_leaf.msl
generated
|
@ -87,16 +87,6 @@ struct FillImage
|
||||||
int2 offset;
|
int2 offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ClipRef
|
|
||||||
{
|
|
||||||
uint offset;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct Clip
|
|
||||||
{
|
|
||||||
float4 bbox;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct ElementTag
|
struct ElementTag
|
||||||
{
|
{
|
||||||
uint tag;
|
uint tag;
|
||||||
|
@ -217,8 +207,13 @@ struct Config
|
||||||
Alloc_1 trans_alloc;
|
Alloc_1 trans_alloc;
|
||||||
Alloc_1 bbox_alloc;
|
Alloc_1 bbox_alloc;
|
||||||
Alloc_1 drawmonoid_alloc;
|
Alloc_1 drawmonoid_alloc;
|
||||||
|
Alloc_1 clip_alloc;
|
||||||
|
Alloc_1 clip_bic_alloc;
|
||||||
|
Alloc_1 clip_stack_alloc;
|
||||||
|
Alloc_1 clip_bbox_alloc;
|
||||||
uint n_trans;
|
uint n_trans;
|
||||||
uint n_path;
|
uint n_path;
|
||||||
|
uint n_clip;
|
||||||
uint trans_offset;
|
uint trans_offset;
|
||||||
uint linewidth_offset;
|
uint linewidth_offset;
|
||||||
uint pathtag_offset;
|
uint pathtag_offset;
|
||||||
|
@ -233,9 +228,9 @@ struct ConfigBuf
|
||||||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
|
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
|
||||||
|
|
||||||
static inline __attribute__((always_inline))
|
static inline __attribute__((always_inline))
|
||||||
ElementTag Element_tag(thread const ElementRef& ref, const device SceneBuf& v_225)
|
ElementTag Element_tag(thread const ElementRef& ref, const device SceneBuf& v_211)
|
||||||
{
|
{
|
||||||
uint tag_and_flags = v_225.scene[ref.offset >> uint(2)];
|
uint tag_and_flags = v_211.scene[ref.offset >> uint(2)];
|
||||||
return ElementTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) };
|
return ElementTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -284,20 +279,20 @@ DrawMonoid tag_monoid_identity()
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __attribute__((always_inline))
|
static inline __attribute__((always_inline))
|
||||||
FillColor FillColor_read(thread const FillColorRef& ref, const device SceneBuf& v_225)
|
FillColor FillColor_read(thread const FillColorRef& ref, const device SceneBuf& v_211)
|
||||||
{
|
{
|
||||||
uint ix = ref.offset >> uint(2);
|
uint ix = ref.offset >> uint(2);
|
||||||
uint raw0 = v_225.scene[ix + 0u];
|
uint raw0 = v_211.scene[ix + 0u];
|
||||||
FillColor s;
|
FillColor s;
|
||||||
s.rgba_color = raw0;
|
s.rgba_color = raw0;
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __attribute__((always_inline))
|
static inline __attribute__((always_inline))
|
||||||
FillColor Element_FillColor_read(thread const ElementRef& ref, const device SceneBuf& v_225)
|
FillColor Element_FillColor_read(thread const ElementRef& ref, const device SceneBuf& v_211)
|
||||||
{
|
{
|
||||||
FillColorRef param = FillColorRef{ ref.offset + 4u };
|
FillColorRef param = FillColorRef{ ref.offset + 4u };
|
||||||
return FillColor_read(param, v_225);
|
return FillColor_read(param, v_211);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __attribute__((always_inline))
|
static inline __attribute__((always_inline))
|
||||||
|
@ -307,7 +302,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __attribute__((always_inline))
|
static inline __attribute__((always_inline))
|
||||||
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_201)
|
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_187)
|
||||||
{
|
{
|
||||||
Alloc param = alloc;
|
Alloc param = alloc;
|
||||||
uint param_1 = offset;
|
uint param_1 = offset;
|
||||||
|
@ -315,61 +310,61 @@ void write_mem(thread const Alloc& alloc, thread const uint& offset, thread cons
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
v_201.memory[offset] = val;
|
v_187.memory[offset] = val;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __attribute__((always_inline))
|
static inline __attribute__((always_inline))
|
||||||
void AnnoColor_write(thread const Alloc& a, thread const AnnoColorRef& ref, thread const AnnoColor& s, device Memory& v_201)
|
void AnnoColor_write(thread const Alloc& a, thread const AnnoColorRef& ref, thread const AnnoColor& s, device Memory& v_187)
|
||||||
{
|
{
|
||||||
uint ix = ref.offset >> uint(2);
|
uint ix = ref.offset >> uint(2);
|
||||||
Alloc param = a;
|
Alloc param = a;
|
||||||
uint param_1 = ix + 0u;
|
uint param_1 = ix + 0u;
|
||||||
uint param_2 = as_type<uint>(s.bbox.x);
|
uint param_2 = as_type<uint>(s.bbox.x);
|
||||||
write_mem(param, param_1, param_2, v_201);
|
write_mem(param, param_1, param_2, v_187);
|
||||||
Alloc param_3 = a;
|
Alloc param_3 = a;
|
||||||
uint param_4 = ix + 1u;
|
uint param_4 = ix + 1u;
|
||||||
uint param_5 = as_type<uint>(s.bbox.y);
|
uint param_5 = as_type<uint>(s.bbox.y);
|
||||||
write_mem(param_3, param_4, param_5, v_201);
|
write_mem(param_3, param_4, param_5, v_187);
|
||||||
Alloc param_6 = a;
|
Alloc param_6 = a;
|
||||||
uint param_7 = ix + 2u;
|
uint param_7 = ix + 2u;
|
||||||
uint param_8 = as_type<uint>(s.bbox.z);
|
uint param_8 = as_type<uint>(s.bbox.z);
|
||||||
write_mem(param_6, param_7, param_8, v_201);
|
write_mem(param_6, param_7, param_8, v_187);
|
||||||
Alloc param_9 = a;
|
Alloc param_9 = a;
|
||||||
uint param_10 = ix + 3u;
|
uint param_10 = ix + 3u;
|
||||||
uint param_11 = as_type<uint>(s.bbox.w);
|
uint param_11 = as_type<uint>(s.bbox.w);
|
||||||
write_mem(param_9, param_10, param_11, v_201);
|
write_mem(param_9, param_10, param_11, v_187);
|
||||||
Alloc param_12 = a;
|
Alloc param_12 = a;
|
||||||
uint param_13 = ix + 4u;
|
uint param_13 = ix + 4u;
|
||||||
uint param_14 = as_type<uint>(s.linewidth);
|
uint param_14 = as_type<uint>(s.linewidth);
|
||||||
write_mem(param_12, param_13, param_14, v_201);
|
write_mem(param_12, param_13, param_14, v_187);
|
||||||
Alloc param_15 = a;
|
Alloc param_15 = a;
|
||||||
uint param_16 = ix + 5u;
|
uint param_16 = ix + 5u;
|
||||||
uint param_17 = s.rgba_color;
|
uint param_17 = s.rgba_color;
|
||||||
write_mem(param_15, param_16, param_17, v_201);
|
write_mem(param_15, param_16, param_17, v_187);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __attribute__((always_inline))
|
static inline __attribute__((always_inline))
|
||||||
void Annotated_Color_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoColor& s, device Memory& v_201)
|
void Annotated_Color_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoColor& s, device Memory& v_187)
|
||||||
{
|
{
|
||||||
Alloc param = a;
|
Alloc param = a;
|
||||||
uint param_1 = ref.offset >> uint(2);
|
uint param_1 = ref.offset >> uint(2);
|
||||||
uint param_2 = (flags << uint(16)) | 1u;
|
uint param_2 = (flags << uint(16)) | 1u;
|
||||||
write_mem(param, param_1, param_2, v_201);
|
write_mem(param, param_1, param_2, v_187);
|
||||||
Alloc param_3 = a;
|
Alloc param_3 = a;
|
||||||
AnnoColorRef param_4 = AnnoColorRef{ ref.offset + 4u };
|
AnnoColorRef param_4 = AnnoColorRef{ ref.offset + 4u };
|
||||||
AnnoColor param_5 = s;
|
AnnoColor param_5 = s;
|
||||||
AnnoColor_write(param_3, param_4, param_5, v_201);
|
AnnoColor_write(param_3, param_4, param_5, v_187);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __attribute__((always_inline))
|
static inline __attribute__((always_inline))
|
||||||
FillLinGradient FillLinGradient_read(thread const FillLinGradientRef& ref, const device SceneBuf& v_225)
|
FillLinGradient FillLinGradient_read(thread const FillLinGradientRef& ref, const device SceneBuf& v_211)
|
||||||
{
|
{
|
||||||
uint ix = ref.offset >> uint(2);
|
uint ix = ref.offset >> uint(2);
|
||||||
uint raw0 = v_225.scene[ix + 0u];
|
uint raw0 = v_211.scene[ix + 0u];
|
||||||
uint raw1 = v_225.scene[ix + 1u];
|
uint raw1 = v_211.scene[ix + 1u];
|
||||||
uint raw2 = v_225.scene[ix + 2u];
|
uint raw2 = v_211.scene[ix + 2u];
|
||||||
uint raw3 = v_225.scene[ix + 3u];
|
uint raw3 = v_211.scene[ix + 3u];
|
||||||
uint raw4 = v_225.scene[ix + 4u];
|
uint raw4 = v_211.scene[ix + 4u];
|
||||||
FillLinGradient s;
|
FillLinGradient s;
|
||||||
s.index = raw0;
|
s.index = raw0;
|
||||||
s.p0 = float2(as_type<float>(raw1), as_type<float>(raw2));
|
s.p0 = float2(as_type<float>(raw1), as_type<float>(raw2));
|
||||||
|
@ -378,73 +373,73 @@ FillLinGradient FillLinGradient_read(thread const FillLinGradientRef& ref, const
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __attribute__((always_inline))
|
static inline __attribute__((always_inline))
|
||||||
FillLinGradient Element_FillLinGradient_read(thread const ElementRef& ref, const device SceneBuf& v_225)
|
FillLinGradient Element_FillLinGradient_read(thread const ElementRef& ref, const device SceneBuf& v_211)
|
||||||
{
|
{
|
||||||
FillLinGradientRef param = FillLinGradientRef{ ref.offset + 4u };
|
FillLinGradientRef param = FillLinGradientRef{ ref.offset + 4u };
|
||||||
return FillLinGradient_read(param, v_225);
|
return FillLinGradient_read(param, v_211);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __attribute__((always_inline))
|
static inline __attribute__((always_inline))
|
||||||
void AnnoLinGradient_write(thread const Alloc& a, thread const AnnoLinGradientRef& ref, thread const AnnoLinGradient& s, device Memory& v_201)
|
void AnnoLinGradient_write(thread const Alloc& a, thread const AnnoLinGradientRef& ref, thread const AnnoLinGradient& s, device Memory& v_187)
|
||||||
{
|
{
|
||||||
uint ix = ref.offset >> uint(2);
|
uint ix = ref.offset >> uint(2);
|
||||||
Alloc param = a;
|
Alloc param = a;
|
||||||
uint param_1 = ix + 0u;
|
uint param_1 = ix + 0u;
|
||||||
uint param_2 = as_type<uint>(s.bbox.x);
|
uint param_2 = as_type<uint>(s.bbox.x);
|
||||||
write_mem(param, param_1, param_2, v_201);
|
write_mem(param, param_1, param_2, v_187);
|
||||||
Alloc param_3 = a;
|
Alloc param_3 = a;
|
||||||
uint param_4 = ix + 1u;
|
uint param_4 = ix + 1u;
|
||||||
uint param_5 = as_type<uint>(s.bbox.y);
|
uint param_5 = as_type<uint>(s.bbox.y);
|
||||||
write_mem(param_3, param_4, param_5, v_201);
|
write_mem(param_3, param_4, param_5, v_187);
|
||||||
Alloc param_6 = a;
|
Alloc param_6 = a;
|
||||||
uint param_7 = ix + 2u;
|
uint param_7 = ix + 2u;
|
||||||
uint param_8 = as_type<uint>(s.bbox.z);
|
uint param_8 = as_type<uint>(s.bbox.z);
|
||||||
write_mem(param_6, param_7, param_8, v_201);
|
write_mem(param_6, param_7, param_8, v_187);
|
||||||
Alloc param_9 = a;
|
Alloc param_9 = a;
|
||||||
uint param_10 = ix + 3u;
|
uint param_10 = ix + 3u;
|
||||||
uint param_11 = as_type<uint>(s.bbox.w);
|
uint param_11 = as_type<uint>(s.bbox.w);
|
||||||
write_mem(param_9, param_10, param_11, v_201);
|
write_mem(param_9, param_10, param_11, v_187);
|
||||||
Alloc param_12 = a;
|
Alloc param_12 = a;
|
||||||
uint param_13 = ix + 4u;
|
uint param_13 = ix + 4u;
|
||||||
uint param_14 = as_type<uint>(s.linewidth);
|
uint param_14 = as_type<uint>(s.linewidth);
|
||||||
write_mem(param_12, param_13, param_14, v_201);
|
write_mem(param_12, param_13, param_14, v_187);
|
||||||
Alloc param_15 = a;
|
Alloc param_15 = a;
|
||||||
uint param_16 = ix + 5u;
|
uint param_16 = ix + 5u;
|
||||||
uint param_17 = s.index;
|
uint param_17 = s.index;
|
||||||
write_mem(param_15, param_16, param_17, v_201);
|
write_mem(param_15, param_16, param_17, v_187);
|
||||||
Alloc param_18 = a;
|
Alloc param_18 = a;
|
||||||
uint param_19 = ix + 6u;
|
uint param_19 = ix + 6u;
|
||||||
uint param_20 = as_type<uint>(s.line_x);
|
uint param_20 = as_type<uint>(s.line_x);
|
||||||
write_mem(param_18, param_19, param_20, v_201);
|
write_mem(param_18, param_19, param_20, v_187);
|
||||||
Alloc param_21 = a;
|
Alloc param_21 = a;
|
||||||
uint param_22 = ix + 7u;
|
uint param_22 = ix + 7u;
|
||||||
uint param_23 = as_type<uint>(s.line_y);
|
uint param_23 = as_type<uint>(s.line_y);
|
||||||
write_mem(param_21, param_22, param_23, v_201);
|
write_mem(param_21, param_22, param_23, v_187);
|
||||||
Alloc param_24 = a;
|
Alloc param_24 = a;
|
||||||
uint param_25 = ix + 8u;
|
uint param_25 = ix + 8u;
|
||||||
uint param_26 = as_type<uint>(s.line_c);
|
uint param_26 = as_type<uint>(s.line_c);
|
||||||
write_mem(param_24, param_25, param_26, v_201);
|
write_mem(param_24, param_25, param_26, v_187);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __attribute__((always_inline))
|
static inline __attribute__((always_inline))
|
||||||
void Annotated_LinGradient_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoLinGradient& s, device Memory& v_201)
|
void Annotated_LinGradient_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoLinGradient& s, device Memory& v_187)
|
||||||
{
|
{
|
||||||
Alloc param = a;
|
Alloc param = a;
|
||||||
uint param_1 = ref.offset >> uint(2);
|
uint param_1 = ref.offset >> uint(2);
|
||||||
uint param_2 = (flags << uint(16)) | 2u;
|
uint param_2 = (flags << uint(16)) | 2u;
|
||||||
write_mem(param, param_1, param_2, v_201);
|
write_mem(param, param_1, param_2, v_187);
|
||||||
Alloc param_3 = a;
|
Alloc param_3 = a;
|
||||||
AnnoLinGradientRef param_4 = AnnoLinGradientRef{ ref.offset + 4u };
|
AnnoLinGradientRef param_4 = AnnoLinGradientRef{ ref.offset + 4u };
|
||||||
AnnoLinGradient param_5 = s;
|
AnnoLinGradient param_5 = s;
|
||||||
AnnoLinGradient_write(param_3, param_4, param_5, v_201);
|
AnnoLinGradient_write(param_3, param_4, param_5, v_187);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __attribute__((always_inline))
|
static inline __attribute__((always_inline))
|
||||||
FillImage FillImage_read(thread const FillImageRef& ref, const device SceneBuf& v_225)
|
FillImage FillImage_read(thread const FillImageRef& ref, const device SceneBuf& v_211)
|
||||||
{
|
{
|
||||||
uint ix = ref.offset >> uint(2);
|
uint ix = ref.offset >> uint(2);
|
||||||
uint raw0 = v_225.scene[ix + 0u];
|
uint raw0 = v_211.scene[ix + 0u];
|
||||||
uint raw1 = v_225.scene[ix + 1u];
|
uint raw1 = v_211.scene[ix + 1u];
|
||||||
FillImage s;
|
FillImage s;
|
||||||
s.index = raw0;
|
s.index = raw0;
|
||||||
s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16);
|
s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16);
|
||||||
|
@ -452,167 +447,140 @@ FillImage FillImage_read(thread const FillImageRef& ref, const device SceneBuf&
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __attribute__((always_inline))
|
static inline __attribute__((always_inline))
|
||||||
FillImage Element_FillImage_read(thread const ElementRef& ref, const device SceneBuf& v_225)
|
FillImage Element_FillImage_read(thread const ElementRef& ref, const device SceneBuf& v_211)
|
||||||
{
|
{
|
||||||
FillImageRef param = FillImageRef{ ref.offset + 4u };
|
FillImageRef param = FillImageRef{ ref.offset + 4u };
|
||||||
return FillImage_read(param, v_225);
|
return FillImage_read(param, v_211);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __attribute__((always_inline))
|
static inline __attribute__((always_inline))
|
||||||
void AnnoImage_write(thread const Alloc& a, thread const AnnoImageRef& ref, thread const AnnoImage& s, device Memory& v_201)
|
void AnnoImage_write(thread const Alloc& a, thread const AnnoImageRef& ref, thread const AnnoImage& s, device Memory& v_187)
|
||||||
{
|
{
|
||||||
uint ix = ref.offset >> uint(2);
|
uint ix = ref.offset >> uint(2);
|
||||||
Alloc param = a;
|
Alloc param = a;
|
||||||
uint param_1 = ix + 0u;
|
uint param_1 = ix + 0u;
|
||||||
uint param_2 = as_type<uint>(s.bbox.x);
|
uint param_2 = as_type<uint>(s.bbox.x);
|
||||||
write_mem(param, param_1, param_2, v_201);
|
write_mem(param, param_1, param_2, v_187);
|
||||||
Alloc param_3 = a;
|
Alloc param_3 = a;
|
||||||
uint param_4 = ix + 1u;
|
uint param_4 = ix + 1u;
|
||||||
uint param_5 = as_type<uint>(s.bbox.y);
|
uint param_5 = as_type<uint>(s.bbox.y);
|
||||||
write_mem(param_3, param_4, param_5, v_201);
|
write_mem(param_3, param_4, param_5, v_187);
|
||||||
Alloc param_6 = a;
|
Alloc param_6 = a;
|
||||||
uint param_7 = ix + 2u;
|
uint param_7 = ix + 2u;
|
||||||
uint param_8 = as_type<uint>(s.bbox.z);
|
uint param_8 = as_type<uint>(s.bbox.z);
|
||||||
write_mem(param_6, param_7, param_8, v_201);
|
write_mem(param_6, param_7, param_8, v_187);
|
||||||
Alloc param_9 = a;
|
Alloc param_9 = a;
|
||||||
uint param_10 = ix + 3u;
|
uint param_10 = ix + 3u;
|
||||||
uint param_11 = as_type<uint>(s.bbox.w);
|
uint param_11 = as_type<uint>(s.bbox.w);
|
||||||
write_mem(param_9, param_10, param_11, v_201);
|
write_mem(param_9, param_10, param_11, v_187);
|
||||||
Alloc param_12 = a;
|
Alloc param_12 = a;
|
||||||
uint param_13 = ix + 4u;
|
uint param_13 = ix + 4u;
|
||||||
uint param_14 = as_type<uint>(s.linewidth);
|
uint param_14 = as_type<uint>(s.linewidth);
|
||||||
write_mem(param_12, param_13, param_14, v_201);
|
write_mem(param_12, param_13, param_14, v_187);
|
||||||
Alloc param_15 = a;
|
Alloc param_15 = a;
|
||||||
uint param_16 = ix + 5u;
|
uint param_16 = ix + 5u;
|
||||||
uint param_17 = s.index;
|
uint param_17 = s.index;
|
||||||
write_mem(param_15, param_16, param_17, v_201);
|
write_mem(param_15, param_16, param_17, v_187);
|
||||||
Alloc param_18 = a;
|
Alloc param_18 = a;
|
||||||
uint param_19 = ix + 6u;
|
uint param_19 = ix + 6u;
|
||||||
uint param_20 = (uint(s.offset.x) & 65535u) | (uint(s.offset.y) << uint(16));
|
uint param_20 = (uint(s.offset.x) & 65535u) | (uint(s.offset.y) << uint(16));
|
||||||
write_mem(param_18, param_19, param_20, v_201);
|
write_mem(param_18, param_19, param_20, v_187);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __attribute__((always_inline))
|
static inline __attribute__((always_inline))
|
||||||
void Annotated_Image_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoImage& s, device Memory& v_201)
|
void Annotated_Image_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoImage& s, device Memory& v_187)
|
||||||
{
|
{
|
||||||
Alloc param = a;
|
Alloc param = a;
|
||||||
uint param_1 = ref.offset >> uint(2);
|
uint param_1 = ref.offset >> uint(2);
|
||||||
uint param_2 = (flags << uint(16)) | 3u;
|
uint param_2 = (flags << uint(16)) | 3u;
|
||||||
write_mem(param, param_1, param_2, v_201);
|
write_mem(param, param_1, param_2, v_187);
|
||||||
Alloc param_3 = a;
|
Alloc param_3 = a;
|
||||||
AnnoImageRef param_4 = AnnoImageRef{ ref.offset + 4u };
|
AnnoImageRef param_4 = AnnoImageRef{ ref.offset + 4u };
|
||||||
AnnoImage param_5 = s;
|
AnnoImage param_5 = s;
|
||||||
AnnoImage_write(param_3, param_4, param_5, v_201);
|
AnnoImage_write(param_3, param_4, param_5, v_187);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __attribute__((always_inline))
|
static inline __attribute__((always_inline))
|
||||||
Clip Clip_read(thread const ClipRef& ref, const device SceneBuf& v_225)
|
void AnnoBeginClip_write(thread const Alloc& a, thread const AnnoBeginClipRef& ref, thread const AnnoBeginClip& s, device Memory& v_187)
|
||||||
{
|
|
||||||
uint ix = ref.offset >> uint(2);
|
|
||||||
uint raw0 = v_225.scene[ix + 0u];
|
|
||||||
uint raw1 = v_225.scene[ix + 1u];
|
|
||||||
uint raw2 = v_225.scene[ix + 2u];
|
|
||||||
uint raw3 = v_225.scene[ix + 3u];
|
|
||||||
Clip s;
|
|
||||||
s.bbox = float4(as_type<float>(raw0), as_type<float>(raw1), as_type<float>(raw2), as_type<float>(raw3));
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline __attribute__((always_inline))
|
|
||||||
Clip Element_BeginClip_read(thread const ElementRef& ref, const device SceneBuf& v_225)
|
|
||||||
{
|
|
||||||
ClipRef param = ClipRef{ ref.offset + 4u };
|
|
||||||
return Clip_read(param, v_225);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline __attribute__((always_inline))
|
|
||||||
void AnnoBeginClip_write(thread const Alloc& a, thread const AnnoBeginClipRef& ref, thread const AnnoBeginClip& s, device Memory& v_201)
|
|
||||||
{
|
{
|
||||||
uint ix = ref.offset >> uint(2);
|
uint ix = ref.offset >> uint(2);
|
||||||
Alloc param = a;
|
Alloc param = a;
|
||||||
uint param_1 = ix + 0u;
|
uint param_1 = ix + 0u;
|
||||||
uint param_2 = as_type<uint>(s.bbox.x);
|
uint param_2 = as_type<uint>(s.bbox.x);
|
||||||
write_mem(param, param_1, param_2, v_201);
|
write_mem(param, param_1, param_2, v_187);
|
||||||
Alloc param_3 = a;
|
Alloc param_3 = a;
|
||||||
uint param_4 = ix + 1u;
|
uint param_4 = ix + 1u;
|
||||||
uint param_5 = as_type<uint>(s.bbox.y);
|
uint param_5 = as_type<uint>(s.bbox.y);
|
||||||
write_mem(param_3, param_4, param_5, v_201);
|
write_mem(param_3, param_4, param_5, v_187);
|
||||||
Alloc param_6 = a;
|
Alloc param_6 = a;
|
||||||
uint param_7 = ix + 2u;
|
uint param_7 = ix + 2u;
|
||||||
uint param_8 = as_type<uint>(s.bbox.z);
|
uint param_8 = as_type<uint>(s.bbox.z);
|
||||||
write_mem(param_6, param_7, param_8, v_201);
|
write_mem(param_6, param_7, param_8, v_187);
|
||||||
Alloc param_9 = a;
|
Alloc param_9 = a;
|
||||||
uint param_10 = ix + 3u;
|
uint param_10 = ix + 3u;
|
||||||
uint param_11 = as_type<uint>(s.bbox.w);
|
uint param_11 = as_type<uint>(s.bbox.w);
|
||||||
write_mem(param_9, param_10, param_11, v_201);
|
write_mem(param_9, param_10, param_11, v_187);
|
||||||
Alloc param_12 = a;
|
Alloc param_12 = a;
|
||||||
uint param_13 = ix + 4u;
|
uint param_13 = ix + 4u;
|
||||||
uint param_14 = as_type<uint>(s.linewidth);
|
uint param_14 = as_type<uint>(s.linewidth);
|
||||||
write_mem(param_12, param_13, param_14, v_201);
|
write_mem(param_12, param_13, param_14, v_187);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __attribute__((always_inline))
|
static inline __attribute__((always_inline))
|
||||||
void Annotated_BeginClip_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoBeginClip& s, device Memory& v_201)
|
void Annotated_BeginClip_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoBeginClip& s, device Memory& v_187)
|
||||||
{
|
{
|
||||||
Alloc param = a;
|
Alloc param = a;
|
||||||
uint param_1 = ref.offset >> uint(2);
|
uint param_1 = ref.offset >> uint(2);
|
||||||
uint param_2 = (flags << uint(16)) | 4u;
|
uint param_2 = (flags << uint(16)) | 4u;
|
||||||
write_mem(param, param_1, param_2, v_201);
|
write_mem(param, param_1, param_2, v_187);
|
||||||
Alloc param_3 = a;
|
Alloc param_3 = a;
|
||||||
AnnoBeginClipRef param_4 = AnnoBeginClipRef{ ref.offset + 4u };
|
AnnoBeginClipRef param_4 = AnnoBeginClipRef{ ref.offset + 4u };
|
||||||
AnnoBeginClip param_5 = s;
|
AnnoBeginClip param_5 = s;
|
||||||
AnnoBeginClip_write(param_3, param_4, param_5, v_201);
|
AnnoBeginClip_write(param_3, param_4, param_5, v_187);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __attribute__((always_inline))
|
static inline __attribute__((always_inline))
|
||||||
Clip Element_EndClip_read(thread const ElementRef& ref, const device SceneBuf& v_225)
|
void AnnoEndClip_write(thread const Alloc& a, thread const AnnoEndClipRef& ref, thread const AnnoEndClip& s, device Memory& v_187)
|
||||||
{
|
|
||||||
ClipRef param = ClipRef{ ref.offset + 4u };
|
|
||||||
return Clip_read(param, v_225);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline __attribute__((always_inline))
|
|
||||||
void AnnoEndClip_write(thread const Alloc& a, thread const AnnoEndClipRef& ref, thread const AnnoEndClip& s, device Memory& v_201)
|
|
||||||
{
|
{
|
||||||
uint ix = ref.offset >> uint(2);
|
uint ix = ref.offset >> uint(2);
|
||||||
Alloc param = a;
|
Alloc param = a;
|
||||||
uint param_1 = ix + 0u;
|
uint param_1 = ix + 0u;
|
||||||
uint param_2 = as_type<uint>(s.bbox.x);
|
uint param_2 = as_type<uint>(s.bbox.x);
|
||||||
write_mem(param, param_1, param_2, v_201);
|
write_mem(param, param_1, param_2, v_187);
|
||||||
Alloc param_3 = a;
|
Alloc param_3 = a;
|
||||||
uint param_4 = ix + 1u;
|
uint param_4 = ix + 1u;
|
||||||
uint param_5 = as_type<uint>(s.bbox.y);
|
uint param_5 = as_type<uint>(s.bbox.y);
|
||||||
write_mem(param_3, param_4, param_5, v_201);
|
write_mem(param_3, param_4, param_5, v_187);
|
||||||
Alloc param_6 = a;
|
Alloc param_6 = a;
|
||||||
uint param_7 = ix + 2u;
|
uint param_7 = ix + 2u;
|
||||||
uint param_8 = as_type<uint>(s.bbox.z);
|
uint param_8 = as_type<uint>(s.bbox.z);
|
||||||
write_mem(param_6, param_7, param_8, v_201);
|
write_mem(param_6, param_7, param_8, v_187);
|
||||||
Alloc param_9 = a;
|
Alloc param_9 = a;
|
||||||
uint param_10 = ix + 3u;
|
uint param_10 = ix + 3u;
|
||||||
uint param_11 = as_type<uint>(s.bbox.w);
|
uint param_11 = as_type<uint>(s.bbox.w);
|
||||||
write_mem(param_9, param_10, param_11, v_201);
|
write_mem(param_9, param_10, param_11, v_187);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __attribute__((always_inline))
|
static inline __attribute__((always_inline))
|
||||||
void Annotated_EndClip_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const AnnoEndClip& s, device Memory& v_201)
|
void Annotated_EndClip_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const AnnoEndClip& s, device Memory& v_187)
|
||||||
{
|
{
|
||||||
Alloc param = a;
|
Alloc param = a;
|
||||||
uint param_1 = ref.offset >> uint(2);
|
uint param_1 = ref.offset >> uint(2);
|
||||||
uint param_2 = 5u;
|
uint param_2 = 5u;
|
||||||
write_mem(param, param_1, param_2, v_201);
|
write_mem(param, param_1, param_2, v_187);
|
||||||
Alloc param_3 = a;
|
Alloc param_3 = a;
|
||||||
AnnoEndClipRef param_4 = AnnoEndClipRef{ ref.offset + 4u };
|
AnnoEndClipRef param_4 = AnnoEndClipRef{ ref.offset + 4u };
|
||||||
AnnoEndClip param_5 = s;
|
AnnoEndClip param_5 = s;
|
||||||
AnnoEndClip_write(param_3, param_4, param_5, v_201);
|
AnnoEndClip_write(param_3, param_4, param_5, v_187);
|
||||||
}
|
}
|
||||||
|
|
||||||
kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1038 [[buffer(1)]], const device SceneBuf& v_225 [[buffer(2)]], const device ParentBuf& _1004 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
|
kernel void main0(device Memory& v_187 [[buffer(0)]], const device ConfigBuf& _968 [[buffer(1)]], const device SceneBuf& v_211 [[buffer(2)]], const device ParentBuf& _934 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
|
||||||
{
|
{
|
||||||
threadgroup DrawMonoid sh_scratch[256];
|
threadgroup DrawMonoid sh_scratch[256];
|
||||||
uint ix = gl_GlobalInvocationID.x * 8u;
|
uint ix = gl_GlobalInvocationID.x * 8u;
|
||||||
ElementRef ref = ElementRef{ ix * 36u };
|
ElementRef ref = ElementRef{ ix * 36u };
|
||||||
ElementRef param = ref;
|
ElementRef param = ref;
|
||||||
uint tag_word = Element_tag(param, v_225).tag;
|
uint tag_word = Element_tag(param, v_211).tag;
|
||||||
uint param_1 = tag_word;
|
uint param_1 = tag_word;
|
||||||
DrawMonoid agg = map_tag(param_1);
|
DrawMonoid agg = map_tag(param_1);
|
||||||
spvUnsafeArray<DrawMonoid, 8> local;
|
spvUnsafeArray<DrawMonoid, 8> local;
|
||||||
|
@ -622,7 +590,7 @@ kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1
|
||||||
ElementRef param_2 = ref;
|
ElementRef param_2 = ref;
|
||||||
uint param_3 = i;
|
uint param_3 = i;
|
||||||
ElementRef param_4 = Element_index(param_2, param_3);
|
ElementRef param_4 = Element_index(param_2, param_3);
|
||||||
tag_word = Element_tag(param_4, v_225).tag;
|
tag_word = Element_tag(param_4, v_211).tag;
|
||||||
uint param_5 = tag_word;
|
uint param_5 = tag_word;
|
||||||
DrawMonoid param_6 = agg;
|
DrawMonoid param_6 = agg;
|
||||||
DrawMonoid param_7 = map_tag(param_5);
|
DrawMonoid param_7 = map_tag(param_5);
|
||||||
|
@ -647,9 +615,9 @@ kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1
|
||||||
DrawMonoid row = tag_monoid_identity();
|
DrawMonoid row = tag_monoid_identity();
|
||||||
if (gl_WorkGroupID.x > 0u)
|
if (gl_WorkGroupID.x > 0u)
|
||||||
{
|
{
|
||||||
uint _1007 = gl_WorkGroupID.x - 1u;
|
uint _937 = gl_WorkGroupID.x - 1u;
|
||||||
row.path_ix = _1004.parent[_1007].path_ix;
|
row.path_ix = _934.parent[_937].path_ix;
|
||||||
row.clip_ix = _1004.parent[_1007].clip_ix;
|
row.clip_ix = _934.parent[_937].clip_ix;
|
||||||
}
|
}
|
||||||
if (gl_LocalInvocationID.x > 0u)
|
if (gl_LocalInvocationID.x > 0u)
|
||||||
{
|
{
|
||||||
|
@ -658,8 +626,9 @@ kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1
|
||||||
row = combine_tag_monoid(param_10, param_11);
|
row = combine_tag_monoid(param_10, param_11);
|
||||||
}
|
}
|
||||||
uint out_ix = gl_GlobalInvocationID.x * 8u;
|
uint out_ix = gl_GlobalInvocationID.x * 8u;
|
||||||
uint out_base = (_1038.conf.drawmonoid_alloc.offset >> uint(2)) + (out_ix * 2u);
|
uint out_base = (_968.conf.drawmonoid_alloc.offset >> uint(2)) + (out_ix * 2u);
|
||||||
AnnotatedRef out_ref = AnnotatedRef{ _1038.conf.anno_alloc.offset + (out_ix * 40u) };
|
uint clip_out_base = _968.conf.clip_alloc.offset >> uint(2);
|
||||||
|
AnnotatedRef out_ref = AnnotatedRef{ _968.conf.anno_alloc.offset + (out_ix * 40u) };
|
||||||
float4 mat;
|
float4 mat;
|
||||||
float2 translate;
|
float2 translate;
|
||||||
AnnoColor anno_fill;
|
AnnoColor anno_fill;
|
||||||
|
@ -669,39 +638,43 @@ kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1
|
||||||
AnnoImage anno_img;
|
AnnoImage anno_img;
|
||||||
Alloc param_28;
|
Alloc param_28;
|
||||||
AnnoBeginClip anno_begin_clip;
|
AnnoBeginClip anno_begin_clip;
|
||||||
Alloc param_33;
|
Alloc param_32;
|
||||||
AnnoEndClip anno_end_clip;
|
AnnoEndClip anno_end_clip;
|
||||||
Alloc param_38;
|
Alloc param_36;
|
||||||
for (uint i_2 = 0u; i_2 < 8u; i_2++)
|
for (uint i_2 = 0u; i_2 < 8u; i_2++)
|
||||||
{
|
{
|
||||||
DrawMonoid param_12 = row;
|
DrawMonoid m = row;
|
||||||
DrawMonoid param_13 = local[i_2];
|
if (i_2 > 0u)
|
||||||
DrawMonoid m = combine_tag_monoid(param_12, param_13);
|
{
|
||||||
v_201.memory[out_base + (i_2 * 2u)] = m.path_ix;
|
DrawMonoid param_12 = m;
|
||||||
v_201.memory[(out_base + (i_2 * 2u)) + 1u] = m.clip_ix;
|
DrawMonoid param_13 = local[i_2 - 1u];
|
||||||
|
m = combine_tag_monoid(param_12, param_13);
|
||||||
|
}
|
||||||
|
v_187.memory[out_base + (i_2 * 2u)] = m.path_ix;
|
||||||
|
v_187.memory[(out_base + (i_2 * 2u)) + 1u] = m.clip_ix;
|
||||||
ElementRef param_14 = ref;
|
ElementRef param_14 = ref;
|
||||||
uint param_15 = i_2;
|
uint param_15 = i_2;
|
||||||
ElementRef this_ref = Element_index(param_14, param_15);
|
ElementRef this_ref = Element_index(param_14, param_15);
|
||||||
ElementRef param_16 = this_ref;
|
ElementRef param_16 = this_ref;
|
||||||
tag_word = Element_tag(param_16, v_225).tag;
|
tag_word = Element_tag(param_16, v_211).tag;
|
||||||
if (((tag_word == 4u) || (tag_word == 5u)) || (tag_word == 6u))
|
if ((((tag_word == 4u) || (tag_word == 5u)) || (tag_word == 6u)) || (tag_word == 9u))
|
||||||
{
|
{
|
||||||
uint bbox_offset = (_1038.conf.bbox_alloc.offset >> uint(2)) + (6u * (m.path_ix - 1u));
|
uint bbox_offset = (_968.conf.bbox_alloc.offset >> uint(2)) + (6u * m.path_ix);
|
||||||
float bbox_l = float(v_201.memory[bbox_offset]) - 32768.0;
|
float bbox_l = float(v_187.memory[bbox_offset]) - 32768.0;
|
||||||
float bbox_t = float(v_201.memory[bbox_offset + 1u]) - 32768.0;
|
float bbox_t = float(v_187.memory[bbox_offset + 1u]) - 32768.0;
|
||||||
float bbox_r = float(v_201.memory[bbox_offset + 2u]) - 32768.0;
|
float bbox_r = float(v_187.memory[bbox_offset + 2u]) - 32768.0;
|
||||||
float bbox_b = float(v_201.memory[bbox_offset + 3u]) - 32768.0;
|
float bbox_b = float(v_187.memory[bbox_offset + 3u]) - 32768.0;
|
||||||
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
|
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
|
||||||
float linewidth = as_type<float>(v_201.memory[bbox_offset + 4u]);
|
float linewidth = as_type<float>(v_187.memory[bbox_offset + 4u]);
|
||||||
uint fill_mode = uint(linewidth >= 0.0);
|
uint fill_mode = uint(linewidth >= 0.0);
|
||||||
if ((linewidth >= 0.0) || (tag_word == 5u))
|
if ((linewidth >= 0.0) || (tag_word == 5u))
|
||||||
{
|
{
|
||||||
uint trans_ix = v_201.memory[bbox_offset + 5u];
|
uint trans_ix = v_187.memory[bbox_offset + 5u];
|
||||||
uint t = (_1038.conf.trans_alloc.offset >> uint(2)) + (6u * trans_ix);
|
uint t = (_968.conf.trans_alloc.offset >> uint(2)) + (6u * trans_ix);
|
||||||
mat = as_type<float4>(uint4(v_201.memory[t], v_201.memory[t + 1u], v_201.memory[t + 2u], v_201.memory[t + 3u]));
|
mat = as_type<float4>(uint4(v_187.memory[t], v_187.memory[t + 1u], v_187.memory[t + 2u], v_187.memory[t + 3u]));
|
||||||
if (tag_word == 5u)
|
if (tag_word == 5u)
|
||||||
{
|
{
|
||||||
translate = as_type<float2>(uint2(v_201.memory[t + 4u], v_201.memory[t + 5u]));
|
translate = as_type<float2>(uint2(v_187.memory[t + 4u], v_187.memory[t + 5u]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (linewidth >= 0.0)
|
if (linewidth >= 0.0)
|
||||||
|
@ -714,21 +687,21 @@ kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1
|
||||||
case 4u:
|
case 4u:
|
||||||
{
|
{
|
||||||
ElementRef param_17 = this_ref;
|
ElementRef param_17 = this_ref;
|
||||||
FillColor fill = Element_FillColor_read(param_17, v_225);
|
FillColor fill = Element_FillColor_read(param_17, v_211);
|
||||||
anno_fill.bbox = bbox;
|
anno_fill.bbox = bbox;
|
||||||
anno_fill.linewidth = linewidth;
|
anno_fill.linewidth = linewidth;
|
||||||
anno_fill.rgba_color = fill.rgba_color;
|
anno_fill.rgba_color = fill.rgba_color;
|
||||||
param_18.offset = _1038.conf.anno_alloc.offset;
|
param_18.offset = _968.conf.anno_alloc.offset;
|
||||||
AnnotatedRef param_19 = out_ref;
|
AnnotatedRef param_19 = out_ref;
|
||||||
uint param_20 = fill_mode;
|
uint param_20 = fill_mode;
|
||||||
AnnoColor param_21 = anno_fill;
|
AnnoColor param_21 = anno_fill;
|
||||||
Annotated_Color_write(param_18, param_19, param_20, param_21, v_201);
|
Annotated_Color_write(param_18, param_19, param_20, param_21, v_187);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case 5u:
|
case 5u:
|
||||||
{
|
{
|
||||||
ElementRef param_22 = this_ref;
|
ElementRef param_22 = this_ref;
|
||||||
FillLinGradient lin = Element_FillLinGradient_read(param_22, v_225);
|
FillLinGradient lin = Element_FillLinGradient_read(param_22, v_211);
|
||||||
anno_lin.bbox = bbox;
|
anno_lin.bbox = bbox;
|
||||||
anno_lin.linewidth = linewidth;
|
anno_lin.linewidth = linewidth;
|
||||||
anno_lin.index = lin.index;
|
anno_lin.index = lin.index;
|
||||||
|
@ -741,57 +714,60 @@ kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1
|
||||||
anno_lin.line_x = line_x;
|
anno_lin.line_x = line_x;
|
||||||
anno_lin.line_y = line_y;
|
anno_lin.line_y = line_y;
|
||||||
anno_lin.line_c = -((p0.x * line_x) + (p0.y * line_y));
|
anno_lin.line_c = -((p0.x * line_x) + (p0.y * line_y));
|
||||||
param_23.offset = _1038.conf.anno_alloc.offset;
|
param_23.offset = _968.conf.anno_alloc.offset;
|
||||||
AnnotatedRef param_24 = out_ref;
|
AnnotatedRef param_24 = out_ref;
|
||||||
uint param_25 = fill_mode;
|
uint param_25 = fill_mode;
|
||||||
AnnoLinGradient param_26 = anno_lin;
|
AnnoLinGradient param_26 = anno_lin;
|
||||||
Annotated_LinGradient_write(param_23, param_24, param_25, param_26, v_201);
|
Annotated_LinGradient_write(param_23, param_24, param_25, param_26, v_187);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case 6u:
|
case 6u:
|
||||||
{
|
{
|
||||||
ElementRef param_27 = this_ref;
|
ElementRef param_27 = this_ref;
|
||||||
FillImage fill_img = Element_FillImage_read(param_27, v_225);
|
FillImage fill_img = Element_FillImage_read(param_27, v_211);
|
||||||
anno_img.bbox = bbox;
|
anno_img.bbox = bbox;
|
||||||
anno_img.linewidth = linewidth;
|
anno_img.linewidth = linewidth;
|
||||||
anno_img.index = fill_img.index;
|
anno_img.index = fill_img.index;
|
||||||
anno_img.offset = fill_img.offset;
|
anno_img.offset = fill_img.offset;
|
||||||
param_28.offset = _1038.conf.anno_alloc.offset;
|
param_28.offset = _968.conf.anno_alloc.offset;
|
||||||
AnnotatedRef param_29 = out_ref;
|
AnnotatedRef param_29 = out_ref;
|
||||||
uint param_30 = fill_mode;
|
uint param_30 = fill_mode;
|
||||||
AnnoImage param_31 = anno_img;
|
AnnoImage param_31 = anno_img;
|
||||||
Annotated_Image_write(param_28, param_29, param_30, param_31, v_201);
|
Annotated_Image_write(param_28, param_29, param_30, param_31, v_187);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 9u:
|
||||||
|
{
|
||||||
|
anno_begin_clip.bbox = bbox;
|
||||||
|
anno_begin_clip.linewidth = 0.0;
|
||||||
|
param_32.offset = _968.conf.anno_alloc.offset;
|
||||||
|
AnnotatedRef param_33 = out_ref;
|
||||||
|
uint param_34 = 0u;
|
||||||
|
AnnoBeginClip param_35 = anno_begin_clip;
|
||||||
|
Annotated_BeginClip_write(param_32, param_33, param_34, param_35, v_187);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
if (tag_word == 10u)
|
||||||
|
{
|
||||||
|
anno_end_clip.bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0);
|
||||||
|
param_36.offset = _968.conf.anno_alloc.offset;
|
||||||
|
AnnotatedRef param_37 = out_ref;
|
||||||
|
AnnoEndClip param_38 = anno_end_clip;
|
||||||
|
Annotated_EndClip_write(param_36, param_37, param_38, v_187);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ((tag_word == 9u) || (tag_word == 10u))
|
||||||
|
{
|
||||||
|
uint path_ix = ~(out_ix + i_2);
|
||||||
if (tag_word == 9u)
|
if (tag_word == 9u)
|
||||||
{
|
{
|
||||||
ElementRef param_32 = this_ref;
|
path_ix = m.path_ix;
|
||||||
Clip begin_clip = Element_BeginClip_read(param_32, v_225);
|
|
||||||
anno_begin_clip.bbox = begin_clip.bbox;
|
|
||||||
anno_begin_clip.linewidth = 0.0;
|
|
||||||
param_33.offset = _1038.conf.anno_alloc.offset;
|
|
||||||
AnnotatedRef param_34 = out_ref;
|
|
||||||
uint param_35 = 0u;
|
|
||||||
AnnoBeginClip param_36 = anno_begin_clip;
|
|
||||||
Annotated_BeginClip_write(param_33, param_34, param_35, param_36, v_201);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (tag_word == 10u)
|
|
||||||
{
|
|
||||||
ElementRef param_37 = this_ref;
|
|
||||||
Clip end_clip = Element_EndClip_read(param_37, v_225);
|
|
||||||
anno_end_clip.bbox = end_clip.bbox;
|
|
||||||
param_38.offset = _1038.conf.anno_alloc.offset;
|
|
||||||
AnnotatedRef param_39 = out_ref;
|
|
||||||
AnnoEndClip param_40 = anno_end_clip;
|
|
||||||
Annotated_EndClip_write(param_38, param_39, param_40, v_201);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
v_187.memory[clip_out_base + m.clip_ix] = path_ix;
|
||||||
}
|
}
|
||||||
out_ref.offset += 40u;
|
out_ref.offset += 40u;
|
||||||
}
|
}
|
||||||
|
|
BIN
piet-gpu/shader/gen/draw_leaf.spv
generated
BIN
piet-gpu/shader/gen/draw_leaf.spv
generated
Binary file not shown.
5
piet-gpu/shader/gen/draw_reduce.hlsl
generated
5
piet-gpu/shader/gen/draw_reduce.hlsl
generated
|
@ -36,8 +36,13 @@ struct Config
|
||||||
Alloc trans_alloc;
|
Alloc trans_alloc;
|
||||||
Alloc bbox_alloc;
|
Alloc bbox_alloc;
|
||||||
Alloc drawmonoid_alloc;
|
Alloc drawmonoid_alloc;
|
||||||
|
Alloc clip_alloc;
|
||||||
|
Alloc clip_bic_alloc;
|
||||||
|
Alloc clip_stack_alloc;
|
||||||
|
Alloc clip_bbox_alloc;
|
||||||
uint n_trans;
|
uint n_trans;
|
||||||
uint n_path;
|
uint n_path;
|
||||||
|
uint n_clip;
|
||||||
uint trans_offset;
|
uint trans_offset;
|
||||||
uint linewidth_offset;
|
uint linewidth_offset;
|
||||||
uint pathtag_offset;
|
uint pathtag_offset;
|
||||||
|
|
5
piet-gpu/shader/gen/draw_reduce.msl
generated
5
piet-gpu/shader/gen/draw_reduce.msl
generated
|
@ -66,8 +66,13 @@ struct Config
|
||||||
Alloc trans_alloc;
|
Alloc trans_alloc;
|
||||||
Alloc bbox_alloc;
|
Alloc bbox_alloc;
|
||||||
Alloc drawmonoid_alloc;
|
Alloc drawmonoid_alloc;
|
||||||
|
Alloc clip_alloc;
|
||||||
|
Alloc clip_bic_alloc;
|
||||||
|
Alloc clip_stack_alloc;
|
||||||
|
Alloc clip_bbox_alloc;
|
||||||
uint n_trans;
|
uint n_trans;
|
||||||
uint n_path;
|
uint n_path;
|
||||||
|
uint n_clip;
|
||||||
uint trans_offset;
|
uint trans_offset;
|
||||||
uint linewidth_offset;
|
uint linewidth_offset;
|
||||||
uint pathtag_offset;
|
uint pathtag_offset;
|
||||||
|
|
BIN
piet-gpu/shader/gen/draw_reduce.spv
generated
BIN
piet-gpu/shader/gen/draw_reduce.spv
generated
Binary file not shown.
BIN
piet-gpu/shader/gen/kernel4.dxil
generated
BIN
piet-gpu/shader/gen/kernel4.dxil
generated
Binary file not shown.
13
piet-gpu/shader/gen/kernel4.hlsl
generated
13
piet-gpu/shader/gen/kernel4.hlsl
generated
|
@ -117,8 +117,13 @@ struct Config
|
||||||
Alloc trans_alloc;
|
Alloc trans_alloc;
|
||||||
Alloc bbox_alloc;
|
Alloc bbox_alloc;
|
||||||
Alloc drawmonoid_alloc;
|
Alloc drawmonoid_alloc;
|
||||||
|
Alloc clip_alloc;
|
||||||
|
Alloc clip_bic_alloc;
|
||||||
|
Alloc clip_stack_alloc;
|
||||||
|
Alloc clip_bbox_alloc;
|
||||||
uint n_trans;
|
uint n_trans;
|
||||||
uint n_path;
|
uint n_path;
|
||||||
|
uint n_clip;
|
||||||
uint trans_offset;
|
uint trans_offset;
|
||||||
uint linewidth_offset;
|
uint linewidth_offset;
|
||||||
uint pathtag_offset;
|
uint pathtag_offset;
|
||||||
|
@ -457,7 +462,6 @@ void comp_main()
|
||||||
TileSegRef tile_seg_ref;
|
TileSegRef tile_seg_ref;
|
||||||
float area[8];
|
float area[8];
|
||||||
uint blend_stack[128][8];
|
uint blend_stack[128][8];
|
||||||
float blend_alpha_stack[128][8];
|
|
||||||
while (mem_ok)
|
while (mem_ok)
|
||||||
{
|
{
|
||||||
Alloc param_3 = cmd_alloc;
|
Alloc param_3 = cmd_alloc;
|
||||||
|
@ -640,7 +644,6 @@ void comp_main()
|
||||||
float4 param_34 = float4(rgba[k_11]);
|
float4 param_34 = float4(rgba[k_11]);
|
||||||
uint _1390 = packsRGB(param_34);
|
uint _1390 = packsRGB(param_34);
|
||||||
blend_stack[d_2][k_11] = _1390;
|
blend_stack[d_2][k_11] = _1390;
|
||||||
blend_alpha_stack[d_2][k_11] = clamp(abs(area[k_11]), 0.0f, 1.0f);
|
|
||||||
rgba[k_11] = 0.0f.xxxx;
|
rgba[k_11] = 0.0f.xxxx;
|
||||||
}
|
}
|
||||||
clip_depth++;
|
clip_depth++;
|
||||||
|
@ -655,7 +658,7 @@ void comp_main()
|
||||||
uint d_3 = min(clip_depth, 127u);
|
uint d_3 = min(clip_depth, 127u);
|
||||||
uint param_35 = blend_stack[d_3][k_12];
|
uint param_35 = blend_stack[d_3][k_12];
|
||||||
float4 bg = unpacksRGB(param_35);
|
float4 bg = unpacksRGB(param_35);
|
||||||
float4 fg_1 = (rgba[k_12] * area[k_12]) * blend_alpha_stack[d_3][k_12];
|
float4 fg_1 = rgba[k_12] * area[k_12];
|
||||||
rgba[k_12] = (bg * (1.0f - fg_1.w)) + fg_1;
|
rgba[k_12] = (bg * (1.0f - fg_1.w)) + fg_1;
|
||||||
}
|
}
|
||||||
cmd_ref.offset += 4u;
|
cmd_ref.offset += 4u;
|
||||||
|
@ -665,8 +668,8 @@ void comp_main()
|
||||||
{
|
{
|
||||||
Alloc param_36 = cmd_alloc;
|
Alloc param_36 = cmd_alloc;
|
||||||
CmdRef param_37 = cmd_ref;
|
CmdRef param_37 = cmd_ref;
|
||||||
CmdRef _1469 = { Cmd_Jump_read(param_36, param_37).new_ref };
|
CmdRef _1453 = { Cmd_Jump_read(param_36, param_37).new_ref };
|
||||||
cmd_ref = _1469;
|
cmd_ref = _1453;
|
||||||
cmd_alloc.offset = cmd_ref.offset;
|
cmd_alloc.offset = cmd_ref.offset;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
9
piet-gpu/shader/gen/kernel4.msl
generated
9
piet-gpu/shader/gen/kernel4.msl
generated
|
@ -175,8 +175,13 @@ struct Config
|
||||||
Alloc_1 trans_alloc;
|
Alloc_1 trans_alloc;
|
||||||
Alloc_1 bbox_alloc;
|
Alloc_1 bbox_alloc;
|
||||||
Alloc_1 drawmonoid_alloc;
|
Alloc_1 drawmonoid_alloc;
|
||||||
|
Alloc_1 clip_alloc;
|
||||||
|
Alloc_1 clip_bic_alloc;
|
||||||
|
Alloc_1 clip_stack_alloc;
|
||||||
|
Alloc_1 clip_bbox_alloc;
|
||||||
uint n_trans;
|
uint n_trans;
|
||||||
uint n_path;
|
uint n_path;
|
||||||
|
uint n_clip;
|
||||||
uint trans_offset;
|
uint trans_offset;
|
||||||
uint linewidth_offset;
|
uint linewidth_offset;
|
||||||
uint pathtag_offset;
|
uint pathtag_offset;
|
||||||
|
@ -507,7 +512,6 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7
|
||||||
TileSegRef tile_seg_ref;
|
TileSegRef tile_seg_ref;
|
||||||
spvUnsafeArray<float, 8> area;
|
spvUnsafeArray<float, 8> area;
|
||||||
spvUnsafeArray<spvUnsafeArray<uint, 8>, 128> blend_stack;
|
spvUnsafeArray<spvUnsafeArray<uint, 8>, 128> blend_stack;
|
||||||
spvUnsafeArray<spvUnsafeArray<float, 8>, 128> blend_alpha_stack;
|
|
||||||
while (mem_ok)
|
while (mem_ok)
|
||||||
{
|
{
|
||||||
Alloc param_3 = cmd_alloc;
|
Alloc param_3 = cmd_alloc;
|
||||||
|
@ -687,7 +691,6 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7
|
||||||
float4 param_34 = float4(rgba[k_11]);
|
float4 param_34 = float4(rgba[k_11]);
|
||||||
uint _1390 = packsRGB(param_34);
|
uint _1390 = packsRGB(param_34);
|
||||||
blend_stack[d_2][k_11] = _1390;
|
blend_stack[d_2][k_11] = _1390;
|
||||||
blend_alpha_stack[d_2][k_11] = fast::clamp(abs(area[k_11]), 0.0, 1.0);
|
|
||||||
rgba[k_11] = float4(0.0);
|
rgba[k_11] = float4(0.0);
|
||||||
}
|
}
|
||||||
clip_depth++;
|
clip_depth++;
|
||||||
|
@ -702,7 +705,7 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7
|
||||||
uint d_3 = min(clip_depth, 127u);
|
uint d_3 = min(clip_depth, 127u);
|
||||||
uint param_35 = blend_stack[d_3][k_12];
|
uint param_35 = blend_stack[d_3][k_12];
|
||||||
float4 bg = unpacksRGB(param_35);
|
float4 bg = unpacksRGB(param_35);
|
||||||
float4 fg_1 = (rgba[k_12] * area[k_12]) * blend_alpha_stack[d_3][k_12];
|
float4 fg_1 = rgba[k_12] * area[k_12];
|
||||||
rgba[k_12] = (bg * (1.0 - fg_1.w)) + fg_1;
|
rgba[k_12] = (bg * (1.0 - fg_1.w)) + fg_1;
|
||||||
}
|
}
|
||||||
cmd_ref.offset += 4u;
|
cmd_ref.offset += 4u;
|
||||||
|
|
BIN
piet-gpu/shader/gen/kernel4.spv
generated
BIN
piet-gpu/shader/gen/kernel4.spv
generated
Binary file not shown.
BIN
piet-gpu/shader/gen/kernel4_gray.dxil
generated
BIN
piet-gpu/shader/gen/kernel4_gray.dxil
generated
Binary file not shown.
13
piet-gpu/shader/gen/kernel4_gray.hlsl
generated
13
piet-gpu/shader/gen/kernel4_gray.hlsl
generated
|
@ -117,8 +117,13 @@ struct Config
|
||||||
Alloc trans_alloc;
|
Alloc trans_alloc;
|
||||||
Alloc bbox_alloc;
|
Alloc bbox_alloc;
|
||||||
Alloc drawmonoid_alloc;
|
Alloc drawmonoid_alloc;
|
||||||
|
Alloc clip_alloc;
|
||||||
|
Alloc clip_bic_alloc;
|
||||||
|
Alloc clip_stack_alloc;
|
||||||
|
Alloc clip_bbox_alloc;
|
||||||
uint n_trans;
|
uint n_trans;
|
||||||
uint n_path;
|
uint n_path;
|
||||||
|
uint n_clip;
|
||||||
uint trans_offset;
|
uint trans_offset;
|
||||||
uint linewidth_offset;
|
uint linewidth_offset;
|
||||||
uint pathtag_offset;
|
uint pathtag_offset;
|
||||||
|
@ -457,7 +462,6 @@ void comp_main()
|
||||||
TileSegRef tile_seg_ref;
|
TileSegRef tile_seg_ref;
|
||||||
float area[8];
|
float area[8];
|
||||||
uint blend_stack[128][8];
|
uint blend_stack[128][8];
|
||||||
float blend_alpha_stack[128][8];
|
|
||||||
while (mem_ok)
|
while (mem_ok)
|
||||||
{
|
{
|
||||||
Alloc param_3 = cmd_alloc;
|
Alloc param_3 = cmd_alloc;
|
||||||
|
@ -640,7 +644,6 @@ void comp_main()
|
||||||
float4 param_34 = float4(rgba[k_11]);
|
float4 param_34 = float4(rgba[k_11]);
|
||||||
uint _1390 = packsRGB(param_34);
|
uint _1390 = packsRGB(param_34);
|
||||||
blend_stack[d_2][k_11] = _1390;
|
blend_stack[d_2][k_11] = _1390;
|
||||||
blend_alpha_stack[d_2][k_11] = clamp(abs(area[k_11]), 0.0f, 1.0f);
|
|
||||||
rgba[k_11] = 0.0f.xxxx;
|
rgba[k_11] = 0.0f.xxxx;
|
||||||
}
|
}
|
||||||
clip_depth++;
|
clip_depth++;
|
||||||
|
@ -655,7 +658,7 @@ void comp_main()
|
||||||
uint d_3 = min(clip_depth, 127u);
|
uint d_3 = min(clip_depth, 127u);
|
||||||
uint param_35 = blend_stack[d_3][k_12];
|
uint param_35 = blend_stack[d_3][k_12];
|
||||||
float4 bg = unpacksRGB(param_35);
|
float4 bg = unpacksRGB(param_35);
|
||||||
float4 fg_1 = (rgba[k_12] * area[k_12]) * blend_alpha_stack[d_3][k_12];
|
float4 fg_1 = rgba[k_12] * area[k_12];
|
||||||
rgba[k_12] = (bg * (1.0f - fg_1.w)) + fg_1;
|
rgba[k_12] = (bg * (1.0f - fg_1.w)) + fg_1;
|
||||||
}
|
}
|
||||||
cmd_ref.offset += 4u;
|
cmd_ref.offset += 4u;
|
||||||
|
@ -665,8 +668,8 @@ void comp_main()
|
||||||
{
|
{
|
||||||
Alloc param_36 = cmd_alloc;
|
Alloc param_36 = cmd_alloc;
|
||||||
CmdRef param_37 = cmd_ref;
|
CmdRef param_37 = cmd_ref;
|
||||||
CmdRef _1469 = { Cmd_Jump_read(param_36, param_37).new_ref };
|
CmdRef _1453 = { Cmd_Jump_read(param_36, param_37).new_ref };
|
||||||
cmd_ref = _1469;
|
cmd_ref = _1453;
|
||||||
cmd_alloc.offset = cmd_ref.offset;
|
cmd_alloc.offset = cmd_ref.offset;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
9
piet-gpu/shader/gen/kernel4_gray.msl
generated
9
piet-gpu/shader/gen/kernel4_gray.msl
generated
|
@ -175,8 +175,13 @@ struct Config
|
||||||
Alloc_1 trans_alloc;
|
Alloc_1 trans_alloc;
|
||||||
Alloc_1 bbox_alloc;
|
Alloc_1 bbox_alloc;
|
||||||
Alloc_1 drawmonoid_alloc;
|
Alloc_1 drawmonoid_alloc;
|
||||||
|
Alloc_1 clip_alloc;
|
||||||
|
Alloc_1 clip_bic_alloc;
|
||||||
|
Alloc_1 clip_stack_alloc;
|
||||||
|
Alloc_1 clip_bbox_alloc;
|
||||||
uint n_trans;
|
uint n_trans;
|
||||||
uint n_path;
|
uint n_path;
|
||||||
|
uint n_clip;
|
||||||
uint trans_offset;
|
uint trans_offset;
|
||||||
uint linewidth_offset;
|
uint linewidth_offset;
|
||||||
uint pathtag_offset;
|
uint pathtag_offset;
|
||||||
|
@ -507,7 +512,6 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7
|
||||||
TileSegRef tile_seg_ref;
|
TileSegRef tile_seg_ref;
|
||||||
spvUnsafeArray<float, 8> area;
|
spvUnsafeArray<float, 8> area;
|
||||||
spvUnsafeArray<spvUnsafeArray<uint, 8>, 128> blend_stack;
|
spvUnsafeArray<spvUnsafeArray<uint, 8>, 128> blend_stack;
|
||||||
spvUnsafeArray<spvUnsafeArray<float, 8>, 128> blend_alpha_stack;
|
|
||||||
while (mem_ok)
|
while (mem_ok)
|
||||||
{
|
{
|
||||||
Alloc param_3 = cmd_alloc;
|
Alloc param_3 = cmd_alloc;
|
||||||
|
@ -687,7 +691,6 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7
|
||||||
float4 param_34 = float4(rgba[k_11]);
|
float4 param_34 = float4(rgba[k_11]);
|
||||||
uint _1390 = packsRGB(param_34);
|
uint _1390 = packsRGB(param_34);
|
||||||
blend_stack[d_2][k_11] = _1390;
|
blend_stack[d_2][k_11] = _1390;
|
||||||
blend_alpha_stack[d_2][k_11] = fast::clamp(abs(area[k_11]), 0.0, 1.0);
|
|
||||||
rgba[k_11] = float4(0.0);
|
rgba[k_11] = float4(0.0);
|
||||||
}
|
}
|
||||||
clip_depth++;
|
clip_depth++;
|
||||||
|
@ -702,7 +705,7 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7
|
||||||
uint d_3 = min(clip_depth, 127u);
|
uint d_3 = min(clip_depth, 127u);
|
||||||
uint param_35 = blend_stack[d_3][k_12];
|
uint param_35 = blend_stack[d_3][k_12];
|
||||||
float4 bg = unpacksRGB(param_35);
|
float4 bg = unpacksRGB(param_35);
|
||||||
float4 fg_1 = (rgba[k_12] * area[k_12]) * blend_alpha_stack[d_3][k_12];
|
float4 fg_1 = rgba[k_12] * area[k_12];
|
||||||
rgba[k_12] = (bg * (1.0 - fg_1.w)) + fg_1;
|
rgba[k_12] = (bg * (1.0 - fg_1.w)) + fg_1;
|
||||||
}
|
}
|
||||||
cmd_ref.offset += 4u;
|
cmd_ref.offset += 4u;
|
||||||
|
|
BIN
piet-gpu/shader/gen/kernel4_gray.spv
generated
BIN
piet-gpu/shader/gen/kernel4_gray.spv
generated
Binary file not shown.
5
piet-gpu/shader/gen/path_coarse.hlsl
generated
5
piet-gpu/shader/gen/path_coarse.hlsl
generated
|
@ -86,8 +86,13 @@ struct Config
|
||||||
Alloc trans_alloc;
|
Alloc trans_alloc;
|
||||||
Alloc bbox_alloc;
|
Alloc bbox_alloc;
|
||||||
Alloc drawmonoid_alloc;
|
Alloc drawmonoid_alloc;
|
||||||
|
Alloc clip_alloc;
|
||||||
|
Alloc clip_bic_alloc;
|
||||||
|
Alloc clip_stack_alloc;
|
||||||
|
Alloc clip_bbox_alloc;
|
||||||
uint n_trans;
|
uint n_trans;
|
||||||
uint n_path;
|
uint n_path;
|
||||||
|
uint n_clip;
|
||||||
uint trans_offset;
|
uint trans_offset;
|
||||||
uint linewidth_offset;
|
uint linewidth_offset;
|
||||||
uint pathtag_offset;
|
uint pathtag_offset;
|
||||||
|
|
5
piet-gpu/shader/gen/path_coarse.msl
generated
5
piet-gpu/shader/gen/path_coarse.msl
generated
|
@ -146,8 +146,13 @@ struct Config
|
||||||
Alloc_1 trans_alloc;
|
Alloc_1 trans_alloc;
|
||||||
Alloc_1 bbox_alloc;
|
Alloc_1 bbox_alloc;
|
||||||
Alloc_1 drawmonoid_alloc;
|
Alloc_1 drawmonoid_alloc;
|
||||||
|
Alloc_1 clip_alloc;
|
||||||
|
Alloc_1 clip_bic_alloc;
|
||||||
|
Alloc_1 clip_stack_alloc;
|
||||||
|
Alloc_1 clip_bbox_alloc;
|
||||||
uint n_trans;
|
uint n_trans;
|
||||||
uint n_path;
|
uint n_path;
|
||||||
|
uint n_clip;
|
||||||
uint trans_offset;
|
uint trans_offset;
|
||||||
uint linewidth_offset;
|
uint linewidth_offset;
|
||||||
uint pathtag_offset;
|
uint pathtag_offset;
|
||||||
|
|
BIN
piet-gpu/shader/gen/path_coarse.spv
generated
BIN
piet-gpu/shader/gen/path_coarse.spv
generated
Binary file not shown.
BIN
piet-gpu/shader/gen/pathseg.dxil
generated
BIN
piet-gpu/shader/gen/pathseg.dxil
generated
Binary file not shown.
127
piet-gpu/shader/gen/pathseg.hlsl
generated
127
piet-gpu/shader/gen/pathseg.hlsl
generated
|
@ -64,8 +64,13 @@ struct Config
|
||||||
Alloc trans_alloc;
|
Alloc trans_alloc;
|
||||||
Alloc bbox_alloc;
|
Alloc bbox_alloc;
|
||||||
Alloc drawmonoid_alloc;
|
Alloc drawmonoid_alloc;
|
||||||
|
Alloc clip_alloc;
|
||||||
|
Alloc clip_bic_alloc;
|
||||||
|
Alloc clip_stack_alloc;
|
||||||
|
Alloc clip_bbox_alloc;
|
||||||
uint n_trans;
|
uint n_trans;
|
||||||
uint n_path;
|
uint n_path;
|
||||||
|
uint n_clip;
|
||||||
uint trans_offset;
|
uint trans_offset;
|
||||||
uint linewidth_offset;
|
uint linewidth_offset;
|
||||||
uint pathtag_offset;
|
uint pathtag_offset;
|
||||||
|
@ -80,7 +85,7 @@ static const Monoid _567 = { 0.0f.xxxx, 0u };
|
||||||
RWByteAddressBuffer _111 : register(u0, space0);
|
RWByteAddressBuffer _111 : register(u0, space0);
|
||||||
ByteAddressBuffer _574 : register(t2, space0);
|
ByteAddressBuffer _574 : register(t2, space0);
|
||||||
ByteAddressBuffer _639 : register(t1, space0);
|
ByteAddressBuffer _639 : register(t1, space0);
|
||||||
ByteAddressBuffer _709 : register(t3, space0);
|
ByteAddressBuffer _710 : register(t3, space0);
|
||||||
|
|
||||||
static uint3 gl_WorkGroupID;
|
static uint3 gl_WorkGroupID;
|
||||||
static uint3 gl_LocalInvocationID;
|
static uint3 gl_LocalInvocationID;
|
||||||
|
@ -356,7 +361,7 @@ uint round_up(float x)
|
||||||
void comp_main()
|
void comp_main()
|
||||||
{
|
{
|
||||||
uint ix = gl_GlobalInvocationID.x * 4u;
|
uint ix = gl_GlobalInvocationID.x * 4u;
|
||||||
uint tag_word = _574.Load(((_639.Load(64) >> uint(2)) + (ix >> uint(2))) * 4 + 0);
|
uint tag_word = _574.Load(((_639.Load(84) >> uint(2)) + (ix >> uint(2))) * 4 + 0);
|
||||||
uint param = tag_word;
|
uint param = tag_word;
|
||||||
TagMonoid local_tm = reduce_tag(param);
|
TagMonoid local_tm = reduce_tag(param);
|
||||||
sh_tag[gl_LocalInvocationID.x] = local_tm;
|
sh_tag[gl_LocalInvocationID.x] = local_tm;
|
||||||
|
@ -377,17 +382,17 @@ void comp_main()
|
||||||
TagMonoid tm = tag_monoid_identity();
|
TagMonoid tm = tag_monoid_identity();
|
||||||
if (gl_WorkGroupID.x > 0u)
|
if (gl_WorkGroupID.x > 0u)
|
||||||
{
|
{
|
||||||
TagMonoid _715;
|
TagMonoid _716;
|
||||||
_715.trans_ix = _709.Load((gl_WorkGroupID.x - 1u) * 20 + 0);
|
_716.trans_ix = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 0);
|
||||||
_715.linewidth_ix = _709.Load((gl_WorkGroupID.x - 1u) * 20 + 4);
|
_716.linewidth_ix = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 4);
|
||||||
_715.pathseg_ix = _709.Load((gl_WorkGroupID.x - 1u) * 20 + 8);
|
_716.pathseg_ix = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 8);
|
||||||
_715.path_ix = _709.Load((gl_WorkGroupID.x - 1u) * 20 + 12);
|
_716.path_ix = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 12);
|
||||||
_715.pathseg_offset = _709.Load((gl_WorkGroupID.x - 1u) * 20 + 16);
|
_716.pathseg_offset = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 16);
|
||||||
tm.trans_ix = _715.trans_ix;
|
tm.trans_ix = _716.trans_ix;
|
||||||
tm.linewidth_ix = _715.linewidth_ix;
|
tm.linewidth_ix = _716.linewidth_ix;
|
||||||
tm.pathseg_ix = _715.pathseg_ix;
|
tm.pathseg_ix = _716.pathseg_ix;
|
||||||
tm.path_ix = _715.path_ix;
|
tm.path_ix = _716.path_ix;
|
||||||
tm.pathseg_offset = _715.pathseg_offset;
|
tm.pathseg_offset = _716.pathseg_offset;
|
||||||
}
|
}
|
||||||
if (gl_LocalInvocationID.x > 0u)
|
if (gl_LocalInvocationID.x > 0u)
|
||||||
{
|
{
|
||||||
|
@ -395,14 +400,14 @@ void comp_main()
|
||||||
TagMonoid param_4 = sh_tag[gl_LocalInvocationID.x - 1u];
|
TagMonoid param_4 = sh_tag[gl_LocalInvocationID.x - 1u];
|
||||||
tm = combine_tag_monoid(param_3, param_4);
|
tm = combine_tag_monoid(param_3, param_4);
|
||||||
}
|
}
|
||||||
uint ps_ix = (_639.Load(68) >> uint(2)) + tm.pathseg_offset;
|
uint ps_ix = (_639.Load(88) >> uint(2)) + tm.pathseg_offset;
|
||||||
uint lw_ix = (_639.Load(60) >> uint(2)) + tm.linewidth_ix;
|
uint lw_ix = (_639.Load(80) >> uint(2)) + tm.linewidth_ix;
|
||||||
uint save_path_ix = tm.path_ix;
|
uint save_path_ix = tm.path_ix;
|
||||||
uint trans_ix = tm.trans_ix;
|
uint trans_ix = tm.trans_ix;
|
||||||
TransformSegRef _770 = { _639.Load(36) + (trans_ix * 24u) };
|
TransformSegRef _771 = { _639.Load(36) + (trans_ix * 24u) };
|
||||||
TransformSegRef trans_ref = _770;
|
TransformSegRef trans_ref = _771;
|
||||||
PathSegRef _780 = { _639.Load(28) + (tm.pathseg_ix * 52u) };
|
PathSegRef _781 = { _639.Load(28) + (tm.pathseg_ix * 52u) };
|
||||||
PathSegRef ps_ref = _780;
|
PathSegRef ps_ref = _781;
|
||||||
float linewidth[4];
|
float linewidth[4];
|
||||||
uint save_trans_ix[4];
|
uint save_trans_ix[4];
|
||||||
float2 p0;
|
float2 p0;
|
||||||
|
@ -455,9 +460,9 @@ void comp_main()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Alloc _876;
|
Alloc _877;
|
||||||
_876.offset = _639.Load(36);
|
_877.offset = _639.Load(36);
|
||||||
param_13.offset = _876.offset;
|
param_13.offset = _877.offset;
|
||||||
TransformSegRef param_14 = trans_ref;
|
TransformSegRef param_14 = trans_ref;
|
||||||
TransformSeg transform = TransformSeg_read(param_13, param_14);
|
TransformSeg transform = TransformSeg_read(param_13, param_14);
|
||||||
p0 = ((transform.mat.xy * p0.x) + (transform.mat.zw * p0.y)) + transform.translate;
|
p0 = ((transform.mat.xy * p0.x) + (transform.mat.zw * p0.y)) + transform.translate;
|
||||||
|
@ -466,25 +471,25 @@ void comp_main()
|
||||||
if (seg_type >= 2u)
|
if (seg_type >= 2u)
|
||||||
{
|
{
|
||||||
p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate;
|
p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate;
|
||||||
float4 _946 = bbox;
|
float4 _947 = bbox;
|
||||||
float2 _949 = min(_946.xy, p2);
|
float2 _950 = min(_947.xy, p2);
|
||||||
bbox.x = _949.x;
|
bbox.x = _950.x;
|
||||||
bbox.y = _949.y;
|
bbox.y = _950.y;
|
||||||
float4 _954 = bbox;
|
float4 _955 = bbox;
|
||||||
float2 _957 = max(_954.zw, p2);
|
float2 _958 = max(_955.zw, p2);
|
||||||
bbox.z = _957.x;
|
bbox.z = _958.x;
|
||||||
bbox.w = _957.y;
|
bbox.w = _958.y;
|
||||||
if (seg_type == 3u)
|
if (seg_type == 3u)
|
||||||
{
|
{
|
||||||
p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate;
|
p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate;
|
||||||
float4 _982 = bbox;
|
float4 _983 = bbox;
|
||||||
float2 _985 = min(_982.xy, p3);
|
float2 _986 = min(_983.xy, p3);
|
||||||
bbox.x = _985.x;
|
bbox.x = _986.x;
|
||||||
bbox.y = _985.y;
|
bbox.y = _986.y;
|
||||||
float4 _990 = bbox;
|
float4 _991 = bbox;
|
||||||
float2 _993 = max(_990.zw, p3);
|
float2 _994 = max(_991.zw, p3);
|
||||||
bbox.z = _993.x;
|
bbox.z = _994.x;
|
||||||
bbox.w = _993.y;
|
bbox.w = _994.y;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -515,9 +520,9 @@ void comp_main()
|
||||||
cubic.trans_ix = (gl_GlobalInvocationID.x * 4u) + i_1;
|
cubic.trans_ix = (gl_GlobalInvocationID.x * 4u) + i_1;
|
||||||
cubic.stroke = stroke;
|
cubic.stroke = stroke;
|
||||||
uint fill_mode = uint(linewidth[i_1] >= 0.0f);
|
uint fill_mode = uint(linewidth[i_1] >= 0.0f);
|
||||||
Alloc _1088;
|
Alloc _1089;
|
||||||
_1088.offset = _639.Load(28);
|
_1089.offset = _639.Load(28);
|
||||||
param_15.offset = _1088.offset;
|
param_15.offset = _1089.offset;
|
||||||
PathSegRef param_16 = ps_ref;
|
PathSegRef param_16 = ps_ref;
|
||||||
uint param_17 = fill_mode;
|
uint param_17 = fill_mode;
|
||||||
PathCubic param_18 = cubic;
|
PathCubic param_18 = cubic;
|
||||||
|
@ -574,17 +579,17 @@ void comp_main()
|
||||||
Monoid param_24 = local[i_4];
|
Monoid param_24 = local[i_4];
|
||||||
Monoid m = combine_monoid(param_23, param_24);
|
Monoid m = combine_monoid(param_23, param_24);
|
||||||
bool do_atomic = false;
|
bool do_atomic = false;
|
||||||
bool _1263 = i_4 == 3u;
|
bool _1264 = i_4 == 3u;
|
||||||
bool _1269;
|
bool _1270;
|
||||||
if (_1263)
|
if (_1264)
|
||||||
{
|
{
|
||||||
_1269 = gl_LocalInvocationID.x == 255u;
|
_1270 = gl_LocalInvocationID.x == 255u;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
_1269 = _1263;
|
_1270 = _1264;
|
||||||
}
|
}
|
||||||
if (_1269)
|
if (_1270)
|
||||||
{
|
{
|
||||||
do_atomic = true;
|
do_atomic = true;
|
||||||
}
|
}
|
||||||
|
@ -612,30 +617,30 @@ void comp_main()
|
||||||
}
|
}
|
||||||
if (do_atomic)
|
if (do_atomic)
|
||||||
{
|
{
|
||||||
bool _1334 = m.bbox.z > m.bbox.x;
|
bool _1335 = m.bbox.z > m.bbox.x;
|
||||||
bool _1343;
|
bool _1344;
|
||||||
if (!_1334)
|
if (!_1335)
|
||||||
{
|
{
|
||||||
_1343 = m.bbox.w > m.bbox.y;
|
_1344 = m.bbox.w > m.bbox.y;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
_1343 = _1334;
|
_1344 = _1335;
|
||||||
}
|
}
|
||||||
if (_1343)
|
if (_1344)
|
||||||
{
|
{
|
||||||
float param_29 = m.bbox.x;
|
float param_29 = m.bbox.x;
|
||||||
uint _1352;
|
uint _1353;
|
||||||
_111.InterlockedMin(bbox_out_ix * 4 + 8, round_down(param_29), _1352);
|
_111.InterlockedMin(bbox_out_ix * 4 + 8, round_down(param_29), _1353);
|
||||||
float param_30 = m.bbox.y;
|
float param_30 = m.bbox.y;
|
||||||
uint _1360;
|
uint _1361;
|
||||||
_111.InterlockedMin((bbox_out_ix + 1u) * 4 + 8, round_down(param_30), _1360);
|
_111.InterlockedMin((bbox_out_ix + 1u) * 4 + 8, round_down(param_30), _1361);
|
||||||
float param_31 = m.bbox.z;
|
float param_31 = m.bbox.z;
|
||||||
uint _1368;
|
uint _1369;
|
||||||
_111.InterlockedMax((bbox_out_ix + 2u) * 4 + 8, round_up(param_31), _1368);
|
_111.InterlockedMax((bbox_out_ix + 2u) * 4 + 8, round_up(param_31), _1369);
|
||||||
float param_32 = m.bbox.w;
|
float param_32 = m.bbox.w;
|
||||||
uint _1376;
|
uint _1377;
|
||||||
_111.InterlockedMax((bbox_out_ix + 3u) * 4 + 8, round_up(param_32), _1376);
|
_111.InterlockedMax((bbox_out_ix + 3u) * 4 + 8, round_up(param_32), _1377);
|
||||||
}
|
}
|
||||||
bbox_out_ix += 6u;
|
bbox_out_ix += 6u;
|
||||||
}
|
}
|
||||||
|
|
83
piet-gpu/shader/gen/pathseg.msl
generated
83
piet-gpu/shader/gen/pathseg.msl
generated
|
@ -129,8 +129,13 @@ struct Config
|
||||||
Alloc_1 trans_alloc;
|
Alloc_1 trans_alloc;
|
||||||
Alloc_1 bbox_alloc;
|
Alloc_1 bbox_alloc;
|
||||||
Alloc_1 drawmonoid_alloc;
|
Alloc_1 drawmonoid_alloc;
|
||||||
|
Alloc_1 clip_alloc;
|
||||||
|
Alloc_1 clip_bic_alloc;
|
||||||
|
Alloc_1 clip_stack_alloc;
|
||||||
|
Alloc_1 clip_bbox_alloc;
|
||||||
uint n_trans;
|
uint n_trans;
|
||||||
uint n_path;
|
uint n_path;
|
||||||
|
uint n_clip;
|
||||||
uint trans_offset;
|
uint trans_offset;
|
||||||
uint linewidth_offset;
|
uint linewidth_offset;
|
||||||
uint pathtag_offset;
|
uint pathtag_offset;
|
||||||
|
@ -430,7 +435,7 @@ uint round_up(thread const float& x)
|
||||||
return uint(fast::min(65535.0, ceil(x) + 32768.0));
|
return uint(fast::min(65535.0, ceil(x) + 32768.0));
|
||||||
}
|
}
|
||||||
|
|
||||||
kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _639 [[buffer(1)]], const device SceneBuf& v_574 [[buffer(2)]], const device ParentBuf& _709 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
|
kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _639 [[buffer(1)]], const device SceneBuf& v_574 [[buffer(2)]], const device ParentBuf& _710 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
|
||||||
{
|
{
|
||||||
threadgroup TagMonoid sh_tag[256];
|
threadgroup TagMonoid sh_tag[256];
|
||||||
threadgroup Monoid sh_scratch[256];
|
threadgroup Monoid sh_scratch[256];
|
||||||
|
@ -456,12 +461,12 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6
|
||||||
TagMonoid tm = tag_monoid_identity();
|
TagMonoid tm = tag_monoid_identity();
|
||||||
if (gl_WorkGroupID.x > 0u)
|
if (gl_WorkGroupID.x > 0u)
|
||||||
{
|
{
|
||||||
uint _712 = gl_WorkGroupID.x - 1u;
|
uint _713 = gl_WorkGroupID.x - 1u;
|
||||||
tm.trans_ix = _709.parent[_712].trans_ix;
|
tm.trans_ix = _710.parent[_713].trans_ix;
|
||||||
tm.linewidth_ix = _709.parent[_712].linewidth_ix;
|
tm.linewidth_ix = _710.parent[_713].linewidth_ix;
|
||||||
tm.pathseg_ix = _709.parent[_712].pathseg_ix;
|
tm.pathseg_ix = _710.parent[_713].pathseg_ix;
|
||||||
tm.path_ix = _709.parent[_712].path_ix;
|
tm.path_ix = _710.parent[_713].path_ix;
|
||||||
tm.pathseg_offset = _709.parent[_712].pathseg_offset;
|
tm.pathseg_offset = _710.parent[_713].pathseg_offset;
|
||||||
}
|
}
|
||||||
if (gl_LocalInvocationID.x > 0u)
|
if (gl_LocalInvocationID.x > 0u)
|
||||||
{
|
{
|
||||||
|
@ -536,25 +541,25 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6
|
||||||
if (seg_type >= 2u)
|
if (seg_type >= 2u)
|
||||||
{
|
{
|
||||||
p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate;
|
p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate;
|
||||||
float4 _946 = bbox;
|
float4 _947 = bbox;
|
||||||
float2 _949 = fast::min(_946.xy, p2);
|
float2 _950 = fast::min(_947.xy, p2);
|
||||||
bbox.x = _949.x;
|
bbox.x = _950.x;
|
||||||
bbox.y = _949.y;
|
bbox.y = _950.y;
|
||||||
float4 _954 = bbox;
|
float4 _955 = bbox;
|
||||||
float2 _957 = fast::max(_954.zw, p2);
|
float2 _958 = fast::max(_955.zw, p2);
|
||||||
bbox.z = _957.x;
|
bbox.z = _958.x;
|
||||||
bbox.w = _957.y;
|
bbox.w = _958.y;
|
||||||
if (seg_type == 3u)
|
if (seg_type == 3u)
|
||||||
{
|
{
|
||||||
p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate;
|
p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate;
|
||||||
float4 _982 = bbox;
|
float4 _983 = bbox;
|
||||||
float2 _985 = fast::min(_982.xy, p3);
|
float2 _986 = fast::min(_983.xy, p3);
|
||||||
bbox.x = _985.x;
|
bbox.x = _986.x;
|
||||||
bbox.y = _985.y;
|
bbox.y = _986.y;
|
||||||
float4 _990 = bbox;
|
float4 _991 = bbox;
|
||||||
float2 _993 = fast::max(_990.zw, p3);
|
float2 _994 = fast::max(_991.zw, p3);
|
||||||
bbox.z = _993.x;
|
bbox.z = _994.x;
|
||||||
bbox.w = _993.y;
|
bbox.w = _994.y;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -642,17 +647,17 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6
|
||||||
Monoid param_24 = local[i_4];
|
Monoid param_24 = local[i_4];
|
||||||
Monoid m = combine_monoid(param_23, param_24);
|
Monoid m = combine_monoid(param_23, param_24);
|
||||||
bool do_atomic = false;
|
bool do_atomic = false;
|
||||||
bool _1263 = i_4 == 3u;
|
bool _1264 = i_4 == 3u;
|
||||||
bool _1269;
|
bool _1270;
|
||||||
if (_1263)
|
if (_1264)
|
||||||
{
|
{
|
||||||
_1269 = gl_LocalInvocationID.x == 255u;
|
_1270 = gl_LocalInvocationID.x == 255u;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
_1269 = _1263;
|
_1270 = _1264;
|
||||||
}
|
}
|
||||||
if (_1269)
|
if (_1270)
|
||||||
{
|
{
|
||||||
do_atomic = true;
|
do_atomic = true;
|
||||||
}
|
}
|
||||||
|
@ -680,26 +685,26 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6
|
||||||
}
|
}
|
||||||
if (do_atomic)
|
if (do_atomic)
|
||||||
{
|
{
|
||||||
bool _1334 = m.bbox.z > m.bbox.x;
|
bool _1335 = m.bbox.z > m.bbox.x;
|
||||||
bool _1343;
|
bool _1344;
|
||||||
if (!_1334)
|
if (!_1335)
|
||||||
{
|
{
|
||||||
_1343 = m.bbox.w > m.bbox.y;
|
_1344 = m.bbox.w > m.bbox.y;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
_1343 = _1334;
|
_1344 = _1335;
|
||||||
}
|
}
|
||||||
if (_1343)
|
if (_1344)
|
||||||
{
|
{
|
||||||
float param_29 = m.bbox.x;
|
float param_29 = m.bbox.x;
|
||||||
uint _1352 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix], round_down(param_29), memory_order_relaxed);
|
uint _1353 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix], round_down(param_29), memory_order_relaxed);
|
||||||
float param_30 = m.bbox.y;
|
float param_30 = m.bbox.y;
|
||||||
uint _1360 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 1u], round_down(param_30), memory_order_relaxed);
|
uint _1361 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 1u], round_down(param_30), memory_order_relaxed);
|
||||||
float param_31 = m.bbox.z;
|
float param_31 = m.bbox.z;
|
||||||
uint _1368 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 2u], round_up(param_31), memory_order_relaxed);
|
uint _1369 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 2u], round_up(param_31), memory_order_relaxed);
|
||||||
float param_32 = m.bbox.w;
|
float param_32 = m.bbox.w;
|
||||||
uint _1376 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 3u], round_up(param_32), memory_order_relaxed);
|
uint _1377 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 3u], round_up(param_32), memory_order_relaxed);
|
||||||
}
|
}
|
||||||
bbox_out_ix += 6u;
|
bbox_out_ix += 6u;
|
||||||
}
|
}
|
||||||
|
|
BIN
piet-gpu/shader/gen/pathseg.spv
generated
BIN
piet-gpu/shader/gen/pathseg.spv
generated
Binary file not shown.
BIN
piet-gpu/shader/gen/pathtag_reduce.dxil
generated
BIN
piet-gpu/shader/gen/pathtag_reduce.dxil
generated
Binary file not shown.
27
piet-gpu/shader/gen/pathtag_reduce.hlsl
generated
27
piet-gpu/shader/gen/pathtag_reduce.hlsl
generated
|
@ -26,8 +26,13 @@ struct Config
|
||||||
Alloc trans_alloc;
|
Alloc trans_alloc;
|
||||||
Alloc bbox_alloc;
|
Alloc bbox_alloc;
|
||||||
Alloc drawmonoid_alloc;
|
Alloc drawmonoid_alloc;
|
||||||
|
Alloc clip_alloc;
|
||||||
|
Alloc clip_bic_alloc;
|
||||||
|
Alloc clip_stack_alloc;
|
||||||
|
Alloc clip_bbox_alloc;
|
||||||
uint n_trans;
|
uint n_trans;
|
||||||
uint n_path;
|
uint n_path;
|
||||||
|
uint n_clip;
|
||||||
uint trans_offset;
|
uint trans_offset;
|
||||||
uint linewidth_offset;
|
uint linewidth_offset;
|
||||||
uint pathtag_offset;
|
uint pathtag_offset;
|
||||||
|
@ -37,9 +42,9 @@ struct Config
|
||||||
static const uint3 gl_WorkGroupSize = uint3(128u, 1u, 1u);
|
static const uint3 gl_WorkGroupSize = uint3(128u, 1u, 1u);
|
||||||
|
|
||||||
ByteAddressBuffer _139 : register(t1, space0);
|
ByteAddressBuffer _139 : register(t1, space0);
|
||||||
ByteAddressBuffer _150 : register(t2, space0);
|
ByteAddressBuffer _151 : register(t2, space0);
|
||||||
RWByteAddressBuffer _237 : register(u3, space0);
|
RWByteAddressBuffer _238 : register(u3, space0);
|
||||||
RWByteAddressBuffer _257 : register(u0, space0);
|
RWByteAddressBuffer _258 : register(u0, space0);
|
||||||
|
|
||||||
static uint3 gl_WorkGroupID;
|
static uint3 gl_WorkGroupID;
|
||||||
static uint3 gl_LocalInvocationID;
|
static uint3 gl_LocalInvocationID;
|
||||||
|
@ -83,13 +88,13 @@ TagMonoid combine_tag_monoid(TagMonoid a, TagMonoid b)
|
||||||
void comp_main()
|
void comp_main()
|
||||||
{
|
{
|
||||||
uint ix = gl_GlobalInvocationID.x * 2u;
|
uint ix = gl_GlobalInvocationID.x * 2u;
|
||||||
uint scene_ix = (_139.Load(64) >> uint(2)) + ix;
|
uint scene_ix = (_139.Load(84) >> uint(2)) + ix;
|
||||||
uint tag_word = _150.Load(scene_ix * 4 + 0);
|
uint tag_word = _151.Load(scene_ix * 4 + 0);
|
||||||
uint param = tag_word;
|
uint param = tag_word;
|
||||||
TagMonoid agg = reduce_tag(param);
|
TagMonoid agg = reduce_tag(param);
|
||||||
for (uint i = 1u; i < 2u; i++)
|
for (uint i = 1u; i < 2u; i++)
|
||||||
{
|
{
|
||||||
tag_word = _150.Load((scene_ix + i) * 4 + 0);
|
tag_word = _151.Load((scene_ix + i) * 4 + 0);
|
||||||
uint param_1 = tag_word;
|
uint param_1 = tag_word;
|
||||||
TagMonoid param_2 = agg;
|
TagMonoid param_2 = agg;
|
||||||
TagMonoid param_3 = reduce_tag(param_1);
|
TagMonoid param_3 = reduce_tag(param_1);
|
||||||
|
@ -111,11 +116,11 @@ void comp_main()
|
||||||
}
|
}
|
||||||
if (gl_LocalInvocationID.x == 0u)
|
if (gl_LocalInvocationID.x == 0u)
|
||||||
{
|
{
|
||||||
_237.Store(gl_WorkGroupID.x * 20 + 0, agg.trans_ix);
|
_238.Store(gl_WorkGroupID.x * 20 + 0, agg.trans_ix);
|
||||||
_237.Store(gl_WorkGroupID.x * 20 + 4, agg.linewidth_ix);
|
_238.Store(gl_WorkGroupID.x * 20 + 4, agg.linewidth_ix);
|
||||||
_237.Store(gl_WorkGroupID.x * 20 + 8, agg.pathseg_ix);
|
_238.Store(gl_WorkGroupID.x * 20 + 8, agg.pathseg_ix);
|
||||||
_237.Store(gl_WorkGroupID.x * 20 + 12, agg.path_ix);
|
_238.Store(gl_WorkGroupID.x * 20 + 12, agg.path_ix);
|
||||||
_237.Store(gl_WorkGroupID.x * 20 + 16, agg.pathseg_offset);
|
_238.Store(gl_WorkGroupID.x * 20 + 16, agg.pathseg_offset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
21
piet-gpu/shader/gen/pathtag_reduce.msl
generated
21
piet-gpu/shader/gen/pathtag_reduce.msl
generated
|
@ -33,8 +33,13 @@ struct Config
|
||||||
Alloc trans_alloc;
|
Alloc trans_alloc;
|
||||||
Alloc bbox_alloc;
|
Alloc bbox_alloc;
|
||||||
Alloc drawmonoid_alloc;
|
Alloc drawmonoid_alloc;
|
||||||
|
Alloc clip_alloc;
|
||||||
|
Alloc clip_bic_alloc;
|
||||||
|
Alloc clip_stack_alloc;
|
||||||
|
Alloc clip_bbox_alloc;
|
||||||
uint n_trans;
|
uint n_trans;
|
||||||
uint n_path;
|
uint n_path;
|
||||||
|
uint n_clip;
|
||||||
uint trans_offset;
|
uint trans_offset;
|
||||||
uint linewidth_offset;
|
uint linewidth_offset;
|
||||||
uint pathtag_offset;
|
uint pathtag_offset;
|
||||||
|
@ -103,17 +108,17 @@ TagMonoid combine_tag_monoid(thread const TagMonoid& a, thread const TagMonoid&
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
kernel void main0(const device ConfigBuf& _139 [[buffer(1)]], const device SceneBuf& _150 [[buffer(2)]], device OutBuf& _237 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
|
kernel void main0(const device ConfigBuf& _139 [[buffer(1)]], const device SceneBuf& _151 [[buffer(2)]], device OutBuf& _238 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
|
||||||
{
|
{
|
||||||
threadgroup TagMonoid sh_scratch[128];
|
threadgroup TagMonoid sh_scratch[128];
|
||||||
uint ix = gl_GlobalInvocationID.x * 2u;
|
uint ix = gl_GlobalInvocationID.x * 2u;
|
||||||
uint scene_ix = (_139.conf.pathtag_offset >> uint(2)) + ix;
|
uint scene_ix = (_139.conf.pathtag_offset >> uint(2)) + ix;
|
||||||
uint tag_word = _150.scene[scene_ix];
|
uint tag_word = _151.scene[scene_ix];
|
||||||
uint param = tag_word;
|
uint param = tag_word;
|
||||||
TagMonoid agg = reduce_tag(param);
|
TagMonoid agg = reduce_tag(param);
|
||||||
for (uint i = 1u; i < 2u; i++)
|
for (uint i = 1u; i < 2u; i++)
|
||||||
{
|
{
|
||||||
tag_word = _150.scene[scene_ix + i];
|
tag_word = _151.scene[scene_ix + i];
|
||||||
uint param_1 = tag_word;
|
uint param_1 = tag_word;
|
||||||
TagMonoid param_2 = agg;
|
TagMonoid param_2 = agg;
|
||||||
TagMonoid param_3 = reduce_tag(param_1);
|
TagMonoid param_3 = reduce_tag(param_1);
|
||||||
|
@ -135,11 +140,11 @@ kernel void main0(const device ConfigBuf& _139 [[buffer(1)]], const device Scene
|
||||||
}
|
}
|
||||||
if (gl_LocalInvocationID.x == 0u)
|
if (gl_LocalInvocationID.x == 0u)
|
||||||
{
|
{
|
||||||
_237.outbuf[gl_WorkGroupID.x].trans_ix = agg.trans_ix;
|
_238.outbuf[gl_WorkGroupID.x].trans_ix = agg.trans_ix;
|
||||||
_237.outbuf[gl_WorkGroupID.x].linewidth_ix = agg.linewidth_ix;
|
_238.outbuf[gl_WorkGroupID.x].linewidth_ix = agg.linewidth_ix;
|
||||||
_237.outbuf[gl_WorkGroupID.x].pathseg_ix = agg.pathseg_ix;
|
_238.outbuf[gl_WorkGroupID.x].pathseg_ix = agg.pathseg_ix;
|
||||||
_237.outbuf[gl_WorkGroupID.x].path_ix = agg.path_ix;
|
_238.outbuf[gl_WorkGroupID.x].path_ix = agg.path_ix;
|
||||||
_237.outbuf[gl_WorkGroupID.x].pathseg_offset = agg.pathseg_offset;
|
_238.outbuf[gl_WorkGroupID.x].pathseg_offset = agg.pathseg_offset;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
BIN
piet-gpu/shader/gen/pathtag_reduce.spv
generated
BIN
piet-gpu/shader/gen/pathtag_reduce.spv
generated
Binary file not shown.
5
piet-gpu/shader/gen/tile_alloc.hlsl
generated
5
piet-gpu/shader/gen/tile_alloc.hlsl
generated
|
@ -60,8 +60,13 @@ struct Config
|
||||||
Alloc trans_alloc;
|
Alloc trans_alloc;
|
||||||
Alloc bbox_alloc;
|
Alloc bbox_alloc;
|
||||||
Alloc drawmonoid_alloc;
|
Alloc drawmonoid_alloc;
|
||||||
|
Alloc clip_alloc;
|
||||||
|
Alloc clip_bic_alloc;
|
||||||
|
Alloc clip_stack_alloc;
|
||||||
|
Alloc clip_bbox_alloc;
|
||||||
uint n_trans;
|
uint n_trans;
|
||||||
uint n_path;
|
uint n_path;
|
||||||
|
uint n_clip;
|
||||||
uint trans_offset;
|
uint trans_offset;
|
||||||
uint linewidth_offset;
|
uint linewidth_offset;
|
||||||
uint pathtag_offset;
|
uint pathtag_offset;
|
||||||
|
|
5
piet-gpu/shader/gen/tile_alloc.msl
generated
5
piet-gpu/shader/gen/tile_alloc.msl
generated
|
@ -81,8 +81,13 @@ struct Config
|
||||||
Alloc_1 trans_alloc;
|
Alloc_1 trans_alloc;
|
||||||
Alloc_1 bbox_alloc;
|
Alloc_1 bbox_alloc;
|
||||||
Alloc_1 drawmonoid_alloc;
|
Alloc_1 drawmonoid_alloc;
|
||||||
|
Alloc_1 clip_alloc;
|
||||||
|
Alloc_1 clip_bic_alloc;
|
||||||
|
Alloc_1 clip_stack_alloc;
|
||||||
|
Alloc_1 clip_bbox_alloc;
|
||||||
uint n_trans;
|
uint n_trans;
|
||||||
uint n_path;
|
uint n_path;
|
||||||
|
uint n_clip;
|
||||||
uint trans_offset;
|
uint trans_offset;
|
||||||
uint linewidth_offset;
|
uint linewidth_offset;
|
||||||
uint pathtag_offset;
|
uint pathtag_offset;
|
||||||
|
|
BIN
piet-gpu/shader/gen/tile_alloc.spv
generated
BIN
piet-gpu/shader/gen/tile_alloc.spv
generated
Binary file not shown.
BIN
piet-gpu/shader/gen/transform_leaf.dxil
generated
BIN
piet-gpu/shader/gen/transform_leaf.dxil
generated
Binary file not shown.
7
piet-gpu/shader/gen/transform_leaf.hlsl
generated
7
piet-gpu/shader/gen/transform_leaf.hlsl
generated
|
@ -39,8 +39,13 @@ struct Config
|
||||||
Alloc trans_alloc;
|
Alloc trans_alloc;
|
||||||
Alloc bbox_alloc;
|
Alloc bbox_alloc;
|
||||||
Alloc drawmonoid_alloc;
|
Alloc drawmonoid_alloc;
|
||||||
|
Alloc clip_alloc;
|
||||||
|
Alloc clip_bic_alloc;
|
||||||
|
Alloc clip_stack_alloc;
|
||||||
|
Alloc clip_bbox_alloc;
|
||||||
uint n_trans;
|
uint n_trans;
|
||||||
uint n_path;
|
uint n_path;
|
||||||
|
uint n_clip;
|
||||||
uint trans_offset;
|
uint trans_offset;
|
||||||
uint linewidth_offset;
|
uint linewidth_offset;
|
||||||
uint pathtag_offset;
|
uint pathtag_offset;
|
||||||
|
@ -150,7 +155,7 @@ void TransformSeg_write(Alloc a, TransformSegRef ref, TransformSeg s)
|
||||||
void comp_main()
|
void comp_main()
|
||||||
{
|
{
|
||||||
uint ix = gl_GlobalInvocationID.x * 8u;
|
uint ix = gl_GlobalInvocationID.x * 8u;
|
||||||
TransformRef _285 = { _278.Load(56) + (ix * 24u) };
|
TransformRef _285 = { _278.Load(76) + (ix * 24u) };
|
||||||
TransformRef ref = _285;
|
TransformRef ref = _285;
|
||||||
TransformRef param = ref;
|
TransformRef param = ref;
|
||||||
Transform agg = Transform_read(param);
|
Transform agg = Transform_read(param);
|
||||||
|
|
5
piet-gpu/shader/gen/transform_leaf.msl
generated
5
piet-gpu/shader/gen/transform_leaf.msl
generated
|
@ -102,8 +102,13 @@ struct Config
|
||||||
Alloc_1 trans_alloc;
|
Alloc_1 trans_alloc;
|
||||||
Alloc_1 bbox_alloc;
|
Alloc_1 bbox_alloc;
|
||||||
Alloc_1 drawmonoid_alloc;
|
Alloc_1 drawmonoid_alloc;
|
||||||
|
Alloc_1 clip_alloc;
|
||||||
|
Alloc_1 clip_bic_alloc;
|
||||||
|
Alloc_1 clip_stack_alloc;
|
||||||
|
Alloc_1 clip_bbox_alloc;
|
||||||
uint n_trans;
|
uint n_trans;
|
||||||
uint n_path;
|
uint n_path;
|
||||||
|
uint n_clip;
|
||||||
uint trans_offset;
|
uint trans_offset;
|
||||||
uint linewidth_offset;
|
uint linewidth_offset;
|
||||||
uint pathtag_offset;
|
uint pathtag_offset;
|
||||||
|
|
BIN
piet-gpu/shader/gen/transform_leaf.spv
generated
BIN
piet-gpu/shader/gen/transform_leaf.spv
generated
Binary file not shown.
BIN
piet-gpu/shader/gen/transform_reduce.dxil
generated
BIN
piet-gpu/shader/gen/transform_reduce.dxil
generated
Binary file not shown.
7
piet-gpu/shader/gen/transform_reduce.hlsl
generated
7
piet-gpu/shader/gen/transform_reduce.hlsl
generated
|
@ -28,8 +28,13 @@ struct Config
|
||||||
Alloc trans_alloc;
|
Alloc trans_alloc;
|
||||||
Alloc bbox_alloc;
|
Alloc bbox_alloc;
|
||||||
Alloc drawmonoid_alloc;
|
Alloc drawmonoid_alloc;
|
||||||
|
Alloc clip_alloc;
|
||||||
|
Alloc clip_bic_alloc;
|
||||||
|
Alloc clip_stack_alloc;
|
||||||
|
Alloc clip_bbox_alloc;
|
||||||
uint n_trans;
|
uint n_trans;
|
||||||
uint n_path;
|
uint n_path;
|
||||||
|
uint n_clip;
|
||||||
uint trans_offset;
|
uint trans_offset;
|
||||||
uint linewidth_offset;
|
uint linewidth_offset;
|
||||||
uint pathtag_offset;
|
uint pathtag_offset;
|
||||||
|
@ -87,7 +92,7 @@ Transform combine_monoid(Transform a, Transform b)
|
||||||
void comp_main()
|
void comp_main()
|
||||||
{
|
{
|
||||||
uint ix = gl_GlobalInvocationID.x * 8u;
|
uint ix = gl_GlobalInvocationID.x * 8u;
|
||||||
TransformRef _168 = { _161.Load(56) + (ix * 24u) };
|
TransformRef _168 = { _161.Load(76) + (ix * 24u) };
|
||||||
TransformRef ref = _168;
|
TransformRef ref = _168;
|
||||||
TransformRef param = ref;
|
TransformRef param = ref;
|
||||||
Transform agg = Transform_read(param);
|
Transform agg = Transform_read(param);
|
||||||
|
|
5
piet-gpu/shader/gen/transform_reduce.msl
generated
5
piet-gpu/shader/gen/transform_reduce.msl
generated
|
@ -40,8 +40,13 @@ struct Config
|
||||||
Alloc trans_alloc;
|
Alloc trans_alloc;
|
||||||
Alloc bbox_alloc;
|
Alloc bbox_alloc;
|
||||||
Alloc drawmonoid_alloc;
|
Alloc drawmonoid_alloc;
|
||||||
|
Alloc clip_alloc;
|
||||||
|
Alloc clip_bic_alloc;
|
||||||
|
Alloc clip_stack_alloc;
|
||||||
|
Alloc clip_bbox_alloc;
|
||||||
uint n_trans;
|
uint n_trans;
|
||||||
uint n_path;
|
uint n_path;
|
||||||
|
uint n_clip;
|
||||||
uint trans_offset;
|
uint trans_offset;
|
||||||
uint linewidth_offset;
|
uint linewidth_offset;
|
||||||
uint pathtag_offset;
|
uint pathtag_offset;
|
||||||
|
|
BIN
piet-gpu/shader/gen/transform_reduce.spv
generated
BIN
piet-gpu/shader/gen/transform_reduce.spv
generated
Binary file not shown.
|
@ -91,7 +91,6 @@ void main() {
|
||||||
vec2 xy = vec2(xy_uint);
|
vec2 xy = vec2(xy_uint);
|
||||||
mediump vec4 rgba[CHUNK];
|
mediump vec4 rgba[CHUNK];
|
||||||
uint blend_stack[MAX_BLEND_STACK][CHUNK];
|
uint blend_stack[MAX_BLEND_STACK][CHUNK];
|
||||||
mediump float blend_alpha_stack[MAX_BLEND_STACK][CHUNK];
|
|
||||||
for (uint i = 0; i < CHUNK; i++) {
|
for (uint i = 0; i < CHUNK; i++) {
|
||||||
rgba[i] = vec4(0.0);
|
rgba[i] = vec4(0.0);
|
||||||
}
|
}
|
||||||
|
@ -211,7 +210,6 @@ void main() {
|
||||||
// The following is a sanity check so we don't corrupt memory should there be malformed inputs.
|
// The following is a sanity check so we don't corrupt memory should there be malformed inputs.
|
||||||
uint d = min(clip_depth, MAX_BLEND_STACK - 1);
|
uint d = min(clip_depth, MAX_BLEND_STACK - 1);
|
||||||
blend_stack[d][k] = packsRGB(vec4(rgba[k]));
|
blend_stack[d][k] = packsRGB(vec4(rgba[k]));
|
||||||
blend_alpha_stack[d][k] = clamp(abs(area[k]), 0.0, 1.0);
|
|
||||||
rgba[k] = vec4(0.0);
|
rgba[k] = vec4(0.0);
|
||||||
}
|
}
|
||||||
clip_depth++;
|
clip_depth++;
|
||||||
|
@ -222,7 +220,7 @@ void main() {
|
||||||
for (uint k = 0; k < CHUNK; k++) {
|
for (uint k = 0; k < CHUNK; k++) {
|
||||||
uint d = min(clip_depth, MAX_BLEND_STACK - 1);
|
uint d = min(clip_depth, MAX_BLEND_STACK - 1);
|
||||||
mediump vec4 bg = unpacksRGB(blend_stack[d][k]);
|
mediump vec4 bg = unpacksRGB(blend_stack[d][k]);
|
||||||
mediump vec4 fg = rgba[k] * area[k] * blend_alpha_stack[d][k];
|
mediump vec4 fg = rgba[k] * area[k];
|
||||||
rgba[k] = bg * (1.0 - fg.a) + fg;
|
rgba[k] = bg * (1.0 - fg.a) + fg;
|
||||||
}
|
}
|
||||||
cmd_ref.offset += 4;
|
cmd_ref.offset += 4;
|
||||||
|
|
|
@ -46,11 +46,23 @@ struct Config {
|
||||||
// Monoid for draw objects
|
// Monoid for draw objects
|
||||||
Alloc drawmonoid_alloc;
|
Alloc drawmonoid_alloc;
|
||||||
|
|
||||||
|
// BeginClip(path_ix) / EndClip
|
||||||
|
Alloc clip_alloc;
|
||||||
|
// Intermediate bicyclic semigroup
|
||||||
|
Alloc clip_bic_alloc;
|
||||||
|
// Intermediate stack
|
||||||
|
Alloc clip_stack_alloc;
|
||||||
|
// Clip processing results (path_ix + bbox)
|
||||||
|
Alloc clip_bbox_alloc;
|
||||||
|
|
||||||
// Number of transforms in scene
|
// Number of transforms in scene
|
||||||
// This is probably not needed.
|
// This is probably not needed.
|
||||||
uint n_trans;
|
uint n_trans;
|
||||||
// This only counts actual paths, not EndClip.
|
// This *should* count only actual paths, but in the current
|
||||||
|
// implementation is redundant with n_elements.
|
||||||
uint n_path;
|
uint n_path;
|
||||||
|
// Total number of BeginClip and EndClip draw objects.
|
||||||
|
uint n_clip;
|
||||||
// Offset (in bytes) of transform stream in scene buffer
|
// Offset (in bytes) of transform stream in scene buffer
|
||||||
uint trans_offset;
|
uint trans_offset;
|
||||||
// Offset (in bytes) of linewidth stream in scene
|
// Offset (in bytes) of linewidth stream in scene
|
||||||
|
|
|
@ -20,7 +20,8 @@ use bytemuck::{Pod, Zeroable};
|
||||||
use piet_gpu_hal::BufWrite;
|
use piet_gpu_hal::BufWrite;
|
||||||
|
|
||||||
use crate::stages::{
|
use crate::stages::{
|
||||||
self, Config, PathEncoder, Transform, DRAW_PART_SIZE, PATHSEG_PART_SIZE, TRANSFORM_PART_SIZE,
|
self, Config, PathEncoder, Transform, CLIP_PART_SIZE, DRAW_PART_SIZE, PATHSEG_PART_SIZE,
|
||||||
|
TRANSFORM_PART_SIZE,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub struct Encoder {
|
pub struct Encoder {
|
||||||
|
@ -31,6 +32,7 @@ pub struct Encoder {
|
||||||
drawobj_stream: Vec<u8>,
|
drawobj_stream: Vec<u8>,
|
||||||
n_path: u32,
|
n_path: u32,
|
||||||
n_pathseg: u32,
|
n_pathseg: u32,
|
||||||
|
n_clip: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A scene fragment encoding a glyph.
|
/// A scene fragment encoding a glyph.
|
||||||
|
@ -98,6 +100,7 @@ impl Encoder {
|
||||||
drawobj_stream: Vec::new(),
|
drawobj_stream: Vec::new(),
|
||||||
n_path: 0,
|
n_path: 0,
|
||||||
n_pathseg: 0,
|
n_pathseg: 0,
|
||||||
|
n_clip: 0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -155,6 +158,7 @@ impl Encoder {
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
self.drawobj_stream.extend(bytemuck::bytes_of(&element));
|
self.drawobj_stream.extend(bytemuck::bytes_of(&element));
|
||||||
|
self.n_clip += 1;
|
||||||
saved
|
saved
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -170,6 +174,7 @@ impl Encoder {
|
||||||
// This is a dummy path, and will go away with the new clip impl.
|
// This is a dummy path, and will go away with the new clip impl.
|
||||||
self.tag_stream.push(0x10);
|
self.tag_stream.push(0x10);
|
||||||
self.n_path += 1;
|
self.n_path += 1;
|
||||||
|
self.n_clip += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return a config for the element processing pipeline.
|
/// Return a config for the element processing pipeline.
|
||||||
|
@ -203,6 +208,20 @@ impl Encoder {
|
||||||
alloc += n_drawobj_padded * DRAWMONOID_SIZE;
|
alloc += n_drawobj_padded * DRAWMONOID_SIZE;
|
||||||
let anno_alloc = alloc;
|
let anno_alloc = alloc;
|
||||||
alloc += n_drawobj * ANNOTATED_SIZE;
|
alloc += n_drawobj * ANNOTATED_SIZE;
|
||||||
|
let clip_alloc = alloc;
|
||||||
|
let n_clip = self.n_clip as usize;
|
||||||
|
const CLIP_SIZE: usize = 4;
|
||||||
|
alloc += n_clip * CLIP_SIZE;
|
||||||
|
let clip_bic_alloc = alloc;
|
||||||
|
const CLIP_BIC_SIZE: usize = 8;
|
||||||
|
// This can round down, as we only reduce the prefix
|
||||||
|
alloc += (n_clip / CLIP_PART_SIZE as usize) * CLIP_BIC_SIZE;
|
||||||
|
let clip_stack_alloc = alloc;
|
||||||
|
const CLIP_EL_SIZE: usize = 20;
|
||||||
|
alloc += n_clip * CLIP_EL_SIZE;
|
||||||
|
let clip_bbox_alloc = alloc;
|
||||||
|
const CLIP_BBOX_SIZE: usize = 16;
|
||||||
|
alloc += align_up(n_clip as usize, CLIP_PART_SIZE as usize) * CLIP_BBOX_SIZE;
|
||||||
|
|
||||||
let config = Config {
|
let config = Config {
|
||||||
n_elements: n_drawobj as u32,
|
n_elements: n_drawobj as u32,
|
||||||
|
@ -212,8 +231,13 @@ impl Encoder {
|
||||||
trans_alloc: trans_alloc as u32,
|
trans_alloc: trans_alloc as u32,
|
||||||
bbox_alloc: bbox_alloc as u32,
|
bbox_alloc: bbox_alloc as u32,
|
||||||
drawmonoid_alloc: drawmonoid_alloc as u32,
|
drawmonoid_alloc: drawmonoid_alloc as u32,
|
||||||
|
clip_alloc: clip_alloc as u32,
|
||||||
|
clip_bic_alloc: clip_bic_alloc as u32,
|
||||||
|
clip_stack_alloc: clip_stack_alloc as u32,
|
||||||
|
clip_bbox_alloc: clip_bbox_alloc as u32,
|
||||||
n_trans: n_trans as u32,
|
n_trans: n_trans as u32,
|
||||||
n_path: self.n_path,
|
n_path: self.n_path,
|
||||||
|
n_clip: self.n_clip,
|
||||||
trans_offset: trans_offset as u32,
|
trans_offset: trans_offset as u32,
|
||||||
linewidth_offset: linewidth_offset as u32,
|
linewidth_offset: linewidth_offset as u32,
|
||||||
pathtag_offset: pathtag_offset as u32,
|
pathtag_offset: pathtag_offset as u32,
|
||||||
|
@ -261,6 +285,10 @@ impl Encoder {
|
||||||
self.tag_stream.len()
|
self.tag_stream.len()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn n_clip(&self) -> u32 {
|
||||||
|
self.n_clip
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) fn encode_glyph(&mut self, glyph: &GlyphEncoder) {
|
pub(crate) fn encode_glyph(&mut self, glyph: &GlyphEncoder) {
|
||||||
self.tag_stream.extend(&glyph.tag_stream);
|
self.tag_stream.extend(&glyph.tag_stream);
|
||||||
self.pathseg_stream.extend(&glyph.pathseg_stream);
|
self.pathseg_stream.extend(&glyph.pathseg_stream);
|
||||||
|
|
|
@ -20,9 +20,9 @@ use piet_gpu_hal::{
|
||||||
};
|
};
|
||||||
|
|
||||||
use pico_svg::PicoSvg;
|
use pico_svg::PicoSvg;
|
||||||
use stages::{ElementBinding, ElementCode};
|
use stages::{ClipBinding, ElementBinding, ElementCode};
|
||||||
|
|
||||||
use crate::stages::{Config, ElementStage};
|
use crate::stages::{ClipCode, Config, ElementStage};
|
||||||
|
|
||||||
const TILE_W: usize = 16;
|
const TILE_W: usize = 16;
|
||||||
const TILE_H: usize = 16;
|
const TILE_H: usize = 16;
|
||||||
|
@ -86,6 +86,9 @@ pub struct Renderer {
|
||||||
element_stage: ElementStage,
|
element_stage: ElementStage,
|
||||||
element_bindings: Vec<ElementBinding>,
|
element_bindings: Vec<ElementBinding>,
|
||||||
|
|
||||||
|
clip_code: ClipCode,
|
||||||
|
clip_binding: ClipBinding,
|
||||||
|
|
||||||
tile_pipeline: Pipeline,
|
tile_pipeline: Pipeline,
|
||||||
tile_ds: DescriptorSet,
|
tile_ds: DescriptorSet,
|
||||||
|
|
||||||
|
@ -110,6 +113,7 @@ pub struct Renderer {
|
||||||
n_paths: usize,
|
n_paths: usize,
|
||||||
n_pathseg: usize,
|
n_pathseg: usize,
|
||||||
n_pathtag: usize,
|
n_pathtag: usize,
|
||||||
|
n_clip: u32,
|
||||||
|
|
||||||
// Keep a reference to the image so that it is not destroyed.
|
// Keep a reference to the image so that it is not destroyed.
|
||||||
_bg_image: Image,
|
_bg_image: Image,
|
||||||
|
@ -191,18 +195,20 @@ impl Renderer {
|
||||||
let element_stage = ElementStage::new(session, &element_code);
|
let element_stage = ElementStage::new(session, &element_code);
|
||||||
let element_bindings = scene_bufs
|
let element_bindings = scene_bufs
|
||||||
.iter()
|
.iter()
|
||||||
.zip(&config_bufs)
|
.map(|scene_buf| {
|
||||||
.map(|(scene_buf, config_buf)| {
|
|
||||||
element_stage.bind(
|
element_stage.bind(
|
||||||
session,
|
session,
|
||||||
&element_code,
|
&element_code,
|
||||||
config_buf,
|
&config_buf,
|
||||||
scene_buf,
|
scene_buf,
|
||||||
&memory_buf_dev,
|
&memory_buf_dev,
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
|
let clip_code = ClipCode::new(session);
|
||||||
|
let clip_binding = ClipBinding::new(session, &clip_code, &config_buf, &memory_buf_dev);
|
||||||
|
|
||||||
let tile_alloc_code = include_shader!(session, "../shader/gen/tile_alloc");
|
let tile_alloc_code = include_shader!(session, "../shader/gen/tile_alloc");
|
||||||
let tile_pipeline = session
|
let tile_pipeline = session
|
||||||
.create_compute_pipeline(tile_alloc_code, &[BindType::Buffer, BindType::BufReadOnly])?;
|
.create_compute_pipeline(tile_alloc_code, &[BindType::Buffer, BindType::BufReadOnly])?;
|
||||||
|
@ -286,6 +292,8 @@ impl Renderer {
|
||||||
element_code,
|
element_code,
|
||||||
element_stage,
|
element_stage,
|
||||||
element_bindings,
|
element_bindings,
|
||||||
|
clip_code,
|
||||||
|
clip_binding,
|
||||||
tile_pipeline,
|
tile_pipeline,
|
||||||
tile_ds,
|
tile_ds,
|
||||||
path_pipeline,
|
path_pipeline,
|
||||||
|
@ -304,6 +312,7 @@ impl Renderer {
|
||||||
n_paths: 0,
|
n_paths: 0,
|
||||||
n_pathseg: 0,
|
n_pathseg: 0,
|
||||||
n_pathtag: 0,
|
n_pathtag: 0,
|
||||||
|
n_clip: 0,
|
||||||
_bg_image: bg_image,
|
_bg_image: bg_image,
|
||||||
gradient_bufs,
|
gradient_bufs,
|
||||||
gradients,
|
gradients,
|
||||||
|
@ -329,6 +338,7 @@ impl Renderer {
|
||||||
self.n_drawobj = render_ctx.n_drawobj();
|
self.n_drawobj = render_ctx.n_drawobj();
|
||||||
self.n_pathseg = render_ctx.n_pathseg() as usize;
|
self.n_pathseg = render_ctx.n_pathseg() as usize;
|
||||||
self.n_pathtag = render_ctx.n_pathtag();
|
self.n_pathtag = render_ctx.n_pathtag();
|
||||||
|
self.n_clip = render_ctx.n_clip();
|
||||||
|
|
||||||
// These constants depend on encoding and may need to be updated.
|
// These constants depend on encoding and may need to be updated.
|
||||||
// Perhaps we can plumb these from piet-gpu-derive?
|
// Perhaps we can plumb these from piet-gpu-derive?
|
||||||
|
@ -342,6 +352,7 @@ impl Renderer {
|
||||||
alloc += ((n_drawobj + 255) & !255) * BIN_SIZE;
|
alloc += ((n_drawobj + 255) & !255) * BIN_SIZE;
|
||||||
let ptcl_base = alloc;
|
let ptcl_base = alloc;
|
||||||
alloc += width_in_tiles * height_in_tiles * PTCL_INITIAL_ALLOC;
|
alloc += width_in_tiles * height_in_tiles * PTCL_INITIAL_ALLOC;
|
||||||
|
|
||||||
config.width_in_tiles = width_in_tiles as u32;
|
config.width_in_tiles = width_in_tiles as u32;
|
||||||
config.height_in_tiles = height_in_tiles as u32;
|
config.height_in_tiles = height_in_tiles as u32;
|
||||||
config.tile_alloc = tile_base as u32;
|
config.tile_alloc = tile_base as u32;
|
||||||
|
@ -401,6 +412,19 @@ impl Renderer {
|
||||||
cmd_buf.end_debug_label();
|
cmd_buf.end_debug_label();
|
||||||
cmd_buf.write_timestamp(&query_pool, 1);
|
cmd_buf.write_timestamp(&query_pool, 1);
|
||||||
cmd_buf.memory_barrier();
|
cmd_buf.memory_barrier();
|
||||||
|
cmd_buf.begin_debug_label("Clip bounding box calculation");
|
||||||
|
self.clip_binding
|
||||||
|
.record(cmd_buf, &self.clip_code, self.n_clip as u32);
|
||||||
|
cmd_buf.end_debug_label();
|
||||||
|
cmd_buf.begin_debug_label("Element binning");
|
||||||
|
cmd_buf.dispatch(
|
||||||
|
&self.bin_pipeline,
|
||||||
|
&self.bin_ds,
|
||||||
|
(((self.n_paths + 255) / 256) as u32, 1, 1),
|
||||||
|
(256, 1, 1),
|
||||||
|
);
|
||||||
|
cmd_buf.end_debug_label();
|
||||||
|
cmd_buf.memory_barrier();
|
||||||
cmd_buf.begin_debug_label("Tile allocation");
|
cmd_buf.begin_debug_label("Tile allocation");
|
||||||
cmd_buf.dispatch(
|
cmd_buf.dispatch(
|
||||||
&self.tile_pipeline,
|
&self.tile_pipeline,
|
||||||
|
@ -430,18 +454,7 @@ impl Renderer {
|
||||||
);
|
);
|
||||||
cmd_buf.end_debug_label();
|
cmd_buf.end_debug_label();
|
||||||
cmd_buf.write_timestamp(&query_pool, 4);
|
cmd_buf.write_timestamp(&query_pool, 4);
|
||||||
// Note: this barrier is not needed as an actual dependency between
|
// TODO: redo query accounting
|
||||||
// pipeline stages, but I am keeping it in so that timer queries are
|
|
||||||
// easier to interpret.
|
|
||||||
cmd_buf.memory_barrier();
|
|
||||||
cmd_buf.begin_debug_label("Element binning");
|
|
||||||
cmd_buf.dispatch(
|
|
||||||
&self.bin_pipeline,
|
|
||||||
&self.bin_ds,
|
|
||||||
(((self.n_paths + 255) / 256) as u32, 1, 1),
|
|
||||||
(256, 1, 1),
|
|
||||||
);
|
|
||||||
cmd_buf.end_debug_label();
|
|
||||||
cmd_buf.write_timestamp(&query_pool, 5);
|
cmd_buf.write_timestamp(&query_pool, 5);
|
||||||
cmd_buf.memory_barrier();
|
cmd_buf.memory_barrier();
|
||||||
cmd_buf.begin_debug_label("Coarse raster");
|
cmd_buf.begin_debug_label("Coarse raster");
|
||||||
|
|
|
@ -123,6 +123,10 @@ impl PietGpuRenderContext {
|
||||||
self.new_encoder.n_transform()
|
self.new_encoder.n_transform()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn n_clip(&self) -> u32 {
|
||||||
|
self.new_encoder.n_clip()
|
||||||
|
}
|
||||||
|
|
||||||
pub fn write_scene(&self, buf: &mut BufWrite) {
|
pub fn write_scene(&self, buf: &mut BufWrite) {
|
||||||
self.new_encoder.write_scene(buf);
|
self.new_encoder.write_scene(buf);
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,12 +16,14 @@
|
||||||
|
|
||||||
//! Stages for new element pipeline, exposed for testing.
|
//! Stages for new element pipeline, exposed for testing.
|
||||||
|
|
||||||
|
mod clip;
|
||||||
mod draw;
|
mod draw;
|
||||||
mod path;
|
mod path;
|
||||||
mod transform;
|
mod transform;
|
||||||
|
|
||||||
use bytemuck::{Pod, Zeroable};
|
use bytemuck::{Pod, Zeroable};
|
||||||
|
|
||||||
|
pub use clip::{ClipBinding, ClipCode, CLIP_PART_SIZE};
|
||||||
pub use draw::{DrawBinding, DrawCode, DrawMonoid, DrawStage, DRAW_PART_SIZE};
|
pub use draw::{DrawBinding, DrawCode, DrawMonoid, DrawStage, DRAW_PART_SIZE};
|
||||||
pub use path::{PathBinding, PathCode, PathEncoder, PathStage, PATHSEG_PART_SIZE};
|
pub use path::{PathBinding, PathCode, PathEncoder, PathStage, PATHSEG_PART_SIZE};
|
||||||
use piet_gpu_hal::{Buffer, CmdBuf, Session};
|
use piet_gpu_hal::{Buffer, CmdBuf, Session};
|
||||||
|
@ -47,8 +49,13 @@ pub struct Config {
|
||||||
pub trans_alloc: u32,
|
pub trans_alloc: u32,
|
||||||
pub bbox_alloc: u32,
|
pub bbox_alloc: u32,
|
||||||
pub drawmonoid_alloc: u32,
|
pub drawmonoid_alloc: u32,
|
||||||
|
pub clip_alloc: u32,
|
||||||
|
pub clip_bic_alloc: u32,
|
||||||
|
pub clip_stack_alloc: u32,
|
||||||
|
pub clip_bbox_alloc: u32,
|
||||||
pub n_trans: u32,
|
pub n_trans: u32,
|
||||||
pub n_path: u32,
|
pub n_path: u32,
|
||||||
|
pub n_clip: u32,
|
||||||
pub trans_offset: u32,
|
pub trans_offset: u32,
|
||||||
pub linewidth_offset: u32,
|
pub linewidth_offset: u32,
|
||||||
pub pathtag_offset: u32,
|
pub pathtag_offset: u32,
|
||||||
|
|
94
piet-gpu/src/stages/clip.rs
Normal file
94
piet-gpu/src/stages/clip.rs
Normal file
|
@ -0,0 +1,94 @@
|
||||||
|
// Copyright 2022 The piet-gpu authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// https://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
// Also licensed under MIT license, at your choice.
|
||||||
|
|
||||||
|
//! The clip processing stage (includes substages).
|
||||||
|
|
||||||
|
use piet_gpu_hal::{include_shader, BindType, Buffer, CmdBuf, DescriptorSet, Pipeline, Session};
|
||||||
|
|
||||||
|
// Note that this isn't the code/stage/binding pattern of most of the other stages
|
||||||
|
// in the new element processing pipeline. We want to move those temporary buffers
|
||||||
|
// into common memory and converge on this pattern.
|
||||||
|
pub struct ClipCode {
|
||||||
|
reduce_pipeline: Pipeline,
|
||||||
|
leaf_pipeline: Pipeline,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct ClipBinding {
|
||||||
|
reduce_ds: DescriptorSet,
|
||||||
|
leaf_ds: DescriptorSet,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub const CLIP_PART_SIZE: u32 = 256;
|
||||||
|
|
||||||
|
impl ClipCode {
|
||||||
|
pub unsafe fn new(session: &Session) -> ClipCode {
|
||||||
|
let reduce_code = include_shader!(session, "../../shader/gen/clip_reduce");
|
||||||
|
let reduce_pipeline = session
|
||||||
|
.create_compute_pipeline(reduce_code, &[BindType::Buffer, BindType::BufReadOnly])
|
||||||
|
.unwrap();
|
||||||
|
let leaf_code = include_shader!(session, "../../shader/gen/clip_leaf");
|
||||||
|
let leaf_pipeline = session
|
||||||
|
.create_compute_pipeline(leaf_code, &[BindType::Buffer, BindType::BufReadOnly])
|
||||||
|
.unwrap();
|
||||||
|
ClipCode {
|
||||||
|
reduce_pipeline,
|
||||||
|
leaf_pipeline,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ClipBinding {
|
||||||
|
pub unsafe fn new(
|
||||||
|
session: &Session,
|
||||||
|
code: &ClipCode,
|
||||||
|
config: &Buffer,
|
||||||
|
memory: &Buffer,
|
||||||
|
) -> ClipBinding {
|
||||||
|
let reduce_ds = session
|
||||||
|
.create_simple_descriptor_set(&code.reduce_pipeline, &[memory, config])
|
||||||
|
.unwrap();
|
||||||
|
let leaf_ds = session
|
||||||
|
.create_simple_descriptor_set(&code.leaf_pipeline, &[memory, config])
|
||||||
|
.unwrap();
|
||||||
|
ClipBinding { reduce_ds, leaf_ds }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Record the clip dispatches.
|
||||||
|
///
|
||||||
|
/// Assumes memory barrier on entry. Provides memory barrier on exit.
|
||||||
|
pub unsafe fn record(&self, cmd_buf: &mut CmdBuf, code: &ClipCode, n_clip: u32) {
|
||||||
|
let n_wg_reduce = n_clip.saturating_sub(1) / CLIP_PART_SIZE;
|
||||||
|
if n_wg_reduce > 0 {
|
||||||
|
cmd_buf.dispatch(
|
||||||
|
&code.reduce_pipeline,
|
||||||
|
&self.reduce_ds,
|
||||||
|
(n_wg_reduce, 1, 1),
|
||||||
|
(CLIP_PART_SIZE, 1, 1),
|
||||||
|
);
|
||||||
|
cmd_buf.memory_barrier();
|
||||||
|
}
|
||||||
|
let n_wg = (n_clip + CLIP_PART_SIZE - 1) / CLIP_PART_SIZE;
|
||||||
|
if n_wg > 0 {
|
||||||
|
cmd_buf.dispatch(
|
||||||
|
&code.leaf_pipeline,
|
||||||
|
&self.leaf_ds,
|
||||||
|
(n_wg, 1, 1),
|
||||||
|
(CLIP_PART_SIZE, 1, 1),
|
||||||
|
);
|
||||||
|
cmd_buf.memory_barrier();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
237
tests/src/clip.rs
Normal file
237
tests/src/clip.rs
Normal file
|
@ -0,0 +1,237 @@
|
||||||
|
// Copyright 2022 The piet-gpu authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// https://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
// Also licensed under MIT license, at your choice.
|
||||||
|
|
||||||
|
//! Tests for the piet-gpu clip processing stage.
|
||||||
|
|
||||||
|
use bytemuck::{Pod, Zeroable};
|
||||||
|
use rand::Rng;
|
||||||
|
|
||||||
|
use piet_gpu::stages::{self, ClipBinding, ClipCode, DrawMonoid};
|
||||||
|
use piet_gpu_hal::{BufWrite, BufferUsage};
|
||||||
|
|
||||||
|
use crate::{Config, Runner, TestResult};
|
||||||
|
|
||||||
|
struct ClipData {
|
||||||
|
clip_stream: Vec<u32>,
|
||||||
|
// In the atomic-int friendly encoding
|
||||||
|
path_bbox_stream: Vec<PathBbox>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, Debug, Pod, Zeroable, Default)]
|
||||||
|
#[repr(C)]
|
||||||
|
struct PathBbox {
|
||||||
|
bbox: [u32; 4],
|
||||||
|
linewidth: f32,
|
||||||
|
trans_ix: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub unsafe fn clip_test(runner: &mut Runner, config: &Config) -> TestResult {
|
||||||
|
let mut result = TestResult::new("clip");
|
||||||
|
let n_clip: u64 = config.size.choose(1 << 8, 1 << 12, 1 << 16);
|
||||||
|
let data = ClipData::new(n_clip);
|
||||||
|
let stage_config = data.get_config();
|
||||||
|
let config_buf = runner
|
||||||
|
.session
|
||||||
|
.create_buffer_init(std::slice::from_ref(&stage_config), BufferUsage::STORAGE)
|
||||||
|
.unwrap();
|
||||||
|
// Need to actually get data uploaded
|
||||||
|
let mut memory = runner.buf_down(data.memory_size(), BufferUsage::STORAGE);
|
||||||
|
{
|
||||||
|
let mut buf_write = memory.map_write(..);
|
||||||
|
data.fill_memory(&mut buf_write);
|
||||||
|
}
|
||||||
|
|
||||||
|
let code = ClipCode::new(&runner.session);
|
||||||
|
let binding = ClipBinding::new(&runner.session, &code, &config_buf, &memory.dev_buf);
|
||||||
|
|
||||||
|
let mut commands = runner.commands();
|
||||||
|
commands.write_timestamp(0);
|
||||||
|
commands.upload(&memory);
|
||||||
|
binding.record(&mut commands.cmd_buf, &code, n_clip as u32);
|
||||||
|
commands.download(&memory);
|
||||||
|
commands.write_timestamp(1);
|
||||||
|
runner.submit(commands);
|
||||||
|
let dst = memory.map_read(..);
|
||||||
|
if let Some(failure) = data.verify(&dst) {
|
||||||
|
result.fail(failure);
|
||||||
|
}
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
fn rand_bbox() -> [u32; 4] {
|
||||||
|
let mut rng = rand::thread_rng();
|
||||||
|
const Y_MIN: u32 = 32768;
|
||||||
|
const Y_MAX: u32 = Y_MIN + 1000;
|
||||||
|
let mut x0 = rng.gen_range(Y_MIN, Y_MAX);
|
||||||
|
let mut y0 = rng.gen_range(Y_MIN, Y_MAX);
|
||||||
|
let mut x1 = rng.gen_range(Y_MIN, Y_MAX);
|
||||||
|
let mut y1 = rng.gen_range(Y_MIN, Y_MAX);
|
||||||
|
if x0 > x1 {
|
||||||
|
std::mem::swap(&mut x0, &mut x1);
|
||||||
|
}
|
||||||
|
if y0 > y1 {
|
||||||
|
std::mem::swap(&mut y0, &mut y1);
|
||||||
|
}
|
||||||
|
[x0, y0, x1, y1]
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convert from atomic-friendly to normal float bbox.
|
||||||
|
fn decode_bbox(raw: [u32; 4]) -> [f32; 4] {
|
||||||
|
fn decode(x: u32) -> f32 {
|
||||||
|
x as f32 - 32768.0
|
||||||
|
}
|
||||||
|
[
|
||||||
|
decode(raw[0]),
|
||||||
|
decode(raw[1]),
|
||||||
|
decode(raw[2]),
|
||||||
|
decode(raw[3]),
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
fn intersect_bbox(b0: [f32; 4], b1: [f32; 4]) -> [f32; 4] {
|
||||||
|
[
|
||||||
|
b0[0].max(b1[0]),
|
||||||
|
b0[1].max(b1[1]),
|
||||||
|
b0[2].min(b1[2]),
|
||||||
|
b0[3].min(b1[3]),
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
const INFTY_BBOX: [f32; 4] = [-1e9, -1e9, 1e9, 1e9];
|
||||||
|
|
||||||
|
impl ClipData {
|
||||||
|
/// Generate a random clip sequence
|
||||||
|
fn new(n: u64) -> ClipData {
|
||||||
|
// Simple LCG random generator, for deterministic results
|
||||||
|
let mut z = 20170705u64;
|
||||||
|
let mut depth = 0;
|
||||||
|
let mut path_bbox_stream = Vec::new();
|
||||||
|
let clip_stream = (0..n)
|
||||||
|
.map(|i| {
|
||||||
|
let is_push = if depth == 0 {
|
||||||
|
true
|
||||||
|
} else if depth >= 255 {
|
||||||
|
false
|
||||||
|
} else {
|
||||||
|
z = z.wrapping_mul(742938285) % ((1 << 31) - 1);
|
||||||
|
(z % 2) != 0
|
||||||
|
};
|
||||||
|
if is_push {
|
||||||
|
depth += 1;
|
||||||
|
let path_ix = path_bbox_stream.len() as u32;
|
||||||
|
let bbox = rand_bbox();
|
||||||
|
let path_bbox = PathBbox {
|
||||||
|
bbox,
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
path_bbox_stream.push(path_bbox);
|
||||||
|
path_ix
|
||||||
|
} else {
|
||||||
|
depth -= 1;
|
||||||
|
!(i as u32)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
ClipData {
|
||||||
|
clip_stream,
|
||||||
|
path_bbox_stream,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_config(&self) -> stages::Config {
|
||||||
|
let n_clip = self.clip_stream.len();
|
||||||
|
let n_path = self.path_bbox_stream.len();
|
||||||
|
let clip_alloc = 0;
|
||||||
|
let path_bbox_alloc = clip_alloc + 4 * n_clip;
|
||||||
|
let drawmonoid_alloc = path_bbox_alloc + 24 * n_path;
|
||||||
|
let clip_bic_alloc = drawmonoid_alloc + 8 * n_clip;
|
||||||
|
// TODO: this is over-allocated, we only need one bic per wg
|
||||||
|
let clip_stack_alloc = clip_bic_alloc + 8 * n_clip;
|
||||||
|
let clip_bbox_alloc = clip_stack_alloc + 20 * n_clip;
|
||||||
|
stages::Config {
|
||||||
|
clip_alloc: clip_alloc as u32,
|
||||||
|
// TODO: this wants to be renamed to path_bbox_alloc
|
||||||
|
bbox_alloc: path_bbox_alloc as u32,
|
||||||
|
drawmonoid_alloc: drawmonoid_alloc as u32,
|
||||||
|
clip_bic_alloc: clip_bic_alloc as u32,
|
||||||
|
clip_stack_alloc: clip_stack_alloc as u32,
|
||||||
|
clip_bbox_alloc: clip_bbox_alloc as u32,
|
||||||
|
n_clip: n_clip as u32,
|
||||||
|
..Default::default()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn memory_size(&self) -> u64 {
|
||||||
|
(8 + self.clip_stream.len() * (4 + 8 + 8 + 20 + 16) + self.path_bbox_stream.len() * 24)
|
||||||
|
as u64
|
||||||
|
}
|
||||||
|
|
||||||
|
fn fill_memory(&self, buf: &mut BufWrite) {
|
||||||
|
// offset / header; no dynamic allocation
|
||||||
|
buf.fill_zero(8);
|
||||||
|
buf.extend_slice(&self.clip_stream);
|
||||||
|
buf.extend_slice(&self.path_bbox_stream);
|
||||||
|
// drawmonoid is left uninitialized
|
||||||
|
}
|
||||||
|
|
||||||
|
fn verify(&self, buf: &[u8]) -> Option<String> {
|
||||||
|
let n_clip = self.clip_stream.len();
|
||||||
|
let n_path = self.path_bbox_stream.len();
|
||||||
|
let clip_bbox_start = 8 + n_clip * (4 + 8 + 8 + 20) + n_path * 24;
|
||||||
|
let clip_range = clip_bbox_start..(clip_bbox_start + n_clip * 16);
|
||||||
|
let clip_result = bytemuck::cast_slice::<u8, [f32; 4]>(&buf[clip_range]);
|
||||||
|
let draw_start = 8 + n_clip * 4 + n_path * 24;
|
||||||
|
let draw_range = draw_start..(draw_start + n_clip * 8);
|
||||||
|
let draw_result = bytemuck::cast_slice::<u8, DrawMonoid>(&buf[draw_range]);
|
||||||
|
let mut bbox_stack = Vec::new();
|
||||||
|
let mut parent_stack = Vec::new();
|
||||||
|
for (i, path_ix) in self.clip_stream.iter().enumerate() {
|
||||||
|
let mut expected_path = None;
|
||||||
|
if *path_ix >= 0x8000_0000 {
|
||||||
|
let parent = parent_stack.pop().unwrap();
|
||||||
|
expected_path = Some(self.clip_stream[parent as usize]);
|
||||||
|
bbox_stack.pop().unwrap();
|
||||||
|
} else {
|
||||||
|
parent_stack.push(i);
|
||||||
|
let path_bbox_stream = self.path_bbox_stream[*path_ix as usize];
|
||||||
|
let bbox = decode_bbox(path_bbox_stream.bbox);
|
||||||
|
let new = match bbox_stack.last() {
|
||||||
|
None => bbox,
|
||||||
|
Some(old) => intersect_bbox(*old, bbox),
|
||||||
|
};
|
||||||
|
bbox_stack.push(new);
|
||||||
|
};
|
||||||
|
let expected = bbox_stack.last().copied().unwrap_or(INFTY_BBOX);
|
||||||
|
let clip_bbox = clip_result[i];
|
||||||
|
if clip_bbox != expected {
|
||||||
|
return Some(format!(
|
||||||
|
"{}: path_ix={}, expected bbox={:?}, clip_bbox={:?}",
|
||||||
|
i, path_ix, expected, clip_bbox
|
||||||
|
));
|
||||||
|
}
|
||||||
|
if let Some(expected_path) = expected_path {
|
||||||
|
let actual_path = draw_result[i].path_ix;
|
||||||
|
if expected_path != actual_path {
|
||||||
|
return Some(format!(
|
||||||
|
"{}: expected path {}, actual {}",
|
||||||
|
i, expected_path, actual_path
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
|
@ -102,17 +102,21 @@ impl DrawTestData {
|
||||||
// Layout of memory
|
// Layout of memory
|
||||||
let drawmonoid_alloc = 0;
|
let drawmonoid_alloc = 0;
|
||||||
let anno_alloc = drawmonoid_alloc + 8 * n_tags;
|
let anno_alloc = drawmonoid_alloc + 8 * n_tags;
|
||||||
|
let clip_alloc = anno_alloc + ANNOTATED_SIZE * n_tags;
|
||||||
let stage_config = stages::Config {
|
let stage_config = stages::Config {
|
||||||
n_elements: n_tags as u32,
|
n_elements: n_tags as u32,
|
||||||
anno_alloc: anno_alloc as u32,
|
anno_alloc: anno_alloc as u32,
|
||||||
drawmonoid_alloc: drawmonoid_alloc as u32,
|
drawmonoid_alloc: drawmonoid_alloc as u32,
|
||||||
|
clip_alloc: clip_alloc as u32,
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
stage_config
|
stage_config
|
||||||
}
|
}
|
||||||
|
|
||||||
fn memory_size(&self) -> u64 {
|
fn memory_size(&self) -> u64 {
|
||||||
(8 + self.tags.len() * (8 + ANNOTATED_SIZE)) as u64
|
// Note: this overallocates the clip buf a bit - only needed for the
|
||||||
|
// total number of begin_clip and end_clip tags.
|
||||||
|
(8 + self.tags.len() * (8 + 4 + ANNOTATED_SIZE)) as u64
|
||||||
}
|
}
|
||||||
|
|
||||||
fn fill_scene(&self, buf: &mut BufWrite) {
|
fn fill_scene(&self, buf: &mut BufWrite) {
|
||||||
|
@ -128,14 +132,13 @@ impl DrawTestData {
|
||||||
let actual = bytemuck::cast_slice::<u8, DrawMonoid>(&buf[8..8 + size]);
|
let actual = bytemuck::cast_slice::<u8, DrawMonoid>(&buf[8..8 + size]);
|
||||||
let mut expected = DrawMonoid::default();
|
let mut expected = DrawMonoid::default();
|
||||||
for (i, (tag, actual)) in self.tags.iter().zip(actual).enumerate() {
|
for (i, (tag, actual)) in self.tags.iter().zip(actual).enumerate() {
|
||||||
// We compute an inclusive prefix sum, but for this application
|
// Verify exclusive prefix sum.
|
||||||
// exclusive would be slightly better. We can adapt though.
|
|
||||||
let (path_ix, clip_ix) = Self::reduce_tag(*tag);
|
let (path_ix, clip_ix) = Self::reduce_tag(*tag);
|
||||||
expected.path_ix += path_ix;
|
|
||||||
expected.clip_ix += clip_ix;
|
|
||||||
if *actual != expected {
|
if *actual != expected {
|
||||||
return Some(format!("draw mismatch at {}", i));
|
return Some(format!("draw mismatch at {}", i));
|
||||||
}
|
}
|
||||||
|
expected.path_ix += path_ix;
|
||||||
|
expected.clip_ix += clip_ix;
|
||||||
}
|
}
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
//! Tests for piet-gpu shaders and GPU capabilities.
|
//! Tests for piet-gpu shaders and GPU capabilities.
|
||||||
|
|
||||||
mod clear;
|
mod clear;
|
||||||
|
mod clip;
|
||||||
mod config;
|
mod config;
|
||||||
mod draw;
|
mod draw;
|
||||||
mod linkedlist;
|
mod linkedlist;
|
||||||
|
@ -139,6 +140,7 @@ fn main() {
|
||||||
report(&transform::transform_test(&mut runner, &config));
|
report(&transform::transform_test(&mut runner, &config));
|
||||||
report(&path::path_test(&mut runner, &config));
|
report(&path::path_test(&mut runner, &config));
|
||||||
report(&draw::draw_test(&mut runner, &config));
|
report(&draw::draw_test(&mut runner, &config));
|
||||||
|
report(&clip::clip_test(&mut runner, &config));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,8 +20,8 @@ use std::ops::RangeBounds;
|
||||||
|
|
||||||
use bytemuck::Pod;
|
use bytemuck::Pod;
|
||||||
use piet_gpu_hal::{
|
use piet_gpu_hal::{
|
||||||
BackendType, BufReadGuard, Buffer, BufferUsage, CmdBuf, Instance, InstanceFlags, QueryPool,
|
BackendType, BufReadGuard, BufWriteGuard, Buffer, BufferUsage, CmdBuf, Instance, InstanceFlags,
|
||||||
Session,
|
QueryPool, Session,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub struct Runner {
|
pub struct Runner {
|
||||||
|
@ -37,15 +37,8 @@ pub struct Commands {
|
||||||
query_pool: QueryPool,
|
query_pool: QueryPool,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Buffer for uploading data to GPU.
|
/// Buffer for both uploading and downloading
|
||||||
#[allow(unused)]
|
pub struct BufStage {
|
||||||
pub struct BufUp {
|
|
||||||
pub stage_buf: Buffer,
|
|
||||||
pub dev_buf: Buffer,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Buffer for downloading data from GPU.
|
|
||||||
pub struct BufDown {
|
|
||||||
pub stage_buf: Buffer,
|
pub stage_buf: Buffer,
|
||||||
pub dev_buf: Buffer,
|
pub dev_buf: Buffer,
|
||||||
}
|
}
|
||||||
|
@ -92,7 +85,7 @@ impl Runner {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(unused)]
|
#[allow(unused)]
|
||||||
pub fn buf_up(&self, size: u64) -> BufUp {
|
pub fn buf_up(&self, size: u64) -> BufStage {
|
||||||
let stage_buf = self
|
let stage_buf = self
|
||||||
.session
|
.session
|
||||||
.create_buffer(size, BufferUsage::MAP_WRITE | BufferUsage::COPY_SRC)
|
.create_buffer(size, BufferUsage::MAP_WRITE | BufferUsage::COPY_SRC)
|
||||||
|
@ -101,13 +94,13 @@ impl Runner {
|
||||||
.session
|
.session
|
||||||
.create_buffer(size, BufferUsage::COPY_DST | BufferUsage::STORAGE)
|
.create_buffer(size, BufferUsage::COPY_DST | BufferUsage::STORAGE)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
BufUp { stage_buf, dev_buf }
|
BufStage { stage_buf, dev_buf }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Create a buffer for download (readback).
|
/// Create a buffer for download (readback).
|
||||||
///
|
///
|
||||||
/// The `usage` parameter need not include COPY_SRC and STORAGE.
|
/// The `usage` parameter need not include COPY_SRC and STORAGE.
|
||||||
pub fn buf_down(&self, size: u64, usage: BufferUsage) -> BufDown {
|
pub fn buf_down(&self, size: u64, usage: BufferUsage) -> BufStage {
|
||||||
let stage_buf = self
|
let stage_buf = self
|
||||||
.session
|
.session
|
||||||
.create_buffer(size, BufferUsage::MAP_READ | BufferUsage::COPY_DST)
|
.create_buffer(size, BufferUsage::MAP_READ | BufferUsage::COPY_DST)
|
||||||
|
@ -116,7 +109,7 @@ impl Runner {
|
||||||
.session
|
.session
|
||||||
.create_buffer(size, usage | BufferUsage::COPY_SRC | BufferUsage::STORAGE)
|
.create_buffer(size, usage | BufferUsage::COPY_SRC | BufferUsage::STORAGE)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
BufDown { stage_buf, dev_buf }
|
BufStage { stage_buf, dev_buf }
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn backend_type(&self) -> BackendType {
|
pub fn backend_type(&self) -> BackendType {
|
||||||
|
@ -129,17 +122,16 @@ impl Commands {
|
||||||
self.cmd_buf.write_timestamp(&self.query_pool, query);
|
self.cmd_buf.write_timestamp(&self.query_pool, query);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(unused)]
|
pub unsafe fn upload(&mut self, buf: &BufStage) {
|
||||||
pub unsafe fn upload(&mut self, buf: &BufUp) {
|
|
||||||
self.cmd_buf.copy_buffer(&buf.stage_buf, &buf.dev_buf);
|
self.cmd_buf.copy_buffer(&buf.stage_buf, &buf.dev_buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub unsafe fn download(&mut self, buf: &BufDown) {
|
pub unsafe fn download(&mut self, buf: &BufStage) {
|
||||||
self.cmd_buf.copy_buffer(&buf.dev_buf, &buf.stage_buf);
|
self.cmd_buf.copy_buffer(&buf.dev_buf, &buf.stage_buf);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl BufDown {
|
impl BufStage {
|
||||||
pub unsafe fn read(&self, dst: &mut Vec<impl Pod>) {
|
pub unsafe fn read(&self, dst: &mut Vec<impl Pod>) {
|
||||||
self.stage_buf.read(dst).unwrap()
|
self.stage_buf.read(dst).unwrap()
|
||||||
}
|
}
|
||||||
|
@ -147,4 +139,8 @@ impl BufDown {
|
||||||
pub unsafe fn map_read<'a>(&'a self, range: impl RangeBounds<usize>) -> BufReadGuard<'a> {
|
pub unsafe fn map_read<'a>(&'a self, range: impl RangeBounds<usize>) -> BufReadGuard<'a> {
|
||||||
self.stage_buf.map_read(range).unwrap()
|
self.stage_buf.map_read(range).unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub unsafe fn map_write<'a>(&'a mut self, range: impl RangeBounds<usize>) -> BufWriteGuard {
|
||||||
|
self.stage_buf.map_write(range).unwrap()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue