Variable size encoding of draw objects

This patch switches to a variable size encoding of draw objects.

In addition to the CPU-side scene encoding, it changes the representation of intermediate per draw object state from the `Annotated` struct to a variable "info" encoding. In addition, the bounding boxes are moved to a separate array (for a more "structure of "arrays" approach). Data that's unchanged from the scene encoding is not copied. Rather, downstream stages can access the data from the scene buffer (reducing allocation and copying).

Prefix sums, computed in `DrawMonoid` track the offset of both scene and intermediate data. The tags for the CPU-side encoding have been split into their own stream (again a change from AoS to SoA style).

This is not necessarily the final form. There's some stuff (including at least one piet-gpu-derive type) that can be deleted. In addition, the linewidth field should probably move from the info to path-specific. Also, the 1:1 correspondence between draw object and path has not yet been broken.

Closes #152
This commit is contained in:
Raph Levien 2022-03-02 14:44:03 -08:00
parent 547672ab01
commit acb3933d94
96 changed files with 2018 additions and 4078 deletions

View file

@ -38,7 +38,6 @@ layout(set = 0, binding = 1) readonly buffer ConfigBuf {
Config conf;
};
#include "annotated.h"
#include "tile.h"
shared uint sh_row_count[BACKDROP_WG];
@ -48,23 +47,14 @@ shared uint sh_row_width[BACKDROP_WG];
void main() {
uint th_ix = gl_LocalInvocationIndex;
uint element_ix = gl_GlobalInvocationID.x;
AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);
// Work assignment: 1 thread : 1 path element
uint row_count = 0;
bool mem_ok = mem_error == NO_ERROR;
if (gl_LocalInvocationID.y == 0) {
if (element_ix < conf.n_elements) {
AnnotatedTag tag = Annotated_tag(conf.anno_alloc, ref);
switch (tag.tag) {
case Annotated_Image:
case Annotated_LinGradient:
case Annotated_BeginClip:
case Annotated_Color:
if (fill_mode_from_flags(tag.flags) != MODE_NONZERO) {
break;
}
// Fall through.
// Possible TODO: it's not necessary to process backdrops of stroked paths.
// We had logic for that but took it out because it used the Annotated struct.
PathRef path_ref = PathRef(conf.tile_alloc.offset + element_ix * Path_size);
Path path = Path_read(conf.tile_alloc, path_ref);
sh_row_width[th_ix] = path.bbox.z - path.bbox.x;
@ -81,7 +71,6 @@ void main() {
path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, mem_ok);
sh_row_alloc[th_ix] = path_alloc;
}
}
sh_row_count[th_ix] = row_count;
}

View file

@ -20,7 +20,7 @@ layout(binding = 1) readonly buffer ConfigBuf {
void main() {
uint ix = gl_GlobalInvocationID.x;
if (ix < conf.n_path) {
uint out_ix = (conf.bbox_alloc.offset >> 2) + 6 * ix;
uint out_ix = (conf.path_bbox_alloc.offset >> 2) + 6 * ix;
memory[out_ix] = 0xffff;
memory[out_ix + 1] = 0xffff;
memory[out_ix + 2] = 0;

View file

@ -18,7 +18,6 @@ layout(set = 0, binding = 1) readonly buffer ConfigBuf {
Config conf;
};
#include "annotated.h"
#include "bins.h"
#include "drawtag.h"
@ -37,10 +36,12 @@ shared Alloc sh_chunk_alloc[N_TILE];
shared bool sh_alloc_failed;
DrawMonoid load_draw_monoid(uint element_ix) {
uint base = (conf.drawmonoid_alloc.offset >> 2) + 2 * element_ix;
uint base = (conf.drawmonoid_alloc.offset >> 2) + 4 * element_ix;
uint path_ix = memory[base];
uint clip_ix = memory[base + 1];
return DrawMonoid(path_ix, clip_ix);
uint scene_offset = memory[base + 2];
uint info_offset = memory[base + 3];
return DrawMonoid(path_ix, clip_ix, scene_offset, info_offset);
}
// Load bounding box computed by clip processing
@ -60,7 +61,7 @@ vec4 bbox_intersect(vec4 a, vec4 b) {
// Load path's bbox from bbox (as written by pathseg).
vec4 load_path_bbox(uint path_ix) {
uint base = (conf.bbox_alloc.offset >> 2) + 6 * path_ix;
uint base = (conf.path_bbox_alloc.offset >> 2) + 6 * path_ix;
float bbox_l = float(memory[base]) - 32768.0;
float bbox_t = float(memory[base + 1]) - 32768.0;
float bbox_r = float(memory[base + 2]) - 32768.0;
@ -69,16 +70,15 @@ vec4 load_path_bbox(uint path_ix) {
return bbox;
}
void store_path_bbox(AnnotatedRef ref, vec4 bbox) {
uint ix = ref.offset >> 2;
memory[ix + 1] = floatBitsToUint(bbox.x);
memory[ix + 2] = floatBitsToUint(bbox.y);
memory[ix + 3] = floatBitsToUint(bbox.z);
memory[ix + 4] = floatBitsToUint(bbox.w);
void store_draw_bbox(uint draw_ix, vec4 bbox) {
uint base = (conf.draw_bbox_alloc.offset >> 2) + 4 * draw_ix;
memory[base] = floatBitsToUint(bbox.x);
memory[base + 1] = floatBitsToUint(bbox.y);
memory[base + 2] = floatBitsToUint(bbox.z);
memory[base + 3] = floatBitsToUint(bbox.w);
}
void main() {
uint my_n_elements = conf.n_elements;
uint my_partition = gl_WorkGroupID.x;
for (uint i = 0; i < N_SLICE; i++) {
@ -91,18 +91,8 @@ void main() {
// Read inputs and determine coverage of bins
uint element_ix = my_partition * N_TILE + gl_LocalInvocationID.x;
AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);
uint tag = Annotated_Nop;
if (element_ix < my_n_elements) {
tag = Annotated_tag(conf.anno_alloc, ref).tag;
}
int x0 = 0, y0 = 0, x1 = 0, y1 = 0;
switch (tag) {
case Annotated_Color:
case Annotated_LinGradient:
case Annotated_Image:
case Annotated_BeginClip:
case Annotated_EndClip:
if (element_ix < conf.n_elements) {
DrawMonoid draw_monoid = load_draw_monoid(element_ix);
uint path_ix = draw_monoid.path_ix;
vec4 clip_bbox = vec4(-1e9, -1e9, 1e9, 1e9);
@ -119,12 +109,11 @@ void main() {
// Avoid negative-size bbox (is this necessary)?
bbox.zw = max(bbox.xy, bbox.zw);
// Store clip-intersected bbox for tile_alloc.
store_path_bbox(ref, bbox);
store_draw_bbox(element_ix, bbox);
x0 = int(floor(bbox.x * SX));
y0 = int(floor(bbox.y * SY));
x1 = int(ceil(bbox.z * SX));
y1 = int(ceil(bbox.w * SY));
break;
}
// At this point, we run an iterator over the coverage area,

View file

@ -22,43 +22,43 @@ rule dxil
rule msl
command = $spirv_cross --msl $in --output $out $msl_flags
build gen/binning.spv: glsl binning.comp | annotated.h bins.h drawtag.h setup.h mem.h
build gen/binning.spv: glsl binning.comp | bins.h drawtag.h setup.h mem.h
build gen/binning.hlsl: hlsl gen/binning.spv
build gen/binning.dxil: dxil gen/binning.hlsl
build gen/binning.msl: msl gen/binning.spv
build gen/tile_alloc.spv: glsl tile_alloc.comp | annotated.h tile.h setup.h
build gen/tile_alloc.spv: glsl tile_alloc.comp | drawtag.h tile.h setup.h mem.h
build gen/tile_alloc.hlsl: hlsl gen/tile_alloc.spv
build gen/tile_alloc.dxil: dxil gen/tile_alloc.hlsl
build gen/tile_alloc.msl: msl gen/tile_alloc.spv
build gen/path_coarse.spv: glsl path_coarse.comp | annotated.h pathseg.h tile.h setup.h
build gen/path_coarse.spv: glsl path_coarse.comp | pathseg.h tile.h setup.h mem.h
build gen/path_coarse.hlsl: hlsl gen/path_coarse.spv
build gen/path_coarse.dxil: dxil gen/path_coarse.hlsl
build gen/path_coarse.msl: msl gen/path_coarse.spv
build gen/backdrop.spv: glsl backdrop.comp | annotated.h tile.h setup.h
build gen/backdrop.spv: glsl backdrop.comp | tile.h setup.h mem.h
build gen/backdrop.hlsl: hlsl gen/backdrop.spv
build gen/backdrop.dxil: dxil gen/backdrop.hlsl
build gen/backdrop.msl: msl gen/backdrop.spv
build gen/backdrop_lg.spv: glsl backdrop.comp | annotated.h tile.h setup.h
build gen/backdrop_lg.spv: glsl backdrop.comp | tile.h setup.h mem.h
flags = -DBACKDROP_DIST_FACTOR=4
build gen/backdrop_lg.hlsl: hlsl gen/backdrop_lg.spv
build gen/backdrop_lg.dxil: dxil gen/backdrop_lg.hlsl
build gen/backdrop_lg.msl: msl gen/backdrop_lg.spv
build gen/coarse.spv: glsl coarse.comp | annotated.h bins.h ptcl.h setup.h
build gen/coarse.spv: glsl coarse.comp | drawtag.h bins.h ptcl.h setup.h mem.h
build gen/coarse.hlsl: hlsl gen/coarse.spv
build gen/coarse.dxil: dxil gen/coarse.hlsl
build gen/coarse.msl: msl gen/coarse.spv
build gen/kernel4.spv: glsl kernel4.comp | blend.h ptcl.h setup.h
build gen/kernel4.spv: glsl kernel4.comp | blend.h ptcl.h setup.h mem.h
build gen/kernel4.hlsl: hlsl gen/kernel4.spv
build gen/kernel4.dxil: dxil gen/kernel4.hlsl
build gen/kernel4.msl: msl gen/kernel4.spv
build gen/kernel4_gray.spv: glsl kernel4.comp | ptcl.h setup.h
build gen/kernel4_gray.spv: glsl kernel4.comp | ptcl.h setup.h mem.h
flags = -DGRAY
build gen/kernel4_gray.hlsl: hlsl gen/kernel4_gray.spv
build gen/kernel4_gray.dxil: dxil gen/kernel4_gray.hlsl
@ -114,17 +114,17 @@ build gen/draw_root.hlsl: hlsl gen/draw_root.spv
build gen/draw_root.dxil: dxil gen/draw_root.hlsl
build gen/draw_root.msl: msl gen/draw_root.spv
build gen/draw_leaf.spv: glsl draw_leaf.comp | blend.h scene.h drawtag.h annotated.h setup.h mem.h
build gen/draw_leaf.spv: glsl draw_leaf.comp | blend.h scene.h drawtag.h setup.h mem.h
build gen/draw_leaf.hlsl: hlsl gen/draw_leaf.spv
build gen/draw_leaf.dxil: dxil gen/draw_leaf.hlsl
build gen/draw_leaf.msl: msl gen/draw_leaf.spv
build gen/clip_reduce.spv: glsl clip_reduce.comp | mem.h setup.h annotated.h
build gen/clip_reduce.spv: glsl clip_reduce.comp | mem.h setup.h
build gen/clip_reduce.hlsl: hlsl gen/clip_reduce.spv
build gen/clip_reduce.dxil: dxil gen/clip_reduce.hlsl
build gen/clip_reduce.msl: msl gen/clip_reduce.spv
build gen/clip_leaf.spv: glsl clip_leaf.comp | mem.h setup.h annotated.h
build gen/clip_leaf.spv: glsl clip_leaf.comp | mem.h setup.h
build gen/clip_leaf.hlsl: hlsl gen/clip_leaf.spv
build gen/clip_leaf.dxil: dxil gen/clip_leaf.hlsl
build gen/clip_leaf.msl: msl gen/clip_leaf.spv

View file

@ -18,8 +18,6 @@ layout(binding = 1) readonly buffer ConfigBuf {
Config conf;
};
#include "annotated.h"
// Some of this is cut'n'paste duplication with the reduce pass, and
// arguably should be moved to a common .h file.
// The bicyclic monoid
@ -43,7 +41,7 @@ Bic bic_combine(Bic x, Bic y) {
// Load path's bbox from bbox (as written by pathseg).
vec4 load_path_bbox(uint path_ix) {
uint base = (conf.bbox_alloc.offset >> 2) + 6 * path_ix;
uint base = (conf.path_bbox_alloc.offset >> 2) + 6 * path_ix;
float bbox_l = float(memory[base]) - 32768.0;
float bbox_t = float(memory[base + 1]) - 32768.0;
float bbox_r = float(memory[base + 2]) - 32768.0;
@ -271,7 +269,7 @@ void main() {
// and is in the ClipEl for cross-partition.
// If not, can probably get rid of it in the stack intermediate buf.
path_ix = load_path_ix(parent);
uint drawmonoid_out_base = (conf.drawmonoid_alloc.offset >> 2) + 2 * ~inp;
uint drawmonoid_out_base = (conf.drawmonoid_alloc.offset >> 2) + 4 * ~inp;
// Fix up drawmonoid so path_ix at EndClip matches BeginClip
memory[drawmonoid_out_base] = path_ix;

View file

@ -31,8 +31,6 @@ layout(binding = 1) readonly buffer ConfigBuf {
Config conf;
};
#include "annotated.h"
// The intermediate state for clip processing.
struct ClipEl {
// index of parent node
@ -59,7 +57,7 @@ shared vec4 sh_bbox[WG_SIZE];
// Load path's bbox from bbox (as written by pathseg).
vec4 load_path_bbox(uint path_ix) {
uint base = (conf.bbox_alloc.offset >> 2) + 6 * path_ix;
uint base = (conf.path_bbox_alloc.offset >> 2) + 6 * path_ix;
float bbox_l = float(memory[base]) - 32768.0;
float bbox_t = float(memory[base + 1]) - 32768.0;
float bbox_r = float(memory[base + 2]) - 32768.0;

View file

@ -19,11 +19,15 @@
layout(local_size_x = N_TILE, local_size_y = 1) in;
layout(set = 0, binding = 1) readonly buffer ConfigBuf {
layout(binding = 1) readonly buffer ConfigBuf {
Config conf;
};
#include "annotated.h"
layout(binding = 2) readonly buffer SceneBuf {
uint[] scene;
};
#include "drawtag.h"
#include "bins.h"
#include "tile.h"
#include "ptcl.h"
@ -92,8 +96,8 @@ bool alloc_cmd(inout Alloc cmd_alloc, inout CmdRef cmd_ref, inout uint cmd_limit
return true;
}
void write_fill(Alloc alloc, inout CmdRef cmd_ref, uint flags, Tile tile, float linewidth) {
if (fill_mode_from_flags(flags) == MODE_NONZERO) {
void write_fill(Alloc alloc, inout CmdRef cmd_ref, Tile tile, float linewidth) {
if (linewidth < 0.0) {
if (tile.tile.offset != 0) {
CmdFill cmd_fill = CmdFill(tile.tile.offset, tile.backdrop);
Cmd_Fill_write(alloc, cmd_ref, cmd_fill);
@ -146,6 +150,10 @@ void main() {
uint part_start_ix = 0;
uint ready_ix = 0;
uint drawmonoid_start = conf.drawmonoid_alloc.offset >> 2;
uint drawtag_start = conf.drawtag_offset >> 2;
uint drawdata_start = conf.drawdata_offset >> 2;
uint drawinfo_start = conf.drawinfo_alloc.offset >> 2;
bool mem_ok = mem_error == NO_ERROR;
while (true) {
for (uint i = 0; i < N_SLICE; i++) {
@ -207,24 +215,22 @@ void main() {
// We've done the merge and filled the buffer.
// Read one element, compute coverage.
uint tag = Annotated_Nop;
uint tag = Drawtag_Nop;
uint element_ix;
AnnotatedRef ref;
if (th_ix + rd_ix < wr_ix) {
element_ix = sh_elements[th_ix];
ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);
tag = Annotated_tag(conf.anno_alloc, ref).tag;
tag = scene[drawtag_start + element_ix];
}
// Bounding box of element in pixel coordinates.
uint tile_count;
switch (tag) {
case Annotated_Color:
case Annotated_Image:
case Annotated_LinGradient:
case Annotated_BeginClip:
case Annotated_EndClip:
uint drawmonoid_base = (conf.drawmonoid_alloc.offset >> 2) + 2 * element_ix;
case Drawtag_FillColor:
case Drawtag_FillImage:
case Drawtag_FillLinGradient:
case Drawtag_BeginClip:
case Drawtag_EndClip:
uint drawmonoid_base = drawmonoid_start + 4 * element_ix;
uint path_ix = memory[drawmonoid_base];
Path path = Path_read(conf.tile_alloc, PathRef(conf.tile_alloc.offset + path_ix * Path_size));
uint stride = path.bbox.z - path.bbox.x;
@ -272,9 +278,7 @@ void main() {
el_ix = probe;
}
}
AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + sh_elements[el_ix] * Annotated_size);
AnnotatedTag anno_tag = Annotated_tag(conf.anno_alloc, ref);
uint tag = anno_tag.tag;
uint tag = scene[drawtag_start + sh_elements[el_ix]];
uint seq_ix = ix - (el_ix > 0 ? sh_tile_count[el_ix - 1] : 0);
uint width = sh_tile_width[el_ix];
uint x = sh_tile_x0[el_ix] + seq_ix % width;
@ -283,15 +287,16 @@ void main() {
if (mem_ok) {
Tile tile = Tile_read(read_tile_alloc(el_ix, mem_ok),
TileRef(sh_tile_base[el_ix] + (sh_tile_stride[el_ix] * y + x) * Tile_size));
bool is_clip = tag == Annotated_BeginClip || tag == Annotated_EndClip;
bool is_clip = (tag & 1) != 0;
// Always include the tile if it contains a path segment.
// For draws, include the tile if it is solid.
// For clips, include the tile if it is empty - this way, logic
// below will suppress the drawing of inner elements.
// For blends, include the tile if
// (blend_mode, composition_mode) != (Normal, SrcOver)
bool is_blend = false; // TODO
include_tile = tile.tile.offset != 0 || (tile.backdrop == 0) == is_clip
|| (is_clip && (anno_tag.flags & 0x2) != 0);
|| (is_clip && is_blend);
}
if (include_tile) {
uint el_slice = el_ix / 32;
@ -302,8 +307,8 @@ void main() {
barrier();
// Output non-segment elements for this tile. The thread does a sequential walk
// through the non-segment elements.
// Output draw objects for this tile. The thread does a sequential walk
// through the draw objects.
uint slice_ix = 0;
uint bitmap = sh_bitmaps[0][th_ix];
while (mem_ok) {
@ -323,59 +328,55 @@ void main() {
// Clear LSB
bitmap &= bitmap - 1;
// At this point, we read the element again from global memory.
// If that turns out to be expensive, maybe we can pack it into
// shared memory (or perhaps just the tag).
ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);
AnnotatedTag tag = Annotated_tag(conf.anno_alloc, ref);
uint drawtag = scene[drawtag_start + element_ix];
if (clip_zero_depth == 0) {
switch (tag.tag) {
case Annotated_Color:
Tile tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok),
TileRef(sh_tile_base[element_ref_ix] +
(sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
AnnoColor fill = Annotated_Color_read(conf.anno_alloc, ref);
uint drawmonoid_base = drawmonoid_start + 4 * element_ix;
uint scene_offset = memory[drawmonoid_base + 2];
uint info_offset = memory[drawmonoid_base + 3];
uint dd = drawdata_start + (scene_offset >> 2);
uint di = drawinfo_start + (info_offset >> 2);
switch (drawtag) {
case Drawtag_FillColor:
float linewidth = uintBitsToFloat(memory[di]);
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
break;
}
write_fill(cmd_alloc, cmd_ref, tag.flags, tile, fill.linewidth);
Cmd_Color_write(cmd_alloc, cmd_ref, CmdColor(fill.rgba_color));
write_fill(cmd_alloc, cmd_ref, tile, linewidth);
uint rgba = scene[dd];
Cmd_Color_write(cmd_alloc, cmd_ref, CmdColor(rgba));
cmd_ref.offset += 4 + CmdColor_size;
break;
case Annotated_LinGradient:
tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok),
TileRef(sh_tile_base[element_ref_ix] +
(sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
AnnoLinGradient lin = Annotated_LinGradient_read(conf.anno_alloc, ref);
case Drawtag_FillLinGradient:
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
break;
}
write_fill(cmd_alloc, cmd_ref, tag.flags, tile, fill.linewidth);
linewidth = uintBitsToFloat(memory[di]);
write_fill(cmd_alloc, cmd_ref, tile, linewidth);
CmdLinGrad cmd_lin;
cmd_lin.index = lin.index;
cmd_lin.line_x = lin.line_x;
cmd_lin.line_y = lin.line_y;
cmd_lin.line_c = lin.line_c;
cmd_lin.index = scene[dd];
cmd_lin.line_x = uintBitsToFloat(memory[di + 1]);
cmd_lin.line_y = uintBitsToFloat(memory[di + 2]);
cmd_lin.line_c = uintBitsToFloat(memory[di + 3]);
Cmd_LinGrad_write(cmd_alloc, cmd_ref, cmd_lin);
cmd_ref.offset += 4 + CmdLinGrad_size;
break;
case Annotated_Image:
tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok),
TileRef(sh_tile_base[element_ref_ix] +
(sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
AnnoImage fill_img = Annotated_Image_read(conf.anno_alloc, ref);
case Drawtag_FillImage:
linewidth = uintBitsToFloat(memory[di]);
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
break;
}
write_fill(cmd_alloc, cmd_ref, tag.flags, tile, fill_img.linewidth);
Cmd_Image_write(cmd_alloc, cmd_ref, CmdImage(fill_img.index, fill_img.offset));
write_fill(cmd_alloc, cmd_ref, tile, linewidth);
uint index = scene[dd];
uint raw1 = scene[dd + 1];
ivec2 offset = ivec2(int(raw1 << 16) >> 16, int(raw1) >> 16);
Cmd_Image_write(cmd_alloc, cmd_ref, CmdImage(index, offset));
cmd_ref.offset += 4 + CmdImage_size;
break;
case Annotated_BeginClip:
tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok),
TileRef(sh_tile_base[element_ref_ix] +
(sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
case Drawtag_BeginClip:
if (tile.tile.offset == 0 && tile.backdrop == 0) {
clip_zero_depth = clip_depth + 1;
} else {
@ -387,27 +388,24 @@ void main() {
}
clip_depth++;
break;
case Annotated_EndClip:
tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok),
TileRef(sh_tile_base[element_ref_ix] +
(sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
AnnoEndClip end_clip = Annotated_EndClip_read(conf.anno_alloc, ref);
case Drawtag_EndClip:
clip_depth--;
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
break;
}
write_fill(cmd_alloc, cmd_ref, MODE_NONZERO, tile, 0.0);
Cmd_EndClip_write(cmd_alloc, cmd_ref, CmdEndClip(end_clip.blend));
write_fill(cmd_alloc, cmd_ref, tile, -1.0);
uint blend = scene[dd];
Cmd_EndClip_write(cmd_alloc, cmd_ref, CmdEndClip(blend));
cmd_ref.offset += 4 + CmdEndClip_size;
break;
}
} else {
// In "clip zero" state, suppress all drawing
switch (tag.tag) {
case Annotated_BeginClip:
switch (drawtag) {
case Drawtag_BeginClip:
clip_depth++;
break;
case Annotated_EndClip:
case Drawtag_EndClip:
if (clip_depth == clip_zero_depth) {
clip_zero_depth = 0;
}

View file

@ -27,7 +27,6 @@ layout(binding = 2) readonly buffer SceneBuf {
#include "scene.h"
#include "tile.h"
#include "drawtag.h"
#include "annotated.h"
#include "blend.h"
#define Monoid DrawMonoid
@ -42,14 +41,14 @@ void main() {
Monoid local[N_ROWS];
uint ix = gl_GlobalInvocationID.x * N_ROWS;
ElementRef ref = ElementRef(ix * Element_size);
uint tag_word = Element_tag(ref).tag;
uint drawtag_base = conf.drawtag_offset >> 2;
uint tag_word = scene[drawtag_base + ix];
Monoid agg = map_tag(tag_word);
local[0] = agg;
for (uint i = 1; i < N_ROWS; i++) {
tag_word = Element_tag(Element_index(ref, i)).tag;
agg = combine_tag_monoid(agg, map_tag(tag_word));
tag_word = scene[drawtag_base + ix + i];
agg = combine_draw_monoid(agg, map_tag(tag_word));
local[i] = agg;
}
sh_scratch[gl_LocalInvocationID.x] = agg;
@ -57,41 +56,47 @@ void main() {
barrier();
if (gl_LocalInvocationID.x >= (1u << i)) {
Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i)];
agg = combine_tag_monoid(other, agg);
agg = combine_draw_monoid(other, agg);
}
barrier();
sh_scratch[gl_LocalInvocationID.x] = agg;
}
barrier();
Monoid row = tag_monoid_identity();
Monoid row = draw_monoid_identity();
if (gl_WorkGroupID.x > 0) {
row = parent[gl_WorkGroupID.x - 1];
}
if (gl_LocalInvocationID.x > 0) {
row = combine_tag_monoid(row, sh_scratch[gl_LocalInvocationID.x - 1]);
row = combine_draw_monoid(row, sh_scratch[gl_LocalInvocationID.x - 1]);
}
uint drawdata_base = conf.drawdata_offset >> 2;
uint drawinfo_base = conf.drawinfo_alloc.offset >> 2;
uint out_ix = gl_GlobalInvocationID.x * N_ROWS;
uint out_base = (conf.drawmonoid_alloc.offset >> 2) + out_ix * 2;
uint out_base = (conf.drawmonoid_alloc.offset >> 2) + out_ix * 4;
uint clip_out_base = conf.clip_alloc.offset >> 2;
AnnotatedRef out_ref = AnnotatedRef(conf.anno_alloc.offset + out_ix * Annotated_size);
for (uint i = 0; i < N_ROWS; i++) {
Monoid m = row;
if (i > 0) {
m = combine_tag_monoid(m, local[i - 1]);
m = combine_draw_monoid(m, local[i - 1]);
}
// m now holds exclusive scan of draw monoid
memory[out_base + i * 2] = m.path_ix;
memory[out_base + i * 2 + 1] = m.clip_ix;
memory[out_base + i * 4] = m.path_ix;
memory[out_base + i * 4 + 1] = m.clip_ix;
memory[out_base + i * 4 + 2] = m.scene_offset;
memory[out_base + i * 4 + 3] = m.info_offset;
// u32 offset of drawobj data
uint dd = drawdata_base + (m.scene_offset >> 2);
uint di = drawinfo_base + (m.info_offset >> 2);
// For compatibility, we'll generate an Annotated object, same as old
// pipeline. However, going forward we'll get rid of that, and have
// later stages read scene + bbox etc.
ElementRef this_ref = Element_index(ref, i);
tag_word = Element_tag(this_ref).tag;
if (tag_word == Element_FillColor || tag_word == Element_FillLinGradient || tag_word == Element_FillImage ||
tag_word == Element_BeginClip) {
uint bbox_offset = (conf.bbox_alloc.offset >> 2) + 6 * m.path_ix;
tag_word = scene[drawtag_base + ix + i];
if (tag_word == Drawtag_FillColor || tag_word == Drawtag_FillLinGradient || tag_word == Drawtag_FillImage ||
tag_word == Drawtag_BeginClip) {
uint bbox_offset = (conf.path_bbox_alloc.offset >> 2) + 6 * m.path_ix;
float bbox_l = float(memory[bbox_offset]) - 32768.0;
float bbox_t = float(memory[bbox_offset + 1]) - 32768.0;
float bbox_r = float(memory[bbox_offset + 2]) - 32768.0;
@ -101,11 +106,11 @@ void main() {
uint fill_mode = uint(linewidth >= 0.0);
vec4 mat;
vec2 translate;
if (linewidth >= 0.0 || tag_word == Element_FillLinGradient) {
if (linewidth >= 0.0 || tag_word == Drawtag_FillLinGradient) {
uint trans_ix = memory[bbox_offset + 5];
uint t = (conf.trans_alloc.offset >> 2) + 6 * trans_ix;
mat = uintBitsToFloat(uvec4(memory[t], memory[t + 1], memory[t + 2], memory[t + 3]));
if (tag_word == Element_FillLinGradient) {
if (tag_word == Drawtag_FillLinGradient) {
translate = uintBitsToFloat(uvec2(memory[t + 4], memory[t + 5]));
}
}
@ -113,69 +118,38 @@ void main() {
// TODO: need to deal with anisotropic case
linewidth *= sqrt(abs(mat.x * mat.w - mat.y * mat.z));
}
linewidth = max(linewidth, 0.0);
switch (tag_word) {
case Element_FillColor:
FillColor fill = Element_FillColor_read(this_ref);
AnnoColor anno_fill;
anno_fill.bbox = bbox;
anno_fill.linewidth = linewidth;
anno_fill.rgba_color = fill.rgba_color;
Annotated_Color_write(conf.anno_alloc, out_ref, fill_mode, anno_fill);
case Drawtag_FillColor:
case Drawtag_FillImage:
memory[di] = floatBitsToUint(linewidth);
break;
case Element_FillLinGradient:
FillLinGradient lin = Element_FillLinGradient_read(this_ref);
AnnoLinGradient anno_lin;
anno_lin.bbox = bbox;
anno_lin.linewidth = linewidth;
anno_lin.index = lin.index;
vec2 p0 = mat.xy * lin.p0.x + mat.zw * lin.p0.y + translate;
vec2 p1 = mat.xy * lin.p1.x + mat.zw * lin.p1.y + translate;
case Drawtag_FillLinGradient:
memory[di] = floatBitsToUint(linewidth);
uint index = scene[dd];
vec2 p0 = uintBitsToFloat(uvec2(scene[dd + 1], scene[dd + 2]));
vec2 p1 = uintBitsToFloat(uvec2(scene[dd + 3], scene[dd + 4]));
p0 = mat.xy * p0.x + mat.zw * p0.y + translate;
p1 = mat.xy * p1.x + mat.zw * p1.y + translate;
vec2 dxy = p1 - p0;
float scale = 1.0 / (dxy.x * dxy.x + dxy.y * dxy.y);
float line_x = dxy.x * scale;
float line_y = dxy.y * scale;
anno_lin.line_x = line_x;
anno_lin.line_y = line_y;
anno_lin.line_c = -(p0.x * line_x + p0.y * line_y);
Annotated_LinGradient_write(conf.anno_alloc, out_ref, fill_mode, anno_lin);
float line_c = -(p0.x * line_x + p0.y * line_y);
memory[di + 1] = floatBitsToUint(line_x);
memory[di + 2] = floatBitsToUint(line_y);
memory[di + 3] = floatBitsToUint(line_c);
break;
case Element_FillImage:
FillImage fill_img = Element_FillImage_read(this_ref);
AnnoImage anno_img;
anno_img.bbox = bbox;
anno_img.linewidth = linewidth;
anno_img.index = fill_img.index;
anno_img.offset = fill_img.offset;
Annotated_Image_write(conf.anno_alloc, out_ref, fill_mode, anno_img);
break;
case Element_BeginClip:
Clip begin_clip = Element_BeginClip_read(this_ref);
AnnoBeginClip anno_begin_clip;
anno_begin_clip.bbox = bbox;
anno_begin_clip.linewidth = 0.0; // don't support clip-with-stroke
anno_begin_clip.blend = begin_clip.blend;
uint flags = uint(begin_clip.blend != BlendComp_default) << 1;
Annotated_BeginClip_write(conf.anno_alloc, out_ref, flags, anno_begin_clip);
case Drawtag_BeginClip:
break;
}
} else if (tag_word == Element_EndClip) {
Clip end_clip = Element_BeginClip_read(this_ref);
AnnoEndClip anno_end_clip;
// The actual bbox will be reconstructed from clip stream output.
anno_end_clip.bbox = vec4(-1e9, -1e9, 1e9, 1e9);
anno_end_clip.blend = end_clip.blend;
uint flags = uint(end_clip.blend != BlendComp_default) << 1;
Annotated_EndClip_write(conf.anno_alloc, out_ref, flags, anno_end_clip);
}
// Generate clip stream.
if (tag_word == Element_BeginClip || tag_word == Element_EndClip) {
if (tag_word == Drawtag_BeginClip || tag_word == Drawtag_EndClip) {
uint path_ix = ~(out_ix + i);
if (tag_word == Element_BeginClip) {
if (tag_word == Drawtag_BeginClip) {
path_ix = m.path_ix;
}
memory[clip_out_base + m.clip_ix] = path_ix;
}
out_ref.offset += Annotated_size;
}
}

View file

@ -36,13 +36,13 @@ shared Monoid sh_scratch[WG_SIZE];
void main() {
uint ix = gl_GlobalInvocationID.x * N_ROWS;
ElementRef ref = ElementRef(ix * Element_size);
uint tag_word = Element_tag(ref).tag;
uint drawtag_base = conf.drawtag_offset >> 2;
uint tag_word = scene[drawtag_base + ix];
Monoid agg = map_tag(tag_word);
for (uint i = 1; i < N_ROWS; i++) {
tag_word = Element_tag(Element_index(ref, i)).tag;
agg = combine_tag_monoid(agg, map_tag(tag_word));
uint tag_word = scene[drawtag_base + ix + i];
agg = combine_draw_monoid(agg, map_tag(tag_word));
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i = 0; i < LG_WG_SIZE; i++) {
@ -50,7 +50,7 @@ void main() {
// We could make this predicate tighter, but would it help?
if (gl_LocalInvocationID.x + (1u << i) < WG_SIZE) {
Monoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i)];
agg = combine_tag_monoid(agg, other);
agg = combine_draw_monoid(agg, other);
}
barrier();
sh_scratch[gl_LocalInvocationID.x] = agg;

View file

@ -16,8 +16,8 @@
layout(local_size_x = WG_SIZE, local_size_y = 1) in;
#define Monoid DrawMonoid
#define combine_monoid combine_tag_monoid
#define monoid_identity tag_monoid_identity
#define combine_monoid combine_draw_monoid
#define monoid_identity draw_monoid_identity
layout(binding = 0) buffer DataBuf {
Monoid[] data;

View file

@ -2,36 +2,39 @@
// Common data structures and functions for the draw tag stream.
// Design of draw tag: & 0x1c gives scene size in bytes
// & 1 gives clip
// (tag >> 4) & 0x1c is info size in bytes
#define Drawtag_Nop 0
#define Drawtag_FillColor 0x44
#define Drawtag_FillLinGradient 0x114
#define Drawtag_FillImage 0x48
#define Drawtag_BeginClip 0x05
#define Drawtag_EndClip 0x25
struct DrawMonoid {
uint path_ix;
uint clip_ix;
uint scene_offset;
uint info_offset;
};
DrawMonoid tag_monoid_identity() {
return DrawMonoid(0, 0);
DrawMonoid draw_monoid_identity() {
return DrawMonoid(0, 0, 0, 0);
}
DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b) {
DrawMonoid combine_draw_monoid(DrawMonoid a, DrawMonoid b) {
DrawMonoid c;
c.path_ix = a.path_ix + b.path_ix;
c.clip_ix = a.clip_ix + b.clip_ix;
c.scene_offset = a.scene_offset + b.scene_offset;
c.info_offset = a.info_offset + b.info_offset;
return c;
}
#ifdef Element_size
DrawMonoid map_tag(uint tag_word) {
switch (tag_word) {
case Element_FillColor:
case Element_FillLinGradient:
case Element_FillImage:
return DrawMonoid(1, 0);
case Element_BeginClip:
// TODO: endclip should be (0, 1), ie not generate a path. But for now
// we generate a dummy path.
case Element_EndClip:
return DrawMonoid(1, 1);
default:
return DrawMonoid(0, 0);
}
// TODO: at some point, EndClip should not generate a path
uint has_path = uint(tag_word != Drawtag_Nop);
return DrawMonoid(has_path, tag_word & 1, tag_word & 0x1c, (tag_word >> 4) & 0x1c);
}
#endif

Binary file not shown.

View file

@ -3,17 +3,6 @@ struct Alloc
uint offset;
};
struct AnnotatedRef
{
uint offset;
};
struct AnnotatedTag
{
uint tag;
uint flags;
};
struct PathRef
{
uint offset;
@ -42,12 +31,14 @@ struct Config
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
@ -55,12 +46,14 @@ struct Config
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
RWByteAddressBuffer _79 : register(u0, space0);
ByteAddressBuffer _186 : register(t1, space0);
RWByteAddressBuffer _67 : register(u0, space0);
ByteAddressBuffer _166 : register(t1, space0);
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
@ -89,24 +82,10 @@ uint read_mem(Alloc alloc, uint offset)
{
return 0u;
}
uint v = _79.Load(offset * 4 + 8);
uint v = _67.Load(offset * 4 + 8);
return v;
}
AnnotatedTag Annotated_tag(Alloc a, AnnotatedRef ref)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint tag_and_flags = read_mem(param, param_1);
AnnotatedTag _121 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) };
return _121;
}
uint fill_mode_from_flags(uint flags)
{
return flags & 1u;
}
Path Path_read(Alloc a, PathRef ref)
{
uint ix = ref.offset >> uint(2);
@ -121,8 +100,8 @@ Path Path_read(Alloc a, PathRef ref)
uint raw2 = read_mem(param_4, param_5);
Path s;
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
TileRef _165 = { raw2 };
s.tiles = _165;
TileRef _134 = { raw2 };
s.tiles = _134;
return s;
}
@ -141,88 +120,65 @@ void write_mem(Alloc alloc, uint offset, uint val)
{
return;
}
_79.Store(offset * 4 + 8, val);
_67.Store(offset * 4 + 8, val);
}
void comp_main()
{
uint th_ix = gl_LocalInvocationIndex;
uint element_ix = gl_GlobalInvocationID.x;
AnnotatedRef _194 = { _186.Load(32) + (element_ix * 40u) };
AnnotatedRef ref = _194;
uint row_count = 0u;
bool mem_ok = _79.Load(4) == 0u;
bool mem_ok = _67.Load(4) == 0u;
if (gl_LocalInvocationID.y == 0u)
{
if (element_ix < _186.Load(0))
if (element_ix < _166.Load(0))
{
Alloc _217;
_217.offset = _186.Load(32);
PathRef _180 = { _166.Load(16) + (element_ix * 12u) };
PathRef path_ref = _180;
Alloc _185;
_185.offset = _166.Load(16);
Alloc param;
param.offset = _217.offset;
AnnotatedRef param_1 = ref;
AnnotatedTag tag = Annotated_tag(param, param_1);
switch (tag.tag)
{
case 3u:
case 2u:
case 4u:
case 1u:
{
uint param_2 = tag.flags;
if (fill_mode_from_flags(param_2) != 0u)
{
break;
}
PathRef _243 = { _186.Load(16) + (element_ix * 12u) };
PathRef path_ref = _243;
Alloc _247;
_247.offset = _186.Load(16);
Alloc param_3;
param_3.offset = _247.offset;
PathRef param_4 = path_ref;
Path path = Path_read(param_3, param_4);
param.offset = _185.offset;
PathRef param_1 = path_ref;
Path path = Path_read(param, param_1);
sh_row_width[th_ix] = path.bbox.z - path.bbox.x;
row_count = path.bbox.w - path.bbox.y;
bool _272 = row_count == 1u;
bool _278;
if (_272)
bool _210 = row_count == 1u;
bool _216;
if (_210)
{
_278 = path.bbox.y > 0u;
_216 = path.bbox.y > 0u;
}
else
{
_278 = _272;
_216 = _210;
}
if (_278)
if (_216)
{
row_count = 0u;
}
uint param_5 = path.tiles.offset;
uint param_6 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
bool param_7 = mem_ok;
Alloc path_alloc = new_alloc(param_5, param_6, param_7);
uint param_2 = path.tiles.offset;
uint param_3 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
bool param_4 = mem_ok;
Alloc path_alloc = new_alloc(param_2, param_3, param_4);
sh_row_alloc[th_ix] = path_alloc;
break;
}
}
}
sh_row_count[th_ix] = row_count;
}
for (uint i = 0u; i < 8u; i++)
{
GroupMemoryBarrierWithGroupSync();
bool _325 = gl_LocalInvocationID.y == 0u;
bool _332;
if (_325)
bool _262 = gl_LocalInvocationID.y == 0u;
bool _269;
if (_262)
{
_332 = th_ix >= (1u << i);
_269 = th_ix >= (1u << i);
}
else
{
_332 = _325;
_269 = _262;
}
if (_332)
if (_269)
{
row_count += sh_row_count[th_ix - (1u << i)];
}
@ -234,7 +190,7 @@ void comp_main()
}
GroupMemoryBarrierWithGroupSync();
uint total_rows = sh_row_count[255];
uint _411;
uint _348;
for (uint row = th_ix; row < total_rows; row += 256u)
{
uint el_ix = 0u;
@ -252,27 +208,27 @@ void comp_main()
Alloc tiles_alloc = sh_row_alloc[el_ix];
if (el_ix > 0u)
{
_411 = sh_row_count[el_ix - 1u];
_348 = sh_row_count[el_ix - 1u];
}
else
{
_411 = 0u;
_348 = 0u;
}
uint seq_ix = row - _411;
uint seq_ix = row - _348;
uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width);
Alloc param_8 = tiles_alloc;
uint param_9 = tile_el_ix;
uint sum = read_mem(param_8, param_9);
Alloc param_5 = tiles_alloc;
uint param_6 = tile_el_ix;
uint sum = read_mem(param_5, param_6);
for (uint x = 1u; x < width; x++)
{
tile_el_ix += 2u;
Alloc param_10 = tiles_alloc;
uint param_11 = tile_el_ix;
sum += read_mem(param_10, param_11);
Alloc param_12 = tiles_alloc;
uint param_13 = tile_el_ix;
uint param_14 = sum;
write_mem(param_12, param_13, param_14);
Alloc param_7 = tiles_alloc;
uint param_8 = tile_el_ix;
sum += read_mem(param_7, param_8);
Alloc param_9 = tiles_alloc;
uint param_10 = tile_el_ix;
uint param_11 = sum;
write_mem(param_9, param_10, param_11);
}
}
}

View file

@ -10,17 +10,6 @@ struct Alloc
uint offset;
};
struct AnnotatedRef
{
uint offset;
};
struct AnnotatedTag
{
uint tag;
uint flags;
};
struct PathRef
{
uint offset;
@ -61,12 +50,14 @@ struct Config
Alloc_1 pathseg_alloc;
Alloc_1 anno_alloc;
Alloc_1 trans_alloc;
Alloc_1 bbox_alloc;
Alloc_1 path_bbox_alloc;
Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
Alloc_1 draw_bbox_alloc;
Alloc_1 drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
@ -74,6 +65,8 @@ struct Config
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct ConfigBuf
@ -90,7 +83,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
}
static inline __attribute__((always_inline))
uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_79)
uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_67)
{
Alloc param = alloc;
uint param_1 = offset;
@ -98,38 +91,23 @@ uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memor
{
return 0u;
}
uint v = v_79.memory[offset];
uint v = v_67.memory[offset];
return v;
}
static inline __attribute__((always_inline))
AnnotatedTag Annotated_tag(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_79)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint tag_and_flags = read_mem(param, param_1, v_79);
return AnnotatedTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) };
}
static inline __attribute__((always_inline))
uint fill_mode_from_flags(thread const uint& flags)
{
return flags & 1u;
}
static inline __attribute__((always_inline))
Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_79)
Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_67)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint raw0 = read_mem(param, param_1, v_79);
uint raw0 = read_mem(param, param_1, v_67);
Alloc param_2 = a;
uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3, v_79);
uint raw1 = read_mem(param_2, param_3, v_67);
Alloc param_4 = a;
uint param_5 = ix + 2u;
uint raw2 = read_mem(param_4, param_5, v_79);
uint raw2 = read_mem(param_4, param_5, v_67);
Path s;
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
s.tiles = TileRef{ raw2 };
@ -145,7 +123,7 @@ Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const
}
static inline __attribute__((always_inline))
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_79)
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_67)
{
Alloc param = alloc;
uint param_1 = offset;
@ -153,85 +131,65 @@ void write_mem(thread const Alloc& alloc, thread const uint& offset, thread cons
{
return;
}
v_79.memory[offset] = val;
v_67.memory[offset] = val;
}
kernel void main0(device Memory& v_79 [[buffer(0)]], const device ConfigBuf& _186 [[buffer(1)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
kernel void main0(device Memory& v_67 [[buffer(0)]], const device ConfigBuf& _166 [[buffer(1)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
{
threadgroup uint sh_row_width[256];
threadgroup Alloc sh_row_alloc[256];
threadgroup uint sh_row_count[256];
uint th_ix = gl_LocalInvocationIndex;
uint element_ix = gl_GlobalInvocationID.x;
AnnotatedRef ref = AnnotatedRef{ _186.conf.anno_alloc.offset + (element_ix * 40u) };
uint row_count = 0u;
bool mem_ok = v_79.mem_error == 0u;
bool mem_ok = v_67.mem_error == 0u;
if (gl_LocalInvocationID.y == 0u)
{
if (element_ix < _186.conf.n_elements)
if (element_ix < _166.conf.n_elements)
{
PathRef path_ref = PathRef{ _166.conf.tile_alloc.offset + (element_ix * 12u) };
Alloc param;
param.offset = _186.conf.anno_alloc.offset;
AnnotatedRef param_1 = ref;
AnnotatedTag tag = Annotated_tag(param, param_1, v_79);
switch (tag.tag)
{
case 3u:
case 2u:
case 4u:
case 1u:
{
uint param_2 = tag.flags;
if (fill_mode_from_flags(param_2) != 0u)
{
break;
}
PathRef path_ref = PathRef{ _186.conf.tile_alloc.offset + (element_ix * 12u) };
Alloc param_3;
param_3.offset = _186.conf.tile_alloc.offset;
PathRef param_4 = path_ref;
Path path = Path_read(param_3, param_4, v_79);
param.offset = _166.conf.tile_alloc.offset;
PathRef param_1 = path_ref;
Path path = Path_read(param, param_1, v_67);
sh_row_width[th_ix] = path.bbox.z - path.bbox.x;
row_count = path.bbox.w - path.bbox.y;
bool _272 = row_count == 1u;
bool _278;
if (_272)
bool _210 = row_count == 1u;
bool _216;
if (_210)
{
_278 = path.bbox.y > 0u;
_216 = path.bbox.y > 0u;
}
else
{
_278 = _272;
_216 = _210;
}
if (_278)
if (_216)
{
row_count = 0u;
}
uint param_5 = path.tiles.offset;
uint param_6 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
bool param_7 = mem_ok;
Alloc path_alloc = new_alloc(param_5, param_6, param_7);
uint param_2 = path.tiles.offset;
uint param_3 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
bool param_4 = mem_ok;
Alloc path_alloc = new_alloc(param_2, param_3, param_4);
sh_row_alloc[th_ix] = path_alloc;
break;
}
}
}
sh_row_count[th_ix] = row_count;
}
for (uint i = 0u; i < 8u; i++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
bool _325 = gl_LocalInvocationID.y == 0u;
bool _332;
if (_325)
bool _262 = gl_LocalInvocationID.y == 0u;
bool _269;
if (_262)
{
_332 = th_ix >= (1u << i);
_269 = th_ix >= (1u << i);
}
else
{
_332 = _325;
_269 = _262;
}
if (_332)
if (_269)
{
row_count += sh_row_count[th_ix - (1u << i)];
}
@ -243,7 +201,7 @@ kernel void main0(device Memory& v_79 [[buffer(0)]], const device ConfigBuf& _18
}
threadgroup_barrier(mem_flags::mem_threadgroup);
uint total_rows = sh_row_count[255];
uint _411;
uint _348;
for (uint row = th_ix; row < total_rows; row += 256u)
{
uint el_ix = 0u;
@ -261,27 +219,27 @@ kernel void main0(device Memory& v_79 [[buffer(0)]], const device ConfigBuf& _18
Alloc tiles_alloc = sh_row_alloc[el_ix];
if (el_ix > 0u)
{
_411 = sh_row_count[el_ix - 1u];
_348 = sh_row_count[el_ix - 1u];
}
else
{
_411 = 0u;
_348 = 0u;
}
uint seq_ix = row - _411;
uint seq_ix = row - _348;
uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width);
Alloc param_8 = tiles_alloc;
uint param_9 = tile_el_ix;
uint sum = read_mem(param_8, param_9, v_79);
Alloc param_5 = tiles_alloc;
uint param_6 = tile_el_ix;
uint sum = read_mem(param_5, param_6, v_67);
for (uint x = 1u; x < width; x++)
{
tile_el_ix += 2u;
Alloc param_10 = tiles_alloc;
uint param_11 = tile_el_ix;
sum += read_mem(param_10, param_11, v_79);
Alloc param_12 = tiles_alloc;
uint param_13 = tile_el_ix;
uint param_14 = sum;
write_mem(param_12, param_13, param_14, v_79);
Alloc param_7 = tiles_alloc;
uint param_8 = tile_el_ix;
sum += read_mem(param_7, param_8, v_67);
Alloc param_9 = tiles_alloc;
uint param_10 = tile_el_ix;
uint param_11 = sum;
write_mem(param_9, param_10, param_11, v_67);
}
}
}

Binary file not shown.

Binary file not shown.

View file

@ -3,17 +3,6 @@ struct Alloc
uint offset;
};
struct AnnotatedRef
{
uint offset;
};
struct AnnotatedTag
{
uint tag;
uint flags;
};
struct PathRef
{
uint offset;
@ -42,12 +31,14 @@ struct Config
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
@ -55,12 +46,14 @@ struct Config
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 4u, 1u);
RWByteAddressBuffer _79 : register(u0, space0);
ByteAddressBuffer _186 : register(t1, space0);
RWByteAddressBuffer _67 : register(u0, space0);
ByteAddressBuffer _166 : register(t1, space0);
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
@ -89,24 +82,10 @@ uint read_mem(Alloc alloc, uint offset)
{
return 0u;
}
uint v = _79.Load(offset * 4 + 8);
uint v = _67.Load(offset * 4 + 8);
return v;
}
AnnotatedTag Annotated_tag(Alloc a, AnnotatedRef ref)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint tag_and_flags = read_mem(param, param_1);
AnnotatedTag _121 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) };
return _121;
}
uint fill_mode_from_flags(uint flags)
{
return flags & 1u;
}
Path Path_read(Alloc a, PathRef ref)
{
uint ix = ref.offset >> uint(2);
@ -121,8 +100,8 @@ Path Path_read(Alloc a, PathRef ref)
uint raw2 = read_mem(param_4, param_5);
Path s;
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
TileRef _165 = { raw2 };
s.tiles = _165;
TileRef _134 = { raw2 };
s.tiles = _134;
return s;
}
@ -141,88 +120,65 @@ void write_mem(Alloc alloc, uint offset, uint val)
{
return;
}
_79.Store(offset * 4 + 8, val);
_67.Store(offset * 4 + 8, val);
}
void comp_main()
{
uint th_ix = gl_LocalInvocationIndex;
uint element_ix = gl_GlobalInvocationID.x;
AnnotatedRef _194 = { _186.Load(32) + (element_ix * 40u) };
AnnotatedRef ref = _194;
uint row_count = 0u;
bool mem_ok = _79.Load(4) == 0u;
bool mem_ok = _67.Load(4) == 0u;
if (gl_LocalInvocationID.y == 0u)
{
if (element_ix < _186.Load(0))
if (element_ix < _166.Load(0))
{
Alloc _217;
_217.offset = _186.Load(32);
PathRef _180 = { _166.Load(16) + (element_ix * 12u) };
PathRef path_ref = _180;
Alloc _185;
_185.offset = _166.Load(16);
Alloc param;
param.offset = _217.offset;
AnnotatedRef param_1 = ref;
AnnotatedTag tag = Annotated_tag(param, param_1);
switch (tag.tag)
{
case 3u:
case 2u:
case 4u:
case 1u:
{
uint param_2 = tag.flags;
if (fill_mode_from_flags(param_2) != 0u)
{
break;
}
PathRef _243 = { _186.Load(16) + (element_ix * 12u) };
PathRef path_ref = _243;
Alloc _247;
_247.offset = _186.Load(16);
Alloc param_3;
param_3.offset = _247.offset;
PathRef param_4 = path_ref;
Path path = Path_read(param_3, param_4);
param.offset = _185.offset;
PathRef param_1 = path_ref;
Path path = Path_read(param, param_1);
sh_row_width[th_ix] = path.bbox.z - path.bbox.x;
row_count = path.bbox.w - path.bbox.y;
bool _272 = row_count == 1u;
bool _278;
if (_272)
bool _210 = row_count == 1u;
bool _216;
if (_210)
{
_278 = path.bbox.y > 0u;
_216 = path.bbox.y > 0u;
}
else
{
_278 = _272;
_216 = _210;
}
if (_278)
if (_216)
{
row_count = 0u;
}
uint param_5 = path.tiles.offset;
uint param_6 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
bool param_7 = mem_ok;
Alloc path_alloc = new_alloc(param_5, param_6, param_7);
uint param_2 = path.tiles.offset;
uint param_3 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
bool param_4 = mem_ok;
Alloc path_alloc = new_alloc(param_2, param_3, param_4);
sh_row_alloc[th_ix] = path_alloc;
break;
}
}
}
sh_row_count[th_ix] = row_count;
}
for (uint i = 0u; i < 8u; i++)
{
GroupMemoryBarrierWithGroupSync();
bool _325 = gl_LocalInvocationID.y == 0u;
bool _332;
if (_325)
bool _262 = gl_LocalInvocationID.y == 0u;
bool _269;
if (_262)
{
_332 = th_ix >= (1u << i);
_269 = th_ix >= (1u << i);
}
else
{
_332 = _325;
_269 = _262;
}
if (_332)
if (_269)
{
row_count += sh_row_count[th_ix - (1u << i)];
}
@ -234,7 +190,7 @@ void comp_main()
}
GroupMemoryBarrierWithGroupSync();
uint total_rows = sh_row_count[255];
uint _411;
uint _348;
for (uint row = th_ix; row < total_rows; row += 1024u)
{
uint el_ix = 0u;
@ -252,27 +208,27 @@ void comp_main()
Alloc tiles_alloc = sh_row_alloc[el_ix];
if (el_ix > 0u)
{
_411 = sh_row_count[el_ix - 1u];
_348 = sh_row_count[el_ix - 1u];
}
else
{
_411 = 0u;
_348 = 0u;
}
uint seq_ix = row - _411;
uint seq_ix = row - _348;
uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width);
Alloc param_8 = tiles_alloc;
uint param_9 = tile_el_ix;
uint sum = read_mem(param_8, param_9);
Alloc param_5 = tiles_alloc;
uint param_6 = tile_el_ix;
uint sum = read_mem(param_5, param_6);
for (uint x = 1u; x < width; x++)
{
tile_el_ix += 2u;
Alloc param_10 = tiles_alloc;
uint param_11 = tile_el_ix;
sum += read_mem(param_10, param_11);
Alloc param_12 = tiles_alloc;
uint param_13 = tile_el_ix;
uint param_14 = sum;
write_mem(param_12, param_13, param_14);
Alloc param_7 = tiles_alloc;
uint param_8 = tile_el_ix;
sum += read_mem(param_7, param_8);
Alloc param_9 = tiles_alloc;
uint param_10 = tile_el_ix;
uint param_11 = sum;
write_mem(param_9, param_10, param_11);
}
}
}

View file

@ -10,17 +10,6 @@ struct Alloc
uint offset;
};
struct AnnotatedRef
{
uint offset;
};
struct AnnotatedTag
{
uint tag;
uint flags;
};
struct PathRef
{
uint offset;
@ -61,12 +50,14 @@ struct Config
Alloc_1 pathseg_alloc;
Alloc_1 anno_alloc;
Alloc_1 trans_alloc;
Alloc_1 bbox_alloc;
Alloc_1 path_bbox_alloc;
Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
Alloc_1 draw_bbox_alloc;
Alloc_1 drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
@ -74,6 +65,8 @@ struct Config
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct ConfigBuf
@ -90,7 +83,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
}
static inline __attribute__((always_inline))
uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_79)
uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_67)
{
Alloc param = alloc;
uint param_1 = offset;
@ -98,38 +91,23 @@ uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memor
{
return 0u;
}
uint v = v_79.memory[offset];
uint v = v_67.memory[offset];
return v;
}
static inline __attribute__((always_inline))
AnnotatedTag Annotated_tag(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_79)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint tag_and_flags = read_mem(param, param_1, v_79);
return AnnotatedTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) };
}
static inline __attribute__((always_inline))
uint fill_mode_from_flags(thread const uint& flags)
{
return flags & 1u;
}
static inline __attribute__((always_inline))
Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_79)
Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_67)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint raw0 = read_mem(param, param_1, v_79);
uint raw0 = read_mem(param, param_1, v_67);
Alloc param_2 = a;
uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3, v_79);
uint raw1 = read_mem(param_2, param_3, v_67);
Alloc param_4 = a;
uint param_5 = ix + 2u;
uint raw2 = read_mem(param_4, param_5, v_79);
uint raw2 = read_mem(param_4, param_5, v_67);
Path s;
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
s.tiles = TileRef{ raw2 };
@ -145,7 +123,7 @@ Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const
}
static inline __attribute__((always_inline))
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_79)
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_67)
{
Alloc param = alloc;
uint param_1 = offset;
@ -153,85 +131,65 @@ void write_mem(thread const Alloc& alloc, thread const uint& offset, thread cons
{
return;
}
v_79.memory[offset] = val;
v_67.memory[offset] = val;
}
kernel void main0(device Memory& v_79 [[buffer(0)]], const device ConfigBuf& _186 [[buffer(1)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
kernel void main0(device Memory& v_67 [[buffer(0)]], const device ConfigBuf& _166 [[buffer(1)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
{
threadgroup uint sh_row_width[256];
threadgroup Alloc sh_row_alloc[256];
threadgroup uint sh_row_count[256];
uint th_ix = gl_LocalInvocationIndex;
uint element_ix = gl_GlobalInvocationID.x;
AnnotatedRef ref = AnnotatedRef{ _186.conf.anno_alloc.offset + (element_ix * 40u) };
uint row_count = 0u;
bool mem_ok = v_79.mem_error == 0u;
bool mem_ok = v_67.mem_error == 0u;
if (gl_LocalInvocationID.y == 0u)
{
if (element_ix < _186.conf.n_elements)
if (element_ix < _166.conf.n_elements)
{
PathRef path_ref = PathRef{ _166.conf.tile_alloc.offset + (element_ix * 12u) };
Alloc param;
param.offset = _186.conf.anno_alloc.offset;
AnnotatedRef param_1 = ref;
AnnotatedTag tag = Annotated_tag(param, param_1, v_79);
switch (tag.tag)
{
case 3u:
case 2u:
case 4u:
case 1u:
{
uint param_2 = tag.flags;
if (fill_mode_from_flags(param_2) != 0u)
{
break;
}
PathRef path_ref = PathRef{ _186.conf.tile_alloc.offset + (element_ix * 12u) };
Alloc param_3;
param_3.offset = _186.conf.tile_alloc.offset;
PathRef param_4 = path_ref;
Path path = Path_read(param_3, param_4, v_79);
param.offset = _166.conf.tile_alloc.offset;
PathRef param_1 = path_ref;
Path path = Path_read(param, param_1, v_67);
sh_row_width[th_ix] = path.bbox.z - path.bbox.x;
row_count = path.bbox.w - path.bbox.y;
bool _272 = row_count == 1u;
bool _278;
if (_272)
bool _210 = row_count == 1u;
bool _216;
if (_210)
{
_278 = path.bbox.y > 0u;
_216 = path.bbox.y > 0u;
}
else
{
_278 = _272;
_216 = _210;
}
if (_278)
if (_216)
{
row_count = 0u;
}
uint param_5 = path.tiles.offset;
uint param_6 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
bool param_7 = mem_ok;
Alloc path_alloc = new_alloc(param_5, param_6, param_7);
uint param_2 = path.tiles.offset;
uint param_3 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
bool param_4 = mem_ok;
Alloc path_alloc = new_alloc(param_2, param_3, param_4);
sh_row_alloc[th_ix] = path_alloc;
break;
}
}
}
sh_row_count[th_ix] = row_count;
}
for (uint i = 0u; i < 8u; i++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
bool _325 = gl_LocalInvocationID.y == 0u;
bool _332;
if (_325)
bool _262 = gl_LocalInvocationID.y == 0u;
bool _269;
if (_262)
{
_332 = th_ix >= (1u << i);
_269 = th_ix >= (1u << i);
}
else
{
_332 = _325;
_269 = _262;
}
if (_332)
if (_269)
{
row_count += sh_row_count[th_ix - (1u << i)];
}
@ -243,7 +201,7 @@ kernel void main0(device Memory& v_79 [[buffer(0)]], const device ConfigBuf& _18
}
threadgroup_barrier(mem_flags::mem_threadgroup);
uint total_rows = sh_row_count[255];
uint _411;
uint _348;
for (uint row = th_ix; row < total_rows; row += 1024u)
{
uint el_ix = 0u;
@ -261,27 +219,27 @@ kernel void main0(device Memory& v_79 [[buffer(0)]], const device ConfigBuf& _18
Alloc tiles_alloc = sh_row_alloc[el_ix];
if (el_ix > 0u)
{
_411 = sh_row_count[el_ix - 1u];
_348 = sh_row_count[el_ix - 1u];
}
else
{
_411 = 0u;
_348 = 0u;
}
uint seq_ix = row - _411;
uint seq_ix = row - _348;
uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width);
Alloc param_8 = tiles_alloc;
uint param_9 = tile_el_ix;
uint sum = read_mem(param_8, param_9, v_79);
Alloc param_5 = tiles_alloc;
uint param_6 = tile_el_ix;
uint sum = read_mem(param_5, param_6, v_67);
for (uint x = 1u; x < width; x++)
{
tile_el_ix += 2u;
Alloc param_10 = tiles_alloc;
uint param_11 = tile_el_ix;
sum += read_mem(param_10, param_11, v_79);
Alloc param_12 = tiles_alloc;
uint param_13 = tile_el_ix;
uint param_14 = sum;
write_mem(param_12, param_13, param_14, v_79);
Alloc param_7 = tiles_alloc;
uint param_8 = tile_el_ix;
sum += read_mem(param_7, param_8, v_67);
Alloc param_9 = tiles_alloc;
uint param_10 = tile_el_ix;
uint param_11 = sum;
write_mem(param_9, param_10, param_11, v_67);
}
}
}

Binary file not shown.

Binary file not shown.

View file

@ -15,12 +15,14 @@ struct Config
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
@ -28,6 +30,8 @@ struct Config
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u);
@ -44,7 +48,7 @@ struct SPIRV_Cross_Input
void comp_main()
{
uint ix = gl_GlobalInvocationID.x;
if (ix < _21.Load(68))
if (ix < _21.Load(76))
{
uint out_ix = (_21.Load(40) >> uint(2)) + (6u * ix);
_45.Store(out_ix * 4 + 8, 65535u);

View file

@ -20,12 +20,14 @@ struct Config
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
@ -33,6 +35,8 @@ struct Config
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct ConfigBuf
@ -54,7 +58,7 @@ kernel void main0(device Memory& _45 [[buffer(0)]], const device ConfigBuf& _21
uint ix = gl_GlobalInvocationID.x;
if (ix < _21.conf.n_path)
{
uint out_ix = (_21.conf.bbox_alloc.offset >> uint(2)) + (6u * ix);
uint out_ix = (_21.conf.path_bbox_alloc.offset >> uint(2)) + (6u * ix);
_45.memory[out_ix] = 65535u;
_45.memory[out_ix + 1u] = 65535u;
_45.memory[out_ix + 2u] = 0u;

Binary file not shown.

Binary file not shown.

View file

@ -9,17 +9,6 @@ struct MallocResult
bool failed;
};
struct AnnotatedRef
{
uint offset;
};
struct AnnotatedTag
{
uint tag;
uint flags;
};
struct BinInstanceRef
{
uint offset;
@ -34,6 +23,8 @@ struct DrawMonoid
{
uint path_ix;
uint clip_ix;
uint scene_offset;
uint info_offset;
};
struct Config
@ -48,12 +39,14 @@ struct Config
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
@ -61,12 +54,14 @@ struct Config
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
RWByteAddressBuffer _94 : register(u0, space0);
ByteAddressBuffer _202 : register(t1, space0);
RWByteAddressBuffer _81 : register(u0, space0);
ByteAddressBuffer _156 : register(t1, space0);
static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID;
@ -81,59 +76,35 @@ groupshared bool sh_alloc_failed;
groupshared uint count[8][256];
groupshared Alloc sh_chunk_alloc[256];
bool touch_mem(Alloc alloc, uint offset)
{
return true;
}
uint read_mem(Alloc alloc, uint offset)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return 0u;
}
uint v = _94.Load(offset * 4 + 8);
return v;
}
AnnotatedTag Annotated_tag(Alloc a, AnnotatedRef ref)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint tag_and_flags = read_mem(param, param_1);
AnnotatedTag _181 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) };
return _181;
}
DrawMonoid load_draw_monoid(uint element_ix)
{
uint base = (_202.Load(44) >> uint(2)) + (2u * element_ix);
uint path_ix = _94.Load(base * 4 + 8);
uint clip_ix = _94.Load((base + 1u) * 4 + 8);
DrawMonoid _222 = { path_ix, clip_ix };
return _222;
uint base = (_156.Load(44) >> uint(2)) + (4u * element_ix);
uint path_ix = _81.Load(base * 4 + 8);
uint clip_ix = _81.Load((base + 1u) * 4 + 8);
uint scene_offset = _81.Load((base + 2u) * 4 + 8);
uint info_offset = _81.Load((base + 3u) * 4 + 8);
DrawMonoid _190 = { path_ix, clip_ix, scene_offset, info_offset };
return _190;
}
float4 load_clip_bbox(uint clip_ix)
{
uint base = (_202.Load(60) >> uint(2)) + (4u * clip_ix);
float x0 = asfloat(_94.Load(base * 4 + 8));
float y0 = asfloat(_94.Load((base + 1u) * 4 + 8));
float x1 = asfloat(_94.Load((base + 2u) * 4 + 8));
float y1 = asfloat(_94.Load((base + 3u) * 4 + 8));
uint base = (_156.Load(60) >> uint(2)) + (4u * clip_ix);
float x0 = asfloat(_81.Load(base * 4 + 8));
float y0 = asfloat(_81.Load((base + 1u) * 4 + 8));
float x1 = asfloat(_81.Load((base + 2u) * 4 + 8));
float y1 = asfloat(_81.Load((base + 3u) * 4 + 8));
float4 bbox = float4(x0, y0, x1, y1);
return bbox;
}
float4 load_path_bbox(uint path_ix)
{
uint base = (_202.Load(40) >> uint(2)) + (6u * path_ix);
float bbox_l = float(_94.Load(base * 4 + 8)) - 32768.0f;
float bbox_t = float(_94.Load((base + 1u) * 4 + 8)) - 32768.0f;
float bbox_r = float(_94.Load((base + 2u) * 4 + 8)) - 32768.0f;
float bbox_b = float(_94.Load((base + 3u) * 4 + 8)) - 32768.0f;
uint base = (_156.Load(40) >> uint(2)) + (6u * path_ix);
float bbox_l = float(_81.Load(base * 4 + 8)) - 32768.0f;
float bbox_t = float(_81.Load((base + 1u) * 4 + 8)) - 32768.0f;
float bbox_r = float(_81.Load((base + 2u) * 4 + 8)) - 32768.0f;
float bbox_b = float(_81.Load((base + 3u) * 4 + 8)) - 32768.0f;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
return bbox;
}
@ -143,13 +114,13 @@ float4 bbox_intersect(float4 a, float4 b)
return float4(max(a.xy, b.xy), min(a.zw, b.zw));
}
void store_path_bbox(AnnotatedRef ref, float4 bbox)
void store_draw_bbox(uint draw_ix, float4 bbox)
{
uint ix = ref.offset >> uint(2);
_94.Store((ix + 1u) * 4 + 8, asuint(bbox.x));
_94.Store((ix + 2u) * 4 + 8, asuint(bbox.y));
_94.Store((ix + 3u) * 4 + 8, asuint(bbox.z));
_94.Store((ix + 4u) * 4 + 8, asuint(bbox.w));
uint base = (_156.Load(64) >> uint(2)) + (4u * draw_ix);
_81.Store(base * 4 + 8, asuint(bbox.x));
_81.Store((base + 1u) * 4 + 8, asuint(bbox.y));
_81.Store((base + 2u) * 4 + 8, asuint(bbox.z));
_81.Store((base + 3u) * 4 + 8, asuint(bbox.w));
}
Alloc new_alloc(uint offset, uint size, bool mem_ok)
@ -161,27 +132,32 @@ Alloc new_alloc(uint offset, uint size, bool mem_ok)
MallocResult malloc(uint size)
{
uint _100;
_94.InterlockedAdd(0, size, _100);
uint offset = _100;
uint _107;
_94.GetDimensions(_107);
_107 = (_107 - 8) / 4;
uint _87;
_81.InterlockedAdd(0, size, _87);
uint offset = _87;
uint _94;
_81.GetDimensions(_94);
_94 = (_94 - 8) / 4;
MallocResult r;
r.failed = (offset + size) > uint(int(_107) * 4);
r.failed = (offset + size) > uint(int(_94) * 4);
uint param = offset;
uint param_1 = size;
bool param_2 = !r.failed;
r.alloc = new_alloc(param, param_1, param_2);
if (r.failed)
{
uint _129;
_94.InterlockedMax(4, 1u, _129);
uint _116;
_81.InterlockedMax(4, 1u, _116);
return r;
}
return r;
}
bool touch_mem(Alloc alloc, uint offset)
{
return true;
}
void write_mem(Alloc alloc, uint offset, uint val)
{
Alloc param = alloc;
@ -190,7 +166,7 @@ void write_mem(Alloc alloc, uint offset, uint val)
{
return;
}
_94.Store(offset * 4 + 8, val);
_81.Store(offset * 4 + 8, val);
}
void BinInstance_write(Alloc a, BinInstanceRef ref, BinInstance s)
@ -204,7 +180,6 @@ void BinInstance_write(Alloc a, BinInstanceRef ref, BinInstance s)
void comp_main()
{
uint my_n_elements = _202.Load(0);
uint my_partition = gl_WorkGroupID.x;
for (uint i = 0u; i < 8u; i++)
{
@ -216,62 +191,42 @@ void comp_main()
}
GroupMemoryBarrierWithGroupSync();
uint element_ix = (my_partition * 256u) + gl_LocalInvocationID.x;
AnnotatedRef _415 = { _202.Load(32) + (element_ix * 40u) };
AnnotatedRef ref = _415;
uint tag = 0u;
if (element_ix < my_n_elements)
{
Alloc _425;
_425.offset = _202.Load(32);
Alloc param;
param.offset = _425.offset;
AnnotatedRef param_1 = ref;
tag = Annotated_tag(param, param_1).tag;
}
int x0 = 0;
int y0 = 0;
int x1 = 0;
int y1 = 0;
switch (tag)
if (element_ix < _156.Load(0))
{
case 1u:
case 2u:
case 3u:
case 4u:
case 5u:
{
uint param_2 = element_ix;
DrawMonoid draw_monoid = load_draw_monoid(param_2);
uint param = element_ix;
DrawMonoid draw_monoid = load_draw_monoid(param);
uint path_ix = draw_monoid.path_ix;
float4 clip_bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f);
uint clip_ix = draw_monoid.clip_ix;
if (clip_ix > 0u)
{
uint param_3 = clip_ix - 1u;
clip_bbox = load_clip_bbox(param_3);
uint param_1 = clip_ix - 1u;
clip_bbox = load_clip_bbox(param_1);
}
uint param_4 = path_ix;
float4 path_bbox = load_path_bbox(param_4);
float4 param_5 = path_bbox;
float4 param_6 = clip_bbox;
float4 bbox = bbox_intersect(param_5, param_6);
float4 _473 = bbox;
float4 _475 = bbox;
float2 _477 = max(_473.xy, _475.zw);
bbox.z = _477.x;
bbox.w = _477.y;
AnnotatedRef param_7 = ref;
float4 param_8 = bbox;
store_path_bbox(param_7, param_8);
uint param_2 = path_ix;
float4 path_bbox = load_path_bbox(param_2);
float4 param_3 = path_bbox;
float4 param_4 = clip_bbox;
float4 bbox = bbox_intersect(param_3, param_4);
float4 _417 = bbox;
float4 _419 = bbox;
float2 _421 = max(_417.xy, _419.zw);
bbox.z = _421.x;
bbox.w = _421.y;
uint param_5 = element_ix;
float4 param_6 = bbox;
store_draw_bbox(param_5, param_6);
x0 = int(floor(bbox.x * 0.00390625f));
y0 = int(floor(bbox.y * 0.00390625f));
x1 = int(ceil(bbox.z * 0.00390625f));
y1 = int(ceil(bbox.w * 0.00390625f));
break;
}
}
uint width_in_bins = ((_202.Load(8) + 16u) - 1u) / 16u;
uint height_in_bins = ((_202.Load(12) + 16u) - 1u) / 16u;
uint width_in_bins = ((_156.Load(8) + 16u) - 1u) / 16u;
uint height_in_bins = ((_156.Load(12) + 16u) - 1u) / 16u;
x0 = clamp(x0, 0, int(width_in_bins));
x1 = clamp(x1, x0, int(width_in_bins));
y0 = clamp(y0, 0, int(height_in_bins));
@ -286,8 +241,8 @@ void comp_main()
uint my_mask = 1u << (gl_LocalInvocationID.x & 31u);
while (y < y1)
{
uint _581;
InterlockedOr(bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, _581);
uint _523;
InterlockedOr(bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, _523);
x++;
if (x == x1)
{
@ -302,15 +257,15 @@ void comp_main()
element_count += uint(int(countbits(bitmaps[i_1][gl_LocalInvocationID.x])));
count[i_1][gl_LocalInvocationID.x] = element_count;
}
uint param_9 = 0u;
uint param_10 = 0u;
bool param_11 = true;
Alloc chunk_alloc = new_alloc(param_9, param_10, param_11);
uint param_7 = 0u;
uint param_8 = 0u;
bool param_9 = true;
Alloc chunk_alloc = new_alloc(param_7, param_8, param_9);
if (element_count != 0u)
{
uint param_12 = element_count * 4u;
MallocResult _631 = malloc(param_12);
MallocResult chunk = _631;
uint param_10 = element_count * 4u;
MallocResult _573 = malloc(param_10);
MallocResult chunk = _573;
chunk_alloc = chunk.alloc;
sh_chunk_alloc[gl_LocalInvocationID.x] = chunk_alloc;
if (chunk.failed)
@ -318,32 +273,32 @@ void comp_main()
sh_alloc_failed = true;
}
}
uint out_ix = (_202.Load(20) >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u);
Alloc _660;
_660.offset = _202.Load(20);
Alloc param_13;
param_13.offset = _660.offset;
uint param_14 = out_ix;
uint param_15 = element_count;
write_mem(param_13, param_14, param_15);
Alloc _672;
_672.offset = _202.Load(20);
Alloc param_16;
param_16.offset = _672.offset;
uint param_17 = out_ix + 1u;
uint param_18 = chunk_alloc.offset;
write_mem(param_16, param_17, param_18);
uint out_ix = (_156.Load(20) >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u);
Alloc _603;
_603.offset = _156.Load(20);
Alloc param_11;
param_11.offset = _603.offset;
uint param_12 = out_ix;
uint param_13 = element_count;
write_mem(param_11, param_12, param_13);
Alloc _615;
_615.offset = _156.Load(20);
Alloc param_14;
param_14.offset = _615.offset;
uint param_15 = out_ix + 1u;
uint param_16 = chunk_alloc.offset;
write_mem(param_14, param_15, param_16);
GroupMemoryBarrierWithGroupSync();
bool _687;
bool _630;
if (!sh_alloc_failed)
{
_687 = _94.Load(4) != 0u;
_630 = _81.Load(4) != 0u;
}
else
{
_687 = sh_alloc_failed;
_630 = sh_alloc_failed;
}
if (_687)
if (_630)
{
return;
}
@ -362,12 +317,12 @@ void comp_main()
}
Alloc out_alloc = sh_chunk_alloc[bin_ix];
uint out_offset = out_alloc.offset + (idx * 4u);
BinInstanceRef _749 = { out_offset };
BinInstance _751 = { element_ix };
Alloc param_19 = out_alloc;
BinInstanceRef param_20 = _749;
BinInstance param_21 = _751;
BinInstance_write(param_19, param_20, param_21);
BinInstanceRef _692 = { out_offset };
BinInstance _694 = { element_ix };
Alloc param_17 = out_alloc;
BinInstanceRef param_18 = _692;
BinInstance param_19 = _694;
BinInstance_write(param_17, param_18, param_19);
}
x++;
if (x == x1)

View file

@ -18,17 +18,6 @@ struct MallocResult
bool failed;
};
struct AnnotatedRef
{
uint offset;
};
struct AnnotatedTag
{
uint tag;
uint flags;
};
struct BinInstanceRef
{
uint offset;
@ -43,6 +32,8 @@ struct DrawMonoid
{
uint path_ix;
uint clip_ix;
uint scene_offset;
uint info_offset;
};
struct Memory
@ -69,12 +60,14 @@ struct Config
Alloc_1 pathseg_alloc;
Alloc_1 anno_alloc;
Alloc_1 trans_alloc;
Alloc_1 bbox_alloc;
Alloc_1 path_bbox_alloc;
Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
Alloc_1 draw_bbox_alloc;
Alloc_1 drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
@ -82,6 +75,8 @@ struct Config
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct ConfigBuf
@ -92,62 +87,36 @@ struct ConfigBuf
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
DrawMonoid load_draw_monoid(thread const uint& element_ix, device Memory& v_81, constant uint& v_81BufferSize, const device ConfigBuf& v_156)
{
return true;
uint base = (v_156.conf.drawmonoid_alloc.offset >> uint(2)) + (4u * element_ix);
uint path_ix = v_81.memory[base];
uint clip_ix = v_81.memory[base + 1u];
uint scene_offset = v_81.memory[base + 2u];
uint info_offset = v_81.memory[base + 3u];
return DrawMonoid{ path_ix, clip_ix, scene_offset, info_offset };
}
static inline __attribute__((always_inline))
uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_94, constant uint& v_94BufferSize)
float4 load_clip_bbox(thread const uint& clip_ix, device Memory& v_81, constant uint& v_81BufferSize, const device ConfigBuf& v_156)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return 0u;
}
uint v = v_94.memory[offset];
return v;
}
static inline __attribute__((always_inline))
AnnotatedTag Annotated_tag(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_94, constant uint& v_94BufferSize)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint tag_and_flags = read_mem(param, param_1, v_94, v_94BufferSize);
return AnnotatedTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) };
}
static inline __attribute__((always_inline))
DrawMonoid load_draw_monoid(thread const uint& element_ix, device Memory& v_94, constant uint& v_94BufferSize, const device ConfigBuf& v_202)
{
uint base = (v_202.conf.drawmonoid_alloc.offset >> uint(2)) + (2u * element_ix);
uint path_ix = v_94.memory[base];
uint clip_ix = v_94.memory[base + 1u];
return DrawMonoid{ path_ix, clip_ix };
}
static inline __attribute__((always_inline))
float4 load_clip_bbox(thread const uint& clip_ix, device Memory& v_94, constant uint& v_94BufferSize, const device ConfigBuf& v_202)
{
uint base = (v_202.conf.clip_bbox_alloc.offset >> uint(2)) + (4u * clip_ix);
float x0 = as_type<float>(v_94.memory[base]);
float y0 = as_type<float>(v_94.memory[base + 1u]);
float x1 = as_type<float>(v_94.memory[base + 2u]);
float y1 = as_type<float>(v_94.memory[base + 3u]);
uint base = (v_156.conf.clip_bbox_alloc.offset >> uint(2)) + (4u * clip_ix);
float x0 = as_type<float>(v_81.memory[base]);
float y0 = as_type<float>(v_81.memory[base + 1u]);
float x1 = as_type<float>(v_81.memory[base + 2u]);
float y1 = as_type<float>(v_81.memory[base + 3u]);
float4 bbox = float4(x0, y0, x1, y1);
return bbox;
}
static inline __attribute__((always_inline))
float4 load_path_bbox(thread const uint& path_ix, device Memory& v_94, constant uint& v_94BufferSize, const device ConfigBuf& v_202)
float4 load_path_bbox(thread const uint& path_ix, device Memory& v_81, constant uint& v_81BufferSize, const device ConfigBuf& v_156)
{
uint base = (v_202.conf.bbox_alloc.offset >> uint(2)) + (6u * path_ix);
float bbox_l = float(v_94.memory[base]) - 32768.0;
float bbox_t = float(v_94.memory[base + 1u]) - 32768.0;
float bbox_r = float(v_94.memory[base + 2u]) - 32768.0;
float bbox_b = float(v_94.memory[base + 3u]) - 32768.0;
uint base = (v_156.conf.path_bbox_alloc.offset >> uint(2)) + (6u * path_ix);
float bbox_l = float(v_81.memory[base]) - 32768.0;
float bbox_t = float(v_81.memory[base + 1u]) - 32768.0;
float bbox_r = float(v_81.memory[base + 2u]) - 32768.0;
float bbox_b = float(v_81.memory[base + 3u]) - 32768.0;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
return bbox;
}
@ -159,13 +128,13 @@ float4 bbox_intersect(thread const float4& a, thread const float4& b)
}
static inline __attribute__((always_inline))
void store_path_bbox(thread const AnnotatedRef& ref, thread const float4& bbox, device Memory& v_94, constant uint& v_94BufferSize)
void store_draw_bbox(thread const uint& draw_ix, thread const float4& bbox, device Memory& v_81, constant uint& v_81BufferSize, const device ConfigBuf& v_156)
{
uint ix = ref.offset >> uint(2);
v_94.memory[ix + 1u] = as_type<uint>(bbox.x);
v_94.memory[ix + 2u] = as_type<uint>(bbox.y);
v_94.memory[ix + 3u] = as_type<uint>(bbox.z);
v_94.memory[ix + 4u] = as_type<uint>(bbox.w);
uint base = (v_156.conf.draw_bbox_alloc.offset >> uint(2)) + (4u * draw_ix);
v_81.memory[base] = as_type<uint>(bbox.x);
v_81.memory[base + 1u] = as_type<uint>(bbox.y);
v_81.memory[base + 2u] = as_type<uint>(bbox.z);
v_81.memory[base + 3u] = as_type<uint>(bbox.w);
}
static inline __attribute__((always_inline))
@ -177,26 +146,32 @@ Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const
}
static inline __attribute__((always_inline))
MallocResult malloc(thread const uint& size, device Memory& v_94, constant uint& v_94BufferSize)
MallocResult malloc(thread const uint& size, device Memory& v_81, constant uint& v_81BufferSize)
{
uint _100 = atomic_fetch_add_explicit((device atomic_uint*)&v_94.mem_offset, size, memory_order_relaxed);
uint offset = _100;
uint _87 = atomic_fetch_add_explicit((device atomic_uint*)&v_81.mem_offset, size, memory_order_relaxed);
uint offset = _87;
MallocResult r;
r.failed = (offset + size) > uint(int((v_94BufferSize - 8) / 4) * 4);
r.failed = (offset + size) > uint(int((v_81BufferSize - 8) / 4) * 4);
uint param = offset;
uint param_1 = size;
bool param_2 = !r.failed;
r.alloc = new_alloc(param, param_1, param_2);
if (r.failed)
{
uint _129 = atomic_fetch_max_explicit((device atomic_uint*)&v_94.mem_error, 1u, memory_order_relaxed);
uint _116 = atomic_fetch_max_explicit((device atomic_uint*)&v_81.mem_error, 1u, memory_order_relaxed);
return r;
}
return r;
}
static inline __attribute__((always_inline))
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_94, constant uint& v_94BufferSize)
bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
{
return true;
}
static inline __attribute__((always_inline))
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_81, constant uint& v_81BufferSize)
{
Alloc param = alloc;
uint param_1 = offset;
@ -204,27 +179,26 @@ void write_mem(thread const Alloc& alloc, thread const uint& offset, thread cons
{
return;
}
v_94.memory[offset] = val;
v_81.memory[offset] = val;
}
static inline __attribute__((always_inline))
void BinInstance_write(thread const Alloc& a, thread const BinInstanceRef& ref, thread const BinInstance& s, device Memory& v_94, constant uint& v_94BufferSize)
void BinInstance_write(thread const Alloc& a, thread const BinInstanceRef& ref, thread const BinInstance& s, device Memory& v_81, constant uint& v_81BufferSize)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = s.element_ix;
write_mem(param, param_1, param_2, v_94, v_94BufferSize);
write_mem(param, param_1, param_2, v_81, v_81BufferSize);
}
kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_94 [[buffer(0)]], const device ConfigBuf& v_202 [[buffer(1)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_81 [[buffer(0)]], const device ConfigBuf& v_156 [[buffer(1)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
{
threadgroup uint bitmaps[8][256];
threadgroup short sh_alloc_failed;
threadgroup uint count[8][256];
threadgroup Alloc sh_chunk_alloc[256];
constant uint& v_94BufferSize = spvBufferSizeConstants[0];
uint my_n_elements = v_202.conf.n_elements;
constant uint& v_81BufferSize = spvBufferSizeConstants[0];
uint my_partition = gl_WorkGroupID.x;
for (uint i = 0u; i < 8u; i++)
{
@ -236,59 +210,42 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
}
threadgroup_barrier(mem_flags::mem_threadgroup);
uint element_ix = (my_partition * 256u) + gl_LocalInvocationID.x;
AnnotatedRef ref = AnnotatedRef{ v_202.conf.anno_alloc.offset + (element_ix * 40u) };
uint tag = 0u;
if (element_ix < my_n_elements)
{
Alloc param;
param.offset = v_202.conf.anno_alloc.offset;
AnnotatedRef param_1 = ref;
tag = Annotated_tag(param, param_1, v_94, v_94BufferSize).tag;
}
int x0 = 0;
int y0 = 0;
int x1 = 0;
int y1 = 0;
switch (tag)
if (element_ix < v_156.conf.n_elements)
{
case 1u:
case 2u:
case 3u:
case 4u:
case 5u:
{
uint param_2 = element_ix;
DrawMonoid draw_monoid = load_draw_monoid(param_2, v_94, v_94BufferSize, v_202);
uint param = element_ix;
DrawMonoid draw_monoid = load_draw_monoid(param, v_81, v_81BufferSize, v_156);
uint path_ix = draw_monoid.path_ix;
float4 clip_bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0);
uint clip_ix = draw_monoid.clip_ix;
if (clip_ix > 0u)
{
uint param_3 = clip_ix - 1u;
clip_bbox = load_clip_bbox(param_3, v_94, v_94BufferSize, v_202);
uint param_1 = clip_ix - 1u;
clip_bbox = load_clip_bbox(param_1, v_81, v_81BufferSize, v_156);
}
uint param_4 = path_ix;
float4 path_bbox = load_path_bbox(param_4, v_94, v_94BufferSize, v_202);
float4 param_5 = path_bbox;
float4 param_6 = clip_bbox;
float4 bbox = bbox_intersect(param_5, param_6);
float4 _473 = bbox;
float4 _475 = bbox;
float2 _477 = fast::max(_473.xy, _475.zw);
bbox.z = _477.x;
bbox.w = _477.y;
AnnotatedRef param_7 = ref;
float4 param_8 = bbox;
store_path_bbox(param_7, param_8, v_94, v_94BufferSize);
uint param_2 = path_ix;
float4 path_bbox = load_path_bbox(param_2, v_81, v_81BufferSize, v_156);
float4 param_3 = path_bbox;
float4 param_4 = clip_bbox;
float4 bbox = bbox_intersect(param_3, param_4);
float4 _417 = bbox;
float4 _419 = bbox;
float2 _421 = fast::max(_417.xy, _419.zw);
bbox.z = _421.x;
bbox.w = _421.y;
uint param_5 = element_ix;
float4 param_6 = bbox;
store_draw_bbox(param_5, param_6, v_81, v_81BufferSize, v_156);
x0 = int(floor(bbox.x * 0.00390625));
y0 = int(floor(bbox.y * 0.00390625));
x1 = int(ceil(bbox.z * 0.00390625));
y1 = int(ceil(bbox.w * 0.00390625));
break;
}
}
uint width_in_bins = ((v_202.conf.width_in_tiles + 16u) - 1u) / 16u;
uint height_in_bins = ((v_202.conf.height_in_tiles + 16u) - 1u) / 16u;
uint width_in_bins = ((v_156.conf.width_in_tiles + 16u) - 1u) / 16u;
uint height_in_bins = ((v_156.conf.height_in_tiles + 16u) - 1u) / 16u;
x0 = clamp(x0, 0, int(width_in_bins));
x1 = clamp(x1, x0, int(width_in_bins));
y0 = clamp(y0, 0, int(height_in_bins));
@ -303,7 +260,7 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
uint my_mask = 1u << (gl_LocalInvocationID.x & 31u);
while (y < y1)
{
uint _581 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, memory_order_relaxed);
uint _523 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, memory_order_relaxed);
x++;
if (x == x1)
{
@ -318,15 +275,15 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
element_count += uint(int(popcount(bitmaps[i_1][gl_LocalInvocationID.x])));
count[i_1][gl_LocalInvocationID.x] = element_count;
}
uint param_9 = 0u;
uint param_10 = 0u;
bool param_11 = true;
Alloc chunk_alloc = new_alloc(param_9, param_10, param_11);
uint param_7 = 0u;
uint param_8 = 0u;
bool param_9 = true;
Alloc chunk_alloc = new_alloc(param_7, param_8, param_9);
if (element_count != 0u)
{
uint param_12 = element_count * 4u;
MallocResult _631 = malloc(param_12, v_94, v_94BufferSize);
MallocResult chunk = _631;
uint param_10 = element_count * 4u;
MallocResult _573 = malloc(param_10, v_81, v_81BufferSize);
MallocResult chunk = _573;
chunk_alloc = chunk.alloc;
sh_chunk_alloc[gl_LocalInvocationID.x] = chunk_alloc;
if (chunk.failed)
@ -334,28 +291,28 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
sh_alloc_failed = short(true);
}
}
uint out_ix = (v_202.conf.bin_alloc.offset >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u);
Alloc param_13;
param_13.offset = v_202.conf.bin_alloc.offset;
uint param_14 = out_ix;
uint param_15 = element_count;
write_mem(param_13, param_14, param_15, v_94, v_94BufferSize);
Alloc param_16;
param_16.offset = v_202.conf.bin_alloc.offset;
uint param_17 = out_ix + 1u;
uint param_18 = chunk_alloc.offset;
write_mem(param_16, param_17, param_18, v_94, v_94BufferSize);
uint out_ix = (v_156.conf.bin_alloc.offset >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u);
Alloc param_11;
param_11.offset = v_156.conf.bin_alloc.offset;
uint param_12 = out_ix;
uint param_13 = element_count;
write_mem(param_11, param_12, param_13, v_81, v_81BufferSize);
Alloc param_14;
param_14.offset = v_156.conf.bin_alloc.offset;
uint param_15 = out_ix + 1u;
uint param_16 = chunk_alloc.offset;
write_mem(param_14, param_15, param_16, v_81, v_81BufferSize);
threadgroup_barrier(mem_flags::mem_threadgroup);
bool _687;
bool _630;
if (!bool(sh_alloc_failed))
{
_687 = v_94.mem_error != 0u;
_630 = v_81.mem_error != 0u;
}
else
{
_687 = bool(sh_alloc_failed);
_630 = bool(sh_alloc_failed);
}
if (_687)
if (_630)
{
return;
}
@ -374,10 +331,10 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
}
Alloc out_alloc = sh_chunk_alloc[bin_ix];
uint out_offset = out_alloc.offset + (idx * 4u);
Alloc param_19 = out_alloc;
BinInstanceRef param_20 = BinInstanceRef{ out_offset };
BinInstance param_21 = BinInstance{ element_ix };
BinInstance_write(param_19, param_20, param_21, v_94, v_94BufferSize);
Alloc param_17 = out_alloc;
BinInstanceRef param_18 = BinInstanceRef{ out_offset };
BinInstance param_19 = BinInstance{ element_ix };
BinInstance_write(param_17, param_18, param_19, v_81, v_81BufferSize);
}
x++;
if (x == x1)

Binary file not shown.

Binary file not shown.

View file

@ -27,12 +27,14 @@ struct Config
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
@ -40,6 +42,8 @@ struct Config
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
@ -99,7 +103,7 @@ float4 bbox_intersect(float4 a, float4 b)
uint load_path_ix(uint ix)
{
if (ix < _80.Load(72))
if (ix < _80.Load(80))
{
return _96.Load(((_80.Load(48) >> uint(2)) + ix) * 4 + 8);
}
@ -324,7 +328,7 @@ void comp_main()
bool _725;
if (_717)
{
_725 = gl_GlobalInvocationID.x < _80.Load(72);
_725 = gl_GlobalInvocationID.x < _80.Load(80);
}
else
{
@ -334,7 +338,7 @@ void comp_main()
{
uint param_15 = parent;
path_ix = load_path_ix(param_15);
uint drawmonoid_out_base = (_80.Load(44) >> uint(2)) + (2u * (~inp));
uint drawmonoid_out_base = (_80.Load(44) >> uint(2)) + (4u * (~inp));
_96.Store(drawmonoid_out_base * 4 + 8, path_ix);
if (int(grandparent) >= 0)
{

View file

@ -34,12 +34,14 @@ struct Config
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
@ -47,6 +49,8 @@ struct Config
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct ConfigBuf
@ -112,7 +116,7 @@ uint load_path_ix(thread const uint& ix, const device ConfigBuf& v_80, device Me
static inline __attribute__((always_inline))
float4 load_path_bbox(thread const uint& path_ix, const device ConfigBuf& v_80, device Memory& v_96)
{
uint base = (v_80.conf.bbox_alloc.offset >> uint(2)) + (6u * path_ix);
uint base = (v_80.conf.path_bbox_alloc.offset >> uint(2)) + (6u * path_ix);
float bbox_l = float(v_96.memory[base]) - 32768.0;
float bbox_t = float(v_96.memory[base + 1u]) - 32768.0;
float bbox_r = float(v_96.memory[base + 2u]) - 32768.0;
@ -341,7 +345,7 @@ kernel void main0(device Memory& v_96 [[buffer(0)]], const device ConfigBuf& v_8
{
uint param_15 = parent;
path_ix = load_path_ix(param_15, v_80, v_96);
uint drawmonoid_out_base = (v_80.conf.drawmonoid_alloc.offset >> uint(2)) + (2u * (~inp));
uint drawmonoid_out_base = (v_80.conf.drawmonoid_alloc.offset >> uint(2)) + (4u * (~inp));
v_96.memory[drawmonoid_out_base] = path_ix;
if (int(grandparent) >= 0)
{

Binary file not shown.

Binary file not shown.

View file

@ -27,12 +27,14 @@ struct Config
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
@ -40,6 +42,8 @@ struct Config
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);

View file

@ -34,12 +34,14 @@ struct Config
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
@ -47,6 +49,8 @@ struct Config
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct ConfigBuf
@ -81,7 +85,7 @@ void store_bic(thread const uint& ix, thread const Bic& bic, const device Config
static inline __attribute__((always_inline))
float4 load_path_bbox(thread const uint& path_ix, const device ConfigBuf& v_64, device Memory& v_80)
{
uint base = (v_64.conf.bbox_alloc.offset >> uint(2)) + (6u * path_ix);
uint base = (v_64.conf.path_bbox_alloc.offset >> uint(2)) + (6u * path_ix);
float bbox_l = float(v_80.memory[base]) - 32768.0;
float bbox_t = float(v_80.memory[base + 1u]) - 32768.0;
float bbox_r = float(v_80.memory[base + 2u]) - 32768.0;

Binary file not shown.

Binary file not shown.

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

Binary file not shown.

Binary file not shown.

View file

@ -1,133 +1,12 @@
struct Alloc
{
uint offset;
};
struct ElementRef
{
uint offset;
};
struct FillColorRef
{
uint offset;
};
struct FillColor
{
uint rgba_color;
};
struct FillLinGradientRef
{
uint offset;
};
struct FillLinGradient
{
uint index;
float2 p0;
float2 p1;
};
struct FillImageRef
{
uint offset;
};
struct FillImage
{
uint index;
int2 offset;
};
struct ClipRef
{
uint offset;
};
struct Clip
{
float4 bbox;
uint blend;
};
struct ElementTag
{
uint tag;
uint flags;
};
struct DrawMonoid
{
uint path_ix;
uint clip_ix;
uint scene_offset;
uint info_offset;
};
struct AnnoImageRef
{
uint offset;
};
struct AnnoImage
{
float4 bbox;
float linewidth;
uint index;
int2 offset;
};
struct AnnoColorRef
{
uint offset;
};
struct AnnoColor
{
float4 bbox;
float linewidth;
uint rgba_color;
};
struct AnnoLinGradientRef
{
uint offset;
};
struct AnnoLinGradient
{
float4 bbox;
float linewidth;
uint index;
float line_x;
float line_y;
float line_c;
};
struct AnnoBeginClipRef
{
uint offset;
};
struct AnnoBeginClip
{
float4 bbox;
float linewidth;
uint blend;
};
struct AnnoEndClipRef
{
uint offset;
};
struct AnnoEndClip
{
float4 bbox;
uint blend;
};
struct AnnotatedRef
struct Alloc
{
uint offset;
};
@ -144,12 +23,14 @@ struct Config
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
@ -157,18 +38,18 @@ struct Config
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
static const DrawMonoid _413 = { 0u, 0u };
static const DrawMonoid _437 = { 1u, 0u };
static const DrawMonoid _439 = { 1u, 1u };
static const DrawMonoid _23 = { 0u, 0u, 0u, 0u };
RWByteAddressBuffer _199 : register(u0, space0);
ByteAddressBuffer _223 : register(t2, space0);
ByteAddressBuffer _1020 : register(t3, space0);
ByteAddressBuffer _1054 : register(t1, space0);
ByteAddressBuffer _92 : register(t1, space0);
ByteAddressBuffer _102 : register(t2, space0);
ByteAddressBuffer _202 : register(t3, space0);
RWByteAddressBuffer _284 : register(u0, space0);
static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID;
@ -182,389 +63,44 @@ struct SPIRV_Cross_Input
groupshared DrawMonoid sh_scratch[256];
ElementTag Element_tag(ElementRef ref)
{
uint tag_and_flags = _223.Load((ref.offset >> uint(2)) * 4 + 0);
ElementTag _378 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) };
return _378;
}
DrawMonoid map_tag(uint tag_word)
{
switch (tag_word)
{
case 4u:
case 5u:
case 6u:
{
return _437;
}
case 9u:
case 10u:
{
return _439;
}
default:
{
return _413;
}
}
uint has_path = uint(tag_word != 0u);
DrawMonoid _75 = { has_path, tag_word & 1u, tag_word & 28u, (tag_word >> uint(4)) & 28u };
return _75;
}
ElementRef Element_index(ElementRef ref, uint index)
{
ElementRef _212 = { ref.offset + (index * 36u) };
return _212;
}
DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b)
DrawMonoid combine_draw_monoid(DrawMonoid a, DrawMonoid b)
{
DrawMonoid c;
c.path_ix = a.path_ix + b.path_ix;
c.clip_ix = a.clip_ix + b.clip_ix;
c.scene_offset = a.scene_offset + b.scene_offset;
c.info_offset = a.info_offset + b.info_offset;
return c;
}
DrawMonoid tag_monoid_identity()
DrawMonoid draw_monoid_identity()
{
return _413;
}
FillColor FillColor_read(FillColorRef ref)
{
uint ix = ref.offset >> uint(2);
uint raw0 = _223.Load((ix + 0u) * 4 + 0);
FillColor s;
s.rgba_color = raw0;
return s;
}
FillColor Element_FillColor_read(ElementRef ref)
{
FillColorRef _384 = { ref.offset + 4u };
FillColorRef param = _384;
return FillColor_read(param);
}
bool touch_mem(Alloc alloc, uint offset)
{
return true;
}
void write_mem(Alloc alloc, uint offset, uint val)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return;
}
_199.Store(offset * 4 + 8, val);
}
void AnnoColor_write(Alloc a, AnnoColorRef ref, AnnoColor s)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = asuint(s.bbox.x);
write_mem(param, param_1, param_2);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = asuint(s.bbox.y);
write_mem(param_3, param_4, param_5);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = asuint(s.bbox.z);
write_mem(param_6, param_7, param_8);
Alloc param_9 = a;
uint param_10 = ix + 3u;
uint param_11 = asuint(s.bbox.w);
write_mem(param_9, param_10, param_11);
Alloc param_12 = a;
uint param_13 = ix + 4u;
uint param_14 = asuint(s.linewidth);
write_mem(param_12, param_13, param_14);
Alloc param_15 = a;
uint param_16 = ix + 5u;
uint param_17 = s.rgba_color;
write_mem(param_15, param_16, param_17);
}
void Annotated_Color_write(Alloc a, AnnotatedRef ref, uint flags, AnnoColor s)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint param_2 = (flags << uint(16)) | 1u;
write_mem(param, param_1, param_2);
AnnoColorRef _818 = { ref.offset + 4u };
Alloc param_3 = a;
AnnoColorRef param_4 = _818;
AnnoColor param_5 = s;
AnnoColor_write(param_3, param_4, param_5);
}
FillLinGradient FillLinGradient_read(FillLinGradientRef ref)
{
uint ix = ref.offset >> uint(2);
uint raw0 = _223.Load((ix + 0u) * 4 + 0);
uint raw1 = _223.Load((ix + 1u) * 4 + 0);
uint raw2 = _223.Load((ix + 2u) * 4 + 0);
uint raw3 = _223.Load((ix + 3u) * 4 + 0);
uint raw4 = _223.Load((ix + 4u) * 4 + 0);
FillLinGradient s;
s.index = raw0;
s.p0 = float2(asfloat(raw1), asfloat(raw2));
s.p1 = float2(asfloat(raw3), asfloat(raw4));
return s;
}
FillLinGradient Element_FillLinGradient_read(ElementRef ref)
{
FillLinGradientRef _392 = { ref.offset + 4u };
FillLinGradientRef param = _392;
return FillLinGradient_read(param);
}
void AnnoLinGradient_write(Alloc a, AnnoLinGradientRef ref, AnnoLinGradient s)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = asuint(s.bbox.x);
write_mem(param, param_1, param_2);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = asuint(s.bbox.y);
write_mem(param_3, param_4, param_5);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = asuint(s.bbox.z);
write_mem(param_6, param_7, param_8);
Alloc param_9 = a;
uint param_10 = ix + 3u;
uint param_11 = asuint(s.bbox.w);
write_mem(param_9, param_10, param_11);
Alloc param_12 = a;
uint param_13 = ix + 4u;
uint param_14 = asuint(s.linewidth);
write_mem(param_12, param_13, param_14);
Alloc param_15 = a;
uint param_16 = ix + 5u;
uint param_17 = s.index;
write_mem(param_15, param_16, param_17);
Alloc param_18 = a;
uint param_19 = ix + 6u;
uint param_20 = asuint(s.line_x);
write_mem(param_18, param_19, param_20);
Alloc param_21 = a;
uint param_22 = ix + 7u;
uint param_23 = asuint(s.line_y);
write_mem(param_21, param_22, param_23);
Alloc param_24 = a;
uint param_25 = ix + 8u;
uint param_26 = asuint(s.line_c);
write_mem(param_24, param_25, param_26);
}
void Annotated_LinGradient_write(Alloc a, AnnotatedRef ref, uint flags, AnnoLinGradient s)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint param_2 = (flags << uint(16)) | 2u;
write_mem(param, param_1, param_2);
AnnoLinGradientRef _839 = { ref.offset + 4u };
Alloc param_3 = a;
AnnoLinGradientRef param_4 = _839;
AnnoLinGradient param_5 = s;
AnnoLinGradient_write(param_3, param_4, param_5);
}
FillImage FillImage_read(FillImageRef ref)
{
uint ix = ref.offset >> uint(2);
uint raw0 = _223.Load((ix + 0u) * 4 + 0);
uint raw1 = _223.Load((ix + 1u) * 4 + 0);
FillImage s;
s.index = raw0;
s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16);
return s;
}
FillImage Element_FillImage_read(ElementRef ref)
{
FillImageRef _400 = { ref.offset + 4u };
FillImageRef param = _400;
return FillImage_read(param);
}
void AnnoImage_write(Alloc a, AnnoImageRef ref, AnnoImage s)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = asuint(s.bbox.x);
write_mem(param, param_1, param_2);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = asuint(s.bbox.y);
write_mem(param_3, param_4, param_5);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = asuint(s.bbox.z);
write_mem(param_6, param_7, param_8);
Alloc param_9 = a;
uint param_10 = ix + 3u;
uint param_11 = asuint(s.bbox.w);
write_mem(param_9, param_10, param_11);
Alloc param_12 = a;
uint param_13 = ix + 4u;
uint param_14 = asuint(s.linewidth);
write_mem(param_12, param_13, param_14);
Alloc param_15 = a;
uint param_16 = ix + 5u;
uint param_17 = s.index;
write_mem(param_15, param_16, param_17);
Alloc param_18 = a;
uint param_19 = ix + 6u;
uint param_20 = (uint(s.offset.x) & 65535u) | (uint(s.offset.y) << uint(16));
write_mem(param_18, param_19, param_20);
}
void Annotated_Image_write(Alloc a, AnnotatedRef ref, uint flags, AnnoImage s)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint param_2 = (flags << uint(16)) | 3u;
write_mem(param, param_1, param_2);
AnnoImageRef _860 = { ref.offset + 4u };
Alloc param_3 = a;
AnnoImageRef param_4 = _860;
AnnoImage param_5 = s;
AnnoImage_write(param_3, param_4, param_5);
}
Clip Clip_read(ClipRef ref)
{
uint ix = ref.offset >> uint(2);
uint raw0 = _223.Load((ix + 0u) * 4 + 0);
uint raw1 = _223.Load((ix + 1u) * 4 + 0);
uint raw2 = _223.Load((ix + 2u) * 4 + 0);
uint raw3 = _223.Load((ix + 3u) * 4 + 0);
Clip s;
s.bbox = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3));
s.blend = _223.Load((ix + 4u) * 4 + 0);
return s;
}
Clip Element_BeginClip_read(ElementRef ref)
{
ClipRef _408 = { ref.offset + 4u };
ClipRef param = _408;
return Clip_read(param);
}
void AnnoBeginClip_write(Alloc a, AnnoBeginClipRef ref, AnnoBeginClip s)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = asuint(s.bbox.x);
write_mem(param, param_1, param_2);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = asuint(s.bbox.y);
write_mem(param_3, param_4, param_5);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = asuint(s.bbox.z);
write_mem(param_6, param_7, param_8);
Alloc param_9 = a;
uint param_10 = ix + 3u;
uint param_11 = asuint(s.bbox.w);
write_mem(param_9, param_10, param_11);
Alloc param_12 = a;
uint param_13 = ix + 4u;
uint param_14 = asuint(s.linewidth);
write_mem(param_12, param_13, param_14);
Alloc param_15 = a;
uint param_16 = ix + 5u;
uint param_17 = s.blend;
write_mem(param_15, param_16, param_17);
}
void Annotated_BeginClip_write(Alloc a, AnnotatedRef ref, uint flags, AnnoBeginClip s)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint param_2 = (flags << uint(16)) | 4u;
write_mem(param, param_1, param_2);
AnnoBeginClipRef _881 = { ref.offset + 4u };
Alloc param_3 = a;
AnnoBeginClipRef param_4 = _881;
AnnoBeginClip param_5 = s;
AnnoBeginClip_write(param_3, param_4, param_5);
}
void AnnoEndClip_write(Alloc a, AnnoEndClipRef ref, AnnoEndClip s)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = asuint(s.bbox.x);
write_mem(param, param_1, param_2);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = asuint(s.bbox.y);
write_mem(param_3, param_4, param_5);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = asuint(s.bbox.z);
write_mem(param_6, param_7, param_8);
Alloc param_9 = a;
uint param_10 = ix + 3u;
uint param_11 = asuint(s.bbox.w);
write_mem(param_9, param_10, param_11);
Alloc param_12 = a;
uint param_13 = ix + 4u;
uint param_14 = s.blend;
write_mem(param_12, param_13, param_14);
}
void Annotated_EndClip_write(Alloc a, AnnotatedRef ref, uint flags, AnnoEndClip s)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint param_2 = (flags << uint(16)) | 5u;
write_mem(param, param_1, param_2);
AnnoEndClipRef _902 = { ref.offset + 4u };
Alloc param_3 = a;
AnnoEndClipRef param_4 = _902;
AnnoEndClip param_5 = s;
AnnoEndClip_write(param_3, param_4, param_5);
return _23;
}
void comp_main()
{
uint ix = gl_GlobalInvocationID.x * 8u;
ElementRef _920 = { ix * 36u };
ElementRef ref = _920;
ElementRef param = ref;
uint tag_word = Element_tag(param).tag;
uint param_1 = tag_word;
DrawMonoid agg = map_tag(param_1);
uint drawtag_base = _92.Load(100) >> uint(2);
uint tag_word = _102.Load((drawtag_base + ix) * 4 + 0);
uint param = tag_word;
DrawMonoid agg = map_tag(param);
DrawMonoid local[8];
local[0] = agg;
for (uint i = 1u; i < 8u; i++)
{
ElementRef param_2 = ref;
uint param_3 = i;
ElementRef param_4 = Element_index(param_2, param_3);
tag_word = Element_tag(param_4).tag;
uint param_5 = tag_word;
DrawMonoid param_6 = agg;
DrawMonoid param_7 = map_tag(param_5);
agg = combine_tag_monoid(param_6, param_7);
tag_word = _102.Load(((drawtag_base + ix) + i) * 4 + 0);
uint param_1 = tag_word;
DrawMonoid param_2 = agg;
DrawMonoid param_3 = map_tag(param_1);
agg = combine_draw_monoid(param_2, param_3);
local[i] = agg;
}
sh_scratch[gl_LocalInvocationID.x] = agg;
@ -574,194 +110,121 @@ void comp_main()
if (gl_LocalInvocationID.x >= (1u << i_1))
{
DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
DrawMonoid param_8 = other;
DrawMonoid param_9 = agg;
agg = combine_tag_monoid(param_8, param_9);
DrawMonoid param_4 = other;
DrawMonoid param_5 = agg;
agg = combine_draw_monoid(param_4, param_5);
}
GroupMemoryBarrierWithGroupSync();
sh_scratch[gl_LocalInvocationID.x] = agg;
}
GroupMemoryBarrierWithGroupSync();
DrawMonoid row = tag_monoid_identity();
DrawMonoid row = draw_monoid_identity();
if (gl_WorkGroupID.x > 0u)
{
DrawMonoid _1026;
_1026.path_ix = _1020.Load((gl_WorkGroupID.x - 1u) * 8 + 0);
_1026.clip_ix = _1020.Load((gl_WorkGroupID.x - 1u) * 8 + 4);
row.path_ix = _1026.path_ix;
row.clip_ix = _1026.clip_ix;
DrawMonoid _208;
_208.path_ix = _202.Load((gl_WorkGroupID.x - 1u) * 16 + 0);
_208.clip_ix = _202.Load((gl_WorkGroupID.x - 1u) * 16 + 4);
_208.scene_offset = _202.Load((gl_WorkGroupID.x - 1u) * 16 + 8);
_208.info_offset = _202.Load((gl_WorkGroupID.x - 1u) * 16 + 12);
row.path_ix = _208.path_ix;
row.clip_ix = _208.clip_ix;
row.scene_offset = _208.scene_offset;
row.info_offset = _208.info_offset;
}
if (gl_LocalInvocationID.x > 0u)
{
DrawMonoid param_10 = row;
DrawMonoid param_11 = sh_scratch[gl_LocalInvocationID.x - 1u];
row = combine_tag_monoid(param_10, param_11);
DrawMonoid param_6 = row;
DrawMonoid param_7 = sh_scratch[gl_LocalInvocationID.x - 1u];
row = combine_draw_monoid(param_6, param_7);
}
uint drawdata_base = _92.Load(104) >> uint(2);
uint drawinfo_base = _92.Load(68) >> uint(2);
uint out_ix = gl_GlobalInvocationID.x * 8u;
uint out_base = (_1054.Load(44) >> uint(2)) + (out_ix * 2u);
uint clip_out_base = _1054.Load(48) >> uint(2);
AnnotatedRef _1075 = { _1054.Load(32) + (out_ix * 40u) };
AnnotatedRef out_ref = _1075;
uint out_base = (_92.Load(44) >> uint(2)) + (out_ix * 4u);
uint clip_out_base = _92.Load(48) >> uint(2);
float4 mat;
float2 translate;
AnnoColor anno_fill;
Alloc param_18;
AnnoLinGradient anno_lin;
Alloc param_23;
AnnoImage anno_img;
Alloc param_28;
AnnoBeginClip anno_begin_clip;
Alloc param_33;
AnnoEndClip anno_end_clip;
Alloc param_38;
for (uint i_2 = 0u; i_2 < 8u; i_2++)
{
DrawMonoid m = row;
if (i_2 > 0u)
{
DrawMonoid param_12 = m;
DrawMonoid param_13 = local[i_2 - 1u];
m = combine_tag_monoid(param_12, param_13);
DrawMonoid param_8 = m;
DrawMonoid param_9 = local[i_2 - 1u];
m = combine_draw_monoid(param_8, param_9);
}
_199.Store((out_base + (i_2 * 2u)) * 4 + 8, m.path_ix);
_199.Store(((out_base + (i_2 * 2u)) + 1u) * 4 + 8, m.clip_ix);
ElementRef param_14 = ref;
uint param_15 = i_2;
ElementRef this_ref = Element_index(param_14, param_15);
ElementRef param_16 = this_ref;
tag_word = Element_tag(param_16).tag;
if ((((tag_word == 4u) || (tag_word == 5u)) || (tag_word == 6u)) || (tag_word == 9u))
_284.Store((out_base + (i_2 * 4u)) * 4 + 8, m.path_ix);
_284.Store(((out_base + (i_2 * 4u)) + 1u) * 4 + 8, m.clip_ix);
_284.Store(((out_base + (i_2 * 4u)) + 2u) * 4 + 8, m.scene_offset);
_284.Store(((out_base + (i_2 * 4u)) + 3u) * 4 + 8, m.info_offset);
uint dd = drawdata_base + (m.scene_offset >> uint(2));
uint di = drawinfo_base + (m.info_offset >> uint(2));
tag_word = _102.Load(((drawtag_base + ix) + i_2) * 4 + 0);
if ((((tag_word == 68u) || (tag_word == 276u)) || (tag_word == 72u)) || (tag_word == 5u))
{
uint bbox_offset = (_1054.Load(40) >> uint(2)) + (6u * m.path_ix);
float bbox_l = float(_199.Load(bbox_offset * 4 + 8)) - 32768.0f;
float bbox_t = float(_199.Load((bbox_offset + 1u) * 4 + 8)) - 32768.0f;
float bbox_r = float(_199.Load((bbox_offset + 2u) * 4 + 8)) - 32768.0f;
float bbox_b = float(_199.Load((bbox_offset + 3u) * 4 + 8)) - 32768.0f;
uint bbox_offset = (_92.Load(40) >> uint(2)) + (6u * m.path_ix);
float bbox_l = float(_284.Load(bbox_offset * 4 + 8)) - 32768.0f;
float bbox_t = float(_284.Load((bbox_offset + 1u) * 4 + 8)) - 32768.0f;
float bbox_r = float(_284.Load((bbox_offset + 2u) * 4 + 8)) - 32768.0f;
float bbox_b = float(_284.Load((bbox_offset + 3u) * 4 + 8)) - 32768.0f;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
float linewidth = asfloat(_199.Load((bbox_offset + 4u) * 4 + 8));
float linewidth = asfloat(_284.Load((bbox_offset + 4u) * 4 + 8));
uint fill_mode = uint(linewidth >= 0.0f);
if ((linewidth >= 0.0f) || (tag_word == 5u))
if ((linewidth >= 0.0f) || (tag_word == 276u))
{
uint trans_ix = _199.Load((bbox_offset + 5u) * 4 + 8);
uint t = (_1054.Load(36) >> uint(2)) + (6u * trans_ix);
mat = asfloat(uint4(_199.Load(t * 4 + 8), _199.Load((t + 1u) * 4 + 8), _199.Load((t + 2u) * 4 + 8), _199.Load((t + 3u) * 4 + 8)));
if (tag_word == 5u)
uint trans_ix = _284.Load((bbox_offset + 5u) * 4 + 8);
uint t = (_92.Load(36) >> uint(2)) + (6u * trans_ix);
mat = asfloat(uint4(_284.Load(t * 4 + 8), _284.Load((t + 1u) * 4 + 8), _284.Load((t + 2u) * 4 + 8), _284.Load((t + 3u) * 4 + 8)));
if (tag_word == 276u)
{
translate = asfloat(uint2(_199.Load((t + 4u) * 4 + 8), _199.Load((t + 5u) * 4 + 8)));
translate = asfloat(uint2(_284.Load((t + 4u) * 4 + 8), _284.Load((t + 5u) * 4 + 8)));
}
}
if (linewidth >= 0.0f)
{
linewidth *= sqrt(abs((mat.x * mat.w) - (mat.y * mat.z)));
}
linewidth = max(linewidth, 0.0f);
switch (tag_word)
{
case 4u:
case 68u:
case 72u:
{
ElementRef param_17 = this_ref;
FillColor fill = Element_FillColor_read(param_17);
anno_fill.bbox = bbox;
anno_fill.linewidth = linewidth;
anno_fill.rgba_color = fill.rgba_color;
Alloc _1288;
_1288.offset = _1054.Load(32);
param_18.offset = _1288.offset;
AnnotatedRef param_19 = out_ref;
uint param_20 = fill_mode;
AnnoColor param_21 = anno_fill;
Annotated_Color_write(param_18, param_19, param_20, param_21);
_284.Store(di * 4 + 8, asuint(linewidth));
break;
}
case 5u:
case 276u:
{
ElementRef param_22 = this_ref;
FillLinGradient lin = Element_FillLinGradient_read(param_22);
anno_lin.bbox = bbox;
anno_lin.linewidth = linewidth;
anno_lin.index = lin.index;
float2 p0 = ((mat.xy * lin.p0.x) + (mat.zw * lin.p0.y)) + translate;
float2 p1 = ((mat.xy * lin.p1.x) + (mat.zw * lin.p1.y)) + translate;
_284.Store(di * 4 + 8, asuint(linewidth));
uint index = _102.Load(dd * 4 + 0);
float2 p0 = asfloat(uint2(_102.Load((dd + 1u) * 4 + 0), _102.Load((dd + 2u) * 4 + 0)));
float2 p1 = asfloat(uint2(_102.Load((dd + 3u) * 4 + 0), _102.Load((dd + 4u) * 4 + 0)));
p0 = ((mat.xy * p0.x) + (mat.zw * p0.y)) + translate;
p1 = ((mat.xy * p1.x) + (mat.zw * p1.y)) + translate;
float2 dxy = p1 - p0;
float scale = 1.0f / ((dxy.x * dxy.x) + (dxy.y * dxy.y));
float line_x = dxy.x * scale;
float line_y = dxy.y * scale;
anno_lin.line_x = line_x;
anno_lin.line_y = line_y;
anno_lin.line_c = -((p0.x * line_x) + (p0.y * line_y));
Alloc _1384;
_1384.offset = _1054.Load(32);
param_23.offset = _1384.offset;
AnnotatedRef param_24 = out_ref;
uint param_25 = fill_mode;
AnnoLinGradient param_26 = anno_lin;
Annotated_LinGradient_write(param_23, param_24, param_25, param_26);
float line_c = -((p0.x * line_x) + (p0.y * line_y));
_284.Store((di + 1u) * 4 + 8, asuint(line_x));
_284.Store((di + 2u) * 4 + 8, asuint(line_y));
_284.Store((di + 3u) * 4 + 8, asuint(line_c));
break;
}
case 6u:
case 5u:
{
ElementRef param_27 = this_ref;
FillImage fill_img = Element_FillImage_read(param_27);
anno_img.bbox = bbox;
anno_img.linewidth = linewidth;
anno_img.index = fill_img.index;
anno_img.offset = fill_img.offset;
Alloc _1412;
_1412.offset = _1054.Load(32);
param_28.offset = _1412.offset;
AnnotatedRef param_29 = out_ref;
uint param_30 = fill_mode;
AnnoImage param_31 = anno_img;
Annotated_Image_write(param_28, param_29, param_30, param_31);
break;
}
case 9u:
{
ElementRef param_32 = this_ref;
Clip begin_clip = Element_BeginClip_read(param_32);
anno_begin_clip.bbox = bbox;
anno_begin_clip.linewidth = 0.0f;
anno_begin_clip.blend = begin_clip.blend;
uint flags = uint(begin_clip.blend != 3u) << uint(1);
Alloc _1442;
_1442.offset = _1054.Load(32);
param_33.offset = _1442.offset;
AnnotatedRef param_34 = out_ref;
uint param_35 = flags;
AnnoBeginClip param_36 = anno_begin_clip;
Annotated_BeginClip_write(param_33, param_34, param_35, param_36);
break;
}
}
}
else
{
if (tag_word == 10u)
{
ElementRef param_37 = this_ref;
Clip end_clip = Element_BeginClip_read(param_37);
anno_end_clip.bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f);
anno_end_clip.blend = end_clip.blend;
uint flags_1 = uint(end_clip.blend != 3u) << uint(1);
Alloc _1480;
_1480.offset = _1054.Load(32);
param_38.offset = _1480.offset;
AnnotatedRef param_39 = out_ref;
uint param_40 = flags_1;
AnnoEndClip param_41 = anno_end_clip;
Annotated_EndClip_write(param_38, param_39, param_40, param_41);
}
}
if ((tag_word == 9u) || (tag_word == 10u))
if ((tag_word == 5u) || (tag_word == 37u))
{
uint path_ix = ~(out_ix + i_2);
if (tag_word == 9u)
if (tag_word == 5u)
{
path_ix = m.path_ix;
}
_199.Store((clip_out_base + m.clip_ix) * 4 + 8, path_ix);
_284.Store((clip_out_base + m.clip_ix) * 4 + 8, path_ix);
}
out_ref.offset += 40u;
}
}

View file

@ -44,145 +44,53 @@ struct spvUnsafeArray
}
};
struct DrawMonoid
{
uint path_ix;
uint clip_ix;
uint scene_offset;
uint info_offset;
};
struct Alloc
{
uint offset;
};
struct ElementRef
struct Config
{
uint offset;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct FillColorRef
struct ConfigBuf
{
uint offset;
};
struct FillColor
{
uint rgba_color;
};
struct FillLinGradientRef
{
uint offset;
};
struct FillLinGradient
{
uint index;
float2 p0;
float2 p1;
};
struct FillImageRef
{
uint offset;
};
struct FillImage
{
uint index;
int2 offset;
};
struct ClipRef
{
uint offset;
};
struct Clip
{
float4 bbox;
uint blend;
};
struct ElementTag
{
uint tag;
uint flags;
};
struct DrawMonoid
{
uint path_ix;
uint clip_ix;
};
struct AnnoImageRef
{
uint offset;
};
struct AnnoImage
{
float4 bbox;
float linewidth;
uint index;
int2 offset;
};
struct AnnoColorRef
{
uint offset;
};
struct AnnoColor
{
float4 bbox;
float linewidth;
uint rgba_color;
};
struct AnnoLinGradientRef
{
uint offset;
};
struct AnnoLinGradient
{
float4 bbox;
float linewidth;
uint index;
float line_x;
float line_y;
float line_c;
};
struct AnnoBeginClipRef
{
uint offset;
};
struct AnnoBeginClip
{
float4 bbox;
float linewidth;
uint blend;
};
struct AnnoEndClipRef
{
uint offset;
};
struct AnnoEndClip
{
float4 bbox;
uint blend;
};
struct AnnotatedRef
{
uint offset;
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint memory[1];
Config conf;
};
struct SceneBuf
@ -194,6 +102,8 @@ struct DrawMonoid_1
{
uint path_ix;
uint clip_ix;
uint scene_offset;
uint info_offset;
};
struct ParentBuf
@ -201,442 +111,56 @@ struct ParentBuf
DrawMonoid_1 parent[1];
};
struct Alloc_1
struct Memory
{
uint offset;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc_1 tile_alloc;
Alloc_1 bin_alloc;
Alloc_1 ptcl_alloc;
Alloc_1 pathseg_alloc;
Alloc_1 anno_alloc;
Alloc_1 trans_alloc;
Alloc_1 bbox_alloc;
Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
};
struct ConfigBuf
{
Config conf;
uint mem_offset;
uint mem_error;
uint memory[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
ElementTag Element_tag(thread const ElementRef& ref, const device SceneBuf& v_223)
{
uint tag_and_flags = v_223.scene[ref.offset >> uint(2)];
return ElementTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) };
}
static inline __attribute__((always_inline))
DrawMonoid map_tag(thread const uint& tag_word)
{
switch (tag_word)
{
case 4u:
case 5u:
case 6u:
{
return DrawMonoid{ 1u, 0u };
}
case 9u:
case 10u:
{
return DrawMonoid{ 1u, 1u };
}
default:
{
return DrawMonoid{ 0u, 0u };
}
}
uint has_path = uint(tag_word != 0u);
return DrawMonoid{ has_path, tag_word & 1u, tag_word & 28u, (tag_word >> uint(4)) & 28u };
}
static inline __attribute__((always_inline))
ElementRef Element_index(thread const ElementRef& ref, thread const uint& index)
{
return ElementRef{ ref.offset + (index * 36u) };
}
static inline __attribute__((always_inline))
DrawMonoid combine_tag_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b)
DrawMonoid combine_draw_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b)
{
DrawMonoid c;
c.path_ix = a.path_ix + b.path_ix;
c.clip_ix = a.clip_ix + b.clip_ix;
c.scene_offset = a.scene_offset + b.scene_offset;
c.info_offset = a.info_offset + b.info_offset;
return c;
}
static inline __attribute__((always_inline))
DrawMonoid tag_monoid_identity()
DrawMonoid draw_monoid_identity()
{
return DrawMonoid{ 0u, 0u };
return DrawMonoid{ 0u, 0u, 0u, 0u };
}
static inline __attribute__((always_inline))
FillColor FillColor_read(thread const FillColorRef& ref, const device SceneBuf& v_223)
{
uint ix = ref.offset >> uint(2);
uint raw0 = v_223.scene[ix + 0u];
FillColor s;
s.rgba_color = raw0;
return s;
}
static inline __attribute__((always_inline))
FillColor Element_FillColor_read(thread const ElementRef& ref, const device SceneBuf& v_223)
{
FillColorRef param = FillColorRef{ ref.offset + 4u };
return FillColor_read(param, v_223);
}
static inline __attribute__((always_inline))
bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
{
return true;
}
static inline __attribute__((always_inline))
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_199)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return;
}
v_199.memory[offset] = val;
}
static inline __attribute__((always_inline))
void AnnoColor_write(thread const Alloc& a, thread const AnnoColorRef& ref, thread const AnnoColor& s, device Memory& v_199)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = as_type<uint>(s.bbox.x);
write_mem(param, param_1, param_2, v_199);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = as_type<uint>(s.bbox.y);
write_mem(param_3, param_4, param_5, v_199);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = as_type<uint>(s.bbox.z);
write_mem(param_6, param_7, param_8, v_199);
Alloc param_9 = a;
uint param_10 = ix + 3u;
uint param_11 = as_type<uint>(s.bbox.w);
write_mem(param_9, param_10, param_11, v_199);
Alloc param_12 = a;
uint param_13 = ix + 4u;
uint param_14 = as_type<uint>(s.linewidth);
write_mem(param_12, param_13, param_14, v_199);
Alloc param_15 = a;
uint param_16 = ix + 5u;
uint param_17 = s.rgba_color;
write_mem(param_15, param_16, param_17, v_199);
}
static inline __attribute__((always_inline))
void Annotated_Color_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoColor& s, device Memory& v_199)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint param_2 = (flags << uint(16)) | 1u;
write_mem(param, param_1, param_2, v_199);
Alloc param_3 = a;
AnnoColorRef param_4 = AnnoColorRef{ ref.offset + 4u };
AnnoColor param_5 = s;
AnnoColor_write(param_3, param_4, param_5, v_199);
}
static inline __attribute__((always_inline))
FillLinGradient FillLinGradient_read(thread const FillLinGradientRef& ref, const device SceneBuf& v_223)
{
uint ix = ref.offset >> uint(2);
uint raw0 = v_223.scene[ix + 0u];
uint raw1 = v_223.scene[ix + 1u];
uint raw2 = v_223.scene[ix + 2u];
uint raw3 = v_223.scene[ix + 3u];
uint raw4 = v_223.scene[ix + 4u];
FillLinGradient s;
s.index = raw0;
s.p0 = float2(as_type<float>(raw1), as_type<float>(raw2));
s.p1 = float2(as_type<float>(raw3), as_type<float>(raw4));
return s;
}
static inline __attribute__((always_inline))
FillLinGradient Element_FillLinGradient_read(thread const ElementRef& ref, const device SceneBuf& v_223)
{
FillLinGradientRef param = FillLinGradientRef{ ref.offset + 4u };
return FillLinGradient_read(param, v_223);
}
static inline __attribute__((always_inline))
void AnnoLinGradient_write(thread const Alloc& a, thread const AnnoLinGradientRef& ref, thread const AnnoLinGradient& s, device Memory& v_199)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = as_type<uint>(s.bbox.x);
write_mem(param, param_1, param_2, v_199);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = as_type<uint>(s.bbox.y);
write_mem(param_3, param_4, param_5, v_199);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = as_type<uint>(s.bbox.z);
write_mem(param_6, param_7, param_8, v_199);
Alloc param_9 = a;
uint param_10 = ix + 3u;
uint param_11 = as_type<uint>(s.bbox.w);
write_mem(param_9, param_10, param_11, v_199);
Alloc param_12 = a;
uint param_13 = ix + 4u;
uint param_14 = as_type<uint>(s.linewidth);
write_mem(param_12, param_13, param_14, v_199);
Alloc param_15 = a;
uint param_16 = ix + 5u;
uint param_17 = s.index;
write_mem(param_15, param_16, param_17, v_199);
Alloc param_18 = a;
uint param_19 = ix + 6u;
uint param_20 = as_type<uint>(s.line_x);
write_mem(param_18, param_19, param_20, v_199);
Alloc param_21 = a;
uint param_22 = ix + 7u;
uint param_23 = as_type<uint>(s.line_y);
write_mem(param_21, param_22, param_23, v_199);
Alloc param_24 = a;
uint param_25 = ix + 8u;
uint param_26 = as_type<uint>(s.line_c);
write_mem(param_24, param_25, param_26, v_199);
}
static inline __attribute__((always_inline))
void Annotated_LinGradient_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoLinGradient& s, device Memory& v_199)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint param_2 = (flags << uint(16)) | 2u;
write_mem(param, param_1, param_2, v_199);
Alloc param_3 = a;
AnnoLinGradientRef param_4 = AnnoLinGradientRef{ ref.offset + 4u };
AnnoLinGradient param_5 = s;
AnnoLinGradient_write(param_3, param_4, param_5, v_199);
}
static inline __attribute__((always_inline))
FillImage FillImage_read(thread const FillImageRef& ref, const device SceneBuf& v_223)
{
uint ix = ref.offset >> uint(2);
uint raw0 = v_223.scene[ix + 0u];
uint raw1 = v_223.scene[ix + 1u];
FillImage s;
s.index = raw0;
s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16);
return s;
}
static inline __attribute__((always_inline))
FillImage Element_FillImage_read(thread const ElementRef& ref, const device SceneBuf& v_223)
{
FillImageRef param = FillImageRef{ ref.offset + 4u };
return FillImage_read(param, v_223);
}
static inline __attribute__((always_inline))
void AnnoImage_write(thread const Alloc& a, thread const AnnoImageRef& ref, thread const AnnoImage& s, device Memory& v_199)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = as_type<uint>(s.bbox.x);
write_mem(param, param_1, param_2, v_199);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = as_type<uint>(s.bbox.y);
write_mem(param_3, param_4, param_5, v_199);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = as_type<uint>(s.bbox.z);
write_mem(param_6, param_7, param_8, v_199);
Alloc param_9 = a;
uint param_10 = ix + 3u;
uint param_11 = as_type<uint>(s.bbox.w);
write_mem(param_9, param_10, param_11, v_199);
Alloc param_12 = a;
uint param_13 = ix + 4u;
uint param_14 = as_type<uint>(s.linewidth);
write_mem(param_12, param_13, param_14, v_199);
Alloc param_15 = a;
uint param_16 = ix + 5u;
uint param_17 = s.index;
write_mem(param_15, param_16, param_17, v_199);
Alloc param_18 = a;
uint param_19 = ix + 6u;
uint param_20 = (uint(s.offset.x) & 65535u) | (uint(s.offset.y) << uint(16));
write_mem(param_18, param_19, param_20, v_199);
}
static inline __attribute__((always_inline))
void Annotated_Image_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoImage& s, device Memory& v_199)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint param_2 = (flags << uint(16)) | 3u;
write_mem(param, param_1, param_2, v_199);
Alloc param_3 = a;
AnnoImageRef param_4 = AnnoImageRef{ ref.offset + 4u };
AnnoImage param_5 = s;
AnnoImage_write(param_3, param_4, param_5, v_199);
}
static inline __attribute__((always_inline))
Clip Clip_read(thread const ClipRef& ref, const device SceneBuf& v_223)
{
uint ix = ref.offset >> uint(2);
uint raw0 = v_223.scene[ix + 0u];
uint raw1 = v_223.scene[ix + 1u];
uint raw2 = v_223.scene[ix + 2u];
uint raw3 = v_223.scene[ix + 3u];
Clip s;
s.bbox = float4(as_type<float>(raw0), as_type<float>(raw1), as_type<float>(raw2), as_type<float>(raw3));
s.blend = v_223.scene[ix + 4u];
return s;
}
static inline __attribute__((always_inline))
Clip Element_BeginClip_read(thread const ElementRef& ref, const device SceneBuf& v_223)
{
ClipRef param = ClipRef{ ref.offset + 4u };
return Clip_read(param, v_223);
}
static inline __attribute__((always_inline))
void AnnoBeginClip_write(thread const Alloc& a, thread const AnnoBeginClipRef& ref, thread const AnnoBeginClip& s, device Memory& v_199)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = as_type<uint>(s.bbox.x);
write_mem(param, param_1, param_2, v_199);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = as_type<uint>(s.bbox.y);
write_mem(param_3, param_4, param_5, v_199);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = as_type<uint>(s.bbox.z);
write_mem(param_6, param_7, param_8, v_199);
Alloc param_9 = a;
uint param_10 = ix + 3u;
uint param_11 = as_type<uint>(s.bbox.w);
write_mem(param_9, param_10, param_11, v_199);
Alloc param_12 = a;
uint param_13 = ix + 4u;
uint param_14 = as_type<uint>(s.linewidth);
write_mem(param_12, param_13, param_14, v_199);
Alloc param_15 = a;
uint param_16 = ix + 5u;
uint param_17 = s.blend;
write_mem(param_15, param_16, param_17, v_199);
}
static inline __attribute__((always_inline))
void Annotated_BeginClip_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoBeginClip& s, device Memory& v_199)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint param_2 = (flags << uint(16)) | 4u;
write_mem(param, param_1, param_2, v_199);
Alloc param_3 = a;
AnnoBeginClipRef param_4 = AnnoBeginClipRef{ ref.offset + 4u };
AnnoBeginClip param_5 = s;
AnnoBeginClip_write(param_3, param_4, param_5, v_199);
}
static inline __attribute__((always_inline))
void AnnoEndClip_write(thread const Alloc& a, thread const AnnoEndClipRef& ref, thread const AnnoEndClip& s, device Memory& v_199)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = as_type<uint>(s.bbox.x);
write_mem(param, param_1, param_2, v_199);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = as_type<uint>(s.bbox.y);
write_mem(param_3, param_4, param_5, v_199);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = as_type<uint>(s.bbox.z);
write_mem(param_6, param_7, param_8, v_199);
Alloc param_9 = a;
uint param_10 = ix + 3u;
uint param_11 = as_type<uint>(s.bbox.w);
write_mem(param_9, param_10, param_11, v_199);
Alloc param_12 = a;
uint param_13 = ix + 4u;
uint param_14 = s.blend;
write_mem(param_12, param_13, param_14, v_199);
}
static inline __attribute__((always_inline))
void Annotated_EndClip_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoEndClip& s, device Memory& v_199)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint param_2 = (flags << uint(16)) | 5u;
write_mem(param, param_1, param_2, v_199);
Alloc param_3 = a;
AnnoEndClipRef param_4 = AnnoEndClipRef{ ref.offset + 4u };
AnnoEndClip param_5 = s;
AnnoEndClip_write(param_3, param_4, param_5, v_199);
}
kernel void main0(device Memory& v_199 [[buffer(0)]], const device ConfigBuf& _1054 [[buffer(1)]], const device SceneBuf& v_223 [[buffer(2)]], const device ParentBuf& _1020 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
kernel void main0(device Memory& _284 [[buffer(0)]], const device ConfigBuf& _92 [[buffer(1)]], const device SceneBuf& _102 [[buffer(2)]], const device ParentBuf& _202 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
{
threadgroup DrawMonoid sh_scratch[256];
uint ix = gl_GlobalInvocationID.x * 8u;
ElementRef ref = ElementRef{ ix * 36u };
ElementRef param = ref;
uint tag_word = Element_tag(param, v_223).tag;
uint param_1 = tag_word;
DrawMonoid agg = map_tag(param_1);
uint drawtag_base = _92.conf.drawtag_offset >> uint(2);
uint tag_word = _102.scene[drawtag_base + ix];
uint param = tag_word;
DrawMonoid agg = map_tag(param);
spvUnsafeArray<DrawMonoid, 8> local;
local[0] = agg;
for (uint i = 1u; i < 8u; i++)
{
ElementRef param_2 = ref;
uint param_3 = i;
ElementRef param_4 = Element_index(param_2, param_3);
tag_word = Element_tag(param_4, v_223).tag;
uint param_5 = tag_word;
DrawMonoid param_6 = agg;
DrawMonoid param_7 = map_tag(param_5);
agg = combine_tag_monoid(param_6, param_7);
tag_word = _102.scene[(drawtag_base + ix) + i];
uint param_1 = tag_word;
DrawMonoid param_2 = agg;
DrawMonoid param_3 = map_tag(param_1);
agg = combine_draw_monoid(param_2, param_3);
local[i] = agg;
}
sh_scratch[gl_LocalInvocationID.x] = agg;
@ -646,181 +170,117 @@ kernel void main0(device Memory& v_199 [[buffer(0)]], const device ConfigBuf& _1
if (gl_LocalInvocationID.x >= (1u << i_1))
{
DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
DrawMonoid param_8 = other;
DrawMonoid param_9 = agg;
agg = combine_tag_monoid(param_8, param_9);
DrawMonoid param_4 = other;
DrawMonoid param_5 = agg;
agg = combine_draw_monoid(param_4, param_5);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
sh_scratch[gl_LocalInvocationID.x] = agg;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
DrawMonoid row = tag_monoid_identity();
DrawMonoid row = draw_monoid_identity();
if (gl_WorkGroupID.x > 0u)
{
uint _1023 = gl_WorkGroupID.x - 1u;
row.path_ix = _1020.parent[_1023].path_ix;
row.clip_ix = _1020.parent[_1023].clip_ix;
uint _205 = gl_WorkGroupID.x - 1u;
row.path_ix = _202.parent[_205].path_ix;
row.clip_ix = _202.parent[_205].clip_ix;
row.scene_offset = _202.parent[_205].scene_offset;
row.info_offset = _202.parent[_205].info_offset;
}
if (gl_LocalInvocationID.x > 0u)
{
DrawMonoid param_10 = row;
DrawMonoid param_11 = sh_scratch[gl_LocalInvocationID.x - 1u];
row = combine_tag_monoid(param_10, param_11);
DrawMonoid param_6 = row;
DrawMonoid param_7 = sh_scratch[gl_LocalInvocationID.x - 1u];
row = combine_draw_monoid(param_6, param_7);
}
uint drawdata_base = _92.conf.drawdata_offset >> uint(2);
uint drawinfo_base = _92.conf.drawinfo_alloc.offset >> uint(2);
uint out_ix = gl_GlobalInvocationID.x * 8u;
uint out_base = (_1054.conf.drawmonoid_alloc.offset >> uint(2)) + (out_ix * 2u);
uint clip_out_base = _1054.conf.clip_alloc.offset >> uint(2);
AnnotatedRef out_ref = AnnotatedRef{ _1054.conf.anno_alloc.offset + (out_ix * 40u) };
uint out_base = (_92.conf.drawmonoid_alloc.offset >> uint(2)) + (out_ix * 4u);
uint clip_out_base = _92.conf.clip_alloc.offset >> uint(2);
float4 mat;
float2 translate;
AnnoColor anno_fill;
Alloc param_18;
AnnoLinGradient anno_lin;
Alloc param_23;
AnnoImage anno_img;
Alloc param_28;
AnnoBeginClip anno_begin_clip;
Alloc param_33;
AnnoEndClip anno_end_clip;
Alloc param_38;
for (uint i_2 = 0u; i_2 < 8u; i_2++)
{
DrawMonoid m = row;
if (i_2 > 0u)
{
DrawMonoid param_12 = m;
DrawMonoid param_13 = local[i_2 - 1u];
m = combine_tag_monoid(param_12, param_13);
DrawMonoid param_8 = m;
DrawMonoid param_9 = local[i_2 - 1u];
m = combine_draw_monoid(param_8, param_9);
}
v_199.memory[out_base + (i_2 * 2u)] = m.path_ix;
v_199.memory[(out_base + (i_2 * 2u)) + 1u] = m.clip_ix;
ElementRef param_14 = ref;
uint param_15 = i_2;
ElementRef this_ref = Element_index(param_14, param_15);
ElementRef param_16 = this_ref;
tag_word = Element_tag(param_16, v_223).tag;
if ((((tag_word == 4u) || (tag_word == 5u)) || (tag_word == 6u)) || (tag_word == 9u))
_284.memory[out_base + (i_2 * 4u)] = m.path_ix;
_284.memory[(out_base + (i_2 * 4u)) + 1u] = m.clip_ix;
_284.memory[(out_base + (i_2 * 4u)) + 2u] = m.scene_offset;
_284.memory[(out_base + (i_2 * 4u)) + 3u] = m.info_offset;
uint dd = drawdata_base + (m.scene_offset >> uint(2));
uint di = drawinfo_base + (m.info_offset >> uint(2));
tag_word = _102.scene[(drawtag_base + ix) + i_2];
if ((((tag_word == 68u) || (tag_word == 276u)) || (tag_word == 72u)) || (tag_word == 5u))
{
uint bbox_offset = (_1054.conf.bbox_alloc.offset >> uint(2)) + (6u * m.path_ix);
float bbox_l = float(v_199.memory[bbox_offset]) - 32768.0;
float bbox_t = float(v_199.memory[bbox_offset + 1u]) - 32768.0;
float bbox_r = float(v_199.memory[bbox_offset + 2u]) - 32768.0;
float bbox_b = float(v_199.memory[bbox_offset + 3u]) - 32768.0;
uint bbox_offset = (_92.conf.path_bbox_alloc.offset >> uint(2)) + (6u * m.path_ix);
float bbox_l = float(_284.memory[bbox_offset]) - 32768.0;
float bbox_t = float(_284.memory[bbox_offset + 1u]) - 32768.0;
float bbox_r = float(_284.memory[bbox_offset + 2u]) - 32768.0;
float bbox_b = float(_284.memory[bbox_offset + 3u]) - 32768.0;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
float linewidth = as_type<float>(v_199.memory[bbox_offset + 4u]);
float linewidth = as_type<float>(_284.memory[bbox_offset + 4u]);
uint fill_mode = uint(linewidth >= 0.0);
if ((linewidth >= 0.0) || (tag_word == 5u))
if ((linewidth >= 0.0) || (tag_word == 276u))
{
uint trans_ix = v_199.memory[bbox_offset + 5u];
uint t = (_1054.conf.trans_alloc.offset >> uint(2)) + (6u * trans_ix);
mat = as_type<float4>(uint4(v_199.memory[t], v_199.memory[t + 1u], v_199.memory[t + 2u], v_199.memory[t + 3u]));
if (tag_word == 5u)
uint trans_ix = _284.memory[bbox_offset + 5u];
uint t = (_92.conf.trans_alloc.offset >> uint(2)) + (6u * trans_ix);
mat = as_type<float4>(uint4(_284.memory[t], _284.memory[t + 1u], _284.memory[t + 2u], _284.memory[t + 3u]));
if (tag_word == 276u)
{
translate = as_type<float2>(uint2(v_199.memory[t + 4u], v_199.memory[t + 5u]));
translate = as_type<float2>(uint2(_284.memory[t + 4u], _284.memory[t + 5u]));
}
}
if (linewidth >= 0.0)
{
linewidth *= sqrt(abs((mat.x * mat.w) - (mat.y * mat.z)));
}
linewidth = fast::max(linewidth, 0.0);
switch (tag_word)
{
case 4u:
case 68u:
case 72u:
{
ElementRef param_17 = this_ref;
FillColor fill = Element_FillColor_read(param_17, v_223);
anno_fill.bbox = bbox;
anno_fill.linewidth = linewidth;
anno_fill.rgba_color = fill.rgba_color;
param_18.offset = _1054.conf.anno_alloc.offset;
AnnotatedRef param_19 = out_ref;
uint param_20 = fill_mode;
AnnoColor param_21 = anno_fill;
Annotated_Color_write(param_18, param_19, param_20, param_21, v_199);
_284.memory[di] = as_type<uint>(linewidth);
break;
}
case 5u:
case 276u:
{
ElementRef param_22 = this_ref;
FillLinGradient lin = Element_FillLinGradient_read(param_22, v_223);
anno_lin.bbox = bbox;
anno_lin.linewidth = linewidth;
anno_lin.index = lin.index;
float2 p0 = ((mat.xy * lin.p0.x) + (mat.zw * lin.p0.y)) + translate;
float2 p1 = ((mat.xy * lin.p1.x) + (mat.zw * lin.p1.y)) + translate;
_284.memory[di] = as_type<uint>(linewidth);
uint index = _102.scene[dd];
float2 p0 = as_type<float2>(uint2(_102.scene[dd + 1u], _102.scene[dd + 2u]));
float2 p1 = as_type<float2>(uint2(_102.scene[dd + 3u], _102.scene[dd + 4u]));
p0 = ((mat.xy * p0.x) + (mat.zw * p0.y)) + translate;
p1 = ((mat.xy * p1.x) + (mat.zw * p1.y)) + translate;
float2 dxy = p1 - p0;
float scale = 1.0 / ((dxy.x * dxy.x) + (dxy.y * dxy.y));
float line_x = dxy.x * scale;
float line_y = dxy.y * scale;
anno_lin.line_x = line_x;
anno_lin.line_y = line_y;
anno_lin.line_c = -((p0.x * line_x) + (p0.y * line_y));
param_23.offset = _1054.conf.anno_alloc.offset;
AnnotatedRef param_24 = out_ref;
uint param_25 = fill_mode;
AnnoLinGradient param_26 = anno_lin;
Annotated_LinGradient_write(param_23, param_24, param_25, param_26, v_199);
float line_c = -((p0.x * line_x) + (p0.y * line_y));
_284.memory[di + 1u] = as_type<uint>(line_x);
_284.memory[di + 2u] = as_type<uint>(line_y);
_284.memory[di + 3u] = as_type<uint>(line_c);
break;
}
case 6u:
case 5u:
{
ElementRef param_27 = this_ref;
FillImage fill_img = Element_FillImage_read(param_27, v_223);
anno_img.bbox = bbox;
anno_img.linewidth = linewidth;
anno_img.index = fill_img.index;
anno_img.offset = fill_img.offset;
param_28.offset = _1054.conf.anno_alloc.offset;
AnnotatedRef param_29 = out_ref;
uint param_30 = fill_mode;
AnnoImage param_31 = anno_img;
Annotated_Image_write(param_28, param_29, param_30, param_31, v_199);
break;
}
case 9u:
{
ElementRef param_32 = this_ref;
Clip begin_clip = Element_BeginClip_read(param_32, v_223);
anno_begin_clip.bbox = bbox;
anno_begin_clip.linewidth = 0.0;
anno_begin_clip.blend = begin_clip.blend;
uint flags = uint(begin_clip.blend != 3u) << uint(1);
param_33.offset = _1054.conf.anno_alloc.offset;
AnnotatedRef param_34 = out_ref;
uint param_35 = flags;
AnnoBeginClip param_36 = anno_begin_clip;
Annotated_BeginClip_write(param_33, param_34, param_35, param_36, v_199);
break;
}
}
}
else
{
if (tag_word == 10u)
{
ElementRef param_37 = this_ref;
Clip end_clip = Element_BeginClip_read(param_37, v_223);
anno_end_clip.bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0);
anno_end_clip.blend = end_clip.blend;
uint flags_1 = uint(end_clip.blend != 3u) << uint(1);
param_38.offset = _1054.conf.anno_alloc.offset;
AnnotatedRef param_39 = out_ref;
uint param_40 = flags_1;
AnnoEndClip param_41 = anno_end_clip;
Annotated_EndClip_write(param_38, param_39, param_40, param_41, v_199);
}
}
if ((tag_word == 9u) || (tag_word == 10u))
if ((tag_word == 5u) || (tag_word == 37u))
{
uint path_ix = ~(out_ix + i_2);
if (tag_word == 9u)
if (tag_word == 5u)
{
path_ix = m.path_ix;
}
v_199.memory[clip_out_base + m.clip_ix] = path_ix;
_284.memory[clip_out_base + m.clip_ix] = path_ix;
}
out_ref.offset += 40u;
}
}

Binary file not shown.

Binary file not shown.

View file

@ -1,22 +1,11 @@
struct ElementRef
{
uint offset;
};
struct ElementTag
{
uint tag;
uint flags;
};
struct DrawMonoid
{
uint path_ix;
uint clip_ix;
uint scene_offset;
uint info_offset;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
struct Alloc
{
uint offset;
@ -34,12 +23,14 @@ struct Config
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
@ -47,16 +38,16 @@ struct Config
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
static const DrawMonoid _87 = { 1u, 0u };
static const DrawMonoid _89 = { 1u, 1u };
static const DrawMonoid _91 = { 0u, 0u };
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
ByteAddressBuffer _46 : register(t2, space0);
RWByteAddressBuffer _199 : register(u3, space0);
RWByteAddressBuffer _213 : register(u0, space0);
ByteAddressBuffer _219 : register(t1, space0);
ByteAddressBuffer _86 : register(t1, space0);
ByteAddressBuffer _96 : register(t2, space0);
RWByteAddressBuffer _187 : register(u3, space0);
RWByteAddressBuffer _205 : register(u0, space0);
static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID;
@ -70,68 +61,37 @@ struct SPIRV_Cross_Input
groupshared DrawMonoid sh_scratch[256];
ElementTag Element_tag(ElementRef ref)
{
uint tag_and_flags = _46.Load((ref.offset >> uint(2)) * 4 + 0);
ElementTag _60 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) };
return _60;
}
DrawMonoid map_tag(uint tag_word)
{
switch (tag_word)
{
case 4u:
case 5u:
case 6u:
{
return _87;
}
case 9u:
case 10u:
{
return _89;
}
default:
{
return _91;
}
}
uint has_path = uint(tag_word != 0u);
DrawMonoid _69 = { has_path, tag_word & 1u, tag_word & 28u, (tag_word >> uint(4)) & 28u };
return _69;
}
ElementRef Element_index(ElementRef ref, uint index)
{
ElementRef _39 = { ref.offset + (index * 36u) };
return _39;
}
DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b)
DrawMonoid combine_draw_monoid(DrawMonoid a, DrawMonoid b)
{
DrawMonoid c;
c.path_ix = a.path_ix + b.path_ix;
c.clip_ix = a.clip_ix + b.clip_ix;
c.scene_offset = a.scene_offset + b.scene_offset;
c.info_offset = a.info_offset + b.info_offset;
return c;
}
void comp_main()
{
uint ix = gl_GlobalInvocationID.x * 8u;
ElementRef _107 = { ix * 36u };
ElementRef ref = _107;
ElementRef param = ref;
uint tag_word = Element_tag(param).tag;
uint param_1 = tag_word;
DrawMonoid agg = map_tag(param_1);
uint drawtag_base = _86.Load(100) >> uint(2);
uint tag_word = _96.Load((drawtag_base + ix) * 4 + 0);
uint param = tag_word;
DrawMonoid agg = map_tag(param);
for (uint i = 1u; i < 8u; i++)
{
ElementRef param_2 = ref;
uint param_3 = i;
ElementRef param_4 = Element_index(param_2, param_3);
tag_word = Element_tag(param_4).tag;
uint param_5 = tag_word;
DrawMonoid param_6 = agg;
DrawMonoid param_7 = map_tag(param_5);
agg = combine_tag_monoid(param_6, param_7);
uint tag_word_1 = _96.Load(((drawtag_base + ix) + i) * 4 + 0);
uint param_1 = tag_word_1;
DrawMonoid param_2 = agg;
DrawMonoid param_3 = map_tag(param_1);
agg = combine_draw_monoid(param_2, param_3);
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
@ -140,17 +100,19 @@ void comp_main()
if ((gl_LocalInvocationID.x + (1u << i_1)) < 256u)
{
DrawMonoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)];
DrawMonoid param_8 = agg;
DrawMonoid param_9 = other;
agg = combine_tag_monoid(param_8, param_9);
DrawMonoid param_4 = agg;
DrawMonoid param_5 = other;
agg = combine_draw_monoid(param_4, param_5);
}
GroupMemoryBarrierWithGroupSync();
sh_scratch[gl_LocalInvocationID.x] = agg;
}
if (gl_LocalInvocationID.x == 0u)
{
_199.Store(gl_WorkGroupID.x * 8 + 0, agg.path_ix);
_199.Store(gl_WorkGroupID.x * 8 + 4, agg.clip_ix);
_187.Store(gl_WorkGroupID.x * 16 + 0, agg.path_ix);
_187.Store(gl_WorkGroupID.x * 16 + 4, agg.clip_ix);
_187.Store(gl_WorkGroupID.x * 16 + 8, agg.scene_offset);
_187.Store(gl_WorkGroupID.x * 16 + 12, agg.info_offset);
}
}

View file

@ -5,48 +5,14 @@
using namespace metal;
struct ElementRef
{
uint offset;
};
struct ElementTag
{
uint tag;
uint flags;
};
struct DrawMonoid
{
uint path_ix;
uint clip_ix;
uint scene_offset;
uint info_offset;
};
struct SceneBuf
{
uint scene[1];
};
struct DrawMonoid_1
{
uint path_ix;
uint clip_ix;
};
struct OutBuf
{
DrawMonoid_1 outbuf[1];
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint memory[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
struct Alloc
{
uint offset;
@ -64,12 +30,14 @@ struct Config
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
@ -77,6 +45,8 @@ struct Config
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct ConfigBuf
@ -84,70 +54,66 @@ struct ConfigBuf
Config conf;
};
static inline __attribute__((always_inline))
ElementTag Element_tag(thread const ElementRef& ref, const device SceneBuf& v_46)
struct SceneBuf
{
uint tag_and_flags = v_46.scene[ref.offset >> uint(2)];
return ElementTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) };
}
uint scene[1];
};
struct DrawMonoid_1
{
uint path_ix;
uint clip_ix;
uint scene_offset;
uint info_offset;
};
struct OutBuf
{
DrawMonoid_1 outbuf[1];
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint memory[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
DrawMonoid map_tag(thread const uint& tag_word)
{
switch (tag_word)
{
case 4u:
case 5u:
case 6u:
{
return DrawMonoid{ 1u, 0u };
}
case 9u:
case 10u:
{
return DrawMonoid{ 1u, 1u };
}
default:
{
return DrawMonoid{ 0u, 0u };
}
}
uint has_path = uint(tag_word != 0u);
return DrawMonoid{ has_path, tag_word & 1u, tag_word & 28u, (tag_word >> uint(4)) & 28u };
}
static inline __attribute__((always_inline))
ElementRef Element_index(thread const ElementRef& ref, thread const uint& index)
{
return ElementRef{ ref.offset + (index * 36u) };
}
static inline __attribute__((always_inline))
DrawMonoid combine_tag_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b)
DrawMonoid combine_draw_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b)
{
DrawMonoid c;
c.path_ix = a.path_ix + b.path_ix;
c.clip_ix = a.clip_ix + b.clip_ix;
c.scene_offset = a.scene_offset + b.scene_offset;
c.info_offset = a.info_offset + b.info_offset;
return c;
}
kernel void main0(const device SceneBuf& v_46 [[buffer(2)]], device OutBuf& _199 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
kernel void main0(const device ConfigBuf& _86 [[buffer(1)]], const device SceneBuf& _96 [[buffer(2)]], device OutBuf& _187 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
{
threadgroup DrawMonoid sh_scratch[256];
uint ix = gl_GlobalInvocationID.x * 8u;
ElementRef ref = ElementRef{ ix * 36u };
ElementRef param = ref;
uint tag_word = Element_tag(param, v_46).tag;
uint param_1 = tag_word;
DrawMonoid agg = map_tag(param_1);
uint drawtag_base = _86.conf.drawtag_offset >> uint(2);
uint tag_word = _96.scene[drawtag_base + ix];
uint param = tag_word;
DrawMonoid agg = map_tag(param);
for (uint i = 1u; i < 8u; i++)
{
ElementRef param_2 = ref;
uint param_3 = i;
ElementRef param_4 = Element_index(param_2, param_3);
tag_word = Element_tag(param_4, v_46).tag;
uint param_5 = tag_word;
DrawMonoid param_6 = agg;
DrawMonoid param_7 = map_tag(param_5);
agg = combine_tag_monoid(param_6, param_7);
uint tag_word_1 = _96.scene[(drawtag_base + ix) + i];
uint param_1 = tag_word_1;
DrawMonoid param_2 = agg;
DrawMonoid param_3 = map_tag(param_1);
agg = combine_draw_monoid(param_2, param_3);
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
@ -156,17 +122,19 @@ kernel void main0(const device SceneBuf& v_46 [[buffer(2)]], device OutBuf& _199
if ((gl_LocalInvocationID.x + (1u << i_1)) < 256u)
{
DrawMonoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)];
DrawMonoid param_8 = agg;
DrawMonoid param_9 = other;
agg = combine_tag_monoid(param_8, param_9);
DrawMonoid param_4 = agg;
DrawMonoid param_5 = other;
agg = combine_draw_monoid(param_4, param_5);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
sh_scratch[gl_LocalInvocationID.x] = agg;
}
if (gl_LocalInvocationID.x == 0u)
{
_199.outbuf[gl_WorkGroupID.x].path_ix = agg.path_ix;
_199.outbuf[gl_WorkGroupID.x].clip_ix = agg.clip_ix;
_187.outbuf[gl_WorkGroupID.x].path_ix = agg.path_ix;
_187.outbuf[gl_WorkGroupID.x].clip_ix = agg.clip_ix;
_187.outbuf[gl_WorkGroupID.x].scene_offset = agg.scene_offset;
_187.outbuf[gl_WorkGroupID.x].info_offset = agg.info_offset;
}
}

Binary file not shown.

Binary file not shown.

View file

@ -2,13 +2,15 @@ struct DrawMonoid
{
uint path_ix;
uint clip_ix;
uint scene_offset;
uint info_offset;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
static const DrawMonoid _18 = { 0u, 0u };
static const DrawMonoid _18 = { 0u, 0u, 0u, 0u };
RWByteAddressBuffer _57 : register(u0, space0);
RWByteAddressBuffer _71 : register(u0, space0);
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
@ -20,15 +22,17 @@ struct SPIRV_Cross_Input
groupshared DrawMonoid sh_scratch[256];
DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b)
DrawMonoid combine_draw_monoid(DrawMonoid a, DrawMonoid b)
{
DrawMonoid c;
c.path_ix = a.path_ix + b.path_ix;
c.clip_ix = a.clip_ix + b.clip_ix;
c.scene_offset = a.scene_offset + b.scene_offset;
c.info_offset = a.info_offset + b.info_offset;
return c;
}
DrawMonoid tag_monoid_identity()
DrawMonoid draw_monoid_identity()
{
return _18;
}
@ -36,22 +40,30 @@ DrawMonoid tag_monoid_identity()
void comp_main()
{
uint ix = gl_GlobalInvocationID.x * 8u;
DrawMonoid _61;
_61.path_ix = _57.Load(ix * 8 + 0);
_61.clip_ix = _57.Load(ix * 8 + 4);
DrawMonoid _75;
_75.path_ix = _71.Load(ix * 16 + 0);
_75.clip_ix = _71.Load(ix * 16 + 4);
_75.scene_offset = _71.Load(ix * 16 + 8);
_75.info_offset = _71.Load(ix * 16 + 12);
DrawMonoid local[8];
local[0].path_ix = _61.path_ix;
local[0].clip_ix = _61.clip_ix;
local[0].path_ix = _75.path_ix;
local[0].clip_ix = _75.clip_ix;
local[0].scene_offset = _75.scene_offset;
local[0].info_offset = _75.info_offset;
DrawMonoid param_1;
for (uint i = 1u; i < 8u; i++)
{
DrawMonoid param = local[i - 1u];
DrawMonoid _88;
_88.path_ix = _57.Load((ix + i) * 8 + 0);
_88.clip_ix = _57.Load((ix + i) * 8 + 4);
param_1.path_ix = _88.path_ix;
param_1.clip_ix = _88.clip_ix;
local[i] = combine_tag_monoid(param, param_1);
DrawMonoid _106;
_106.path_ix = _71.Load((ix + i) * 16 + 0);
_106.clip_ix = _71.Load((ix + i) * 16 + 4);
_106.scene_offset = _71.Load((ix + i) * 16 + 8);
_106.info_offset = _71.Load((ix + i) * 16 + 12);
param_1.path_ix = _106.path_ix;
param_1.clip_ix = _106.clip_ix;
param_1.scene_offset = _106.scene_offset;
param_1.info_offset = _106.info_offset;
local[i] = combine_draw_monoid(param, param_1);
}
DrawMonoid agg = local[7];
sh_scratch[gl_LocalInvocationID.x] = agg;
@ -63,13 +75,13 @@ void comp_main()
DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
DrawMonoid param_2 = other;
DrawMonoid param_3 = agg;
agg = combine_tag_monoid(param_2, param_3);
agg = combine_draw_monoid(param_2, param_3);
}
GroupMemoryBarrierWithGroupSync();
sh_scratch[gl_LocalInvocationID.x] = agg;
}
GroupMemoryBarrierWithGroupSync();
DrawMonoid row = tag_monoid_identity();
DrawMonoid row = draw_monoid_identity();
if (gl_LocalInvocationID.x > 0u)
{
row = sh_scratch[gl_LocalInvocationID.x - 1u];
@ -78,10 +90,12 @@ void comp_main()
{
DrawMonoid param_4 = row;
DrawMonoid param_5 = local[i_2];
DrawMonoid m = combine_tag_monoid(param_4, param_5);
uint _177 = ix + i_2;
_57.Store(_177 * 8 + 0, m.path_ix);
_57.Store(_177 * 8 + 4, m.clip_ix);
DrawMonoid m = combine_draw_monoid(param_4, param_5);
uint _199 = ix + i_2;
_71.Store(_199 * 16 + 0, m.path_ix);
_71.Store(_199 * 16 + 4, m.clip_ix);
_71.Store(_199 * 16 + 8, m.scene_offset);
_71.Store(_199 * 16 + 12, m.info_offset);
}
}

View file

@ -48,12 +48,16 @@ struct DrawMonoid
{
uint path_ix;
uint clip_ix;
uint scene_offset;
uint info_offset;
};
struct DrawMonoid_1
{
uint path_ix;
uint clip_ix;
uint scene_offset;
uint info_offset;
};
struct DataBuf
@ -64,35 +68,41 @@ struct DataBuf
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
DrawMonoid combine_tag_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b)
DrawMonoid combine_draw_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b)
{
DrawMonoid c;
c.path_ix = a.path_ix + b.path_ix;
c.clip_ix = a.clip_ix + b.clip_ix;
c.scene_offset = a.scene_offset + b.scene_offset;
c.info_offset = a.info_offset + b.info_offset;
return c;
}
static inline __attribute__((always_inline))
DrawMonoid tag_monoid_identity()
DrawMonoid draw_monoid_identity()
{
return DrawMonoid{ 0u, 0u };
return DrawMonoid{ 0u, 0u, 0u, 0u };
}
kernel void main0(device DataBuf& _57 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
kernel void main0(device DataBuf& _71 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
{
threadgroup DrawMonoid sh_scratch[256];
uint ix = gl_GlobalInvocationID.x * 8u;
spvUnsafeArray<DrawMonoid, 8> local;
local[0].path_ix = _57.data[ix].path_ix;
local[0].clip_ix = _57.data[ix].clip_ix;
local[0].path_ix = _71.data[ix].path_ix;
local[0].clip_ix = _71.data[ix].clip_ix;
local[0].scene_offset = _71.data[ix].scene_offset;
local[0].info_offset = _71.data[ix].info_offset;
DrawMonoid param_1;
for (uint i = 1u; i < 8u; i++)
{
uint _82 = ix + i;
uint _100 = ix + i;
DrawMonoid param = local[i - 1u];
param_1.path_ix = _57.data[_82].path_ix;
param_1.clip_ix = _57.data[_82].clip_ix;
local[i] = combine_tag_monoid(param, param_1);
param_1.path_ix = _71.data[_100].path_ix;
param_1.clip_ix = _71.data[_100].clip_ix;
param_1.scene_offset = _71.data[_100].scene_offset;
param_1.info_offset = _71.data[_100].info_offset;
local[i] = combine_draw_monoid(param, param_1);
}
DrawMonoid agg = local[7];
sh_scratch[gl_LocalInvocationID.x] = agg;
@ -104,13 +114,13 @@ kernel void main0(device DataBuf& _57 [[buffer(0)]], uint3 gl_GlobalInvocationID
DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
DrawMonoid param_2 = other;
DrawMonoid param_3 = agg;
agg = combine_tag_monoid(param_2, param_3);
agg = combine_draw_monoid(param_2, param_3);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
sh_scratch[gl_LocalInvocationID.x] = agg;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
DrawMonoid row = tag_monoid_identity();
DrawMonoid row = draw_monoid_identity();
if (gl_LocalInvocationID.x > 0u)
{
row = sh_scratch[gl_LocalInvocationID.x - 1u];
@ -119,10 +129,12 @@ kernel void main0(device DataBuf& _57 [[buffer(0)]], uint3 gl_GlobalInvocationID
{
DrawMonoid param_4 = row;
DrawMonoid param_5 = local[i_2];
DrawMonoid m = combine_tag_monoid(param_4, param_5);
uint _177 = ix + i_2;
_57.data[_177].path_ix = m.path_ix;
_57.data[_177].clip_ix = m.clip_ix;
DrawMonoid m = combine_draw_monoid(param_4, param_5);
uint _199 = ix + i_2;
_71.data[_199].path_ix = m.path_ix;
_71.data[_199].clip_ix = m.clip_ix;
_71.data[_199].scene_offset = m.scene_offset;
_71.data[_199].info_offset = m.info_offset;
}
}

Binary file not shown.

Binary file not shown.

View file

@ -125,12 +125,14 @@ struct Config
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
@ -138,6 +140,8 @@ struct Config
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
static const uint3 gl_WorkGroupSize = uint3(8u, 4u, 1u);

View file

@ -183,12 +183,14 @@ struct Config
Alloc_1 pathseg_alloc;
Alloc_1 anno_alloc;
Alloc_1 trans_alloc;
Alloc_1 bbox_alloc;
Alloc_1 path_bbox_alloc;
Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
Alloc_1 draw_bbox_alloc;
Alloc_1 drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
@ -196,6 +198,8 @@ struct Config
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct ConfigBuf

Binary file not shown.

Binary file not shown.

View file

@ -125,12 +125,14 @@ struct Config
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
@ -138,6 +140,8 @@ struct Config
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
static const uint3 gl_WorkGroupSize = uint3(8u, 4u, 1u);

View file

@ -183,12 +183,14 @@ struct Config
Alloc_1 pathseg_alloc;
Alloc_1 anno_alloc;
Alloc_1 trans_alloc;
Alloc_1 bbox_alloc;
Alloc_1 path_bbox_alloc;
Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
Alloc_1 draw_bbox_alloc;
Alloc_1 drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
@ -196,6 +198,8 @@ struct Config
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct ConfigBuf

Binary file not shown.

Binary file not shown.

View file

@ -84,12 +84,14 @@ struct Config
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
@ -97,6 +99,8 @@ struct Config
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
static const uint3 gl_WorkGroupSize = uint3(32u, 1u, 1u);

View file

@ -144,12 +144,14 @@ struct Config
Alloc_1 pathseg_alloc;
Alloc_1 anno_alloc;
Alloc_1 trans_alloc;
Alloc_1 bbox_alloc;
Alloc_1 path_bbox_alloc;
Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
Alloc_1 draw_bbox_alloc;
Alloc_1 drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
@ -157,6 +159,8 @@ struct Config
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct ConfigBuf

Binary file not shown.

Binary file not shown.

View file

@ -62,12 +62,14 @@ struct Config
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
@ -75,6 +77,8 @@ struct Config
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
@ -361,7 +365,7 @@ uint round_up(float x)
void comp_main()
{
uint ix = gl_GlobalInvocationID.x * 4u;
uint tag_word = _574.Load(((_639.Load(84) >> uint(2)) + (ix >> uint(2))) * 4 + 0);
uint tag_word = _574.Load(((_639.Load(92) >> uint(2)) + (ix >> uint(2))) * 4 + 0);
uint param = tag_word;
TagMonoid local_tm = reduce_tag(param);
sh_tag[gl_LocalInvocationID.x] = local_tm;
@ -400,8 +404,8 @@ void comp_main()
TagMonoid param_4 = sh_tag[gl_LocalInvocationID.x - 1u];
tm = combine_tag_monoid(param_3, param_4);
}
uint ps_ix = (_639.Load(88) >> uint(2)) + tm.pathseg_offset;
uint lw_ix = (_639.Load(80) >> uint(2)) + tm.linewidth_ix;
uint ps_ix = (_639.Load(96) >> uint(2)) + tm.pathseg_offset;
uint lw_ix = (_639.Load(88) >> uint(2)) + tm.linewidth_ix;
uint save_path_ix = tm.path_ix;
uint trans_ix = tm.trans_ix;
TransformSegRef _771 = { _639.Load(36) + (trans_ix * 24u) };

View file

@ -127,12 +127,14 @@ struct Config
Alloc_1 pathseg_alloc;
Alloc_1 anno_alloc;
Alloc_1 trans_alloc;
Alloc_1 bbox_alloc;
Alloc_1 path_bbox_alloc;
Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
Alloc_1 draw_bbox_alloc;
Alloc_1 drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
@ -140,6 +142,8 @@ struct Config
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct ConfigBuf
@ -635,7 +639,7 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6
}
threadgroup_barrier(mem_flags::mem_threadgroup);
uint path_ix = save_path_ix;
uint bbox_out_ix = (_639.conf.bbox_alloc.offset >> uint(2)) + (path_ix * 6u);
uint bbox_out_ix = (_639.conf.path_bbox_alloc.offset >> uint(2)) + (path_ix * 6u);
Monoid row = monoid_identity();
if (gl_LocalInvocationID.x > 0u)
{

Binary file not shown.

Binary file not shown.

View file

@ -24,12 +24,14 @@ struct Config
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
@ -37,6 +39,8 @@ struct Config
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
static const uint3 gl_WorkGroupSize = uint3(128u, 1u, 1u);
@ -88,7 +92,7 @@ TagMonoid combine_tag_monoid(TagMonoid a, TagMonoid b)
void comp_main()
{
uint ix = gl_GlobalInvocationID.x * 2u;
uint scene_ix = (_139.Load(84) >> uint(2)) + ix;
uint scene_ix = (_139.Load(92) >> uint(2)) + ix;
uint tag_word = _151.Load(scene_ix * 4 + 0);
uint param = tag_word;
TagMonoid agg = reduce_tag(param);

View file

@ -31,12 +31,14 @@ struct Config
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
@ -44,6 +46,8 @@ struct Config
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct ConfigBuf

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -9,28 +9,6 @@ struct MallocResult
bool failed;
};
struct AnnoEndClipRef
{
uint offset;
};
struct AnnoEndClip
{
float4 bbox;
uint blend;
};
struct AnnotatedRef
{
uint offset;
};
struct AnnotatedTag
{
uint tag;
uint flags;
};
struct PathRef
{
uint offset;
@ -59,12 +37,14 @@ struct Config
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
@ -72,12 +52,15 @@ struct Config
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
RWByteAddressBuffer _92 : register(u0, space0);
ByteAddressBuffer _314 : register(t1, space0);
RWByteAddressBuffer _70 : register(u0, space0);
ByteAddressBuffer _181 : register(t1, space0);
ByteAddressBuffer _257 : register(t2, space0);
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
@ -90,62 +73,15 @@ struct SPIRV_Cross_Input
groupshared uint sh_tile_count[256];
groupshared MallocResult sh_tile_alloc;
bool touch_mem(Alloc alloc, uint offset)
float4 load_draw_bbox(uint draw_ix)
{
return true;
}
uint read_mem(Alloc alloc, uint offset)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return 0u;
}
uint v = _92.Load(offset * 4 + 8);
return v;
}
AnnotatedTag Annotated_tag(Alloc a, AnnotatedRef ref)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint tag_and_flags = read_mem(param, param_1);
AnnotatedTag _246 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) };
return _246;
}
AnnoEndClip AnnoEndClip_read(Alloc a, AnnoEndClipRef ref)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint raw0 = read_mem(param, param_1);
Alloc param_2 = a;
uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3);
Alloc param_4 = a;
uint param_5 = ix + 2u;
uint raw2 = read_mem(param_4, param_5);
Alloc param_6 = a;
uint param_7 = ix + 3u;
uint raw3 = read_mem(param_6, param_7);
Alloc param_8 = a;
uint param_9 = ix + 4u;
uint raw4 = read_mem(param_8, param_9);
AnnoEndClip s;
s.bbox = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3));
s.blend = raw4;
return s;
}
AnnoEndClip Annotated_EndClip_read(Alloc a, AnnotatedRef ref)
{
AnnoEndClipRef _252 = { ref.offset + 4u };
Alloc param = a;
AnnoEndClipRef param_1 = _252;
return AnnoEndClip_read(param, param_1);
uint base = (_181.Load(64) >> uint(2)) + (4u * draw_ix);
float x0 = asfloat(_70.Load(base * 4 + 8));
float y0 = asfloat(_70.Load((base + 1u) * 4 + 8));
float x1 = asfloat(_70.Load((base + 2u) * 4 + 8));
float y1 = asfloat(_70.Load((base + 3u) * 4 + 8));
float4 bbox = float4(x0, y0, x1, y1);
return bbox;
}
Alloc new_alloc(uint offset, uint size, bool mem_ok)
@ -157,22 +93,22 @@ Alloc new_alloc(uint offset, uint size, bool mem_ok)
MallocResult malloc(uint size)
{
uint _98;
_92.InterlockedAdd(0, size, _98);
uint offset = _98;
uint _105;
_92.GetDimensions(_105);
_105 = (_105 - 8) / 4;
uint _76;
_70.InterlockedAdd(0, size, _76);
uint offset = _76;
uint _83;
_70.GetDimensions(_83);
_83 = (_83 - 8) / 4;
MallocResult r;
r.failed = (offset + size) > uint(int(_105) * 4);
r.failed = (offset + size) > uint(int(_83) * 4);
uint param = offset;
uint param_1 = size;
bool param_2 = !r.failed;
r.alloc = new_alloc(param, param_1, param_2);
if (r.failed)
{
uint _127;
_92.InterlockedMax(4, 1u, _127);
uint _105;
_70.InterlockedMax(4, 1u, _105);
return r;
}
return r;
@ -180,8 +116,13 @@ MallocResult malloc(uint size)
Alloc slice_mem(Alloc a, uint offset, uint size)
{
Alloc _169 = { a.offset + offset };
return _169;
Alloc _131 = { a.offset + offset };
return _131;
}
bool touch_mem(Alloc alloc, uint offset)
{
return true;
}
void write_mem(Alloc alloc, uint offset, uint val)
@ -192,7 +133,7 @@ void write_mem(Alloc alloc, uint offset, uint val)
{
return;
}
_92.Store(offset * 4 + 8, val);
_70.Store(offset * 4 + 8, val);
}
void Path_write(Alloc a, PathRef ref, Path s)
@ -216,53 +157,35 @@ void comp_main()
{
uint th_ix = gl_LocalInvocationID.x;
uint element_ix = gl_GlobalInvocationID.x;
PathRef _321 = { _314.Load(16) + (element_ix * 12u) };
PathRef path_ref = _321;
AnnotatedRef _330 = { _314.Load(32) + (element_ix * 40u) };
AnnotatedRef ref = _330;
uint tag = 0u;
if (element_ix < _314.Load(0))
PathRef _241 = { _181.Load(16) + (element_ix * 12u) };
PathRef path_ref = _241;
uint drawtag_base = _181.Load(100) >> uint(2);
uint drawtag = 0u;
if (element_ix < _181.Load(0))
{
Alloc _341;
_341.offset = _314.Load(32);
Alloc param;
param.offset = _341.offset;
AnnotatedRef param_1 = ref;
tag = Annotated_tag(param, param_1).tag;
drawtag = _257.Load((drawtag_base + element_ix) * 4 + 0);
}
int x0 = 0;
int y0 = 0;
int x1 = 0;
int y1 = 0;
switch (tag)
if ((drawtag != 0u) && (drawtag != 37u))
{
case 1u:
case 2u:
case 3u:
case 4u:
case 5u:
{
Alloc _359;
_359.offset = _314.Load(32);
Alloc param_2;
param_2.offset = _359.offset;
AnnotatedRef param_3 = ref;
AnnoEndClip clip = Annotated_EndClip_read(param_2, param_3);
x0 = int(floor(clip.bbox.x * 0.0625f));
y0 = int(floor(clip.bbox.y * 0.0625f));
x1 = int(ceil(clip.bbox.z * 0.0625f));
y1 = int(ceil(clip.bbox.w * 0.0625f));
break;
uint param = element_ix;
float4 bbox = load_draw_bbox(param);
x0 = int(floor(bbox.x * 0.0625f));
y0 = int(floor(bbox.y * 0.0625f));
x1 = int(ceil(bbox.z * 0.0625f));
y1 = int(ceil(bbox.w * 0.0625f));
}
}
x0 = clamp(x0, 0, int(_314.Load(8)));
y0 = clamp(y0, 0, int(_314.Load(12)));
x1 = clamp(x1, 0, int(_314.Load(8)));
y1 = clamp(y1, 0, int(_314.Load(12)));
x0 = clamp(x0, 0, int(_181.Load(8)));
y0 = clamp(y0, 0, int(_181.Load(12)));
x1 = clamp(x1, 0, int(_181.Load(8)));
y1 = clamp(y1, 0, int(_181.Load(12)));
Path path;
path.bbox = uint4(uint(x0), uint(y0), uint(x1), uint(y1));
uint tile_count = uint((x1 - x0) * (y1 - y0));
if (tag == 5u)
if (drawtag == 37u)
{
tile_count = 0u;
}
@ -280,59 +203,59 @@ void comp_main()
}
if (th_ix == 255u)
{
uint param_4 = total_tile_count * 8u;
MallocResult _485 = malloc(param_4);
sh_tile_alloc = _485;
uint param_1 = total_tile_count * 8u;
MallocResult _396 = malloc(param_1);
sh_tile_alloc = _396;
}
GroupMemoryBarrierWithGroupSync();
MallocResult alloc_start = sh_tile_alloc;
bool _496;
bool _407;
if (!alloc_start.failed)
{
_496 = _92.Load(4) != 0u;
_407 = _70.Load(4) != 0u;
}
else
{
_496 = alloc_start.failed;
_407 = alloc_start.failed;
}
if (_496)
if (_407)
{
return;
}
if (element_ix < _314.Load(0))
if (element_ix < _181.Load(0))
{
uint _509;
uint _420;
if (th_ix > 0u)
{
_509 = sh_tile_count[th_ix - 1u];
_420 = sh_tile_count[th_ix - 1u];
}
else
{
_509 = 0u;
_420 = 0u;
}
uint tile_subix = _509;
Alloc param_5 = alloc_start.alloc;
uint param_6 = 8u * tile_subix;
uint param_7 = 8u * tile_count;
Alloc tiles_alloc = slice_mem(param_5, param_6, param_7);
TileRef _531 = { tiles_alloc.offset };
path.tiles = _531;
Alloc _536;
_536.offset = _314.Load(16);
Alloc param_8;
param_8.offset = _536.offset;
PathRef param_9 = path_ref;
Path param_10 = path;
Path_write(param_8, param_9, param_10);
uint tile_subix = _420;
Alloc param_2 = alloc_start.alloc;
uint param_3 = 8u * tile_subix;
uint param_4 = 8u * tile_count;
Alloc tiles_alloc = slice_mem(param_2, param_3, param_4);
TileRef _442 = { tiles_alloc.offset };
path.tiles = _442;
Alloc _448;
_448.offset = _181.Load(16);
Alloc param_5;
param_5.offset = _448.offset;
PathRef param_6 = path_ref;
Path param_7 = path;
Path_write(param_5, param_6, param_7);
}
uint total_count = sh_tile_count[255] * 2u;
uint start_ix = alloc_start.alloc.offset >> uint(2);
for (uint i_1 = th_ix; i_1 < total_count; i_1 += 256u)
{
Alloc param_11 = alloc_start.alloc;
uint param_12 = start_ix + i_1;
uint param_13 = 0u;
write_mem(param_11, param_12, param_13);
Alloc param_8 = alloc_start.alloc;
uint param_9 = start_ix + i_1;
uint param_10 = 0u;
write_mem(param_8, param_9, param_10);
}
}

View file

@ -18,28 +18,6 @@ struct MallocResult
bool failed;
};
struct AnnoEndClipRef
{
uint offset;
};
struct AnnoEndClip
{
float4 bbox;
uint blend;
};
struct AnnotatedRef
{
uint offset;
};
struct AnnotatedTag
{
uint tag;
uint flags;
};
struct PathRef
{
uint offset;
@ -80,12 +58,14 @@ struct Config
Alloc_1 pathseg_alloc;
Alloc_1 anno_alloc;
Alloc_1 trans_alloc;
Alloc_1 bbox_alloc;
Alloc_1 path_bbox_alloc;
Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
Alloc_1 draw_bbox_alloc;
Alloc_1 drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
@ -93,6 +73,8 @@ struct Config
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct ConfigBuf
@ -100,67 +82,23 @@ struct ConfigBuf
Config conf;
};
struct SceneBuf
{
uint scene[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
float4 load_draw_bbox(thread const uint& draw_ix, device Memory& v_70, constant uint& v_70BufferSize, const device ConfigBuf& v_181)
{
return true;
}
static inline __attribute__((always_inline))
uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_92, constant uint& v_92BufferSize)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return 0u;
}
uint v = v_92.memory[offset];
return v;
}
static inline __attribute__((always_inline))
AnnotatedTag Annotated_tag(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_92, constant uint& v_92BufferSize)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint tag_and_flags = read_mem(param, param_1, v_92, v_92BufferSize);
return AnnotatedTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) };
}
static inline __attribute__((always_inline))
AnnoEndClip AnnoEndClip_read(thread const Alloc& a, thread const AnnoEndClipRef& ref, device Memory& v_92, constant uint& v_92BufferSize)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint raw0 = read_mem(param, param_1, v_92, v_92BufferSize);
Alloc param_2 = a;
uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3, v_92, v_92BufferSize);
Alloc param_4 = a;
uint param_5 = ix + 2u;
uint raw2 = read_mem(param_4, param_5, v_92, v_92BufferSize);
Alloc param_6 = a;
uint param_7 = ix + 3u;
uint raw3 = read_mem(param_6, param_7, v_92, v_92BufferSize);
Alloc param_8 = a;
uint param_9 = ix + 4u;
uint raw4 = read_mem(param_8, param_9, v_92, v_92BufferSize);
AnnoEndClip s;
s.bbox = float4(as_type<float>(raw0), as_type<float>(raw1), as_type<float>(raw2), as_type<float>(raw3));
s.blend = raw4;
return s;
}
static inline __attribute__((always_inline))
AnnoEndClip Annotated_EndClip_read(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_92, constant uint& v_92BufferSize)
{
Alloc param = a;
AnnoEndClipRef param_1 = AnnoEndClipRef{ ref.offset + 4u };
return AnnoEndClip_read(param, param_1, v_92, v_92BufferSize);
uint base = (v_181.conf.draw_bbox_alloc.offset >> uint(2)) + (4u * draw_ix);
float x0 = as_type<float>(v_70.memory[base]);
float y0 = as_type<float>(v_70.memory[base + 1u]);
float x1 = as_type<float>(v_70.memory[base + 2u]);
float y1 = as_type<float>(v_70.memory[base + 3u]);
float4 bbox = float4(x0, y0, x1, y1);
return bbox;
}
static inline __attribute__((always_inline))
@ -172,19 +110,19 @@ Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const
}
static inline __attribute__((always_inline))
MallocResult malloc(thread const uint& size, device Memory& v_92, constant uint& v_92BufferSize)
MallocResult malloc(thread const uint& size, device Memory& v_70, constant uint& v_70BufferSize)
{
uint _98 = atomic_fetch_add_explicit((device atomic_uint*)&v_92.mem_offset, size, memory_order_relaxed);
uint offset = _98;
uint _76 = atomic_fetch_add_explicit((device atomic_uint*)&v_70.mem_offset, size, memory_order_relaxed);
uint offset = _76;
MallocResult r;
r.failed = (offset + size) > uint(int((v_92BufferSize - 8) / 4) * 4);
r.failed = (offset + size) > uint(int((v_70BufferSize - 8) / 4) * 4);
uint param = offset;
uint param_1 = size;
bool param_2 = !r.failed;
r.alloc = new_alloc(param, param_1, param_2);
if (r.failed)
{
uint _127 = atomic_fetch_max_explicit((device atomic_uint*)&v_92.mem_error, 1u, memory_order_relaxed);
uint _105 = atomic_fetch_max_explicit((device atomic_uint*)&v_70.mem_error, 1u, memory_order_relaxed);
return r;
}
return r;
@ -197,7 +135,13 @@ Alloc slice_mem(thread const Alloc& a, thread const uint& offset, thread const u
}
static inline __attribute__((always_inline))
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_92, constant uint& v_92BufferSize)
bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
{
return true;
}
static inline __attribute__((always_inline))
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_70, constant uint& v_70BufferSize)
{
Alloc param = alloc;
uint param_1 = offset;
@ -205,75 +149,62 @@ void write_mem(thread const Alloc& alloc, thread const uint& offset, thread cons
{
return;
}
v_92.memory[offset] = val;
v_70.memory[offset] = val;
}
static inline __attribute__((always_inline))
void Path_write(thread const Alloc& a, thread const PathRef& ref, thread const Path& s, device Memory& v_92, constant uint& v_92BufferSize)
void Path_write(thread const Alloc& a, thread const PathRef& ref, thread const Path& s, device Memory& v_70, constant uint& v_70BufferSize)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = s.bbox.x | (s.bbox.y << uint(16));
write_mem(param, param_1, param_2, v_92, v_92BufferSize);
write_mem(param, param_1, param_2, v_70, v_70BufferSize);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = s.bbox.z | (s.bbox.w << uint(16));
write_mem(param_3, param_4, param_5, v_92, v_92BufferSize);
write_mem(param_3, param_4, param_5, v_70, v_70BufferSize);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = s.tiles.offset;
write_mem(param_6, param_7, param_8, v_92, v_92BufferSize);
write_mem(param_6, param_7, param_8, v_70, v_70BufferSize);
}
kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_92 [[buffer(0)]], const device ConfigBuf& _314 [[buffer(1)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_70 [[buffer(0)]], const device ConfigBuf& v_181 [[buffer(1)]], const device SceneBuf& _257 [[buffer(2)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
threadgroup uint sh_tile_count[256];
threadgroup MallocResult sh_tile_alloc;
constant uint& v_92BufferSize = spvBufferSizeConstants[0];
constant uint& v_70BufferSize = spvBufferSizeConstants[0];
uint th_ix = gl_LocalInvocationID.x;
uint element_ix = gl_GlobalInvocationID.x;
PathRef path_ref = PathRef{ _314.conf.tile_alloc.offset + (element_ix * 12u) };
AnnotatedRef ref = AnnotatedRef{ _314.conf.anno_alloc.offset + (element_ix * 40u) };
uint tag = 0u;
if (element_ix < _314.conf.n_elements)
PathRef path_ref = PathRef{ v_181.conf.tile_alloc.offset + (element_ix * 12u) };
uint drawtag_base = v_181.conf.drawtag_offset >> uint(2);
uint drawtag = 0u;
if (element_ix < v_181.conf.n_elements)
{
Alloc param;
param.offset = _314.conf.anno_alloc.offset;
AnnotatedRef param_1 = ref;
tag = Annotated_tag(param, param_1, v_92, v_92BufferSize).tag;
drawtag = _257.scene[drawtag_base + element_ix];
}
int x0 = 0;
int y0 = 0;
int x1 = 0;
int y1 = 0;
switch (tag)
if ((drawtag != 0u) && (drawtag != 37u))
{
case 1u:
case 2u:
case 3u:
case 4u:
case 5u:
{
Alloc param_2;
param_2.offset = _314.conf.anno_alloc.offset;
AnnotatedRef param_3 = ref;
AnnoEndClip clip = Annotated_EndClip_read(param_2, param_3, v_92, v_92BufferSize);
x0 = int(floor(clip.bbox.x * 0.0625));
y0 = int(floor(clip.bbox.y * 0.0625));
x1 = int(ceil(clip.bbox.z * 0.0625));
y1 = int(ceil(clip.bbox.w * 0.0625));
break;
uint param = element_ix;
float4 bbox = load_draw_bbox(param, v_70, v_70BufferSize, v_181);
x0 = int(floor(bbox.x * 0.0625));
y0 = int(floor(bbox.y * 0.0625));
x1 = int(ceil(bbox.z * 0.0625));
y1 = int(ceil(bbox.w * 0.0625));
}
}
x0 = clamp(x0, 0, int(_314.conf.width_in_tiles));
y0 = clamp(y0, 0, int(_314.conf.height_in_tiles));
x1 = clamp(x1, 0, int(_314.conf.width_in_tiles));
y1 = clamp(y1, 0, int(_314.conf.height_in_tiles));
x0 = clamp(x0, 0, int(v_181.conf.width_in_tiles));
y0 = clamp(y0, 0, int(v_181.conf.height_in_tiles));
x1 = clamp(x1, 0, int(v_181.conf.width_in_tiles));
y1 = clamp(y1, 0, int(v_181.conf.height_in_tiles));
Path path;
path.bbox = uint4(uint(x0), uint(y0), uint(x1), uint(y1));
uint tile_count = uint((x1 - x0) * (y1 - y0));
if (tag == 5u)
if (drawtag == 37u)
{
tile_count = 0u;
}
@ -291,56 +222,56 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
}
if (th_ix == 255u)
{
uint param_4 = total_tile_count * 8u;
MallocResult _485 = malloc(param_4, v_92, v_92BufferSize);
sh_tile_alloc = _485;
uint param_1 = total_tile_count * 8u;
MallocResult _396 = malloc(param_1, v_70, v_70BufferSize);
sh_tile_alloc = _396;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
MallocResult alloc_start = sh_tile_alloc;
bool _496;
bool _407;
if (!alloc_start.failed)
{
_496 = v_92.mem_error != 0u;
_407 = v_70.mem_error != 0u;
}
else
{
_496 = alloc_start.failed;
_407 = alloc_start.failed;
}
if (_496)
if (_407)
{
return;
}
if (element_ix < _314.conf.n_elements)
if (element_ix < v_181.conf.n_elements)
{
uint _509;
uint _420;
if (th_ix > 0u)
{
_509 = sh_tile_count[th_ix - 1u];
_420 = sh_tile_count[th_ix - 1u];
}
else
{
_509 = 0u;
_420 = 0u;
}
uint tile_subix = _509;
Alloc param_5 = alloc_start.alloc;
uint param_6 = 8u * tile_subix;
uint param_7 = 8u * tile_count;
Alloc tiles_alloc = slice_mem(param_5, param_6, param_7);
uint tile_subix = _420;
Alloc param_2 = alloc_start.alloc;
uint param_3 = 8u * tile_subix;
uint param_4 = 8u * tile_count;
Alloc tiles_alloc = slice_mem(param_2, param_3, param_4);
path.tiles = TileRef{ tiles_alloc.offset };
Alloc param_8;
param_8.offset = _314.conf.tile_alloc.offset;
PathRef param_9 = path_ref;
Path param_10 = path;
Path_write(param_8, param_9, param_10, v_92, v_92BufferSize);
Alloc param_5;
param_5.offset = v_181.conf.tile_alloc.offset;
PathRef param_6 = path_ref;
Path param_7 = path;
Path_write(param_5, param_6, param_7, v_70, v_70BufferSize);
}
uint total_count = sh_tile_count[255] * 2u;
uint start_ix = alloc_start.alloc.offset >> uint(2);
for (uint i_1 = th_ix; i_1 < total_count; i_1 += 256u)
{
Alloc param_11 = alloc_start.alloc;
uint param_12 = start_ix + i_1;
uint param_13 = 0u;
write_mem(param_11, param_12, param_13, v_92, v_92BufferSize);
Alloc param_8 = alloc_start.alloc;
uint param_9 = start_ix + i_1;
uint param_10 = 0u;
write_mem(param_8, param_9, param_10, v_70, v_70BufferSize);
}
}

Binary file not shown.

Binary file not shown.

View file

@ -37,12 +37,14 @@ struct Config
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
@ -50,6 +52,8 @@ struct Config
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
@ -155,7 +159,7 @@ void TransformSeg_write(Alloc a, TransformSegRef ref, TransformSeg s)
void comp_main()
{
uint ix = gl_GlobalInvocationID.x * 8u;
TransformRef _285 = { _278.Load(76) + (ix * 24u) };
TransformRef _285 = { _278.Load(84) + (ix * 24u) };
TransformRef ref = _285;
TransformRef param = ref;
Transform agg = Transform_read(param);

View file

@ -100,12 +100,14 @@ struct Config
Alloc_1 pathseg_alloc;
Alloc_1 anno_alloc;
Alloc_1 trans_alloc;
Alloc_1 bbox_alloc;
Alloc_1 path_bbox_alloc;
Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
Alloc_1 draw_bbox_alloc;
Alloc_1 drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
@ -113,6 +115,8 @@ struct Config
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct ConfigBuf

Binary file not shown.

Binary file not shown.

View file

@ -26,12 +26,14 @@ struct Config
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
@ -39,6 +41,8 @@ struct Config
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
@ -92,7 +96,7 @@ Transform combine_monoid(Transform a, Transform b)
void comp_main()
{
uint ix = gl_GlobalInvocationID.x * 8u;
TransformRef _168 = { _161.Load(76) + (ix * 24u) };
TransformRef _168 = { _161.Load(84) + (ix * 24u) };
TransformRef ref = _168;
TransformRef param = ref;
Transform agg = Transform_read(param);

View file

@ -38,12 +38,14 @@ struct Config
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
@ -51,6 +53,8 @@ struct Config
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct ConfigBuf

Binary file not shown.

Binary file not shown.

View file

@ -248,7 +248,7 @@ void main() {
barrier();
uint path_ix = save_path_ix;
uint bbox_out_ix = (conf.bbox_alloc.offset >> 2) + path_ix * 6;
uint bbox_out_ix = (conf.path_bbox_alloc.offset >> 2) + path_ix * 6;
// Write bboxes to paths; do atomic min/max if partial
Monoid row = monoid_identity();
if (gl_LocalInvocationID.x > 0) {

View file

@ -42,7 +42,7 @@ struct Config {
// new element pipeline stuff follows
// Bounding boxes of paths, stored as int (so atomics work)
Alloc bbox_alloc;
Alloc path_bbox_alloc;
// Monoid for draw objects
Alloc drawmonoid_alloc;
@ -54,6 +54,10 @@ struct Config {
Alloc clip_stack_alloc;
// Clip processing results (path_ix + bbox)
Alloc clip_bbox_alloc;
// Bounding box per draw object
Alloc draw_bbox_alloc;
// Info computed in draw stage, per draw object
Alloc drawinfo_alloc;
// Number of transforms in scene
// This is probably not needed.
@ -63,6 +67,10 @@ struct Config {
uint n_path;
// Total number of BeginClip and EndClip draw objects.
uint n_clip;
// Note: one of these offsets *could* be hardcoded to zero (as was the
// original element stream), but for now retain flexibility.
// Offset (in bytes) of transform stream in scene buffer
uint trans_offset;
// Offset (in bytes) of linewidth stream in scene
@ -71,6 +79,10 @@ struct Config {
uint pathtag_offset;
// Offset (in bytes) of path segment stream in scene
uint pathseg_offset;
// Offset (in bytes) of draw object tag stream in scene; see drawtag.h
uint drawtag_offset;
// Offset (in bytes) of draw payload stream in scene
uint drawdata_offset;
};
#endif

View file

@ -17,7 +17,11 @@ layout(set = 0, binding = 1) readonly buffer ConfigBuf {
Config conf;
};
#include "annotated.h"
layout(binding = 2) readonly buffer SceneBuf {
uint[] scene;
};
#include "drawtag.h"
#include "tile.h"
// scale factors useful for converting coordinates to tiles
@ -27,31 +31,39 @@ layout(set = 0, binding = 1) readonly buffer ConfigBuf {
shared uint sh_tile_count[TILE_ALLOC_WG];
shared MallocResult sh_tile_alloc;
vec4 load_draw_bbox(uint draw_ix) {
uint base = (conf.draw_bbox_alloc.offset >> 2) + 4 * draw_ix;
float x0 = uintBitsToFloat(memory[base]);
float y0 = uintBitsToFloat(memory[base + 1]);
float x1 = uintBitsToFloat(memory[base + 2]);
float y1 = uintBitsToFloat(memory[base + 3]);
vec4 bbox = vec4(x0, y0, x1, y1);
return bbox;
}
void main() {
uint th_ix = gl_LocalInvocationID.x;
uint element_ix = gl_GlobalInvocationID.x;
// At the moment, element_ix == path_ix. The clip-intersected bounding boxes
// for elements (draw objects) are computed in the binning stage, but at some
// point we'll probably want to break that correspondence. Tiles should be
// allocated for paths, not draw objs. EndClip doesn't need an allocation.
PathRef path_ref = PathRef(conf.tile_alloc.offset + element_ix * Path_size);
AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);
uint drawtag_base = conf.drawtag_offset >> 2;
uint tag = Annotated_Nop;
uint drawtag = Drawtag_Nop;
if (element_ix < conf.n_elements) {
tag = Annotated_tag(conf.anno_alloc, ref).tag;
drawtag = scene[drawtag_base + element_ix];
}
int x0 = 0, y0 = 0, x1 = 0, y1 = 0;
switch (tag) {
case Annotated_Color:
case Annotated_LinGradient:
case Annotated_Image:
case Annotated_BeginClip:
case Annotated_EndClip:
// Note: we take advantage of the fact that fills, strokes, and
// clips have compatible layout.
AnnoEndClip clip = Annotated_EndClip_read(conf.anno_alloc, ref);
x0 = int(floor(clip.bbox.x * SX));
y0 = int(floor(clip.bbox.y * SY));
x1 = int(ceil(clip.bbox.z * SX));
y1 = int(ceil(clip.bbox.w * SY));
break;
// Allocate an empty path for EndClip; at some point we'll change
// this to be per path rather than per draw object.
if (drawtag != Drawtag_Nop && drawtag != Drawtag_EndClip) {
vec4 bbox = load_draw_bbox(element_ix);
x0 = int(floor(bbox.x * SX));
y0 = int(floor(bbox.y * SY));
x1 = int(ceil(bbox.z * SX));
y1 = int(ceil(bbox.w * SY));
}
x0 = clamp(x0, 0, int(conf.width_in_tiles));
y0 = clamp(y0, 0, int(conf.height_in_tiles));
@ -61,11 +73,6 @@ void main() {
Path path;
path.bbox = uvec4(x0, y0, x1, y1);
uint tile_count = (x1 - x0) * (y1 - y0);
if (tag == Annotated_EndClip) {
// Don't actually allocate tiles for an end clip, but we do want
// the path structure (especially bbox) allocated for it.
tile_count = 0;
}
sh_tile_count[th_ix] = tile_count;
uint total_tile_count = tile_count;

View file

@ -30,7 +30,8 @@ pub struct Encoder {
tag_stream: Vec<u8>,
pathseg_stream: Vec<u8>,
linewidth_stream: Vec<f32>,
drawobj_stream: Vec<u8>,
drawtag_stream: Vec<u32>,
drawdata_stream: Vec<u8>,
n_path: u32,
n_pathseg: u32,
n_clip: u32,
@ -43,53 +44,54 @@ pub struct Encoder {
pub struct GlyphEncoder {
tag_stream: Vec<u8>,
pathseg_stream: Vec<u8>,
drawobj_stream: Vec<u8>,
drawtag_stream: Vec<u32>,
drawdata_stream: Vec<u8>,
n_path: u32,
n_pathseg: u32,
}
// Currently same as Element, but may change - should become packed.
const DRAWOBJ_SIZE: usize = 36;
const TRANSFORM_SIZE: usize = 24;
const LINEWIDTH_SIZE: usize = 4;
const PATHSEG_SIZE: usize = 52;
const BBOX_SIZE: usize = 24;
const DRAWMONOID_SIZE: usize = 8;
const PATH_BBOX_SIZE: usize = 24;
const DRAWMONOID_SIZE: usize = 16;
const DRAW_BBOX_SIZE: usize = 16;
const DRAWTAG_SIZE: usize = 4;
const ANNOTATED_SIZE: usize = 40;
// These are bytemuck versions of elements currently defined in the
// Element struct in piet-gpu-types; that's pretty much going away.
const ELEMENT_FILLCOLOR: u32 = 4;
const ELEMENT_FILLLINGRADIENT: u32 = 5;
const ELEMENT_BEGINCLIP: u32 = 9;
const ELEMENT_ENDCLIP: u32 = 10;
// Tags for draw objects. See shader/drawtag.h for the authoritative source.
const DRAWTAG_FILLCOLOR: u32 = 0x44;
const DRAWTAG_FILLLINGRADIENT: u32 = 0x114;
const DRAWTAG_BEGINCLIP: u32 = 0x05;
const DRAWTAG_ENDCLIP: u32 = 0x25;
#[repr(C)]
#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)]
pub struct FillColor {
tag: u32,
rgba_color: u32,
padding: [u32; 7],
}
#[repr(C)]
#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)]
pub struct FillLinGradient {
tag: u32,
index: u32,
p0: [f32; 2],
p1: [f32; 2],
padding: [u32; 3],
}
#[allow(unused)]
#[repr(C)]
#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)]
pub struct FillImage {
index: u32,
// [i16; 2]
offset: u32,
}
#[repr(C)]
#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)]
pub struct Clip {
tag: u32,
bbox: [f32; 4],
blend: u32,
padding: [u32; 3],
}
impl Encoder {
@ -99,7 +101,8 @@ impl Encoder {
tag_stream: Vec::new(),
pathseg_stream: Vec::new(),
linewidth_stream: vec![-1.0],
drawobj_stream: Vec::new(),
drawtag_stream: Vec::new(),
drawdata_stream: Vec::new(),
n_path: 0,
n_pathseg: 0,
n_clip: 0,
@ -130,51 +133,42 @@ impl Encoder {
///
/// This should be encoded after a path.
pub fn fill_color(&mut self, rgba_color: u32) {
self.drawtag_stream.push(DRAWTAG_FILLCOLOR);
let element = FillColor {
tag: ELEMENT_FILLCOLOR,
rgba_color,
..Default::default()
};
self.drawobj_stream.extend(bytemuck::bytes_of(&element));
self.drawdata_stream.extend(bytemuck::bytes_of(&element));
}
/// Encode a fill linear gradient draw object.
///
/// This should be encoded after a path.
pub fn fill_lin_gradient(&mut self, index: u32, p0: [f32; 2], p1: [f32; 2]) {
self.drawtag_stream.push(DRAWTAG_FILLLINGRADIENT);
let element = FillLinGradient {
tag: ELEMENT_FILLLINGRADIENT,
index,
p0,
p1,
..Default::default()
};
self.drawobj_stream.extend(bytemuck::bytes_of(&element));
self.drawdata_stream.extend(bytemuck::bytes_of(&element));
}
/// Start a clip and return a save point to be filled in later.
pub fn begin_clip(&mut self, blend: Option<Blend>) -> usize {
let saved = self.drawobj_stream.len();
/// Start a clip.
pub fn begin_clip(&mut self, blend: Option<Blend>) {
self.drawtag_stream.push(DRAWTAG_BEGINCLIP);
let element = Clip {
tag: ELEMENT_BEGINCLIP,
blend: blend.unwrap_or(Blend::default()).pack(),
..Default::default()
};
self.drawobj_stream.extend(bytemuck::bytes_of(&element));
self.drawdata_stream.extend(bytemuck::bytes_of(&element));
self.n_clip += 1;
saved
}
pub fn end_clip(&mut self, bbox: [f32; 4], blend: Option<Blend>, save_point: usize) {
pub fn end_clip(&mut self, blend: Option<Blend>) {
self.drawtag_stream.push(DRAWTAG_ENDCLIP);
let element = Clip {
tag: ELEMENT_ENDCLIP,
bbox,
blend: blend.unwrap_or(Blend::default()).pack(),
..Default::default()
};
self.drawobj_stream[save_point + 4..save_point + 20]
.clone_from_slice(bytemuck::bytes_of(&bbox));
self.drawobj_stream.extend(bytemuck::bytes_of(&element));
self.drawdata_stream.extend(bytemuck::bytes_of(&element));
// This is a dummy path, and will go away with the new clip impl.
self.tag_stream.push(0x10);
self.n_path += 1;
@ -187,9 +181,11 @@ impl Encoder {
/// beginning of free memory.
pub fn stage_config(&self) -> (Config, usize) {
// Layout of scene buffer
let drawtag_offset = 0;
let n_drawobj = self.n_drawobj();
let n_drawobj_padded = align_up(n_drawobj, DRAW_PART_SIZE as usize);
let trans_offset = n_drawobj_padded * DRAWOBJ_SIZE;
let drawdata_offset = drawtag_offset + n_drawobj_padded * DRAWTAG_SIZE;
let trans_offset = drawdata_offset + self.drawdata_stream.len();
let n_trans = self.transform_stream.len();
let n_trans_padded = align_up(n_trans, TRANSFORM_PART_SIZE as usize);
let linewidth_offset = trans_offset + n_trans_padded * TRANSFORM_SIZE;
@ -205,9 +201,9 @@ impl Encoder {
alloc += trans_alloc + n_trans_padded * TRANSFORM_SIZE;
let pathseg_alloc = alloc;
alloc += pathseg_alloc + self.n_pathseg as usize * PATHSEG_SIZE;
let bbox_alloc = alloc;
let path_bbox_alloc = alloc;
let n_path = self.n_path as usize;
alloc += bbox_alloc + n_path * BBOX_SIZE;
alloc += path_bbox_alloc + n_path * PATH_BBOX_SIZE;
let drawmonoid_alloc = alloc;
alloc += n_drawobj_padded * DRAWMONOID_SIZE;
let anno_alloc = alloc;
@ -226,6 +222,12 @@ impl Encoder {
let clip_bbox_alloc = alloc;
const CLIP_BBOX_SIZE: usize = 16;
alloc += align_up(n_clip as usize, CLIP_PART_SIZE as usize) * CLIP_BBOX_SIZE;
let draw_bbox_alloc = alloc;
alloc += n_drawobj * DRAW_BBOX_SIZE;
let drawinfo_alloc = alloc;
// TODO: not optimized; it can be accumulated during encoding or summed from drawtags
const MAX_DRAWINFO_SIZE: usize = 16;
alloc += n_drawobj * MAX_DRAWINFO_SIZE;
let config = Config {
n_elements: n_drawobj as u32,
@ -233,12 +235,14 @@ impl Encoder {
pathseg_alloc: pathseg_alloc as u32,
anno_alloc: anno_alloc as u32,
trans_alloc: trans_alloc as u32,
bbox_alloc: bbox_alloc as u32,
path_bbox_alloc: path_bbox_alloc as u32,
drawmonoid_alloc: drawmonoid_alloc as u32,
clip_alloc: clip_alloc as u32,
clip_bic_alloc: clip_bic_alloc as u32,
clip_stack_alloc: clip_stack_alloc as u32,
clip_bbox_alloc: clip_bbox_alloc as u32,
draw_bbox_alloc: draw_bbox_alloc as u32,
drawinfo_alloc: drawinfo_alloc as u32,
n_trans: n_trans as u32,
n_path: self.n_path,
n_clip: self.n_clip,
@ -246,15 +250,18 @@ impl Encoder {
linewidth_offset: linewidth_offset as u32,
pathtag_offset: pathtag_offset as u32,
pathseg_offset: pathseg_offset as u32,
drawtag_offset: drawtag_offset as u32,
drawdata_offset: drawdata_offset as u32,
..Default::default()
};
(config, alloc)
}
pub fn write_scene(&self, buf: &mut BufWrite) {
buf.extend_slice(&self.drawobj_stream);
let n_drawobj = self.drawobj_stream.len() / DRAWOBJ_SIZE;
buf.fill_zero(padding(n_drawobj, DRAW_PART_SIZE as usize) * DRAWOBJ_SIZE);
buf.extend_slice(&self.drawtag_stream);
let n_drawobj = self.drawtag_stream.len();
buf.fill_zero(padding(n_drawobj, DRAW_PART_SIZE as usize) * DRAWTAG_SIZE);
buf.extend_slice(&self.drawdata_stream);
buf.extend_slice(&self.transform_stream);
let n_trans = self.transform_stream.len();
buf.fill_zero(padding(n_trans, TRANSFORM_PART_SIZE as usize) * TRANSFORM_SIZE);
@ -265,9 +272,9 @@ impl Encoder {
buf.extend_slice(&self.pathseg_stream);
}
/// The number of elements in the draw object stream.
/// The number of draw objects in the draw object stream.
pub(crate) fn n_drawobj(&self) -> usize {
self.drawobj_stream.len() / DRAWOBJ_SIZE
self.drawtag_stream.len()
}
/// The number of paths.
@ -296,7 +303,8 @@ impl Encoder {
pub(crate) fn encode_glyph(&mut self, glyph: &GlyphEncoder) {
self.tag_stream.extend(&glyph.tag_stream);
self.pathseg_stream.extend(&glyph.pathseg_stream);
self.drawobj_stream.extend(&glyph.drawobj_stream);
self.drawtag_stream.extend(&glyph.drawtag_stream);
self.drawdata_stream.extend(&glyph.drawdata_stream);
self.n_path += glyph.n_path;
self.n_pathseg += glyph.n_pathseg;
}
@ -325,15 +333,14 @@ impl GlyphEncoder {
///
/// This should be encoded after a path.
pub(crate) fn fill_color(&mut self, rgba_color: u32) {
self.drawtag_stream.push(DRAWTAG_FILLCOLOR);
let element = FillColor {
tag: ELEMENT_FILLCOLOR,
rgba_color,
..Default::default()
};
self.drawobj_stream.extend(bytemuck::bytes_of(&element));
self.drawdata_stream.extend(bytemuck::bytes_of(&element));
}
pub(crate) fn is_color(&self) -> bool {
!self.drawobj_stream.is_empty()
!self.drawtag_stream.is_empty()
}
}

View file

@ -92,7 +92,7 @@ pub struct Renderer {
clip_binding: ClipBinding,
tile_pipeline: Pipeline,
tile_ds: DescriptorSet,
tile_ds: Vec<DescriptorSet>,
path_pipeline: Pipeline,
path_ds: DescriptorSet,
@ -105,7 +105,7 @@ pub struct Renderer {
bin_ds: DescriptorSet,
coarse_pipeline: Pipeline,
coarse_ds: DescriptorSet,
coarse_ds: Vec<DescriptorSet>,
k4_pipeline: Pipeline,
k4_ds: DescriptorSet,
@ -176,10 +176,8 @@ impl Renderer {
};
let image_dev = session.create_image2d(width as u32, height as u32, image_format)?;
// Note: this must be updated when the config struct size changes.
const CONFIG_BUFFER_SIZE: u64 = std::mem::size_of::<Config>() as u64;
let config_buf = session.create_buffer(CONFIG_BUFFER_SIZE, dev).unwrap();
// TODO: separate staging buffer (if needed)
let config_bufs = (0..n_bufs)
.map(|_| {
session
@ -197,7 +195,7 @@ impl Renderer {
let element_stage = ElementStage::new(session, &element_code);
let element_bindings = scene_bufs
.iter()
.map(|scene_buf| {
.map(|scene_buf|
element_stage.bind(
session,
&element_code,
@ -205,7 +203,7 @@ impl Renderer {
scene_buf,
&memory_buf_dev,
)
})
)
.collect();
let clip_code = ClipCode::new(session);
@ -214,8 +212,15 @@ impl Renderer {
let tile_alloc_code = include_shader!(session, "../shader/gen/tile_alloc");
let tile_pipeline = session
.create_compute_pipeline(tile_alloc_code, &[BindType::Buffer, BindType::BufReadOnly])?;
let tile_ds = session
.create_simple_descriptor_set(&tile_pipeline, &[&memory_buf_dev, &config_buf])?;
let tile_ds = scene_bufs
.iter()
.map(|scene_buf| {
session.create_simple_descriptor_set(
&tile_pipeline,
&[&memory_buf_dev, &config_buf, scene_buf],
)
})
.collect::<Result<Vec<_>, _>>()?;
let path_alloc_code = include_shader!(session, "../shader/gen/path_coarse");
let path_pipeline = session
@ -243,10 +248,23 @@ impl Renderer {
session.create_simple_descriptor_set(&bin_pipeline, &[&memory_buf_dev, &config_buf])?;
let coarse_code = include_shader!(session, "../shader/gen/coarse");
let coarse_pipeline = session
.create_compute_pipeline(coarse_code, &[BindType::Buffer, BindType::BufReadOnly])?;
let coarse_ds = session
.create_simple_descriptor_set(&coarse_pipeline, &[&memory_buf_dev, &config_buf])?;
let coarse_pipeline = session.create_compute_pipeline(
coarse_code,
&[
BindType::Buffer,
BindType::BufReadOnly,
BindType::BufReadOnly,
],
)?;
let coarse_ds = scene_bufs
.iter()
.map(|scene_buf| {
session.create_simple_descriptor_set(
&coarse_pipeline,
&[&memory_buf_dev, &config_buf, scene_buf],
)
})
.collect::<Result<Vec<_>, _>>()?;
let bg_image = Self::make_test_bg_image(&session);
@ -430,7 +448,7 @@ impl Renderer {
cmd_buf.begin_debug_label("Tile allocation");
cmd_buf.dispatch(
&self.tile_pipeline,
&self.tile_ds,
&self.tile_ds[buf_ix],
(((self.n_paths + 255) / 256) as u32, 1, 1),
(256, 1, 1),
);
@ -462,7 +480,7 @@ impl Renderer {
cmd_buf.begin_debug_label("Coarse raster");
cmd_buf.dispatch(
&self.coarse_pipeline,
&self.coarse_ds,
&self.coarse_ds[buf_ix],
(
(self.width as u32 + 255) / 256,
(self.height as u32 + 255) / 256,

View file

@ -64,9 +64,6 @@ struct State {
}
struct ClipElement {
/// Byte offset of BeginClip element in element vec, for bbox fixup.
save_point: usize,
bbox: Option<Rect>,
blend: Option<Blend>,
}
@ -199,8 +196,6 @@ impl RenderContext for PietGpuRenderContext {
fn stroke(&mut self, shape: impl Shape, brush: &impl IntoBrush<Self>, width: f64) {
self.encode_linewidth(width.abs() as f32);
let brush = brush.make_brush(self, || shape.bounding_box()).into_owned();
// Note: the bbox contribution of stroke becomes more complicated with miter joins.
self.accumulate_bbox(|| shape.bounding_box() + Insets::uniform(width * 0.5));
let path = shape.path_elements(TOLERANCE);
self.encode_path(path, false);
self.encode_brush(&brush);
@ -217,9 +212,6 @@ impl RenderContext for PietGpuRenderContext {
fn fill(&mut self, shape: impl Shape, brush: &impl IntoBrush<Self>) {
let brush = brush.make_brush(self, || shape.bounding_box()).into_owned();
// Note: we might get a good speedup from using an approximate bounding box.
// Perhaps that should be added to kurbo.
self.accumulate_bbox(|| shape.bounding_box());
let path = shape.path_elements(TOLERANCE);
self.encode_linewidth(-1.0);
self.encode_path(path, true);
@ -232,13 +224,11 @@ impl RenderContext for PietGpuRenderContext {
self.encode_linewidth(-1.0);
let path = shape.path_elements(TOLERANCE);
self.encode_path(path, true);
let save_point = self.new_encoder.begin_clip(None);
self.new_encoder.begin_clip(None);
if self.clip_stack.len() >= MAX_BLEND_STACK {
panic!("Maximum clip/blend stack size {} exceeded", MAX_BLEND_STACK);
}
self.clip_stack.push(ClipElement {
bbox: None,
save_point,
blend: None,
});
if let Some(tos) = self.state_stack.last_mut() {
@ -340,16 +330,13 @@ impl PietGpuRenderContext {
self.encode_linewidth(-1.0);
let path = shape.path_elements(TOLERANCE);
self.encode_path(path, true);
let save_point = self.new_encoder.begin_clip(Some(blend));
self.new_encoder.begin_clip(Some(blend));
if self.clip_stack.len() >= MAX_BLEND_STACK {
panic!("Maximum clip/blend stack size {} exceeded", MAX_BLEND_STACK);
}
self.clip_stack.push(ClipElement {
bbox: None,
save_point,
blend: Some(blend),
});
self.accumulate_bbox(|| shape.bounding_box());
if let Some(tos) = self.state_stack.last_mut() {
tos.n_clip += 1;
}
@ -406,37 +393,7 @@ impl PietGpuRenderContext {
fn pop_clip(&mut self) {
let tos = self.clip_stack.pop().unwrap();
let bbox = tos.bbox.unwrap_or_default();
let bbox_f32_4 = rect_to_f32_4(bbox);
self.new_encoder.end_clip(bbox_f32_4, tos.blend, tos.save_point);
if let Some(bbox) = tos.bbox {
self.union_bbox(bbox);
}
}
/// Accumulate a bbox.
///
/// The bbox is given lazily as a closure, relative to the current transform.
/// It's lazy because we don't need to compute it unless we're inside a clip.
fn accumulate_bbox(&mut self, f: impl FnOnce() -> Rect) {
if !self.clip_stack.is_empty() {
let bbox = f();
let bbox = self.cur_transform.transform_rect_bbox(bbox);
self.union_bbox(bbox);
}
}
/// Accumulate an absolute bbox.
///
/// The bbox is given already transformed into surface coordinates.
fn union_bbox(&mut self, bbox: Rect) {
if let Some(tos) = self.clip_stack.last_mut() {
tos.bbox = if let Some(old_bbox) = tos.bbox {
Some(old_bbox.union(bbox))
} else {
Some(bbox)
};
}
self.new_encoder.end_clip(tos.blend);
}
pub(crate) fn encode_glyph(&mut self, glyph: &GlyphEncoder) {

View file

@ -47,12 +47,14 @@ pub struct Config {
pub pathseg_alloc: u32,
pub anno_alloc: u32,
pub trans_alloc: u32,
pub bbox_alloc: u32,
pub path_bbox_alloc: u32,
pub drawmonoid_alloc: u32,
pub clip_alloc: u32,
pub clip_bic_alloc: u32,
pub clip_stack_alloc: u32,
pub clip_bbox_alloc: u32,
pub draw_bbox_alloc: u32,
pub drawinfo_alloc: u32,
pub n_trans: u32,
pub n_path: u32,
pub n_clip: u32,
@ -60,6 +62,8 @@ pub struct Config {
pub linewidth_offset: u32,
pub pathtag_offset: u32,
pub pathseg_offset: u32,
pub drawtag_offset: u32,
pub drawdata_offset: u32,
}
// The "element" stage combines a number of stages for parts of the pipeline.

View file

@ -28,6 +28,8 @@ use piet_gpu_hal::{
pub struct DrawMonoid {
pub path_ix: u32,
pub clip_ix: u32,
pub scene_offset: u32,
pub info_offset: u32,
}
const DRAW_WG: u64 = 256;
@ -93,7 +95,7 @@ impl DrawStage {
pub unsafe fn new(session: &Session, code: &DrawCode) -> DrawStage {
// We're limited to DRAW_PART_SIZE^2
// Also note: size here allows padding
let root_buf_size = DRAW_PART_SIZE * 8;
let root_buf_size = DRAW_PART_SIZE * 16;
let root_buf = session
.create_buffer(root_buf_size, BufferUsage::STORAGE)
.unwrap();

View file

@ -163,8 +163,7 @@ impl ClipData {
let clip_bbox_alloc = clip_stack_alloc + 20 * n_clip;
stages::Config {
clip_alloc: clip_alloc as u32,
// TODO: this wants to be renamed to path_bbox_alloc
bbox_alloc: path_bbox_alloc as u32,
path_bbox_alloc: path_bbox_alloc as u32,
drawmonoid_alloc: drawmonoid_alloc as u32,
clip_bic_alloc: clip_bic_alloc as u32,
clip_stack_alloc: clip_stack_alloc as u32,
@ -194,7 +193,7 @@ impl ClipData {
let clip_range = clip_bbox_start..(clip_bbox_start + n_clip * 16);
let clip_result = bytemuck::cast_slice::<u8, [f32; 4]>(&buf[clip_range]);
let draw_start = 8 + n_clip * 4 + n_path * 24;
let draw_range = draw_start..(draw_start + n_clip * 8);
let draw_range = draw_start..(draw_start + n_clip * 16);
let draw_result = bytemuck::cast_slice::<u8, DrawMonoid>(&buf[draw_range]);
let mut bbox_stack = Vec::new();
let mut parent_stack = Vec::new();

View file

@ -17,20 +17,23 @@
//! Tests for the piet-gpu draw object stage.
use piet_gpu_hal::{BufWrite, BufferUsage};
use rand::Rng;
use rand::{Rng, seq::SliceRandom};
use crate::{Config, Runner, TestResult};
use piet_gpu::stages::{self, DrawCode, DrawMonoid, DrawStage};
const ELEMENT_SIZE: usize = 36;
const DRAWTAG_SIZE: usize = 4;
const ANNOTATED_SIZE: usize = 40;
const ELEMENT_FILLCOLOR: u32 = 4;
const ELEMENT_FILLLINGRADIENT: u32 = 5;
const ELEMENT_FILLIMAGE: u32 = 6;
const ELEMENT_BEGINCLIP: u32 = 9;
const ELEMENT_ENDCLIP: u32 = 10;
// Tags for draw objects. See shader/drawtag.h for the authoritative source.
const DRAWTAG_FILLCOLOR: u32 = 4;
const DRAWTAG_FILLLINGRADIENT: u32 = 20;
const DRAWTAG_FILLIMAGE: u32 = 8;
const DRAWTAG_BEGINCLIP: u32 = 5;
const DRAWTAG_ENDCLIP: u32 = 37;
const TAGS: &[u32] = &[DRAWTAG_FILLCOLOR, DRAWTAG_FILLLINGRADIENT, DRAWTAG_FILLIMAGE, DRAWTAG_BEGINCLIP, DRAWTAG_ENDCLIP];
struct DrawTestData {
tags: Vec<u32>,
@ -47,7 +50,7 @@ pub unsafe fn draw_test(runner: &mut Runner, config: &Config) -> TestResult {
.session
.create_buffer_init(std::slice::from_ref(&stage_config), BufferUsage::STORAGE)
.unwrap();
let scene_size = n_tag * ELEMENT_SIZE as u64;
let scene_size = n_tag * DRAWTAG_SIZE as u64;
let scene_buf = runner
.session
.create_buffer_with(scene_size, |b| data.fill_scene(b), BufferUsage::STORAGE)
@ -92,7 +95,7 @@ pub unsafe fn draw_test(runner: &mut Runner, config: &Config) -> TestResult {
impl DrawTestData {
fn new(n: u64) -> DrawTestData {
let mut rng = rand::thread_rng();
let tags = (0..n).map(|_| rng.gen_range(0, 12)).collect();
let tags = (0..n).map(|_| *TAGS.choose(&mut rng).unwrap()).collect();
DrawTestData { tags }
}
@ -101,13 +104,14 @@ impl DrawTestData {
// Layout of memory
let drawmonoid_alloc = 0;
let anno_alloc = drawmonoid_alloc + 8 * n_tags;
let anno_alloc = drawmonoid_alloc + 16 * n_tags;
let clip_alloc = anno_alloc + ANNOTATED_SIZE * n_tags;
let stage_config = stages::Config {
n_elements: n_tags as u32,
anno_alloc: anno_alloc as u32,
drawmonoid_alloc: drawmonoid_alloc as u32,
clip_alloc: clip_alloc as u32,
drawtag_offset: 0,
..Default::default()
};
stage_config
@ -116,37 +120,35 @@ impl DrawTestData {
fn memory_size(&self) -> u64 {
// Note: this overallocates the clip buf a bit - only needed for the
// total number of begin_clip and end_clip tags.
(8 + self.tags.len() * (8 + 4 + ANNOTATED_SIZE)) as u64
(8 + self.tags.len() * (16 + 4 + ANNOTATED_SIZE)) as u64
}
fn fill_scene(&self, buf: &mut BufWrite) {
let mut element = [0u32; ELEMENT_SIZE / 4];
for tag in &self.tags {
element[0] = *tag;
buf.push(element);
}
buf.extend_slice(&self.tags);
}
fn verify(&self, buf: &[u8]) -> Option<String> {
let size = self.tags.len() * 8;
let size = self.tags.len() * 16;
let actual = bytemuck::cast_slice::<u8, DrawMonoid>(&buf[8..8 + size]);
let mut expected = DrawMonoid::default();
for (i, (tag, actual)) in self.tags.iter().zip(actual).enumerate() {
// Verify exclusive prefix sum.
let (path_ix, clip_ix) = Self::reduce_tag(*tag);
if *actual != expected {
println!("{:?} {:?}", actual, expected);
return Some(format!("draw mismatch at {}", i));
}
expected.path_ix += path_ix;
expected.clip_ix += clip_ix;
expected.scene_offset += tag & 28;
}
None
}
fn reduce_tag(tag: u32) -> (u32, u32) {
match tag {
ELEMENT_FILLCOLOR | ELEMENT_FILLLINGRADIENT | ELEMENT_FILLIMAGE => (1, 0),
ELEMENT_BEGINCLIP | ELEMENT_ENDCLIP => (1, 1),
DRAWTAG_FILLCOLOR | DRAWTAG_FILLLINGRADIENT | DRAWTAG_FILLIMAGE => (1, 0),
DRAWTAG_BEGINCLIP | DRAWTAG_ENDCLIP => (1, 1),
// TODO: ENDCLIP will become (0, 1)
_ => (0, 0),
}

View file

@ -207,11 +207,11 @@ impl PathData {
// Layout of memory
let trans_alloc = 0;
let pathseg_alloc = trans_alloc + n_trans * 24;
let bbox_alloc = pathseg_alloc + self.n_pathseg * PATHSEG_SIZE;
let path_bbox_alloc = pathseg_alloc + self.n_pathseg * PATHSEG_SIZE;
let stage_config = stages::Config {
pathseg_alloc,
trans_alloc,
bbox_alloc,
path_bbox_alloc,
n_trans,
n_path: self.n_path,
pathtag_offset,