Merge pull request #159 from linebender/varenc

Variable size encoding of draw objects
This commit is contained in:
Raph Levien 2022-03-30 20:27:56 -07:00 committed by GitHub
commit 7a8d41455a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
92 changed files with 2060 additions and 4120 deletions

View file

@ -38,7 +38,6 @@ layout(set = 0, binding = 1) readonly buffer ConfigBuf {
Config conf; Config conf;
}; };
#include "annotated.h"
#include "tile.h" #include "tile.h"
shared uint sh_row_count[BACKDROP_WG]; shared uint sh_row_count[BACKDROP_WG];
@ -48,23 +47,14 @@ shared uint sh_row_width[BACKDROP_WG];
void main() { void main() {
uint th_ix = gl_LocalInvocationIndex; uint th_ix = gl_LocalInvocationIndex;
uint element_ix = gl_GlobalInvocationID.x; uint element_ix = gl_GlobalInvocationID.x;
AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);
// Work assignment: 1 thread : 1 path element // Work assignment: 1 thread : 1 path element
uint row_count = 0; uint row_count = 0;
bool mem_ok = mem_error == NO_ERROR; bool mem_ok = mem_error == NO_ERROR;
if (gl_LocalInvocationID.y == 0) { if (gl_LocalInvocationID.y == 0) {
if (element_ix < conf.n_elements) { if (element_ix < conf.n_elements) {
AnnotatedTag tag = Annotated_tag(conf.anno_alloc, ref); // Possible TODO: it's not necessary to process backdrops of stroked paths.
switch (tag.tag) { // We had logic for that but took it out because it used the Annotated struct.
case Annotated_Image:
case Annotated_LinGradient:
case Annotated_BeginClip:
case Annotated_Color:
if (fill_mode_from_flags(tag.flags) != MODE_NONZERO) {
break;
}
// Fall through.
PathRef path_ref = PathRef(conf.tile_alloc.offset + element_ix * Path_size); PathRef path_ref = PathRef(conf.tile_alloc.offset + element_ix * Path_size);
Path path = Path_read(conf.tile_alloc, path_ref); Path path = Path_read(conf.tile_alloc, path_ref);
sh_row_width[th_ix] = path.bbox.z - path.bbox.x; sh_row_width[th_ix] = path.bbox.z - path.bbox.x;
@ -81,7 +71,6 @@ void main() {
path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, mem_ok); path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, mem_ok);
sh_row_alloc[th_ix] = path_alloc; sh_row_alloc[th_ix] = path_alloc;
} }
}
sh_row_count[th_ix] = row_count; sh_row_count[th_ix] = row_count;
} }

View file

@ -20,7 +20,7 @@ layout(binding = 1) readonly buffer ConfigBuf {
void main() { void main() {
uint ix = gl_GlobalInvocationID.x; uint ix = gl_GlobalInvocationID.x;
if (ix < conf.n_path) { if (ix < conf.n_path) {
uint out_ix = (conf.bbox_alloc.offset >> 2) + 6 * ix; uint out_ix = (conf.path_bbox_alloc.offset >> 2) + 6 * ix;
memory[out_ix] = 0xffff; memory[out_ix] = 0xffff;
memory[out_ix + 1] = 0xffff; memory[out_ix + 1] = 0xffff;
memory[out_ix + 2] = 0; memory[out_ix + 2] = 0;

View file

@ -18,7 +18,6 @@ layout(set = 0, binding = 1) readonly buffer ConfigBuf {
Config conf; Config conf;
}; };
#include "annotated.h"
#include "bins.h" #include "bins.h"
#include "drawtag.h" #include "drawtag.h"
@ -37,10 +36,12 @@ shared Alloc sh_chunk_alloc[N_TILE];
shared bool sh_alloc_failed; shared bool sh_alloc_failed;
DrawMonoid load_draw_monoid(uint element_ix) { DrawMonoid load_draw_monoid(uint element_ix) {
uint base = (conf.drawmonoid_alloc.offset >> 2) + 2 * element_ix; uint base = (conf.drawmonoid_alloc.offset >> 2) + 4 * element_ix;
uint path_ix = memory[base]; uint path_ix = memory[base];
uint clip_ix = memory[base + 1]; uint clip_ix = memory[base + 1];
return DrawMonoid(path_ix, clip_ix); uint scene_offset = memory[base + 2];
uint info_offset = memory[base + 3];
return DrawMonoid(path_ix, clip_ix, scene_offset, info_offset);
} }
// Load bounding box computed by clip processing // Load bounding box computed by clip processing
@ -60,7 +61,7 @@ vec4 bbox_intersect(vec4 a, vec4 b) {
// Load path's bbox from bbox (as written by pathseg). // Load path's bbox from bbox (as written by pathseg).
vec4 load_path_bbox(uint path_ix) { vec4 load_path_bbox(uint path_ix) {
uint base = (conf.bbox_alloc.offset >> 2) + 6 * path_ix; uint base = (conf.path_bbox_alloc.offset >> 2) + 6 * path_ix;
float bbox_l = float(memory[base]) - 32768.0; float bbox_l = float(memory[base]) - 32768.0;
float bbox_t = float(memory[base + 1]) - 32768.0; float bbox_t = float(memory[base + 1]) - 32768.0;
float bbox_r = float(memory[base + 2]) - 32768.0; float bbox_r = float(memory[base + 2]) - 32768.0;
@ -69,16 +70,15 @@ vec4 load_path_bbox(uint path_ix) {
return bbox; return bbox;
} }
void store_path_bbox(AnnotatedRef ref, vec4 bbox) { void store_draw_bbox(uint draw_ix, vec4 bbox) {
uint ix = ref.offset >> 2; uint base = (conf.draw_bbox_alloc.offset >> 2) + 4 * draw_ix;
memory[ix + 1] = floatBitsToUint(bbox.x); memory[base] = floatBitsToUint(bbox.x);
memory[ix + 2] = floatBitsToUint(bbox.y); memory[base + 1] = floatBitsToUint(bbox.y);
memory[ix + 3] = floatBitsToUint(bbox.z); memory[base + 2] = floatBitsToUint(bbox.z);
memory[ix + 4] = floatBitsToUint(bbox.w); memory[base + 3] = floatBitsToUint(bbox.w);
} }
void main() { void main() {
uint my_n_elements = conf.n_elements;
uint my_partition = gl_WorkGroupID.x; uint my_partition = gl_WorkGroupID.x;
for (uint i = 0; i < N_SLICE; i++) { for (uint i = 0; i < N_SLICE; i++) {
@ -91,18 +91,8 @@ void main() {
// Read inputs and determine coverage of bins // Read inputs and determine coverage of bins
uint element_ix = my_partition * N_TILE + gl_LocalInvocationID.x; uint element_ix = my_partition * N_TILE + gl_LocalInvocationID.x;
AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);
uint tag = Annotated_Nop;
if (element_ix < my_n_elements) {
tag = Annotated_tag(conf.anno_alloc, ref).tag;
}
int x0 = 0, y0 = 0, x1 = 0, y1 = 0; int x0 = 0, y0 = 0, x1 = 0, y1 = 0;
switch (tag) { if (element_ix < conf.n_elements) {
case Annotated_Color:
case Annotated_LinGradient:
case Annotated_Image:
case Annotated_BeginClip:
case Annotated_EndClip:
DrawMonoid draw_monoid = load_draw_monoid(element_ix); DrawMonoid draw_monoid = load_draw_monoid(element_ix);
uint path_ix = draw_monoid.path_ix; uint path_ix = draw_monoid.path_ix;
vec4 clip_bbox = vec4(-1e9, -1e9, 1e9, 1e9); vec4 clip_bbox = vec4(-1e9, -1e9, 1e9, 1e9);
@ -119,12 +109,11 @@ void main() {
// Avoid negative-size bbox (is this necessary)? // Avoid negative-size bbox (is this necessary)?
bbox.zw = max(bbox.xy, bbox.zw); bbox.zw = max(bbox.xy, bbox.zw);
// Store clip-intersected bbox for tile_alloc. // Store clip-intersected bbox for tile_alloc.
store_path_bbox(ref, bbox); store_draw_bbox(element_ix, bbox);
x0 = int(floor(bbox.x * SX)); x0 = int(floor(bbox.x * SX));
y0 = int(floor(bbox.y * SY)); y0 = int(floor(bbox.y * SY));
x1 = int(ceil(bbox.z * SX)); x1 = int(ceil(bbox.z * SX));
y1 = int(ceil(bbox.w * SY)); y1 = int(ceil(bbox.w * SY));
break;
} }
// At this point, we run an iterator over the coverage area, // At this point, we run an iterator over the coverage area,

View file

@ -22,43 +22,43 @@ rule dxil
rule msl rule msl
command = $spirv_cross --msl $in --output $out $msl_flags command = $spirv_cross --msl $in --output $out $msl_flags
build gen/binning.spv: glsl binning.comp | annotated.h bins.h drawtag.h setup.h mem.h build gen/binning.spv: glsl binning.comp | bins.h drawtag.h setup.h mem.h
build gen/binning.hlsl: hlsl gen/binning.spv build gen/binning.hlsl: hlsl gen/binning.spv
build gen/binning.dxil: dxil gen/binning.hlsl build gen/binning.dxil: dxil gen/binning.hlsl
build gen/binning.msl: msl gen/binning.spv build gen/binning.msl: msl gen/binning.spv
build gen/tile_alloc.spv: glsl tile_alloc.comp | annotated.h tile.h setup.h build gen/tile_alloc.spv: glsl tile_alloc.comp | drawtag.h tile.h setup.h mem.h
build gen/tile_alloc.hlsl: hlsl gen/tile_alloc.spv build gen/tile_alloc.hlsl: hlsl gen/tile_alloc.spv
build gen/tile_alloc.dxil: dxil gen/tile_alloc.hlsl build gen/tile_alloc.dxil: dxil gen/tile_alloc.hlsl
build gen/tile_alloc.msl: msl gen/tile_alloc.spv build gen/tile_alloc.msl: msl gen/tile_alloc.spv
build gen/path_coarse.spv: glsl path_coarse.comp | annotated.h pathseg.h tile.h setup.h build gen/path_coarse.spv: glsl path_coarse.comp | pathseg.h tile.h setup.h mem.h
build gen/path_coarse.hlsl: hlsl gen/path_coarse.spv build gen/path_coarse.hlsl: hlsl gen/path_coarse.spv
build gen/path_coarse.dxil: dxil gen/path_coarse.hlsl build gen/path_coarse.dxil: dxil gen/path_coarse.hlsl
build gen/path_coarse.msl: msl gen/path_coarse.spv build gen/path_coarse.msl: msl gen/path_coarse.spv
build gen/backdrop.spv: glsl backdrop.comp | annotated.h tile.h setup.h build gen/backdrop.spv: glsl backdrop.comp | tile.h setup.h mem.h
build gen/backdrop.hlsl: hlsl gen/backdrop.spv build gen/backdrop.hlsl: hlsl gen/backdrop.spv
build gen/backdrop.dxil: dxil gen/backdrop.hlsl build gen/backdrop.dxil: dxil gen/backdrop.hlsl
build gen/backdrop.msl: msl gen/backdrop.spv build gen/backdrop.msl: msl gen/backdrop.spv
build gen/backdrop_lg.spv: glsl backdrop.comp | annotated.h tile.h setup.h build gen/backdrop_lg.spv: glsl backdrop.comp | tile.h setup.h mem.h
flags = -DBACKDROP_DIST_FACTOR=4 flags = -DBACKDROP_DIST_FACTOR=4
build gen/backdrop_lg.hlsl: hlsl gen/backdrop_lg.spv build gen/backdrop_lg.hlsl: hlsl gen/backdrop_lg.spv
build gen/backdrop_lg.dxil: dxil gen/backdrop_lg.hlsl build gen/backdrop_lg.dxil: dxil gen/backdrop_lg.hlsl
build gen/backdrop_lg.msl: msl gen/backdrop_lg.spv build gen/backdrop_lg.msl: msl gen/backdrop_lg.spv
build gen/coarse.spv: glsl coarse.comp | annotated.h bins.h ptcl.h setup.h build gen/coarse.spv: glsl coarse.comp | drawtag.h bins.h ptcl.h blend.h setup.h mem.h
build gen/coarse.hlsl: hlsl gen/coarse.spv build gen/coarse.hlsl: hlsl gen/coarse.spv
build gen/coarse.dxil: dxil gen/coarse.hlsl build gen/coarse.dxil: dxil gen/coarse.hlsl
build gen/coarse.msl: msl gen/coarse.spv build gen/coarse.msl: msl gen/coarse.spv
build gen/kernel4.spv: glsl kernel4.comp | blend.h ptcl.h setup.h build gen/kernel4.spv: glsl kernel4.comp | blend.h ptcl.h setup.h mem.h
build gen/kernel4.hlsl: hlsl gen/kernel4.spv build gen/kernel4.hlsl: hlsl gen/kernel4.spv
build gen/kernel4.dxil: dxil gen/kernel4.hlsl build gen/kernel4.dxil: dxil gen/kernel4.hlsl
build gen/kernel4.msl: msl gen/kernel4.spv build gen/kernel4.msl: msl gen/kernel4.spv
build gen/kernel4_gray.spv: glsl kernel4.comp | ptcl.h setup.h build gen/kernel4_gray.spv: glsl kernel4.comp | ptcl.h setup.h mem.h
flags = -DGRAY flags = -DGRAY
build gen/kernel4_gray.hlsl: hlsl gen/kernel4_gray.spv build gen/kernel4_gray.hlsl: hlsl gen/kernel4_gray.spv
build gen/kernel4_gray.dxil: dxil gen/kernel4_gray.hlsl build gen/kernel4_gray.dxil: dxil gen/kernel4_gray.hlsl
@ -114,17 +114,17 @@ build gen/draw_root.hlsl: hlsl gen/draw_root.spv
build gen/draw_root.dxil: dxil gen/draw_root.hlsl build gen/draw_root.dxil: dxil gen/draw_root.hlsl
build gen/draw_root.msl: msl gen/draw_root.spv build gen/draw_root.msl: msl gen/draw_root.spv
build gen/draw_leaf.spv: glsl draw_leaf.comp | blend.h scene.h drawtag.h annotated.h setup.h mem.h build gen/draw_leaf.spv: glsl draw_leaf.comp | blend.h scene.h drawtag.h setup.h mem.h
build gen/draw_leaf.hlsl: hlsl gen/draw_leaf.spv build gen/draw_leaf.hlsl: hlsl gen/draw_leaf.spv
build gen/draw_leaf.dxil: dxil gen/draw_leaf.hlsl build gen/draw_leaf.dxil: dxil gen/draw_leaf.hlsl
build gen/draw_leaf.msl: msl gen/draw_leaf.spv build gen/draw_leaf.msl: msl gen/draw_leaf.spv
build gen/clip_reduce.spv: glsl clip_reduce.comp | mem.h setup.h annotated.h build gen/clip_reduce.spv: glsl clip_reduce.comp | mem.h setup.h
build gen/clip_reduce.hlsl: hlsl gen/clip_reduce.spv build gen/clip_reduce.hlsl: hlsl gen/clip_reduce.spv
build gen/clip_reduce.dxil: dxil gen/clip_reduce.hlsl build gen/clip_reduce.dxil: dxil gen/clip_reduce.hlsl
build gen/clip_reduce.msl: msl gen/clip_reduce.spv build gen/clip_reduce.msl: msl gen/clip_reduce.spv
build gen/clip_leaf.spv: glsl clip_leaf.comp | mem.h setup.h annotated.h build gen/clip_leaf.spv: glsl clip_leaf.comp | mem.h setup.h
build gen/clip_leaf.hlsl: hlsl gen/clip_leaf.spv build gen/clip_leaf.hlsl: hlsl gen/clip_leaf.spv
build gen/clip_leaf.dxil: dxil gen/clip_leaf.hlsl build gen/clip_leaf.dxil: dxil gen/clip_leaf.hlsl
build gen/clip_leaf.msl: msl gen/clip_leaf.spv build gen/clip_leaf.msl: msl gen/clip_leaf.spv

View file

@ -18,8 +18,6 @@ layout(binding = 1) readonly buffer ConfigBuf {
Config conf; Config conf;
}; };
#include "annotated.h"
// Some of this is cut'n'paste duplication with the reduce pass, and // Some of this is cut'n'paste duplication with the reduce pass, and
// arguably should be moved to a common .h file. // arguably should be moved to a common .h file.
// The bicyclic monoid // The bicyclic monoid
@ -43,7 +41,7 @@ Bic bic_combine(Bic x, Bic y) {
// Load path's bbox from bbox (as written by pathseg). // Load path's bbox from bbox (as written by pathseg).
vec4 load_path_bbox(uint path_ix) { vec4 load_path_bbox(uint path_ix) {
uint base = (conf.bbox_alloc.offset >> 2) + 6 * path_ix; uint base = (conf.path_bbox_alloc.offset >> 2) + 6 * path_ix;
float bbox_l = float(memory[base]) - 32768.0; float bbox_l = float(memory[base]) - 32768.0;
float bbox_t = float(memory[base + 1]) - 32768.0; float bbox_t = float(memory[base + 1]) - 32768.0;
float bbox_r = float(memory[base + 2]) - 32768.0; float bbox_r = float(memory[base + 2]) - 32768.0;
@ -271,7 +269,7 @@ void main() {
// and is in the ClipEl for cross-partition. // and is in the ClipEl for cross-partition.
// If not, can probably get rid of it in the stack intermediate buf. // If not, can probably get rid of it in the stack intermediate buf.
path_ix = load_path_ix(parent); path_ix = load_path_ix(parent);
uint drawmonoid_out_base = (conf.drawmonoid_alloc.offset >> 2) + 2 * ~inp; uint drawmonoid_out_base = (conf.drawmonoid_alloc.offset >> 2) + 4 * ~inp;
// Fix up drawmonoid so path_ix at EndClip matches BeginClip // Fix up drawmonoid so path_ix at EndClip matches BeginClip
memory[drawmonoid_out_base] = path_ix; memory[drawmonoid_out_base] = path_ix;

View file

@ -31,8 +31,6 @@ layout(binding = 1) readonly buffer ConfigBuf {
Config conf; Config conf;
}; };
#include "annotated.h"
// The intermediate state for clip processing. // The intermediate state for clip processing.
struct ClipEl { struct ClipEl {
// index of parent node // index of parent node
@ -59,7 +57,7 @@ shared vec4 sh_bbox[WG_SIZE];
// Load path's bbox from bbox (as written by pathseg). // Load path's bbox from bbox (as written by pathseg).
vec4 load_path_bbox(uint path_ix) { vec4 load_path_bbox(uint path_ix) {
uint base = (conf.bbox_alloc.offset >> 2) + 6 * path_ix; uint base = (conf.path_bbox_alloc.offset >> 2) + 6 * path_ix;
float bbox_l = float(memory[base]) - 32768.0; float bbox_l = float(memory[base]) - 32768.0;
float bbox_t = float(memory[base + 1]) - 32768.0; float bbox_t = float(memory[base + 1]) - 32768.0;
float bbox_r = float(memory[base + 2]) - 32768.0; float bbox_r = float(memory[base + 2]) - 32768.0;

View file

@ -19,14 +19,19 @@
layout(local_size_x = N_TILE, local_size_y = 1) in; layout(local_size_x = N_TILE, local_size_y = 1) in;
layout(set = 0, binding = 1) readonly buffer ConfigBuf { layout(binding = 1) readonly buffer ConfigBuf {
Config conf; Config conf;
}; };
#include "annotated.h" layout(binding = 2) readonly buffer SceneBuf {
uint[] scene;
};
#include "drawtag.h"
#include "bins.h" #include "bins.h"
#include "tile.h" #include "tile.h"
#include "ptcl.h" #include "ptcl.h"
#include "blend.h"
#define LG_N_PART_READ (7 + LG_WG_FACTOR) #define LG_N_PART_READ (7 + LG_WG_FACTOR)
#define N_PART_READ (1 << LG_N_PART_READ) #define N_PART_READ (1 << LG_N_PART_READ)
@ -92,8 +97,8 @@ bool alloc_cmd(inout Alloc cmd_alloc, inout CmdRef cmd_ref, inout uint cmd_limit
return true; return true;
} }
void write_fill(Alloc alloc, inout CmdRef cmd_ref, uint flags, Tile tile, float linewidth) { void write_fill(Alloc alloc, inout CmdRef cmd_ref, Tile tile, float linewidth) {
if (fill_mode_from_flags(flags) == MODE_NONZERO) { if (linewidth < 0.0) {
if (tile.tile.offset != 0) { if (tile.tile.offset != 0) {
CmdFill cmd_fill = CmdFill(tile.tile.offset, tile.backdrop); CmdFill cmd_fill = CmdFill(tile.tile.offset, tile.backdrop);
Cmd_Fill_write(alloc, cmd_ref, cmd_fill); Cmd_Fill_write(alloc, cmd_ref, cmd_fill);
@ -146,6 +151,10 @@ void main() {
uint part_start_ix = 0; uint part_start_ix = 0;
uint ready_ix = 0; uint ready_ix = 0;
uint drawmonoid_start = conf.drawmonoid_alloc.offset >> 2;
uint drawtag_start = conf.drawtag_offset >> 2;
uint drawdata_start = conf.drawdata_offset >> 2;
uint drawinfo_start = conf.drawinfo_alloc.offset >> 2;
bool mem_ok = mem_error == NO_ERROR; bool mem_ok = mem_error == NO_ERROR;
while (true) { while (true) {
for (uint i = 0; i < N_SLICE; i++) { for (uint i = 0; i < N_SLICE; i++) {
@ -207,24 +216,22 @@ void main() {
// We've done the merge and filled the buffer. // We've done the merge and filled the buffer.
// Read one element, compute coverage. // Read one element, compute coverage.
uint tag = Annotated_Nop; uint tag = Drawtag_Nop;
uint element_ix; uint element_ix;
AnnotatedRef ref;
if (th_ix + rd_ix < wr_ix) { if (th_ix + rd_ix < wr_ix) {
element_ix = sh_elements[th_ix]; element_ix = sh_elements[th_ix];
ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size); tag = scene[drawtag_start + element_ix];
tag = Annotated_tag(conf.anno_alloc, ref).tag;
} }
// Bounding box of element in pixel coordinates. // Bounding box of element in pixel coordinates.
uint tile_count; uint tile_count;
switch (tag) { switch (tag) {
case Annotated_Color: case Drawtag_FillColor:
case Annotated_Image: case Drawtag_FillImage:
case Annotated_LinGradient: case Drawtag_FillLinGradient:
case Annotated_BeginClip: case Drawtag_BeginClip:
case Annotated_EndClip: case Drawtag_EndClip:
uint drawmonoid_base = (conf.drawmonoid_alloc.offset >> 2) + 2 * element_ix; uint drawmonoid_base = drawmonoid_start + 4 * element_ix;
uint path_ix = memory[drawmonoid_base]; uint path_ix = memory[drawmonoid_base];
Path path = Path_read(conf.tile_alloc, PathRef(conf.tile_alloc.offset + path_ix * Path_size)); Path path = Path_read(conf.tile_alloc, PathRef(conf.tile_alloc.offset + path_ix * Path_size));
uint stride = path.bbox.z - path.bbox.x; uint stride = path.bbox.z - path.bbox.x;
@ -272,9 +279,8 @@ void main() {
el_ix = probe; el_ix = probe;
} }
} }
AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + sh_elements[el_ix] * Annotated_size); uint element_ix = sh_elements[el_ix];
AnnotatedTag anno_tag = Annotated_tag(conf.anno_alloc, ref); uint tag = scene[drawtag_start + element_ix];
uint tag = anno_tag.tag;
uint seq_ix = ix - (el_ix > 0 ? sh_tile_count[el_ix - 1] : 0); uint seq_ix = ix - (el_ix > 0 ? sh_tile_count[el_ix - 1] : 0);
uint width = sh_tile_width[el_ix]; uint width = sh_tile_width[el_ix];
uint x = sh_tile_x0[el_ix] + seq_ix % width; uint x = sh_tile_x0[el_ix] + seq_ix % width;
@ -283,15 +289,23 @@ void main() {
if (mem_ok) { if (mem_ok) {
Tile tile = Tile_read(read_tile_alloc(el_ix, mem_ok), Tile tile = Tile_read(read_tile_alloc(el_ix, mem_ok),
TileRef(sh_tile_base[el_ix] + (sh_tile_stride[el_ix] * y + x) * Tile_size)); TileRef(sh_tile_base[el_ix] + (sh_tile_stride[el_ix] * y + x) * Tile_size));
bool is_clip = tag == Annotated_BeginClip || tag == Annotated_EndClip; bool is_clip = (tag & 1) != 0;
// Always include the tile if it contains a path segment. // Always include the tile if it contains a path segment.
// For draws, include the tile if it is solid. // For draws, include the tile if it is solid.
// For clips, include the tile if it is empty - this way, logic // For clips, include the tile if it is empty - this way, logic
// below will suppress the drawing of inner elements. // below will suppress the drawing of inner elements.
// For blends, include the tile if // For blends, include the tile if
// (blend_mode, composition_mode) != (Normal, SrcOver) // (blend_mode, composition_mode) != (Normal, SrcOver)
bool is_blend = false;
if (is_clip) {
uint drawmonoid_base = drawmonoid_start + 4 * element_ix;
uint scene_offset = memory[drawmonoid_base + 2];
uint dd = drawdata_start + (scene_offset >> 2);
uint blend = scene[dd];
is_blend = (blend != BlendComp_default);
}
include_tile = tile.tile.offset != 0 || (tile.backdrop == 0) == is_clip include_tile = tile.tile.offset != 0 || (tile.backdrop == 0) == is_clip
|| (is_clip && (anno_tag.flags & 0x2) != 0); || is_blend;
} }
if (include_tile) { if (include_tile) {
uint el_slice = el_ix / 32; uint el_slice = el_ix / 32;
@ -302,8 +316,8 @@ void main() {
barrier(); barrier();
// Output non-segment elements for this tile. The thread does a sequential walk // Output draw objects for this tile. The thread does a sequential walk
// through the non-segment elements. // through the draw objects.
uint slice_ix = 0; uint slice_ix = 0;
uint bitmap = sh_bitmaps[0][th_ix]; uint bitmap = sh_bitmaps[0][th_ix];
while (mem_ok) { while (mem_ok) {
@ -323,59 +337,55 @@ void main() {
// Clear LSB // Clear LSB
bitmap &= bitmap - 1; bitmap &= bitmap - 1;
// At this point, we read the element again from global memory. uint drawtag = scene[drawtag_start + element_ix];
// If that turns out to be expensive, maybe we can pack it into
// shared memory (or perhaps just the tag).
ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);
AnnotatedTag tag = Annotated_tag(conf.anno_alloc, ref);
if (clip_zero_depth == 0) { if (clip_zero_depth == 0) {
switch (tag.tag) {
case Annotated_Color:
Tile tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok), Tile tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok),
TileRef(sh_tile_base[element_ref_ix] + TileRef(sh_tile_base[element_ref_ix] +
(sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size)); (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
AnnoColor fill = Annotated_Color_read(conf.anno_alloc, ref); uint drawmonoid_base = drawmonoid_start + 4 * element_ix;
uint scene_offset = memory[drawmonoid_base + 2];
uint info_offset = memory[drawmonoid_base + 3];
uint dd = drawdata_start + (scene_offset >> 2);
uint di = drawinfo_start + (info_offset >> 2);
switch (drawtag) {
case Drawtag_FillColor:
float linewidth = uintBitsToFloat(memory[di]);
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) { if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
break; break;
} }
write_fill(cmd_alloc, cmd_ref, tag.flags, tile, fill.linewidth); write_fill(cmd_alloc, cmd_ref, tile, linewidth);
Cmd_Color_write(cmd_alloc, cmd_ref, CmdColor(fill.rgba_color)); uint rgba = scene[dd];
Cmd_Color_write(cmd_alloc, cmd_ref, CmdColor(rgba));
cmd_ref.offset += 4 + CmdColor_size; cmd_ref.offset += 4 + CmdColor_size;
break; break;
case Annotated_LinGradient: case Drawtag_FillLinGradient:
tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok),
TileRef(sh_tile_base[element_ref_ix] +
(sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
AnnoLinGradient lin = Annotated_LinGradient_read(conf.anno_alloc, ref);
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) { if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
break; break;
} }
write_fill(cmd_alloc, cmd_ref, tag.flags, tile, fill.linewidth); linewidth = uintBitsToFloat(memory[di]);
write_fill(cmd_alloc, cmd_ref, tile, linewidth);
CmdLinGrad cmd_lin; CmdLinGrad cmd_lin;
cmd_lin.index = lin.index; cmd_lin.index = scene[dd];
cmd_lin.line_x = lin.line_x; cmd_lin.line_x = uintBitsToFloat(memory[di + 1]);
cmd_lin.line_y = lin.line_y; cmd_lin.line_y = uintBitsToFloat(memory[di + 2]);
cmd_lin.line_c = lin.line_c; cmd_lin.line_c = uintBitsToFloat(memory[di + 3]);
Cmd_LinGrad_write(cmd_alloc, cmd_ref, cmd_lin); Cmd_LinGrad_write(cmd_alloc, cmd_ref, cmd_lin);
cmd_ref.offset += 4 + CmdLinGrad_size; cmd_ref.offset += 4 + CmdLinGrad_size;
break; break;
case Annotated_Image: case Drawtag_FillImage:
tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok), linewidth = uintBitsToFloat(memory[di]);
TileRef(sh_tile_base[element_ref_ix] +
(sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
AnnoImage fill_img = Annotated_Image_read(conf.anno_alloc, ref);
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) { if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
break; break;
} }
write_fill(cmd_alloc, cmd_ref, tag.flags, tile, fill_img.linewidth); write_fill(cmd_alloc, cmd_ref, tile, linewidth);
Cmd_Image_write(cmd_alloc, cmd_ref, CmdImage(fill_img.index, fill_img.offset)); uint index = scene[dd];
uint raw1 = scene[dd + 1];
ivec2 offset = ivec2(int(raw1 << 16) >> 16, int(raw1) >> 16);
Cmd_Image_write(cmd_alloc, cmd_ref, CmdImage(index, offset));
cmd_ref.offset += 4 + CmdImage_size; cmd_ref.offset += 4 + CmdImage_size;
break; break;
case Annotated_BeginClip: case Drawtag_BeginClip:
tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok),
TileRef(sh_tile_base[element_ref_ix] +
(sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
if (tile.tile.offset == 0 && tile.backdrop == 0) { if (tile.tile.offset == 0 && tile.backdrop == 0) {
clip_zero_depth = clip_depth + 1; clip_zero_depth = clip_depth + 1;
} else { } else {
@ -387,27 +397,24 @@ void main() {
} }
clip_depth++; clip_depth++;
break; break;
case Annotated_EndClip: case Drawtag_EndClip:
tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok),
TileRef(sh_tile_base[element_ref_ix] +
(sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
AnnoEndClip end_clip = Annotated_EndClip_read(conf.anno_alloc, ref);
clip_depth--; clip_depth--;
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) { if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
break; break;
} }
write_fill(cmd_alloc, cmd_ref, MODE_NONZERO, tile, 0.0); write_fill(cmd_alloc, cmd_ref, tile, -1.0);
Cmd_EndClip_write(cmd_alloc, cmd_ref, CmdEndClip(end_clip.blend)); uint blend = scene[dd];
Cmd_EndClip_write(cmd_alloc, cmd_ref, CmdEndClip(blend));
cmd_ref.offset += 4 + CmdEndClip_size; cmd_ref.offset += 4 + CmdEndClip_size;
break; break;
} }
} else { } else {
// In "clip zero" state, suppress all drawing // In "clip zero" state, suppress all drawing
switch (tag.tag) { switch (drawtag) {
case Annotated_BeginClip: case Drawtag_BeginClip:
clip_depth++; clip_depth++;
break; break;
case Annotated_EndClip: case Drawtag_EndClip:
if (clip_depth == clip_zero_depth) { if (clip_depth == clip_zero_depth) {
clip_zero_depth = 0; clip_zero_depth = 0;
} }

View file

@ -27,7 +27,6 @@ layout(binding = 2) readonly buffer SceneBuf {
#include "scene.h" #include "scene.h"
#include "tile.h" #include "tile.h"
#include "drawtag.h" #include "drawtag.h"
#include "annotated.h"
#include "blend.h" #include "blend.h"
#define Monoid DrawMonoid #define Monoid DrawMonoid
@ -42,14 +41,14 @@ void main() {
Monoid local[N_ROWS]; Monoid local[N_ROWS];
uint ix = gl_GlobalInvocationID.x * N_ROWS; uint ix = gl_GlobalInvocationID.x * N_ROWS;
ElementRef ref = ElementRef(ix * Element_size); uint drawtag_base = conf.drawtag_offset >> 2;
uint tag_word = Element_tag(ref).tag; uint tag_word = scene[drawtag_base + ix];
Monoid agg = map_tag(tag_word); Monoid agg = map_tag(tag_word);
local[0] = agg; local[0] = agg;
for (uint i = 1; i < N_ROWS; i++) { for (uint i = 1; i < N_ROWS; i++) {
tag_word = Element_tag(Element_index(ref, i)).tag; tag_word = scene[drawtag_base + ix + i];
agg = combine_tag_monoid(agg, map_tag(tag_word)); agg = combine_draw_monoid(agg, map_tag(tag_word));
local[i] = agg; local[i] = agg;
} }
sh_scratch[gl_LocalInvocationID.x] = agg; sh_scratch[gl_LocalInvocationID.x] = agg;
@ -57,41 +56,47 @@ void main() {
barrier(); barrier();
if (gl_LocalInvocationID.x >= (1u << i)) { if (gl_LocalInvocationID.x >= (1u << i)) {
Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i)]; Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i)];
agg = combine_tag_monoid(other, agg); agg = combine_draw_monoid(other, agg);
} }
barrier(); barrier();
sh_scratch[gl_LocalInvocationID.x] = agg; sh_scratch[gl_LocalInvocationID.x] = agg;
} }
barrier(); barrier();
Monoid row = tag_monoid_identity(); Monoid row = draw_monoid_identity();
if (gl_WorkGroupID.x > 0) { if (gl_WorkGroupID.x > 0) {
row = parent[gl_WorkGroupID.x - 1]; row = parent[gl_WorkGroupID.x - 1];
} }
if (gl_LocalInvocationID.x > 0) { if (gl_LocalInvocationID.x > 0) {
row = combine_tag_monoid(row, sh_scratch[gl_LocalInvocationID.x - 1]); row = combine_draw_monoid(row, sh_scratch[gl_LocalInvocationID.x - 1]);
} }
uint drawdata_base = conf.drawdata_offset >> 2;
uint drawinfo_base = conf.drawinfo_alloc.offset >> 2;
uint out_ix = gl_GlobalInvocationID.x * N_ROWS; uint out_ix = gl_GlobalInvocationID.x * N_ROWS;
uint out_base = (conf.drawmonoid_alloc.offset >> 2) + out_ix * 2; uint out_base = (conf.drawmonoid_alloc.offset >> 2) + out_ix * 4;
uint clip_out_base = conf.clip_alloc.offset >> 2; uint clip_out_base = conf.clip_alloc.offset >> 2;
AnnotatedRef out_ref = AnnotatedRef(conf.anno_alloc.offset + out_ix * Annotated_size);
for (uint i = 0; i < N_ROWS; i++) { for (uint i = 0; i < N_ROWS; i++) {
Monoid m = row; Monoid m = row;
if (i > 0) { if (i > 0) {
m = combine_tag_monoid(m, local[i - 1]); m = combine_draw_monoid(m, local[i - 1]);
} }
// m now holds exclusive scan of draw monoid // m now holds exclusive scan of draw monoid
memory[out_base + i * 2] = m.path_ix; memory[out_base + i * 4] = m.path_ix;
memory[out_base + i * 2 + 1] = m.clip_ix; memory[out_base + i * 4 + 1] = m.clip_ix;
memory[out_base + i * 4 + 2] = m.scene_offset;
memory[out_base + i * 4 + 3] = m.info_offset;
// u32 offset of drawobj data
uint dd = drawdata_base + (m.scene_offset >> 2);
uint di = drawinfo_base + (m.info_offset >> 2);
// For compatibility, we'll generate an Annotated object, same as old // For compatibility, we'll generate an Annotated object, same as old
// pipeline. However, going forward we'll get rid of that, and have // pipeline. However, going forward we'll get rid of that, and have
// later stages read scene + bbox etc. // later stages read scene + bbox etc.
ElementRef this_ref = Element_index(ref, i); tag_word = scene[drawtag_base + ix + i];
tag_word = Element_tag(this_ref).tag; if (tag_word == Drawtag_FillColor || tag_word == Drawtag_FillLinGradient || tag_word == Drawtag_FillImage ||
if (tag_word == Element_FillColor || tag_word == Element_FillLinGradient || tag_word == Element_FillImage || tag_word == Drawtag_BeginClip) {
tag_word == Element_BeginClip) { uint bbox_offset = (conf.path_bbox_alloc.offset >> 2) + 6 * m.path_ix;
uint bbox_offset = (conf.bbox_alloc.offset >> 2) + 6 * m.path_ix;
float bbox_l = float(memory[bbox_offset]) - 32768.0; float bbox_l = float(memory[bbox_offset]) - 32768.0;
float bbox_t = float(memory[bbox_offset + 1]) - 32768.0; float bbox_t = float(memory[bbox_offset + 1]) - 32768.0;
float bbox_r = float(memory[bbox_offset + 2]) - 32768.0; float bbox_r = float(memory[bbox_offset + 2]) - 32768.0;
@ -101,11 +106,11 @@ void main() {
uint fill_mode = uint(linewidth >= 0.0); uint fill_mode = uint(linewidth >= 0.0);
vec4 mat; vec4 mat;
vec2 translate; vec2 translate;
if (linewidth >= 0.0 || tag_word == Element_FillLinGradient) { if (linewidth >= 0.0 || tag_word == Drawtag_FillLinGradient) {
uint trans_ix = memory[bbox_offset + 5]; uint trans_ix = memory[bbox_offset + 5];
uint t = (conf.trans_alloc.offset >> 2) + 6 * trans_ix; uint t = (conf.trans_alloc.offset >> 2) + 6 * trans_ix;
mat = uintBitsToFloat(uvec4(memory[t], memory[t + 1], memory[t + 2], memory[t + 3])); mat = uintBitsToFloat(uvec4(memory[t], memory[t + 1], memory[t + 2], memory[t + 3]));
if (tag_word == Element_FillLinGradient) { if (tag_word == Drawtag_FillLinGradient) {
translate = uintBitsToFloat(uvec2(memory[t + 4], memory[t + 5])); translate = uintBitsToFloat(uvec2(memory[t + 4], memory[t + 5]));
} }
} }
@ -113,69 +118,38 @@ void main() {
// TODO: need to deal with anisotropic case // TODO: need to deal with anisotropic case
linewidth *= sqrt(abs(mat.x * mat.w - mat.y * mat.z)); linewidth *= sqrt(abs(mat.x * mat.w - mat.y * mat.z));
} }
linewidth = max(linewidth, 0.0);
switch (tag_word) { switch (tag_word) {
case Element_FillColor: case Drawtag_FillColor:
FillColor fill = Element_FillColor_read(this_ref); case Drawtag_FillImage:
AnnoColor anno_fill; memory[di] = floatBitsToUint(linewidth);
anno_fill.bbox = bbox;
anno_fill.linewidth = linewidth;
anno_fill.rgba_color = fill.rgba_color;
Annotated_Color_write(conf.anno_alloc, out_ref, fill_mode, anno_fill);
break; break;
case Element_FillLinGradient: case Drawtag_FillLinGradient:
FillLinGradient lin = Element_FillLinGradient_read(this_ref); memory[di] = floatBitsToUint(linewidth);
AnnoLinGradient anno_lin; uint index = scene[dd];
anno_lin.bbox = bbox; vec2 p0 = uintBitsToFloat(uvec2(scene[dd + 1], scene[dd + 2]));
anno_lin.linewidth = linewidth; vec2 p1 = uintBitsToFloat(uvec2(scene[dd + 3], scene[dd + 4]));
anno_lin.index = lin.index; p0 = mat.xy * p0.x + mat.zw * p0.y + translate;
vec2 p0 = mat.xy * lin.p0.x + mat.zw * lin.p0.y + translate; p1 = mat.xy * p1.x + mat.zw * p1.y + translate;
vec2 p1 = mat.xy * lin.p1.x + mat.zw * lin.p1.y + translate;
vec2 dxy = p1 - p0; vec2 dxy = p1 - p0;
float scale = 1.0 / (dxy.x * dxy.x + dxy.y * dxy.y); float scale = 1.0 / (dxy.x * dxy.x + dxy.y * dxy.y);
float line_x = dxy.x * scale; float line_x = dxy.x * scale;
float line_y = dxy.y * scale; float line_y = dxy.y * scale;
anno_lin.line_x = line_x; float line_c = -(p0.x * line_x + p0.y * line_y);
anno_lin.line_y = line_y; memory[di + 1] = floatBitsToUint(line_x);
anno_lin.line_c = -(p0.x * line_x + p0.y * line_y); memory[di + 2] = floatBitsToUint(line_y);
Annotated_LinGradient_write(conf.anno_alloc, out_ref, fill_mode, anno_lin); memory[di + 3] = floatBitsToUint(line_c);
break; break;
case Element_FillImage: case Drawtag_BeginClip:
FillImage fill_img = Element_FillImage_read(this_ref);
AnnoImage anno_img;
anno_img.bbox = bbox;
anno_img.linewidth = linewidth;
anno_img.index = fill_img.index;
anno_img.offset = fill_img.offset;
Annotated_Image_write(conf.anno_alloc, out_ref, fill_mode, anno_img);
break;
case Element_BeginClip:
Clip begin_clip = Element_BeginClip_read(this_ref);
AnnoBeginClip anno_begin_clip;
anno_begin_clip.bbox = bbox;
anno_begin_clip.linewidth = 0.0; // don't support clip-with-stroke
anno_begin_clip.blend = begin_clip.blend;
uint flags = uint(begin_clip.blend != BlendComp_default) << 1;
Annotated_BeginClip_write(conf.anno_alloc, out_ref, flags, anno_begin_clip);
break; break;
} }
} else if (tag_word == Element_EndClip) {
Clip end_clip = Element_BeginClip_read(this_ref);
AnnoEndClip anno_end_clip;
// The actual bbox will be reconstructed from clip stream output.
anno_end_clip.bbox = vec4(-1e9, -1e9, 1e9, 1e9);
anno_end_clip.blend = end_clip.blend;
uint flags = uint(end_clip.blend != BlendComp_default) << 1;
Annotated_EndClip_write(conf.anno_alloc, out_ref, flags, anno_end_clip);
} }
// Generate clip stream. // Generate clip stream.
if (tag_word == Element_BeginClip || tag_word == Element_EndClip) { if (tag_word == Drawtag_BeginClip || tag_word == Drawtag_EndClip) {
uint path_ix = ~(out_ix + i); uint path_ix = ~(out_ix + i);
if (tag_word == Element_BeginClip) { if (tag_word == Drawtag_BeginClip) {
path_ix = m.path_ix; path_ix = m.path_ix;
} }
memory[clip_out_base + m.clip_ix] = path_ix; memory[clip_out_base + m.clip_ix] = path_ix;
} }
out_ref.offset += Annotated_size;
} }
} }

View file

@ -36,13 +36,13 @@ shared Monoid sh_scratch[WG_SIZE];
void main() { void main() {
uint ix = gl_GlobalInvocationID.x * N_ROWS; uint ix = gl_GlobalInvocationID.x * N_ROWS;
ElementRef ref = ElementRef(ix * Element_size); uint drawtag_base = conf.drawtag_offset >> 2;
uint tag_word = Element_tag(ref).tag; uint tag_word = scene[drawtag_base + ix];
Monoid agg = map_tag(tag_word); Monoid agg = map_tag(tag_word);
for (uint i = 1; i < N_ROWS; i++) { for (uint i = 1; i < N_ROWS; i++) {
tag_word = Element_tag(Element_index(ref, i)).tag; uint tag_word = scene[drawtag_base + ix + i];
agg = combine_tag_monoid(agg, map_tag(tag_word)); agg = combine_draw_monoid(agg, map_tag(tag_word));
} }
sh_scratch[gl_LocalInvocationID.x] = agg; sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i = 0; i < LG_WG_SIZE; i++) { for (uint i = 0; i < LG_WG_SIZE; i++) {
@ -50,7 +50,7 @@ void main() {
// We could make this predicate tighter, but would it help? // We could make this predicate tighter, but would it help?
if (gl_LocalInvocationID.x + (1u << i) < WG_SIZE) { if (gl_LocalInvocationID.x + (1u << i) < WG_SIZE) {
Monoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i)]; Monoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i)];
agg = combine_tag_monoid(agg, other); agg = combine_draw_monoid(agg, other);
} }
barrier(); barrier();
sh_scratch[gl_LocalInvocationID.x] = agg; sh_scratch[gl_LocalInvocationID.x] = agg;

View file

@ -16,8 +16,8 @@
layout(local_size_x = WG_SIZE, local_size_y = 1) in; layout(local_size_x = WG_SIZE, local_size_y = 1) in;
#define Monoid DrawMonoid #define Monoid DrawMonoid
#define combine_monoid combine_tag_monoid #define combine_monoid combine_draw_monoid
#define monoid_identity tag_monoid_identity #define monoid_identity draw_monoid_identity
layout(binding = 0) buffer DataBuf { layout(binding = 0) buffer DataBuf {
Monoid[] data; Monoid[] data;

View file

@ -2,36 +2,39 @@
// Common data structures and functions for the draw tag stream. // Common data structures and functions for the draw tag stream.
// Design of draw tag: & 0x1c gives scene size in bytes
// & 1 gives clip
// (tag >> 4) & 0x1c is info size in bytes
#define Drawtag_Nop 0
#define Drawtag_FillColor 0x44
#define Drawtag_FillLinGradient 0x114
#define Drawtag_FillImage 0x48
#define Drawtag_BeginClip 0x05
#define Drawtag_EndClip 0x25
struct DrawMonoid { struct DrawMonoid {
uint path_ix; uint path_ix;
uint clip_ix; uint clip_ix;
uint scene_offset;
uint info_offset;
}; };
DrawMonoid tag_monoid_identity() { DrawMonoid draw_monoid_identity() {
return DrawMonoid(0, 0); return DrawMonoid(0, 0, 0, 0);
} }
DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b) { DrawMonoid combine_draw_monoid(DrawMonoid a, DrawMonoid b) {
DrawMonoid c; DrawMonoid c;
c.path_ix = a.path_ix + b.path_ix; c.path_ix = a.path_ix + b.path_ix;
c.clip_ix = a.clip_ix + b.clip_ix; c.clip_ix = a.clip_ix + b.clip_ix;
c.scene_offset = a.scene_offset + b.scene_offset;
c.info_offset = a.info_offset + b.info_offset;
return c; return c;
} }
#ifdef Element_size
DrawMonoid map_tag(uint tag_word) { DrawMonoid map_tag(uint tag_word) {
switch (tag_word) { // TODO: at some point, EndClip should not generate a path
case Element_FillColor: uint has_path = uint(tag_word != Drawtag_Nop);
case Element_FillLinGradient: return DrawMonoid(has_path, tag_word & 1, tag_word & 0x1c, (tag_word >> 4) & 0x1c);
case Element_FillImage:
return DrawMonoid(1, 0);
case Element_BeginClip:
// TODO: endclip should be (0, 1), ie not generate a path. But for now
// we generate a dummy path.
case Element_EndClip:
return DrawMonoid(1, 1);
default:
return DrawMonoid(0, 0);
} }
}
#endif

Binary file not shown.

View file

@ -3,17 +3,6 @@ struct Alloc
uint offset; uint offset;
}; };
struct AnnotatedRef
{
uint offset;
};
struct AnnotatedTag
{
uint tag;
uint flags;
};
struct PathRef struct PathRef
{ {
uint offset; uint offset;
@ -42,12 +31,14 @@ struct Config
Alloc pathseg_alloc; Alloc pathseg_alloc;
Alloc anno_alloc; Alloc anno_alloc;
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc path_bbox_alloc;
Alloc drawmonoid_alloc; Alloc drawmonoid_alloc;
Alloc clip_alloc; Alloc clip_alloc;
Alloc clip_bic_alloc; Alloc clip_bic_alloc;
Alloc clip_stack_alloc; Alloc clip_stack_alloc;
Alloc clip_bbox_alloc; Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip; uint n_clip;
@ -55,12 +46,14 @@ struct Config
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
uint pathseg_offset; uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
}; };
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
RWByteAddressBuffer _79 : register(u0, space0); RWByteAddressBuffer _67 : register(u0, space0);
ByteAddressBuffer _186 : register(t1, space0); ByteAddressBuffer _166 : register(t1, space0);
static uint3 gl_LocalInvocationID; static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID; static uint3 gl_GlobalInvocationID;
@ -89,24 +82,10 @@ uint read_mem(Alloc alloc, uint offset)
{ {
return 0u; return 0u;
} }
uint v = _79.Load(offset * 4 + 8); uint v = _67.Load(offset * 4 + 8);
return v; return v;
} }
AnnotatedTag Annotated_tag(Alloc a, AnnotatedRef ref)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint tag_and_flags = read_mem(param, param_1);
AnnotatedTag _121 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) };
return _121;
}
uint fill_mode_from_flags(uint flags)
{
return flags & 1u;
}
Path Path_read(Alloc a, PathRef ref) Path Path_read(Alloc a, PathRef ref)
{ {
uint ix = ref.offset >> uint(2); uint ix = ref.offset >> uint(2);
@ -121,8 +100,8 @@ Path Path_read(Alloc a, PathRef ref)
uint raw2 = read_mem(param_4, param_5); uint raw2 = read_mem(param_4, param_5);
Path s; Path s;
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
TileRef _165 = { raw2 }; TileRef _134 = { raw2 };
s.tiles = _165; s.tiles = _134;
return s; return s;
} }
@ -141,88 +120,65 @@ void write_mem(Alloc alloc, uint offset, uint val)
{ {
return; return;
} }
_79.Store(offset * 4 + 8, val); _67.Store(offset * 4 + 8, val);
} }
void comp_main() void comp_main()
{ {
uint th_ix = gl_LocalInvocationIndex; uint th_ix = gl_LocalInvocationIndex;
uint element_ix = gl_GlobalInvocationID.x; uint element_ix = gl_GlobalInvocationID.x;
AnnotatedRef _194 = { _186.Load(32) + (element_ix * 40u) };
AnnotatedRef ref = _194;
uint row_count = 0u; uint row_count = 0u;
bool mem_ok = _79.Load(4) == 0u; bool mem_ok = _67.Load(4) == 0u;
if (gl_LocalInvocationID.y == 0u) if (gl_LocalInvocationID.y == 0u)
{ {
if (element_ix < _186.Load(0)) if (element_ix < _166.Load(0))
{ {
Alloc _217; PathRef _180 = { _166.Load(16) + (element_ix * 12u) };
_217.offset = _186.Load(32); PathRef path_ref = _180;
Alloc _185;
_185.offset = _166.Load(16);
Alloc param; Alloc param;
param.offset = _217.offset; param.offset = _185.offset;
AnnotatedRef param_1 = ref; PathRef param_1 = path_ref;
AnnotatedTag tag = Annotated_tag(param, param_1); Path path = Path_read(param, param_1);
switch (tag.tag)
{
case 3u:
case 2u:
case 4u:
case 1u:
{
uint param_2 = tag.flags;
if (fill_mode_from_flags(param_2) != 0u)
{
break;
}
PathRef _243 = { _186.Load(16) + (element_ix * 12u) };
PathRef path_ref = _243;
Alloc _247;
_247.offset = _186.Load(16);
Alloc param_3;
param_3.offset = _247.offset;
PathRef param_4 = path_ref;
Path path = Path_read(param_3, param_4);
sh_row_width[th_ix] = path.bbox.z - path.bbox.x; sh_row_width[th_ix] = path.bbox.z - path.bbox.x;
row_count = path.bbox.w - path.bbox.y; row_count = path.bbox.w - path.bbox.y;
bool _272 = row_count == 1u; bool _210 = row_count == 1u;
bool _278; bool _216;
if (_272) if (_210)
{ {
_278 = path.bbox.y > 0u; _216 = path.bbox.y > 0u;
} }
else else
{ {
_278 = _272; _216 = _210;
} }
if (_278) if (_216)
{ {
row_count = 0u; row_count = 0u;
} }
uint param_5 = path.tiles.offset; uint param_2 = path.tiles.offset;
uint param_6 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u; uint param_3 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
bool param_7 = mem_ok; bool param_4 = mem_ok;
Alloc path_alloc = new_alloc(param_5, param_6, param_7); Alloc path_alloc = new_alloc(param_2, param_3, param_4);
sh_row_alloc[th_ix] = path_alloc; sh_row_alloc[th_ix] = path_alloc;
break;
}
}
} }
sh_row_count[th_ix] = row_count; sh_row_count[th_ix] = row_count;
} }
for (uint i = 0u; i < 8u; i++) for (uint i = 0u; i < 8u; i++)
{ {
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();
bool _325 = gl_LocalInvocationID.y == 0u; bool _262 = gl_LocalInvocationID.y == 0u;
bool _332; bool _269;
if (_325) if (_262)
{ {
_332 = th_ix >= (1u << i); _269 = th_ix >= (1u << i);
} }
else else
{ {
_332 = _325; _269 = _262;
} }
if (_332) if (_269)
{ {
row_count += sh_row_count[th_ix - (1u << i)]; row_count += sh_row_count[th_ix - (1u << i)];
} }
@ -234,7 +190,7 @@ void comp_main()
} }
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();
uint total_rows = sh_row_count[255]; uint total_rows = sh_row_count[255];
uint _411; uint _348;
for (uint row = th_ix; row < total_rows; row += 256u) for (uint row = th_ix; row < total_rows; row += 256u)
{ {
uint el_ix = 0u; uint el_ix = 0u;
@ -252,27 +208,27 @@ void comp_main()
Alloc tiles_alloc = sh_row_alloc[el_ix]; Alloc tiles_alloc = sh_row_alloc[el_ix];
if (el_ix > 0u) if (el_ix > 0u)
{ {
_411 = sh_row_count[el_ix - 1u]; _348 = sh_row_count[el_ix - 1u];
} }
else else
{ {
_411 = 0u; _348 = 0u;
} }
uint seq_ix = row - _411; uint seq_ix = row - _348;
uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width); uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width);
Alloc param_8 = tiles_alloc; Alloc param_5 = tiles_alloc;
uint param_9 = tile_el_ix; uint param_6 = tile_el_ix;
uint sum = read_mem(param_8, param_9); uint sum = read_mem(param_5, param_6);
for (uint x = 1u; x < width; x++) for (uint x = 1u; x < width; x++)
{ {
tile_el_ix += 2u; tile_el_ix += 2u;
Alloc param_10 = tiles_alloc; Alloc param_7 = tiles_alloc;
uint param_11 = tile_el_ix; uint param_8 = tile_el_ix;
sum += read_mem(param_10, param_11); sum += read_mem(param_7, param_8);
Alloc param_12 = tiles_alloc; Alloc param_9 = tiles_alloc;
uint param_13 = tile_el_ix; uint param_10 = tile_el_ix;
uint param_14 = sum; uint param_11 = sum;
write_mem(param_12, param_13, param_14); write_mem(param_9, param_10, param_11);
} }
} }
} }

View file

@ -10,17 +10,6 @@ struct Alloc
uint offset; uint offset;
}; };
struct AnnotatedRef
{
uint offset;
};
struct AnnotatedTag
{
uint tag;
uint flags;
};
struct PathRef struct PathRef
{ {
uint offset; uint offset;
@ -61,12 +50,14 @@ struct Config
Alloc_1 pathseg_alloc; Alloc_1 pathseg_alloc;
Alloc_1 anno_alloc; Alloc_1 anno_alloc;
Alloc_1 trans_alloc; Alloc_1 trans_alloc;
Alloc_1 bbox_alloc; Alloc_1 path_bbox_alloc;
Alloc_1 drawmonoid_alloc; Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc; Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc; Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc; Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc; Alloc_1 clip_bbox_alloc;
Alloc_1 draw_bbox_alloc;
Alloc_1 drawinfo_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip; uint n_clip;
@ -74,6 +65,8 @@ struct Config
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
uint pathseg_offset; uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
}; };
struct ConfigBuf struct ConfigBuf
@ -90,7 +83,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_79) uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_67)
{ {
Alloc param = alloc; Alloc param = alloc;
uint param_1 = offset; uint param_1 = offset;
@ -98,38 +91,23 @@ uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memor
{ {
return 0u; return 0u;
} }
uint v = v_79.memory[offset]; uint v = v_67.memory[offset];
return v; return v;
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
AnnotatedTag Annotated_tag(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_79) Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_67)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint tag_and_flags = read_mem(param, param_1, v_79);
return AnnotatedTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) };
}
static inline __attribute__((always_inline))
uint fill_mode_from_flags(thread const uint& flags)
{
return flags & 1u;
}
static inline __attribute__((always_inline))
Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_79)
{ {
uint ix = ref.offset >> uint(2); uint ix = ref.offset >> uint(2);
Alloc param = a; Alloc param = a;
uint param_1 = ix + 0u; uint param_1 = ix + 0u;
uint raw0 = read_mem(param, param_1, v_79); uint raw0 = read_mem(param, param_1, v_67);
Alloc param_2 = a; Alloc param_2 = a;
uint param_3 = ix + 1u; uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3, v_79); uint raw1 = read_mem(param_2, param_3, v_67);
Alloc param_4 = a; Alloc param_4 = a;
uint param_5 = ix + 2u; uint param_5 = ix + 2u;
uint raw2 = read_mem(param_4, param_5, v_79); uint raw2 = read_mem(param_4, param_5, v_67);
Path s; Path s;
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
s.tiles = TileRef{ raw2 }; s.tiles = TileRef{ raw2 };
@ -145,7 +123,7 @@ Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_79) void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_67)
{ {
Alloc param = alloc; Alloc param = alloc;
uint param_1 = offset; uint param_1 = offset;
@ -153,85 +131,65 @@ void write_mem(thread const Alloc& alloc, thread const uint& offset, thread cons
{ {
return; return;
} }
v_79.memory[offset] = val; v_67.memory[offset] = val;
} }
kernel void main0(device Memory& v_79 [[buffer(0)]], const device ConfigBuf& _186 [[buffer(1)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) kernel void main0(device Memory& v_67 [[buffer(0)]], const device ConfigBuf& _166 [[buffer(1)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
{ {
threadgroup uint sh_row_width[256]; threadgroup uint sh_row_width[256];
threadgroup Alloc sh_row_alloc[256]; threadgroup Alloc sh_row_alloc[256];
threadgroup uint sh_row_count[256]; threadgroup uint sh_row_count[256];
uint th_ix = gl_LocalInvocationIndex; uint th_ix = gl_LocalInvocationIndex;
uint element_ix = gl_GlobalInvocationID.x; uint element_ix = gl_GlobalInvocationID.x;
AnnotatedRef ref = AnnotatedRef{ _186.conf.anno_alloc.offset + (element_ix * 40u) };
uint row_count = 0u; uint row_count = 0u;
bool mem_ok = v_79.mem_error == 0u; bool mem_ok = v_67.mem_error == 0u;
if (gl_LocalInvocationID.y == 0u) if (gl_LocalInvocationID.y == 0u)
{ {
if (element_ix < _186.conf.n_elements) if (element_ix < _166.conf.n_elements)
{ {
PathRef path_ref = PathRef{ _166.conf.tile_alloc.offset + (element_ix * 12u) };
Alloc param; Alloc param;
param.offset = _186.conf.anno_alloc.offset; param.offset = _166.conf.tile_alloc.offset;
AnnotatedRef param_1 = ref; PathRef param_1 = path_ref;
AnnotatedTag tag = Annotated_tag(param, param_1, v_79); Path path = Path_read(param, param_1, v_67);
switch (tag.tag)
{
case 3u:
case 2u:
case 4u:
case 1u:
{
uint param_2 = tag.flags;
if (fill_mode_from_flags(param_2) != 0u)
{
break;
}
PathRef path_ref = PathRef{ _186.conf.tile_alloc.offset + (element_ix * 12u) };
Alloc param_3;
param_3.offset = _186.conf.tile_alloc.offset;
PathRef param_4 = path_ref;
Path path = Path_read(param_3, param_4, v_79);
sh_row_width[th_ix] = path.bbox.z - path.bbox.x; sh_row_width[th_ix] = path.bbox.z - path.bbox.x;
row_count = path.bbox.w - path.bbox.y; row_count = path.bbox.w - path.bbox.y;
bool _272 = row_count == 1u; bool _210 = row_count == 1u;
bool _278; bool _216;
if (_272) if (_210)
{ {
_278 = path.bbox.y > 0u; _216 = path.bbox.y > 0u;
} }
else else
{ {
_278 = _272; _216 = _210;
} }
if (_278) if (_216)
{ {
row_count = 0u; row_count = 0u;
} }
uint param_5 = path.tiles.offset; uint param_2 = path.tiles.offset;
uint param_6 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u; uint param_3 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
bool param_7 = mem_ok; bool param_4 = mem_ok;
Alloc path_alloc = new_alloc(param_5, param_6, param_7); Alloc path_alloc = new_alloc(param_2, param_3, param_4);
sh_row_alloc[th_ix] = path_alloc; sh_row_alloc[th_ix] = path_alloc;
break;
}
}
} }
sh_row_count[th_ix] = row_count; sh_row_count[th_ix] = row_count;
} }
for (uint i = 0u; i < 8u; i++) for (uint i = 0u; i < 8u; i++)
{ {
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);
bool _325 = gl_LocalInvocationID.y == 0u; bool _262 = gl_LocalInvocationID.y == 0u;
bool _332; bool _269;
if (_325) if (_262)
{ {
_332 = th_ix >= (1u << i); _269 = th_ix >= (1u << i);
} }
else else
{ {
_332 = _325; _269 = _262;
} }
if (_332) if (_269)
{ {
row_count += sh_row_count[th_ix - (1u << i)]; row_count += sh_row_count[th_ix - (1u << i)];
} }
@ -243,7 +201,7 @@ kernel void main0(device Memory& v_79 [[buffer(0)]], const device ConfigBuf& _18
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);
uint total_rows = sh_row_count[255]; uint total_rows = sh_row_count[255];
uint _411; uint _348;
for (uint row = th_ix; row < total_rows; row += 256u) for (uint row = th_ix; row < total_rows; row += 256u)
{ {
uint el_ix = 0u; uint el_ix = 0u;
@ -261,27 +219,27 @@ kernel void main0(device Memory& v_79 [[buffer(0)]], const device ConfigBuf& _18
Alloc tiles_alloc = sh_row_alloc[el_ix]; Alloc tiles_alloc = sh_row_alloc[el_ix];
if (el_ix > 0u) if (el_ix > 0u)
{ {
_411 = sh_row_count[el_ix - 1u]; _348 = sh_row_count[el_ix - 1u];
} }
else else
{ {
_411 = 0u; _348 = 0u;
} }
uint seq_ix = row - _411; uint seq_ix = row - _348;
uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width); uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width);
Alloc param_8 = tiles_alloc; Alloc param_5 = tiles_alloc;
uint param_9 = tile_el_ix; uint param_6 = tile_el_ix;
uint sum = read_mem(param_8, param_9, v_79); uint sum = read_mem(param_5, param_6, v_67);
for (uint x = 1u; x < width; x++) for (uint x = 1u; x < width; x++)
{ {
tile_el_ix += 2u; tile_el_ix += 2u;
Alloc param_10 = tiles_alloc; Alloc param_7 = tiles_alloc;
uint param_11 = tile_el_ix; uint param_8 = tile_el_ix;
sum += read_mem(param_10, param_11, v_79); sum += read_mem(param_7, param_8, v_67);
Alloc param_12 = tiles_alloc; Alloc param_9 = tiles_alloc;
uint param_13 = tile_el_ix; uint param_10 = tile_el_ix;
uint param_14 = sum; uint param_11 = sum;
write_mem(param_12, param_13, param_14, v_79); write_mem(param_9, param_10, param_11, v_67);
} }
} }
} }

Binary file not shown.

Binary file not shown.

View file

@ -3,17 +3,6 @@ struct Alloc
uint offset; uint offset;
}; };
struct AnnotatedRef
{
uint offset;
};
struct AnnotatedTag
{
uint tag;
uint flags;
};
struct PathRef struct PathRef
{ {
uint offset; uint offset;
@ -42,12 +31,14 @@ struct Config
Alloc pathseg_alloc; Alloc pathseg_alloc;
Alloc anno_alloc; Alloc anno_alloc;
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc path_bbox_alloc;
Alloc drawmonoid_alloc; Alloc drawmonoid_alloc;
Alloc clip_alloc; Alloc clip_alloc;
Alloc clip_bic_alloc; Alloc clip_bic_alloc;
Alloc clip_stack_alloc; Alloc clip_stack_alloc;
Alloc clip_bbox_alloc; Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip; uint n_clip;
@ -55,12 +46,14 @@ struct Config
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
uint pathseg_offset; uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
}; };
static const uint3 gl_WorkGroupSize = uint3(256u, 4u, 1u); static const uint3 gl_WorkGroupSize = uint3(256u, 4u, 1u);
RWByteAddressBuffer _79 : register(u0, space0); RWByteAddressBuffer _67 : register(u0, space0);
ByteAddressBuffer _186 : register(t1, space0); ByteAddressBuffer _166 : register(t1, space0);
static uint3 gl_LocalInvocationID; static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID; static uint3 gl_GlobalInvocationID;
@ -89,24 +82,10 @@ uint read_mem(Alloc alloc, uint offset)
{ {
return 0u; return 0u;
} }
uint v = _79.Load(offset * 4 + 8); uint v = _67.Load(offset * 4 + 8);
return v; return v;
} }
AnnotatedTag Annotated_tag(Alloc a, AnnotatedRef ref)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint tag_and_flags = read_mem(param, param_1);
AnnotatedTag _121 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) };
return _121;
}
uint fill_mode_from_flags(uint flags)
{
return flags & 1u;
}
Path Path_read(Alloc a, PathRef ref) Path Path_read(Alloc a, PathRef ref)
{ {
uint ix = ref.offset >> uint(2); uint ix = ref.offset >> uint(2);
@ -121,8 +100,8 @@ Path Path_read(Alloc a, PathRef ref)
uint raw2 = read_mem(param_4, param_5); uint raw2 = read_mem(param_4, param_5);
Path s; Path s;
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
TileRef _165 = { raw2 }; TileRef _134 = { raw2 };
s.tiles = _165; s.tiles = _134;
return s; return s;
} }
@ -141,88 +120,65 @@ void write_mem(Alloc alloc, uint offset, uint val)
{ {
return; return;
} }
_79.Store(offset * 4 + 8, val); _67.Store(offset * 4 + 8, val);
} }
void comp_main() void comp_main()
{ {
uint th_ix = gl_LocalInvocationIndex; uint th_ix = gl_LocalInvocationIndex;
uint element_ix = gl_GlobalInvocationID.x; uint element_ix = gl_GlobalInvocationID.x;
AnnotatedRef _194 = { _186.Load(32) + (element_ix * 40u) };
AnnotatedRef ref = _194;
uint row_count = 0u; uint row_count = 0u;
bool mem_ok = _79.Load(4) == 0u; bool mem_ok = _67.Load(4) == 0u;
if (gl_LocalInvocationID.y == 0u) if (gl_LocalInvocationID.y == 0u)
{ {
if (element_ix < _186.Load(0)) if (element_ix < _166.Load(0))
{ {
Alloc _217; PathRef _180 = { _166.Load(16) + (element_ix * 12u) };
_217.offset = _186.Load(32); PathRef path_ref = _180;
Alloc _185;
_185.offset = _166.Load(16);
Alloc param; Alloc param;
param.offset = _217.offset; param.offset = _185.offset;
AnnotatedRef param_1 = ref; PathRef param_1 = path_ref;
AnnotatedTag tag = Annotated_tag(param, param_1); Path path = Path_read(param, param_1);
switch (tag.tag)
{
case 3u:
case 2u:
case 4u:
case 1u:
{
uint param_2 = tag.flags;
if (fill_mode_from_flags(param_2) != 0u)
{
break;
}
PathRef _243 = { _186.Load(16) + (element_ix * 12u) };
PathRef path_ref = _243;
Alloc _247;
_247.offset = _186.Load(16);
Alloc param_3;
param_3.offset = _247.offset;
PathRef param_4 = path_ref;
Path path = Path_read(param_3, param_4);
sh_row_width[th_ix] = path.bbox.z - path.bbox.x; sh_row_width[th_ix] = path.bbox.z - path.bbox.x;
row_count = path.bbox.w - path.bbox.y; row_count = path.bbox.w - path.bbox.y;
bool _272 = row_count == 1u; bool _210 = row_count == 1u;
bool _278; bool _216;
if (_272) if (_210)
{ {
_278 = path.bbox.y > 0u; _216 = path.bbox.y > 0u;
} }
else else
{ {
_278 = _272; _216 = _210;
} }
if (_278) if (_216)
{ {
row_count = 0u; row_count = 0u;
} }
uint param_5 = path.tiles.offset; uint param_2 = path.tiles.offset;
uint param_6 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u; uint param_3 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
bool param_7 = mem_ok; bool param_4 = mem_ok;
Alloc path_alloc = new_alloc(param_5, param_6, param_7); Alloc path_alloc = new_alloc(param_2, param_3, param_4);
sh_row_alloc[th_ix] = path_alloc; sh_row_alloc[th_ix] = path_alloc;
break;
}
}
} }
sh_row_count[th_ix] = row_count; sh_row_count[th_ix] = row_count;
} }
for (uint i = 0u; i < 8u; i++) for (uint i = 0u; i < 8u; i++)
{ {
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();
bool _325 = gl_LocalInvocationID.y == 0u; bool _262 = gl_LocalInvocationID.y == 0u;
bool _332; bool _269;
if (_325) if (_262)
{ {
_332 = th_ix >= (1u << i); _269 = th_ix >= (1u << i);
} }
else else
{ {
_332 = _325; _269 = _262;
} }
if (_332) if (_269)
{ {
row_count += sh_row_count[th_ix - (1u << i)]; row_count += sh_row_count[th_ix - (1u << i)];
} }
@ -234,7 +190,7 @@ void comp_main()
} }
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();
uint total_rows = sh_row_count[255]; uint total_rows = sh_row_count[255];
uint _411; uint _348;
for (uint row = th_ix; row < total_rows; row += 1024u) for (uint row = th_ix; row < total_rows; row += 1024u)
{ {
uint el_ix = 0u; uint el_ix = 0u;
@ -252,27 +208,27 @@ void comp_main()
Alloc tiles_alloc = sh_row_alloc[el_ix]; Alloc tiles_alloc = sh_row_alloc[el_ix];
if (el_ix > 0u) if (el_ix > 0u)
{ {
_411 = sh_row_count[el_ix - 1u]; _348 = sh_row_count[el_ix - 1u];
} }
else else
{ {
_411 = 0u; _348 = 0u;
} }
uint seq_ix = row - _411; uint seq_ix = row - _348;
uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width); uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width);
Alloc param_8 = tiles_alloc; Alloc param_5 = tiles_alloc;
uint param_9 = tile_el_ix; uint param_6 = tile_el_ix;
uint sum = read_mem(param_8, param_9); uint sum = read_mem(param_5, param_6);
for (uint x = 1u; x < width; x++) for (uint x = 1u; x < width; x++)
{ {
tile_el_ix += 2u; tile_el_ix += 2u;
Alloc param_10 = tiles_alloc; Alloc param_7 = tiles_alloc;
uint param_11 = tile_el_ix; uint param_8 = tile_el_ix;
sum += read_mem(param_10, param_11); sum += read_mem(param_7, param_8);
Alloc param_12 = tiles_alloc; Alloc param_9 = tiles_alloc;
uint param_13 = tile_el_ix; uint param_10 = tile_el_ix;
uint param_14 = sum; uint param_11 = sum;
write_mem(param_12, param_13, param_14); write_mem(param_9, param_10, param_11);
} }
} }
} }

View file

@ -10,17 +10,6 @@ struct Alloc
uint offset; uint offset;
}; };
struct AnnotatedRef
{
uint offset;
};
struct AnnotatedTag
{
uint tag;
uint flags;
};
struct PathRef struct PathRef
{ {
uint offset; uint offset;
@ -61,12 +50,14 @@ struct Config
Alloc_1 pathseg_alloc; Alloc_1 pathseg_alloc;
Alloc_1 anno_alloc; Alloc_1 anno_alloc;
Alloc_1 trans_alloc; Alloc_1 trans_alloc;
Alloc_1 bbox_alloc; Alloc_1 path_bbox_alloc;
Alloc_1 drawmonoid_alloc; Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc; Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc; Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc; Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc; Alloc_1 clip_bbox_alloc;
Alloc_1 draw_bbox_alloc;
Alloc_1 drawinfo_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip; uint n_clip;
@ -74,6 +65,8 @@ struct Config
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
uint pathseg_offset; uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
}; };
struct ConfigBuf struct ConfigBuf
@ -90,7 +83,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_79) uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_67)
{ {
Alloc param = alloc; Alloc param = alloc;
uint param_1 = offset; uint param_1 = offset;
@ -98,38 +91,23 @@ uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memor
{ {
return 0u; return 0u;
} }
uint v = v_79.memory[offset]; uint v = v_67.memory[offset];
return v; return v;
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
AnnotatedTag Annotated_tag(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_79) Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_67)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint tag_and_flags = read_mem(param, param_1, v_79);
return AnnotatedTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) };
}
static inline __attribute__((always_inline))
uint fill_mode_from_flags(thread const uint& flags)
{
return flags & 1u;
}
static inline __attribute__((always_inline))
Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_79)
{ {
uint ix = ref.offset >> uint(2); uint ix = ref.offset >> uint(2);
Alloc param = a; Alloc param = a;
uint param_1 = ix + 0u; uint param_1 = ix + 0u;
uint raw0 = read_mem(param, param_1, v_79); uint raw0 = read_mem(param, param_1, v_67);
Alloc param_2 = a; Alloc param_2 = a;
uint param_3 = ix + 1u; uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3, v_79); uint raw1 = read_mem(param_2, param_3, v_67);
Alloc param_4 = a; Alloc param_4 = a;
uint param_5 = ix + 2u; uint param_5 = ix + 2u;
uint raw2 = read_mem(param_4, param_5, v_79); uint raw2 = read_mem(param_4, param_5, v_67);
Path s; Path s;
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
s.tiles = TileRef{ raw2 }; s.tiles = TileRef{ raw2 };
@ -145,7 +123,7 @@ Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_79) void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_67)
{ {
Alloc param = alloc; Alloc param = alloc;
uint param_1 = offset; uint param_1 = offset;
@ -153,85 +131,65 @@ void write_mem(thread const Alloc& alloc, thread const uint& offset, thread cons
{ {
return; return;
} }
v_79.memory[offset] = val; v_67.memory[offset] = val;
} }
kernel void main0(device Memory& v_79 [[buffer(0)]], const device ConfigBuf& _186 [[buffer(1)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) kernel void main0(device Memory& v_67 [[buffer(0)]], const device ConfigBuf& _166 [[buffer(1)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
{ {
threadgroup uint sh_row_width[256]; threadgroup uint sh_row_width[256];
threadgroup Alloc sh_row_alloc[256]; threadgroup Alloc sh_row_alloc[256];
threadgroup uint sh_row_count[256]; threadgroup uint sh_row_count[256];
uint th_ix = gl_LocalInvocationIndex; uint th_ix = gl_LocalInvocationIndex;
uint element_ix = gl_GlobalInvocationID.x; uint element_ix = gl_GlobalInvocationID.x;
AnnotatedRef ref = AnnotatedRef{ _186.conf.anno_alloc.offset + (element_ix * 40u) };
uint row_count = 0u; uint row_count = 0u;
bool mem_ok = v_79.mem_error == 0u; bool mem_ok = v_67.mem_error == 0u;
if (gl_LocalInvocationID.y == 0u) if (gl_LocalInvocationID.y == 0u)
{ {
if (element_ix < _186.conf.n_elements) if (element_ix < _166.conf.n_elements)
{ {
PathRef path_ref = PathRef{ _166.conf.tile_alloc.offset + (element_ix * 12u) };
Alloc param; Alloc param;
param.offset = _186.conf.anno_alloc.offset; param.offset = _166.conf.tile_alloc.offset;
AnnotatedRef param_1 = ref; PathRef param_1 = path_ref;
AnnotatedTag tag = Annotated_tag(param, param_1, v_79); Path path = Path_read(param, param_1, v_67);
switch (tag.tag)
{
case 3u:
case 2u:
case 4u:
case 1u:
{
uint param_2 = tag.flags;
if (fill_mode_from_flags(param_2) != 0u)
{
break;
}
PathRef path_ref = PathRef{ _186.conf.tile_alloc.offset + (element_ix * 12u) };
Alloc param_3;
param_3.offset = _186.conf.tile_alloc.offset;
PathRef param_4 = path_ref;
Path path = Path_read(param_3, param_4, v_79);
sh_row_width[th_ix] = path.bbox.z - path.bbox.x; sh_row_width[th_ix] = path.bbox.z - path.bbox.x;
row_count = path.bbox.w - path.bbox.y; row_count = path.bbox.w - path.bbox.y;
bool _272 = row_count == 1u; bool _210 = row_count == 1u;
bool _278; bool _216;
if (_272) if (_210)
{ {
_278 = path.bbox.y > 0u; _216 = path.bbox.y > 0u;
} }
else else
{ {
_278 = _272; _216 = _210;
} }
if (_278) if (_216)
{ {
row_count = 0u; row_count = 0u;
} }
uint param_5 = path.tiles.offset; uint param_2 = path.tiles.offset;
uint param_6 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u; uint param_3 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
bool param_7 = mem_ok; bool param_4 = mem_ok;
Alloc path_alloc = new_alloc(param_5, param_6, param_7); Alloc path_alloc = new_alloc(param_2, param_3, param_4);
sh_row_alloc[th_ix] = path_alloc; sh_row_alloc[th_ix] = path_alloc;
break;
}
}
} }
sh_row_count[th_ix] = row_count; sh_row_count[th_ix] = row_count;
} }
for (uint i = 0u; i < 8u; i++) for (uint i = 0u; i < 8u; i++)
{ {
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);
bool _325 = gl_LocalInvocationID.y == 0u; bool _262 = gl_LocalInvocationID.y == 0u;
bool _332; bool _269;
if (_325) if (_262)
{ {
_332 = th_ix >= (1u << i); _269 = th_ix >= (1u << i);
} }
else else
{ {
_332 = _325; _269 = _262;
} }
if (_332) if (_269)
{ {
row_count += sh_row_count[th_ix - (1u << i)]; row_count += sh_row_count[th_ix - (1u << i)];
} }
@ -243,7 +201,7 @@ kernel void main0(device Memory& v_79 [[buffer(0)]], const device ConfigBuf& _18
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);
uint total_rows = sh_row_count[255]; uint total_rows = sh_row_count[255];
uint _411; uint _348;
for (uint row = th_ix; row < total_rows; row += 1024u) for (uint row = th_ix; row < total_rows; row += 1024u)
{ {
uint el_ix = 0u; uint el_ix = 0u;
@ -261,27 +219,27 @@ kernel void main0(device Memory& v_79 [[buffer(0)]], const device ConfigBuf& _18
Alloc tiles_alloc = sh_row_alloc[el_ix]; Alloc tiles_alloc = sh_row_alloc[el_ix];
if (el_ix > 0u) if (el_ix > 0u)
{ {
_411 = sh_row_count[el_ix - 1u]; _348 = sh_row_count[el_ix - 1u];
} }
else else
{ {
_411 = 0u; _348 = 0u;
} }
uint seq_ix = row - _411; uint seq_ix = row - _348;
uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width); uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width);
Alloc param_8 = tiles_alloc; Alloc param_5 = tiles_alloc;
uint param_9 = tile_el_ix; uint param_6 = tile_el_ix;
uint sum = read_mem(param_8, param_9, v_79); uint sum = read_mem(param_5, param_6, v_67);
for (uint x = 1u; x < width; x++) for (uint x = 1u; x < width; x++)
{ {
tile_el_ix += 2u; tile_el_ix += 2u;
Alloc param_10 = tiles_alloc; Alloc param_7 = tiles_alloc;
uint param_11 = tile_el_ix; uint param_8 = tile_el_ix;
sum += read_mem(param_10, param_11, v_79); sum += read_mem(param_7, param_8, v_67);
Alloc param_12 = tiles_alloc; Alloc param_9 = tiles_alloc;
uint param_13 = tile_el_ix; uint param_10 = tile_el_ix;
uint param_14 = sum; uint param_11 = sum;
write_mem(param_12, param_13, param_14, v_79); write_mem(param_9, param_10, param_11, v_67);
} }
} }
} }

Binary file not shown.

Binary file not shown.

View file

@ -15,12 +15,14 @@ struct Config
Alloc pathseg_alloc; Alloc pathseg_alloc;
Alloc anno_alloc; Alloc anno_alloc;
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc path_bbox_alloc;
Alloc drawmonoid_alloc; Alloc drawmonoid_alloc;
Alloc clip_alloc; Alloc clip_alloc;
Alloc clip_bic_alloc; Alloc clip_bic_alloc;
Alloc clip_stack_alloc; Alloc clip_stack_alloc;
Alloc clip_bbox_alloc; Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip; uint n_clip;
@ -28,6 +30,8 @@ struct Config
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
uint pathseg_offset; uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
}; };
static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u); static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u);
@ -44,7 +48,7 @@ struct SPIRV_Cross_Input
void comp_main() void comp_main()
{ {
uint ix = gl_GlobalInvocationID.x; uint ix = gl_GlobalInvocationID.x;
if (ix < _21.Load(68)) if (ix < _21.Load(76))
{ {
uint out_ix = (_21.Load(40) >> uint(2)) + (6u * ix); uint out_ix = (_21.Load(40) >> uint(2)) + (6u * ix);
_45.Store(out_ix * 4 + 8, 65535u); _45.Store(out_ix * 4 + 8, 65535u);

View file

@ -20,12 +20,14 @@ struct Config
Alloc pathseg_alloc; Alloc pathseg_alloc;
Alloc anno_alloc; Alloc anno_alloc;
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc path_bbox_alloc;
Alloc drawmonoid_alloc; Alloc drawmonoid_alloc;
Alloc clip_alloc; Alloc clip_alloc;
Alloc clip_bic_alloc; Alloc clip_bic_alloc;
Alloc clip_stack_alloc; Alloc clip_stack_alloc;
Alloc clip_bbox_alloc; Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip; uint n_clip;
@ -33,6 +35,8 @@ struct Config
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
uint pathseg_offset; uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
}; };
struct ConfigBuf struct ConfigBuf
@ -54,7 +58,7 @@ kernel void main0(device Memory& _45 [[buffer(0)]], const device ConfigBuf& _21
uint ix = gl_GlobalInvocationID.x; uint ix = gl_GlobalInvocationID.x;
if (ix < _21.conf.n_path) if (ix < _21.conf.n_path)
{ {
uint out_ix = (_21.conf.bbox_alloc.offset >> uint(2)) + (6u * ix); uint out_ix = (_21.conf.path_bbox_alloc.offset >> uint(2)) + (6u * ix);
_45.memory[out_ix] = 65535u; _45.memory[out_ix] = 65535u;
_45.memory[out_ix + 1u] = 65535u; _45.memory[out_ix + 1u] = 65535u;
_45.memory[out_ix + 2u] = 0u; _45.memory[out_ix + 2u] = 0u;

Binary file not shown.

Binary file not shown.

View file

@ -9,17 +9,6 @@ struct MallocResult
bool failed; bool failed;
}; };
struct AnnotatedRef
{
uint offset;
};
struct AnnotatedTag
{
uint tag;
uint flags;
};
struct BinInstanceRef struct BinInstanceRef
{ {
uint offset; uint offset;
@ -34,6 +23,8 @@ struct DrawMonoid
{ {
uint path_ix; uint path_ix;
uint clip_ix; uint clip_ix;
uint scene_offset;
uint info_offset;
}; };
struct Config struct Config
@ -48,12 +39,14 @@ struct Config
Alloc pathseg_alloc; Alloc pathseg_alloc;
Alloc anno_alloc; Alloc anno_alloc;
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc path_bbox_alloc;
Alloc drawmonoid_alloc; Alloc drawmonoid_alloc;
Alloc clip_alloc; Alloc clip_alloc;
Alloc clip_bic_alloc; Alloc clip_bic_alloc;
Alloc clip_stack_alloc; Alloc clip_stack_alloc;
Alloc clip_bbox_alloc; Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip; uint n_clip;
@ -61,12 +54,14 @@ struct Config
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
uint pathseg_offset; uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
}; };
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
RWByteAddressBuffer _94 : register(u0, space0); RWByteAddressBuffer _81 : register(u0, space0);
ByteAddressBuffer _202 : register(t1, space0); ByteAddressBuffer _156 : register(t1, space0);
static uint3 gl_WorkGroupID; static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID; static uint3 gl_LocalInvocationID;
@ -81,59 +76,35 @@ groupshared bool sh_alloc_failed;
groupshared uint count[8][256]; groupshared uint count[8][256];
groupshared Alloc sh_chunk_alloc[256]; groupshared Alloc sh_chunk_alloc[256];
bool touch_mem(Alloc alloc, uint offset)
{
return true;
}
uint read_mem(Alloc alloc, uint offset)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return 0u;
}
uint v = _94.Load(offset * 4 + 8);
return v;
}
AnnotatedTag Annotated_tag(Alloc a, AnnotatedRef ref)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint tag_and_flags = read_mem(param, param_1);
AnnotatedTag _181 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) };
return _181;
}
DrawMonoid load_draw_monoid(uint element_ix) DrawMonoid load_draw_monoid(uint element_ix)
{ {
uint base = (_202.Load(44) >> uint(2)) + (2u * element_ix); uint base = (_156.Load(44) >> uint(2)) + (4u * element_ix);
uint path_ix = _94.Load(base * 4 + 8); uint path_ix = _81.Load(base * 4 + 8);
uint clip_ix = _94.Load((base + 1u) * 4 + 8); uint clip_ix = _81.Load((base + 1u) * 4 + 8);
DrawMonoid _222 = { path_ix, clip_ix }; uint scene_offset = _81.Load((base + 2u) * 4 + 8);
return _222; uint info_offset = _81.Load((base + 3u) * 4 + 8);
DrawMonoid _190 = { path_ix, clip_ix, scene_offset, info_offset };
return _190;
} }
float4 load_clip_bbox(uint clip_ix) float4 load_clip_bbox(uint clip_ix)
{ {
uint base = (_202.Load(60) >> uint(2)) + (4u * clip_ix); uint base = (_156.Load(60) >> uint(2)) + (4u * clip_ix);
float x0 = asfloat(_94.Load(base * 4 + 8)); float x0 = asfloat(_81.Load(base * 4 + 8));
float y0 = asfloat(_94.Load((base + 1u) * 4 + 8)); float y0 = asfloat(_81.Load((base + 1u) * 4 + 8));
float x1 = asfloat(_94.Load((base + 2u) * 4 + 8)); float x1 = asfloat(_81.Load((base + 2u) * 4 + 8));
float y1 = asfloat(_94.Load((base + 3u) * 4 + 8)); float y1 = asfloat(_81.Load((base + 3u) * 4 + 8));
float4 bbox = float4(x0, y0, x1, y1); float4 bbox = float4(x0, y0, x1, y1);
return bbox; return bbox;
} }
float4 load_path_bbox(uint path_ix) float4 load_path_bbox(uint path_ix)
{ {
uint base = (_202.Load(40) >> uint(2)) + (6u * path_ix); uint base = (_156.Load(40) >> uint(2)) + (6u * path_ix);
float bbox_l = float(_94.Load(base * 4 + 8)) - 32768.0f; float bbox_l = float(_81.Load(base * 4 + 8)) - 32768.0f;
float bbox_t = float(_94.Load((base + 1u) * 4 + 8)) - 32768.0f; float bbox_t = float(_81.Load((base + 1u) * 4 + 8)) - 32768.0f;
float bbox_r = float(_94.Load((base + 2u) * 4 + 8)) - 32768.0f; float bbox_r = float(_81.Load((base + 2u) * 4 + 8)) - 32768.0f;
float bbox_b = float(_94.Load((base + 3u) * 4 + 8)) - 32768.0f; float bbox_b = float(_81.Load((base + 3u) * 4 + 8)) - 32768.0f;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
return bbox; return bbox;
} }
@ -143,13 +114,13 @@ float4 bbox_intersect(float4 a, float4 b)
return float4(max(a.xy, b.xy), min(a.zw, b.zw)); return float4(max(a.xy, b.xy), min(a.zw, b.zw));
} }
void store_path_bbox(AnnotatedRef ref, float4 bbox) void store_draw_bbox(uint draw_ix, float4 bbox)
{ {
uint ix = ref.offset >> uint(2); uint base = (_156.Load(64) >> uint(2)) + (4u * draw_ix);
_94.Store((ix + 1u) * 4 + 8, asuint(bbox.x)); _81.Store(base * 4 + 8, asuint(bbox.x));
_94.Store((ix + 2u) * 4 + 8, asuint(bbox.y)); _81.Store((base + 1u) * 4 + 8, asuint(bbox.y));
_94.Store((ix + 3u) * 4 + 8, asuint(bbox.z)); _81.Store((base + 2u) * 4 + 8, asuint(bbox.z));
_94.Store((ix + 4u) * 4 + 8, asuint(bbox.w)); _81.Store((base + 3u) * 4 + 8, asuint(bbox.w));
} }
Alloc new_alloc(uint offset, uint size, bool mem_ok) Alloc new_alloc(uint offset, uint size, bool mem_ok)
@ -161,27 +132,32 @@ Alloc new_alloc(uint offset, uint size, bool mem_ok)
MallocResult malloc(uint size) MallocResult malloc(uint size)
{ {
uint _100; uint _87;
_94.InterlockedAdd(0, size, _100); _81.InterlockedAdd(0, size, _87);
uint offset = _100; uint offset = _87;
uint _107; uint _94;
_94.GetDimensions(_107); _81.GetDimensions(_94);
_107 = (_107 - 8) / 4; _94 = (_94 - 8) / 4;
MallocResult r; MallocResult r;
r.failed = (offset + size) > uint(int(_107) * 4); r.failed = (offset + size) > uint(int(_94) * 4);
uint param = offset; uint param = offset;
uint param_1 = size; uint param_1 = size;
bool param_2 = !r.failed; bool param_2 = !r.failed;
r.alloc = new_alloc(param, param_1, param_2); r.alloc = new_alloc(param, param_1, param_2);
if (r.failed) if (r.failed)
{ {
uint _129; uint _116;
_94.InterlockedMax(4, 1u, _129); _81.InterlockedMax(4, 1u, _116);
return r; return r;
} }
return r; return r;
} }
bool touch_mem(Alloc alloc, uint offset)
{
return true;
}
void write_mem(Alloc alloc, uint offset, uint val) void write_mem(Alloc alloc, uint offset, uint val)
{ {
Alloc param = alloc; Alloc param = alloc;
@ -190,7 +166,7 @@ void write_mem(Alloc alloc, uint offset, uint val)
{ {
return; return;
} }
_94.Store(offset * 4 + 8, val); _81.Store(offset * 4 + 8, val);
} }
void BinInstance_write(Alloc a, BinInstanceRef ref, BinInstance s) void BinInstance_write(Alloc a, BinInstanceRef ref, BinInstance s)
@ -204,7 +180,6 @@ void BinInstance_write(Alloc a, BinInstanceRef ref, BinInstance s)
void comp_main() void comp_main()
{ {
uint my_n_elements = _202.Load(0);
uint my_partition = gl_WorkGroupID.x; uint my_partition = gl_WorkGroupID.x;
for (uint i = 0u; i < 8u; i++) for (uint i = 0u; i < 8u; i++)
{ {
@ -216,62 +191,42 @@ void comp_main()
} }
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();
uint element_ix = (my_partition * 256u) + gl_LocalInvocationID.x; uint element_ix = (my_partition * 256u) + gl_LocalInvocationID.x;
AnnotatedRef _415 = { _202.Load(32) + (element_ix * 40u) };
AnnotatedRef ref = _415;
uint tag = 0u;
if (element_ix < my_n_elements)
{
Alloc _425;
_425.offset = _202.Load(32);
Alloc param;
param.offset = _425.offset;
AnnotatedRef param_1 = ref;
tag = Annotated_tag(param, param_1).tag;
}
int x0 = 0; int x0 = 0;
int y0 = 0; int y0 = 0;
int x1 = 0; int x1 = 0;
int y1 = 0; int y1 = 0;
switch (tag) if (element_ix < _156.Load(0))
{ {
case 1u: uint param = element_ix;
case 2u: DrawMonoid draw_monoid = load_draw_monoid(param);
case 3u:
case 4u:
case 5u:
{
uint param_2 = element_ix;
DrawMonoid draw_monoid = load_draw_monoid(param_2);
uint path_ix = draw_monoid.path_ix; uint path_ix = draw_monoid.path_ix;
float4 clip_bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f); float4 clip_bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f);
uint clip_ix = draw_monoid.clip_ix; uint clip_ix = draw_monoid.clip_ix;
if (clip_ix > 0u) if (clip_ix > 0u)
{ {
uint param_3 = clip_ix - 1u; uint param_1 = clip_ix - 1u;
clip_bbox = load_clip_bbox(param_3); clip_bbox = load_clip_bbox(param_1);
} }
uint param_4 = path_ix; uint param_2 = path_ix;
float4 path_bbox = load_path_bbox(param_4); float4 path_bbox = load_path_bbox(param_2);
float4 param_5 = path_bbox; float4 param_3 = path_bbox;
float4 param_6 = clip_bbox; float4 param_4 = clip_bbox;
float4 bbox = bbox_intersect(param_5, param_6); float4 bbox = bbox_intersect(param_3, param_4);
float4 _473 = bbox; float4 _417 = bbox;
float4 _475 = bbox; float4 _419 = bbox;
float2 _477 = max(_473.xy, _475.zw); float2 _421 = max(_417.xy, _419.zw);
bbox.z = _477.x; bbox.z = _421.x;
bbox.w = _477.y; bbox.w = _421.y;
AnnotatedRef param_7 = ref; uint param_5 = element_ix;
float4 param_8 = bbox; float4 param_6 = bbox;
store_path_bbox(param_7, param_8); store_draw_bbox(param_5, param_6);
x0 = int(floor(bbox.x * 0.00390625f)); x0 = int(floor(bbox.x * 0.00390625f));
y0 = int(floor(bbox.y * 0.00390625f)); y0 = int(floor(bbox.y * 0.00390625f));
x1 = int(ceil(bbox.z * 0.00390625f)); x1 = int(ceil(bbox.z * 0.00390625f));
y1 = int(ceil(bbox.w * 0.00390625f)); y1 = int(ceil(bbox.w * 0.00390625f));
break;
} }
} uint width_in_bins = ((_156.Load(8) + 16u) - 1u) / 16u;
uint width_in_bins = ((_202.Load(8) + 16u) - 1u) / 16u; uint height_in_bins = ((_156.Load(12) + 16u) - 1u) / 16u;
uint height_in_bins = ((_202.Load(12) + 16u) - 1u) / 16u;
x0 = clamp(x0, 0, int(width_in_bins)); x0 = clamp(x0, 0, int(width_in_bins));
x1 = clamp(x1, x0, int(width_in_bins)); x1 = clamp(x1, x0, int(width_in_bins));
y0 = clamp(y0, 0, int(height_in_bins)); y0 = clamp(y0, 0, int(height_in_bins));
@ -286,8 +241,8 @@ void comp_main()
uint my_mask = 1u << (gl_LocalInvocationID.x & 31u); uint my_mask = 1u << (gl_LocalInvocationID.x & 31u);
while (y < y1) while (y < y1)
{ {
uint _581; uint _523;
InterlockedOr(bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, _581); InterlockedOr(bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, _523);
x++; x++;
if (x == x1) if (x == x1)
{ {
@ -302,15 +257,15 @@ void comp_main()
element_count += uint(int(countbits(bitmaps[i_1][gl_LocalInvocationID.x]))); element_count += uint(int(countbits(bitmaps[i_1][gl_LocalInvocationID.x])));
count[i_1][gl_LocalInvocationID.x] = element_count; count[i_1][gl_LocalInvocationID.x] = element_count;
} }
uint param_9 = 0u; uint param_7 = 0u;
uint param_10 = 0u; uint param_8 = 0u;
bool param_11 = true; bool param_9 = true;
Alloc chunk_alloc = new_alloc(param_9, param_10, param_11); Alloc chunk_alloc = new_alloc(param_7, param_8, param_9);
if (element_count != 0u) if (element_count != 0u)
{ {
uint param_12 = element_count * 4u; uint param_10 = element_count * 4u;
MallocResult _631 = malloc(param_12); MallocResult _573 = malloc(param_10);
MallocResult chunk = _631; MallocResult chunk = _573;
chunk_alloc = chunk.alloc; chunk_alloc = chunk.alloc;
sh_chunk_alloc[gl_LocalInvocationID.x] = chunk_alloc; sh_chunk_alloc[gl_LocalInvocationID.x] = chunk_alloc;
if (chunk.failed) if (chunk.failed)
@ -318,32 +273,32 @@ void comp_main()
sh_alloc_failed = true; sh_alloc_failed = true;
} }
} }
uint out_ix = (_202.Load(20) >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u); uint out_ix = (_156.Load(20) >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u);
Alloc _660; Alloc _603;
_660.offset = _202.Load(20); _603.offset = _156.Load(20);
Alloc param_13; Alloc param_11;
param_13.offset = _660.offset; param_11.offset = _603.offset;
uint param_14 = out_ix; uint param_12 = out_ix;
uint param_15 = element_count; uint param_13 = element_count;
write_mem(param_13, param_14, param_15); write_mem(param_11, param_12, param_13);
Alloc _672; Alloc _615;
_672.offset = _202.Load(20); _615.offset = _156.Load(20);
Alloc param_16; Alloc param_14;
param_16.offset = _672.offset; param_14.offset = _615.offset;
uint param_17 = out_ix + 1u; uint param_15 = out_ix + 1u;
uint param_18 = chunk_alloc.offset; uint param_16 = chunk_alloc.offset;
write_mem(param_16, param_17, param_18); write_mem(param_14, param_15, param_16);
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();
bool _687; bool _630;
if (!sh_alloc_failed) if (!sh_alloc_failed)
{ {
_687 = _94.Load(4) != 0u; _630 = _81.Load(4) != 0u;
} }
else else
{ {
_687 = sh_alloc_failed; _630 = sh_alloc_failed;
} }
if (_687) if (_630)
{ {
return; return;
} }
@ -362,12 +317,12 @@ void comp_main()
} }
Alloc out_alloc = sh_chunk_alloc[bin_ix]; Alloc out_alloc = sh_chunk_alloc[bin_ix];
uint out_offset = out_alloc.offset + (idx * 4u); uint out_offset = out_alloc.offset + (idx * 4u);
BinInstanceRef _749 = { out_offset }; BinInstanceRef _692 = { out_offset };
BinInstance _751 = { element_ix }; BinInstance _694 = { element_ix };
Alloc param_19 = out_alloc; Alloc param_17 = out_alloc;
BinInstanceRef param_20 = _749; BinInstanceRef param_18 = _692;
BinInstance param_21 = _751; BinInstance param_19 = _694;
BinInstance_write(param_19, param_20, param_21); BinInstance_write(param_17, param_18, param_19);
} }
x++; x++;
if (x == x1) if (x == x1)

View file

@ -18,17 +18,6 @@ struct MallocResult
bool failed; bool failed;
}; };
struct AnnotatedRef
{
uint offset;
};
struct AnnotatedTag
{
uint tag;
uint flags;
};
struct BinInstanceRef struct BinInstanceRef
{ {
uint offset; uint offset;
@ -43,6 +32,8 @@ struct DrawMonoid
{ {
uint path_ix; uint path_ix;
uint clip_ix; uint clip_ix;
uint scene_offset;
uint info_offset;
}; };
struct Memory struct Memory
@ -69,12 +60,14 @@ struct Config
Alloc_1 pathseg_alloc; Alloc_1 pathseg_alloc;
Alloc_1 anno_alloc; Alloc_1 anno_alloc;
Alloc_1 trans_alloc; Alloc_1 trans_alloc;
Alloc_1 bbox_alloc; Alloc_1 path_bbox_alloc;
Alloc_1 drawmonoid_alloc; Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc; Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc; Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc; Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc; Alloc_1 clip_bbox_alloc;
Alloc_1 draw_bbox_alloc;
Alloc_1 drawinfo_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip; uint n_clip;
@ -82,6 +75,8 @@ struct Config
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
uint pathseg_offset; uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
}; };
struct ConfigBuf struct ConfigBuf
@ -92,62 +87,36 @@ struct ConfigBuf
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
bool touch_mem(thread const Alloc& alloc, thread const uint& offset) DrawMonoid load_draw_monoid(thread const uint& element_ix, device Memory& v_81, constant uint& v_81BufferSize, const device ConfigBuf& v_156)
{ {
return true; uint base = (v_156.conf.drawmonoid_alloc.offset >> uint(2)) + (4u * element_ix);
uint path_ix = v_81.memory[base];
uint clip_ix = v_81.memory[base + 1u];
uint scene_offset = v_81.memory[base + 2u];
uint info_offset = v_81.memory[base + 3u];
return DrawMonoid{ path_ix, clip_ix, scene_offset, info_offset };
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_94, constant uint& v_94BufferSize) float4 load_clip_bbox(thread const uint& clip_ix, device Memory& v_81, constant uint& v_81BufferSize, const device ConfigBuf& v_156)
{ {
Alloc param = alloc; uint base = (v_156.conf.clip_bbox_alloc.offset >> uint(2)) + (4u * clip_ix);
uint param_1 = offset; float x0 = as_type<float>(v_81.memory[base]);
if (!touch_mem(param, param_1)) float y0 = as_type<float>(v_81.memory[base + 1u]);
{ float x1 = as_type<float>(v_81.memory[base + 2u]);
return 0u; float y1 = as_type<float>(v_81.memory[base + 3u]);
}
uint v = v_94.memory[offset];
return v;
}
static inline __attribute__((always_inline))
AnnotatedTag Annotated_tag(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_94, constant uint& v_94BufferSize)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint tag_and_flags = read_mem(param, param_1, v_94, v_94BufferSize);
return AnnotatedTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) };
}
static inline __attribute__((always_inline))
DrawMonoid load_draw_monoid(thread const uint& element_ix, device Memory& v_94, constant uint& v_94BufferSize, const device ConfigBuf& v_202)
{
uint base = (v_202.conf.drawmonoid_alloc.offset >> uint(2)) + (2u * element_ix);
uint path_ix = v_94.memory[base];
uint clip_ix = v_94.memory[base + 1u];
return DrawMonoid{ path_ix, clip_ix };
}
static inline __attribute__((always_inline))
float4 load_clip_bbox(thread const uint& clip_ix, device Memory& v_94, constant uint& v_94BufferSize, const device ConfigBuf& v_202)
{
uint base = (v_202.conf.clip_bbox_alloc.offset >> uint(2)) + (4u * clip_ix);
float x0 = as_type<float>(v_94.memory[base]);
float y0 = as_type<float>(v_94.memory[base + 1u]);
float x1 = as_type<float>(v_94.memory[base + 2u]);
float y1 = as_type<float>(v_94.memory[base + 3u]);
float4 bbox = float4(x0, y0, x1, y1); float4 bbox = float4(x0, y0, x1, y1);
return bbox; return bbox;
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
float4 load_path_bbox(thread const uint& path_ix, device Memory& v_94, constant uint& v_94BufferSize, const device ConfigBuf& v_202) float4 load_path_bbox(thread const uint& path_ix, device Memory& v_81, constant uint& v_81BufferSize, const device ConfigBuf& v_156)
{ {
uint base = (v_202.conf.bbox_alloc.offset >> uint(2)) + (6u * path_ix); uint base = (v_156.conf.path_bbox_alloc.offset >> uint(2)) + (6u * path_ix);
float bbox_l = float(v_94.memory[base]) - 32768.0; float bbox_l = float(v_81.memory[base]) - 32768.0;
float bbox_t = float(v_94.memory[base + 1u]) - 32768.0; float bbox_t = float(v_81.memory[base + 1u]) - 32768.0;
float bbox_r = float(v_94.memory[base + 2u]) - 32768.0; float bbox_r = float(v_81.memory[base + 2u]) - 32768.0;
float bbox_b = float(v_94.memory[base + 3u]) - 32768.0; float bbox_b = float(v_81.memory[base + 3u]) - 32768.0;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
return bbox; return bbox;
} }
@ -159,13 +128,13 @@ float4 bbox_intersect(thread const float4& a, thread const float4& b)
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
void store_path_bbox(thread const AnnotatedRef& ref, thread const float4& bbox, device Memory& v_94, constant uint& v_94BufferSize) void store_draw_bbox(thread const uint& draw_ix, thread const float4& bbox, device Memory& v_81, constant uint& v_81BufferSize, const device ConfigBuf& v_156)
{ {
uint ix = ref.offset >> uint(2); uint base = (v_156.conf.draw_bbox_alloc.offset >> uint(2)) + (4u * draw_ix);
v_94.memory[ix + 1u] = as_type<uint>(bbox.x); v_81.memory[base] = as_type<uint>(bbox.x);
v_94.memory[ix + 2u] = as_type<uint>(bbox.y); v_81.memory[base + 1u] = as_type<uint>(bbox.y);
v_94.memory[ix + 3u] = as_type<uint>(bbox.z); v_81.memory[base + 2u] = as_type<uint>(bbox.z);
v_94.memory[ix + 4u] = as_type<uint>(bbox.w); v_81.memory[base + 3u] = as_type<uint>(bbox.w);
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
@ -177,26 +146,32 @@ Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
MallocResult malloc(thread const uint& size, device Memory& v_94, constant uint& v_94BufferSize) MallocResult malloc(thread const uint& size, device Memory& v_81, constant uint& v_81BufferSize)
{ {
uint _100 = atomic_fetch_add_explicit((device atomic_uint*)&v_94.mem_offset, size, memory_order_relaxed); uint _87 = atomic_fetch_add_explicit((device atomic_uint*)&v_81.mem_offset, size, memory_order_relaxed);
uint offset = _100; uint offset = _87;
MallocResult r; MallocResult r;
r.failed = (offset + size) > uint(int((v_94BufferSize - 8) / 4) * 4); r.failed = (offset + size) > uint(int((v_81BufferSize - 8) / 4) * 4);
uint param = offset; uint param = offset;
uint param_1 = size; uint param_1 = size;
bool param_2 = !r.failed; bool param_2 = !r.failed;
r.alloc = new_alloc(param, param_1, param_2); r.alloc = new_alloc(param, param_1, param_2);
if (r.failed) if (r.failed)
{ {
uint _129 = atomic_fetch_max_explicit((device atomic_uint*)&v_94.mem_error, 1u, memory_order_relaxed); uint _116 = atomic_fetch_max_explicit((device atomic_uint*)&v_81.mem_error, 1u, memory_order_relaxed);
return r; return r;
} }
return r; return r;
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_94, constant uint& v_94BufferSize) bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
{
return true;
}
static inline __attribute__((always_inline))
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_81, constant uint& v_81BufferSize)
{ {
Alloc param = alloc; Alloc param = alloc;
uint param_1 = offset; uint param_1 = offset;
@ -204,27 +179,26 @@ void write_mem(thread const Alloc& alloc, thread const uint& offset, thread cons
{ {
return; return;
} }
v_94.memory[offset] = val; v_81.memory[offset] = val;
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
void BinInstance_write(thread const Alloc& a, thread const BinInstanceRef& ref, thread const BinInstance& s, device Memory& v_94, constant uint& v_94BufferSize) void BinInstance_write(thread const Alloc& a, thread const BinInstanceRef& ref, thread const BinInstance& s, device Memory& v_81, constant uint& v_81BufferSize)
{ {
uint ix = ref.offset >> uint(2); uint ix = ref.offset >> uint(2);
Alloc param = a; Alloc param = a;
uint param_1 = ix + 0u; uint param_1 = ix + 0u;
uint param_2 = s.element_ix; uint param_2 = s.element_ix;
write_mem(param, param_1, param_2, v_94, v_94BufferSize); write_mem(param, param_1, param_2, v_81, v_81BufferSize);
} }
kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_94 [[buffer(0)]], const device ConfigBuf& v_202 [[buffer(1)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_81 [[buffer(0)]], const device ConfigBuf& v_156 [[buffer(1)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
{ {
threadgroup uint bitmaps[8][256]; threadgroup uint bitmaps[8][256];
threadgroup short sh_alloc_failed; threadgroup short sh_alloc_failed;
threadgroup uint count[8][256]; threadgroup uint count[8][256];
threadgroup Alloc sh_chunk_alloc[256]; threadgroup Alloc sh_chunk_alloc[256];
constant uint& v_94BufferSize = spvBufferSizeConstants[0]; constant uint& v_81BufferSize = spvBufferSizeConstants[0];
uint my_n_elements = v_202.conf.n_elements;
uint my_partition = gl_WorkGroupID.x; uint my_partition = gl_WorkGroupID.x;
for (uint i = 0u; i < 8u; i++) for (uint i = 0u; i < 8u; i++)
{ {
@ -236,59 +210,42 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);
uint element_ix = (my_partition * 256u) + gl_LocalInvocationID.x; uint element_ix = (my_partition * 256u) + gl_LocalInvocationID.x;
AnnotatedRef ref = AnnotatedRef{ v_202.conf.anno_alloc.offset + (element_ix * 40u) };
uint tag = 0u;
if (element_ix < my_n_elements)
{
Alloc param;
param.offset = v_202.conf.anno_alloc.offset;
AnnotatedRef param_1 = ref;
tag = Annotated_tag(param, param_1, v_94, v_94BufferSize).tag;
}
int x0 = 0; int x0 = 0;
int y0 = 0; int y0 = 0;
int x1 = 0; int x1 = 0;
int y1 = 0; int y1 = 0;
switch (tag) if (element_ix < v_156.conf.n_elements)
{ {
case 1u: uint param = element_ix;
case 2u: DrawMonoid draw_monoid = load_draw_monoid(param, v_81, v_81BufferSize, v_156);
case 3u:
case 4u:
case 5u:
{
uint param_2 = element_ix;
DrawMonoid draw_monoid = load_draw_monoid(param_2, v_94, v_94BufferSize, v_202);
uint path_ix = draw_monoid.path_ix; uint path_ix = draw_monoid.path_ix;
float4 clip_bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0); float4 clip_bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0);
uint clip_ix = draw_monoid.clip_ix; uint clip_ix = draw_monoid.clip_ix;
if (clip_ix > 0u) if (clip_ix > 0u)
{ {
uint param_3 = clip_ix - 1u; uint param_1 = clip_ix - 1u;
clip_bbox = load_clip_bbox(param_3, v_94, v_94BufferSize, v_202); clip_bbox = load_clip_bbox(param_1, v_81, v_81BufferSize, v_156);
} }
uint param_4 = path_ix; uint param_2 = path_ix;
float4 path_bbox = load_path_bbox(param_4, v_94, v_94BufferSize, v_202); float4 path_bbox = load_path_bbox(param_2, v_81, v_81BufferSize, v_156);
float4 param_5 = path_bbox; float4 param_3 = path_bbox;
float4 param_6 = clip_bbox; float4 param_4 = clip_bbox;
float4 bbox = bbox_intersect(param_5, param_6); float4 bbox = bbox_intersect(param_3, param_4);
float4 _473 = bbox; float4 _417 = bbox;
float4 _475 = bbox; float4 _419 = bbox;
float2 _477 = fast::max(_473.xy, _475.zw); float2 _421 = fast::max(_417.xy, _419.zw);
bbox.z = _477.x; bbox.z = _421.x;
bbox.w = _477.y; bbox.w = _421.y;
AnnotatedRef param_7 = ref; uint param_5 = element_ix;
float4 param_8 = bbox; float4 param_6 = bbox;
store_path_bbox(param_7, param_8, v_94, v_94BufferSize); store_draw_bbox(param_5, param_6, v_81, v_81BufferSize, v_156);
x0 = int(floor(bbox.x * 0.00390625)); x0 = int(floor(bbox.x * 0.00390625));
y0 = int(floor(bbox.y * 0.00390625)); y0 = int(floor(bbox.y * 0.00390625));
x1 = int(ceil(bbox.z * 0.00390625)); x1 = int(ceil(bbox.z * 0.00390625));
y1 = int(ceil(bbox.w * 0.00390625)); y1 = int(ceil(bbox.w * 0.00390625));
break;
} }
} uint width_in_bins = ((v_156.conf.width_in_tiles + 16u) - 1u) / 16u;
uint width_in_bins = ((v_202.conf.width_in_tiles + 16u) - 1u) / 16u; uint height_in_bins = ((v_156.conf.height_in_tiles + 16u) - 1u) / 16u;
uint height_in_bins = ((v_202.conf.height_in_tiles + 16u) - 1u) / 16u;
x0 = clamp(x0, 0, int(width_in_bins)); x0 = clamp(x0, 0, int(width_in_bins));
x1 = clamp(x1, x0, int(width_in_bins)); x1 = clamp(x1, x0, int(width_in_bins));
y0 = clamp(y0, 0, int(height_in_bins)); y0 = clamp(y0, 0, int(height_in_bins));
@ -303,7 +260,7 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
uint my_mask = 1u << (gl_LocalInvocationID.x & 31u); uint my_mask = 1u << (gl_LocalInvocationID.x & 31u);
while (y < y1) while (y < y1)
{ {
uint _581 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, memory_order_relaxed); uint _523 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, memory_order_relaxed);
x++; x++;
if (x == x1) if (x == x1)
{ {
@ -318,15 +275,15 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
element_count += uint(int(popcount(bitmaps[i_1][gl_LocalInvocationID.x]))); element_count += uint(int(popcount(bitmaps[i_1][gl_LocalInvocationID.x])));
count[i_1][gl_LocalInvocationID.x] = element_count; count[i_1][gl_LocalInvocationID.x] = element_count;
} }
uint param_9 = 0u; uint param_7 = 0u;
uint param_10 = 0u; uint param_8 = 0u;
bool param_11 = true; bool param_9 = true;
Alloc chunk_alloc = new_alloc(param_9, param_10, param_11); Alloc chunk_alloc = new_alloc(param_7, param_8, param_9);
if (element_count != 0u) if (element_count != 0u)
{ {
uint param_12 = element_count * 4u; uint param_10 = element_count * 4u;
MallocResult _631 = malloc(param_12, v_94, v_94BufferSize); MallocResult _573 = malloc(param_10, v_81, v_81BufferSize);
MallocResult chunk = _631; MallocResult chunk = _573;
chunk_alloc = chunk.alloc; chunk_alloc = chunk.alloc;
sh_chunk_alloc[gl_LocalInvocationID.x] = chunk_alloc; sh_chunk_alloc[gl_LocalInvocationID.x] = chunk_alloc;
if (chunk.failed) if (chunk.failed)
@ -334,28 +291,28 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
sh_alloc_failed = short(true); sh_alloc_failed = short(true);
} }
} }
uint out_ix = (v_202.conf.bin_alloc.offset >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u); uint out_ix = (v_156.conf.bin_alloc.offset >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u);
Alloc param_13; Alloc param_11;
param_13.offset = v_202.conf.bin_alloc.offset; param_11.offset = v_156.conf.bin_alloc.offset;
uint param_14 = out_ix; uint param_12 = out_ix;
uint param_15 = element_count; uint param_13 = element_count;
write_mem(param_13, param_14, param_15, v_94, v_94BufferSize); write_mem(param_11, param_12, param_13, v_81, v_81BufferSize);
Alloc param_16; Alloc param_14;
param_16.offset = v_202.conf.bin_alloc.offset; param_14.offset = v_156.conf.bin_alloc.offset;
uint param_17 = out_ix + 1u; uint param_15 = out_ix + 1u;
uint param_18 = chunk_alloc.offset; uint param_16 = chunk_alloc.offset;
write_mem(param_16, param_17, param_18, v_94, v_94BufferSize); write_mem(param_14, param_15, param_16, v_81, v_81BufferSize);
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);
bool _687; bool _630;
if (!bool(sh_alloc_failed)) if (!bool(sh_alloc_failed))
{ {
_687 = v_94.mem_error != 0u; _630 = v_81.mem_error != 0u;
} }
else else
{ {
_687 = bool(sh_alloc_failed); _630 = bool(sh_alloc_failed);
} }
if (_687) if (_630)
{ {
return; return;
} }
@ -374,10 +331,10 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
} }
Alloc out_alloc = sh_chunk_alloc[bin_ix]; Alloc out_alloc = sh_chunk_alloc[bin_ix];
uint out_offset = out_alloc.offset + (idx * 4u); uint out_offset = out_alloc.offset + (idx * 4u);
Alloc param_19 = out_alloc; Alloc param_17 = out_alloc;
BinInstanceRef param_20 = BinInstanceRef{ out_offset }; BinInstanceRef param_18 = BinInstanceRef{ out_offset };
BinInstance param_21 = BinInstance{ element_ix }; BinInstance param_19 = BinInstance{ element_ix };
BinInstance_write(param_19, param_20, param_21, v_94, v_94BufferSize); BinInstance_write(param_17, param_18, param_19, v_81, v_81BufferSize);
} }
x++; x++;
if (x == x1) if (x == x1)

Binary file not shown.

Binary file not shown.

View file

@ -27,12 +27,14 @@ struct Config
Alloc pathseg_alloc; Alloc pathseg_alloc;
Alloc anno_alloc; Alloc anno_alloc;
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc path_bbox_alloc;
Alloc drawmonoid_alloc; Alloc drawmonoid_alloc;
Alloc clip_alloc; Alloc clip_alloc;
Alloc clip_bic_alloc; Alloc clip_bic_alloc;
Alloc clip_stack_alloc; Alloc clip_stack_alloc;
Alloc clip_bbox_alloc; Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip; uint n_clip;
@ -40,6 +42,8 @@ struct Config
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
uint pathseg_offset; uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
}; };
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
@ -99,7 +103,7 @@ float4 bbox_intersect(float4 a, float4 b)
uint load_path_ix(uint ix) uint load_path_ix(uint ix)
{ {
if (ix < _80.Load(72)) if (ix < _80.Load(80))
{ {
return _96.Load(((_80.Load(48) >> uint(2)) + ix) * 4 + 8); return _96.Load(((_80.Load(48) >> uint(2)) + ix) * 4 + 8);
} }
@ -324,7 +328,7 @@ void comp_main()
bool _725; bool _725;
if (_717) if (_717)
{ {
_725 = gl_GlobalInvocationID.x < _80.Load(72); _725 = gl_GlobalInvocationID.x < _80.Load(80);
} }
else else
{ {
@ -334,7 +338,7 @@ void comp_main()
{ {
uint param_15 = parent; uint param_15 = parent;
path_ix = load_path_ix(param_15); path_ix = load_path_ix(param_15);
uint drawmonoid_out_base = (_80.Load(44) >> uint(2)) + (2u * (~inp)); uint drawmonoid_out_base = (_80.Load(44) >> uint(2)) + (4u * (~inp));
_96.Store(drawmonoid_out_base * 4 + 8, path_ix); _96.Store(drawmonoid_out_base * 4 + 8, path_ix);
if (int(grandparent) >= 0) if (int(grandparent) >= 0)
{ {

View file

@ -34,12 +34,14 @@ struct Config
Alloc pathseg_alloc; Alloc pathseg_alloc;
Alloc anno_alloc; Alloc anno_alloc;
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc path_bbox_alloc;
Alloc drawmonoid_alloc; Alloc drawmonoid_alloc;
Alloc clip_alloc; Alloc clip_alloc;
Alloc clip_bic_alloc; Alloc clip_bic_alloc;
Alloc clip_stack_alloc; Alloc clip_stack_alloc;
Alloc clip_bbox_alloc; Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip; uint n_clip;
@ -47,6 +49,8 @@ struct Config
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
uint pathseg_offset; uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
}; };
struct ConfigBuf struct ConfigBuf
@ -112,7 +116,7 @@ uint load_path_ix(thread const uint& ix, const device ConfigBuf& v_80, device Me
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
float4 load_path_bbox(thread const uint& path_ix, const device ConfigBuf& v_80, device Memory& v_96) float4 load_path_bbox(thread const uint& path_ix, const device ConfigBuf& v_80, device Memory& v_96)
{ {
uint base = (v_80.conf.bbox_alloc.offset >> uint(2)) + (6u * path_ix); uint base = (v_80.conf.path_bbox_alloc.offset >> uint(2)) + (6u * path_ix);
float bbox_l = float(v_96.memory[base]) - 32768.0; float bbox_l = float(v_96.memory[base]) - 32768.0;
float bbox_t = float(v_96.memory[base + 1u]) - 32768.0; float bbox_t = float(v_96.memory[base + 1u]) - 32768.0;
float bbox_r = float(v_96.memory[base + 2u]) - 32768.0; float bbox_r = float(v_96.memory[base + 2u]) - 32768.0;
@ -341,7 +345,7 @@ kernel void main0(device Memory& v_96 [[buffer(0)]], const device ConfigBuf& v_8
{ {
uint param_15 = parent; uint param_15 = parent;
path_ix = load_path_ix(param_15, v_80, v_96); path_ix = load_path_ix(param_15, v_80, v_96);
uint drawmonoid_out_base = (v_80.conf.drawmonoid_alloc.offset >> uint(2)) + (2u * (~inp)); uint drawmonoid_out_base = (v_80.conf.drawmonoid_alloc.offset >> uint(2)) + (4u * (~inp));
v_96.memory[drawmonoid_out_base] = path_ix; v_96.memory[drawmonoid_out_base] = path_ix;
if (int(grandparent) >= 0) if (int(grandparent) >= 0)
{ {

Binary file not shown.

View file

@ -27,12 +27,14 @@ struct Config
Alloc pathseg_alloc; Alloc pathseg_alloc;
Alloc anno_alloc; Alloc anno_alloc;
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc path_bbox_alloc;
Alloc drawmonoid_alloc; Alloc drawmonoid_alloc;
Alloc clip_alloc; Alloc clip_alloc;
Alloc clip_bic_alloc; Alloc clip_bic_alloc;
Alloc clip_stack_alloc; Alloc clip_stack_alloc;
Alloc clip_bbox_alloc; Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip; uint n_clip;
@ -40,6 +42,8 @@ struct Config
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
uint pathseg_offset; uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
}; };
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);

View file

@ -34,12 +34,14 @@ struct Config
Alloc pathseg_alloc; Alloc pathseg_alloc;
Alloc anno_alloc; Alloc anno_alloc;
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc path_bbox_alloc;
Alloc drawmonoid_alloc; Alloc drawmonoid_alloc;
Alloc clip_alloc; Alloc clip_alloc;
Alloc clip_bic_alloc; Alloc clip_bic_alloc;
Alloc clip_stack_alloc; Alloc clip_stack_alloc;
Alloc clip_bbox_alloc; Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip; uint n_clip;
@ -47,6 +49,8 @@ struct Config
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
uint pathseg_offset; uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
}; };
struct ConfigBuf struct ConfigBuf
@ -81,7 +85,7 @@ void store_bic(thread const uint& ix, thread const Bic& bic, const device Config
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
float4 load_path_bbox(thread const uint& path_ix, const device ConfigBuf& v_64, device Memory& v_80) float4 load_path_bbox(thread const uint& path_ix, const device ConfigBuf& v_64, device Memory& v_80)
{ {
uint base = (v_64.conf.bbox_alloc.offset >> uint(2)) + (6u * path_ix); uint base = (v_64.conf.path_bbox_alloc.offset >> uint(2)) + (6u * path_ix);
float bbox_l = float(v_80.memory[base]) - 32768.0; float bbox_l = float(v_80.memory[base]) - 32768.0;
float bbox_t = float(v_80.memory[base + 1u]) - 32768.0; float bbox_t = float(v_80.memory[base + 1u]) - 32768.0;
float bbox_r = float(v_80.memory[base + 2u]) - 32768.0; float bbox_r = float(v_80.memory[base + 2u]) - 32768.0;

Binary file not shown.

Binary file not shown.

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

Binary file not shown.

Binary file not shown.

View file

@ -1,133 +1,12 @@
struct Alloc
{
uint offset;
};
struct ElementRef
{
uint offset;
};
struct FillColorRef
{
uint offset;
};
struct FillColor
{
uint rgba_color;
};
struct FillLinGradientRef
{
uint offset;
};
struct FillLinGradient
{
uint index;
float2 p0;
float2 p1;
};
struct FillImageRef
{
uint offset;
};
struct FillImage
{
uint index;
int2 offset;
};
struct ClipRef
{
uint offset;
};
struct Clip
{
float4 bbox;
uint blend;
};
struct ElementTag
{
uint tag;
uint flags;
};
struct DrawMonoid struct DrawMonoid
{ {
uint path_ix; uint path_ix;
uint clip_ix; uint clip_ix;
uint scene_offset;
uint info_offset;
}; };
struct AnnoImageRef struct Alloc
{
uint offset;
};
struct AnnoImage
{
float4 bbox;
float linewidth;
uint index;
int2 offset;
};
struct AnnoColorRef
{
uint offset;
};
struct AnnoColor
{
float4 bbox;
float linewidth;
uint rgba_color;
};
struct AnnoLinGradientRef
{
uint offset;
};
struct AnnoLinGradient
{
float4 bbox;
float linewidth;
uint index;
float line_x;
float line_y;
float line_c;
};
struct AnnoBeginClipRef
{
uint offset;
};
struct AnnoBeginClip
{
float4 bbox;
float linewidth;
uint blend;
};
struct AnnoEndClipRef
{
uint offset;
};
struct AnnoEndClip
{
float4 bbox;
uint blend;
};
struct AnnotatedRef
{ {
uint offset; uint offset;
}; };
@ -144,12 +23,14 @@ struct Config
Alloc pathseg_alloc; Alloc pathseg_alloc;
Alloc anno_alloc; Alloc anno_alloc;
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc path_bbox_alloc;
Alloc drawmonoid_alloc; Alloc drawmonoid_alloc;
Alloc clip_alloc; Alloc clip_alloc;
Alloc clip_bic_alloc; Alloc clip_bic_alloc;
Alloc clip_stack_alloc; Alloc clip_stack_alloc;
Alloc clip_bbox_alloc; Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip; uint n_clip;
@ -157,18 +38,18 @@ struct Config
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
uint pathseg_offset; uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
}; };
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
static const DrawMonoid _413 = { 0u, 0u }; static const DrawMonoid _23 = { 0u, 0u, 0u, 0u };
static const DrawMonoid _437 = { 1u, 0u };
static const DrawMonoid _439 = { 1u, 1u };
RWByteAddressBuffer _199 : register(u0, space0); ByteAddressBuffer _92 : register(t1, space0);
ByteAddressBuffer _223 : register(t2, space0); ByteAddressBuffer _102 : register(t2, space0);
ByteAddressBuffer _1020 : register(t3, space0); ByteAddressBuffer _202 : register(t3, space0);
ByteAddressBuffer _1054 : register(t1, space0); RWByteAddressBuffer _284 : register(u0, space0);
static uint3 gl_WorkGroupID; static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID; static uint3 gl_LocalInvocationID;
@ -182,389 +63,44 @@ struct SPIRV_Cross_Input
groupshared DrawMonoid sh_scratch[256]; groupshared DrawMonoid sh_scratch[256];
ElementTag Element_tag(ElementRef ref)
{
uint tag_and_flags = _223.Load((ref.offset >> uint(2)) * 4 + 0);
ElementTag _378 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) };
return _378;
}
DrawMonoid map_tag(uint tag_word) DrawMonoid map_tag(uint tag_word)
{ {
switch (tag_word) uint has_path = uint(tag_word != 0u);
{ DrawMonoid _75 = { has_path, tag_word & 1u, tag_word & 28u, (tag_word >> uint(4)) & 28u };
case 4u: return _75;
case 5u:
case 6u:
{
return _437;
}
case 9u:
case 10u:
{
return _439;
}
default:
{
return _413;
}
}
} }
ElementRef Element_index(ElementRef ref, uint index) DrawMonoid combine_draw_monoid(DrawMonoid a, DrawMonoid b)
{
ElementRef _212 = { ref.offset + (index * 36u) };
return _212;
}
DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b)
{ {
DrawMonoid c; DrawMonoid c;
c.path_ix = a.path_ix + b.path_ix; c.path_ix = a.path_ix + b.path_ix;
c.clip_ix = a.clip_ix + b.clip_ix; c.clip_ix = a.clip_ix + b.clip_ix;
c.scene_offset = a.scene_offset + b.scene_offset;
c.info_offset = a.info_offset + b.info_offset;
return c; return c;
} }
DrawMonoid tag_monoid_identity() DrawMonoid draw_monoid_identity()
{ {
return _413; return _23;
}
FillColor FillColor_read(FillColorRef ref)
{
uint ix = ref.offset >> uint(2);
uint raw0 = _223.Load((ix + 0u) * 4 + 0);
FillColor s;
s.rgba_color = raw0;
return s;
}
FillColor Element_FillColor_read(ElementRef ref)
{
FillColorRef _384 = { ref.offset + 4u };
FillColorRef param = _384;
return FillColor_read(param);
}
bool touch_mem(Alloc alloc, uint offset)
{
return true;
}
void write_mem(Alloc alloc, uint offset, uint val)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return;
}
_199.Store(offset * 4 + 8, val);
}
void AnnoColor_write(Alloc a, AnnoColorRef ref, AnnoColor s)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = asuint(s.bbox.x);
write_mem(param, param_1, param_2);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = asuint(s.bbox.y);
write_mem(param_3, param_4, param_5);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = asuint(s.bbox.z);
write_mem(param_6, param_7, param_8);
Alloc param_9 = a;
uint param_10 = ix + 3u;
uint param_11 = asuint(s.bbox.w);
write_mem(param_9, param_10, param_11);
Alloc param_12 = a;
uint param_13 = ix + 4u;
uint param_14 = asuint(s.linewidth);
write_mem(param_12, param_13, param_14);
Alloc param_15 = a;
uint param_16 = ix + 5u;
uint param_17 = s.rgba_color;
write_mem(param_15, param_16, param_17);
}
void Annotated_Color_write(Alloc a, AnnotatedRef ref, uint flags, AnnoColor s)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint param_2 = (flags << uint(16)) | 1u;
write_mem(param, param_1, param_2);
AnnoColorRef _818 = { ref.offset + 4u };
Alloc param_3 = a;
AnnoColorRef param_4 = _818;
AnnoColor param_5 = s;
AnnoColor_write(param_3, param_4, param_5);
}
FillLinGradient FillLinGradient_read(FillLinGradientRef ref)
{
uint ix = ref.offset >> uint(2);
uint raw0 = _223.Load((ix + 0u) * 4 + 0);
uint raw1 = _223.Load((ix + 1u) * 4 + 0);
uint raw2 = _223.Load((ix + 2u) * 4 + 0);
uint raw3 = _223.Load((ix + 3u) * 4 + 0);
uint raw4 = _223.Load((ix + 4u) * 4 + 0);
FillLinGradient s;
s.index = raw0;
s.p0 = float2(asfloat(raw1), asfloat(raw2));
s.p1 = float2(asfloat(raw3), asfloat(raw4));
return s;
}
FillLinGradient Element_FillLinGradient_read(ElementRef ref)
{
FillLinGradientRef _392 = { ref.offset + 4u };
FillLinGradientRef param = _392;
return FillLinGradient_read(param);
}
void AnnoLinGradient_write(Alloc a, AnnoLinGradientRef ref, AnnoLinGradient s)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = asuint(s.bbox.x);
write_mem(param, param_1, param_2);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = asuint(s.bbox.y);
write_mem(param_3, param_4, param_5);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = asuint(s.bbox.z);
write_mem(param_6, param_7, param_8);
Alloc param_9 = a;
uint param_10 = ix + 3u;
uint param_11 = asuint(s.bbox.w);
write_mem(param_9, param_10, param_11);
Alloc param_12 = a;
uint param_13 = ix + 4u;
uint param_14 = asuint(s.linewidth);
write_mem(param_12, param_13, param_14);
Alloc param_15 = a;
uint param_16 = ix + 5u;
uint param_17 = s.index;
write_mem(param_15, param_16, param_17);
Alloc param_18 = a;
uint param_19 = ix + 6u;
uint param_20 = asuint(s.line_x);
write_mem(param_18, param_19, param_20);
Alloc param_21 = a;
uint param_22 = ix + 7u;
uint param_23 = asuint(s.line_y);
write_mem(param_21, param_22, param_23);
Alloc param_24 = a;
uint param_25 = ix + 8u;
uint param_26 = asuint(s.line_c);
write_mem(param_24, param_25, param_26);
}
void Annotated_LinGradient_write(Alloc a, AnnotatedRef ref, uint flags, AnnoLinGradient s)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint param_2 = (flags << uint(16)) | 2u;
write_mem(param, param_1, param_2);
AnnoLinGradientRef _839 = { ref.offset + 4u };
Alloc param_3 = a;
AnnoLinGradientRef param_4 = _839;
AnnoLinGradient param_5 = s;
AnnoLinGradient_write(param_3, param_4, param_5);
}
FillImage FillImage_read(FillImageRef ref)
{
uint ix = ref.offset >> uint(2);
uint raw0 = _223.Load((ix + 0u) * 4 + 0);
uint raw1 = _223.Load((ix + 1u) * 4 + 0);
FillImage s;
s.index = raw0;
s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16);
return s;
}
FillImage Element_FillImage_read(ElementRef ref)
{
FillImageRef _400 = { ref.offset + 4u };
FillImageRef param = _400;
return FillImage_read(param);
}
void AnnoImage_write(Alloc a, AnnoImageRef ref, AnnoImage s)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = asuint(s.bbox.x);
write_mem(param, param_1, param_2);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = asuint(s.bbox.y);
write_mem(param_3, param_4, param_5);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = asuint(s.bbox.z);
write_mem(param_6, param_7, param_8);
Alloc param_9 = a;
uint param_10 = ix + 3u;
uint param_11 = asuint(s.bbox.w);
write_mem(param_9, param_10, param_11);
Alloc param_12 = a;
uint param_13 = ix + 4u;
uint param_14 = asuint(s.linewidth);
write_mem(param_12, param_13, param_14);
Alloc param_15 = a;
uint param_16 = ix + 5u;
uint param_17 = s.index;
write_mem(param_15, param_16, param_17);
Alloc param_18 = a;
uint param_19 = ix + 6u;
uint param_20 = (uint(s.offset.x) & 65535u) | (uint(s.offset.y) << uint(16));
write_mem(param_18, param_19, param_20);
}
void Annotated_Image_write(Alloc a, AnnotatedRef ref, uint flags, AnnoImage s)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint param_2 = (flags << uint(16)) | 3u;
write_mem(param, param_1, param_2);
AnnoImageRef _860 = { ref.offset + 4u };
Alloc param_3 = a;
AnnoImageRef param_4 = _860;
AnnoImage param_5 = s;
AnnoImage_write(param_3, param_4, param_5);
}
Clip Clip_read(ClipRef ref)
{
uint ix = ref.offset >> uint(2);
uint raw0 = _223.Load((ix + 0u) * 4 + 0);
uint raw1 = _223.Load((ix + 1u) * 4 + 0);
uint raw2 = _223.Load((ix + 2u) * 4 + 0);
uint raw3 = _223.Load((ix + 3u) * 4 + 0);
Clip s;
s.bbox = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3));
s.blend = _223.Load((ix + 4u) * 4 + 0);
return s;
}
Clip Element_BeginClip_read(ElementRef ref)
{
ClipRef _408 = { ref.offset + 4u };
ClipRef param = _408;
return Clip_read(param);
}
void AnnoBeginClip_write(Alloc a, AnnoBeginClipRef ref, AnnoBeginClip s)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = asuint(s.bbox.x);
write_mem(param, param_1, param_2);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = asuint(s.bbox.y);
write_mem(param_3, param_4, param_5);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = asuint(s.bbox.z);
write_mem(param_6, param_7, param_8);
Alloc param_9 = a;
uint param_10 = ix + 3u;
uint param_11 = asuint(s.bbox.w);
write_mem(param_9, param_10, param_11);
Alloc param_12 = a;
uint param_13 = ix + 4u;
uint param_14 = asuint(s.linewidth);
write_mem(param_12, param_13, param_14);
Alloc param_15 = a;
uint param_16 = ix + 5u;
uint param_17 = s.blend;
write_mem(param_15, param_16, param_17);
}
void Annotated_BeginClip_write(Alloc a, AnnotatedRef ref, uint flags, AnnoBeginClip s)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint param_2 = (flags << uint(16)) | 4u;
write_mem(param, param_1, param_2);
AnnoBeginClipRef _881 = { ref.offset + 4u };
Alloc param_3 = a;
AnnoBeginClipRef param_4 = _881;
AnnoBeginClip param_5 = s;
AnnoBeginClip_write(param_3, param_4, param_5);
}
void AnnoEndClip_write(Alloc a, AnnoEndClipRef ref, AnnoEndClip s)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = asuint(s.bbox.x);
write_mem(param, param_1, param_2);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = asuint(s.bbox.y);
write_mem(param_3, param_4, param_5);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = asuint(s.bbox.z);
write_mem(param_6, param_7, param_8);
Alloc param_9 = a;
uint param_10 = ix + 3u;
uint param_11 = asuint(s.bbox.w);
write_mem(param_9, param_10, param_11);
Alloc param_12 = a;
uint param_13 = ix + 4u;
uint param_14 = s.blend;
write_mem(param_12, param_13, param_14);
}
void Annotated_EndClip_write(Alloc a, AnnotatedRef ref, uint flags, AnnoEndClip s)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint param_2 = (flags << uint(16)) | 5u;
write_mem(param, param_1, param_2);
AnnoEndClipRef _902 = { ref.offset + 4u };
Alloc param_3 = a;
AnnoEndClipRef param_4 = _902;
AnnoEndClip param_5 = s;
AnnoEndClip_write(param_3, param_4, param_5);
} }
void comp_main() void comp_main()
{ {
uint ix = gl_GlobalInvocationID.x * 8u; uint ix = gl_GlobalInvocationID.x * 8u;
ElementRef _920 = { ix * 36u }; uint drawtag_base = _92.Load(100) >> uint(2);
ElementRef ref = _920; uint tag_word = _102.Load((drawtag_base + ix) * 4 + 0);
ElementRef param = ref; uint param = tag_word;
uint tag_word = Element_tag(param).tag; DrawMonoid agg = map_tag(param);
uint param_1 = tag_word;
DrawMonoid agg = map_tag(param_1);
DrawMonoid local[8]; DrawMonoid local[8];
local[0] = agg; local[0] = agg;
for (uint i = 1u; i < 8u; i++) for (uint i = 1u; i < 8u; i++)
{ {
ElementRef param_2 = ref; tag_word = _102.Load(((drawtag_base + ix) + i) * 4 + 0);
uint param_3 = i; uint param_1 = tag_word;
ElementRef param_4 = Element_index(param_2, param_3); DrawMonoid param_2 = agg;
tag_word = Element_tag(param_4).tag; DrawMonoid param_3 = map_tag(param_1);
uint param_5 = tag_word; agg = combine_draw_monoid(param_2, param_3);
DrawMonoid param_6 = agg;
DrawMonoid param_7 = map_tag(param_5);
agg = combine_tag_monoid(param_6, param_7);
local[i] = agg; local[i] = agg;
} }
sh_scratch[gl_LocalInvocationID.x] = agg; sh_scratch[gl_LocalInvocationID.x] = agg;
@ -574,194 +110,121 @@ void comp_main()
if (gl_LocalInvocationID.x >= (1u << i_1)) if (gl_LocalInvocationID.x >= (1u << i_1))
{ {
DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
DrawMonoid param_8 = other; DrawMonoid param_4 = other;
DrawMonoid param_9 = agg; DrawMonoid param_5 = agg;
agg = combine_tag_monoid(param_8, param_9); agg = combine_draw_monoid(param_4, param_5);
} }
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();
sh_scratch[gl_LocalInvocationID.x] = agg; sh_scratch[gl_LocalInvocationID.x] = agg;
} }
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();
DrawMonoid row = tag_monoid_identity(); DrawMonoid row = draw_monoid_identity();
if (gl_WorkGroupID.x > 0u) if (gl_WorkGroupID.x > 0u)
{ {
DrawMonoid _1026; DrawMonoid _208;
_1026.path_ix = _1020.Load((gl_WorkGroupID.x - 1u) * 8 + 0); _208.path_ix = _202.Load((gl_WorkGroupID.x - 1u) * 16 + 0);
_1026.clip_ix = _1020.Load((gl_WorkGroupID.x - 1u) * 8 + 4); _208.clip_ix = _202.Load((gl_WorkGroupID.x - 1u) * 16 + 4);
row.path_ix = _1026.path_ix; _208.scene_offset = _202.Load((gl_WorkGroupID.x - 1u) * 16 + 8);
row.clip_ix = _1026.clip_ix; _208.info_offset = _202.Load((gl_WorkGroupID.x - 1u) * 16 + 12);
row.path_ix = _208.path_ix;
row.clip_ix = _208.clip_ix;
row.scene_offset = _208.scene_offset;
row.info_offset = _208.info_offset;
} }
if (gl_LocalInvocationID.x > 0u) if (gl_LocalInvocationID.x > 0u)
{ {
DrawMonoid param_10 = row; DrawMonoid param_6 = row;
DrawMonoid param_11 = sh_scratch[gl_LocalInvocationID.x - 1u]; DrawMonoid param_7 = sh_scratch[gl_LocalInvocationID.x - 1u];
row = combine_tag_monoid(param_10, param_11); row = combine_draw_monoid(param_6, param_7);
} }
uint drawdata_base = _92.Load(104) >> uint(2);
uint drawinfo_base = _92.Load(68) >> uint(2);
uint out_ix = gl_GlobalInvocationID.x * 8u; uint out_ix = gl_GlobalInvocationID.x * 8u;
uint out_base = (_1054.Load(44) >> uint(2)) + (out_ix * 2u); uint out_base = (_92.Load(44) >> uint(2)) + (out_ix * 4u);
uint clip_out_base = _1054.Load(48) >> uint(2); uint clip_out_base = _92.Load(48) >> uint(2);
AnnotatedRef _1075 = { _1054.Load(32) + (out_ix * 40u) };
AnnotatedRef out_ref = _1075;
float4 mat; float4 mat;
float2 translate; float2 translate;
AnnoColor anno_fill;
Alloc param_18;
AnnoLinGradient anno_lin;
Alloc param_23;
AnnoImage anno_img;
Alloc param_28;
AnnoBeginClip anno_begin_clip;
Alloc param_33;
AnnoEndClip anno_end_clip;
Alloc param_38;
for (uint i_2 = 0u; i_2 < 8u; i_2++) for (uint i_2 = 0u; i_2 < 8u; i_2++)
{ {
DrawMonoid m = row; DrawMonoid m = row;
if (i_2 > 0u) if (i_2 > 0u)
{ {
DrawMonoid param_12 = m; DrawMonoid param_8 = m;
DrawMonoid param_13 = local[i_2 - 1u]; DrawMonoid param_9 = local[i_2 - 1u];
m = combine_tag_monoid(param_12, param_13); m = combine_draw_monoid(param_8, param_9);
} }
_199.Store((out_base + (i_2 * 2u)) * 4 + 8, m.path_ix); _284.Store((out_base + (i_2 * 4u)) * 4 + 8, m.path_ix);
_199.Store(((out_base + (i_2 * 2u)) + 1u) * 4 + 8, m.clip_ix); _284.Store(((out_base + (i_2 * 4u)) + 1u) * 4 + 8, m.clip_ix);
ElementRef param_14 = ref; _284.Store(((out_base + (i_2 * 4u)) + 2u) * 4 + 8, m.scene_offset);
uint param_15 = i_2; _284.Store(((out_base + (i_2 * 4u)) + 3u) * 4 + 8, m.info_offset);
ElementRef this_ref = Element_index(param_14, param_15); uint dd = drawdata_base + (m.scene_offset >> uint(2));
ElementRef param_16 = this_ref; uint di = drawinfo_base + (m.info_offset >> uint(2));
tag_word = Element_tag(param_16).tag; tag_word = _102.Load(((drawtag_base + ix) + i_2) * 4 + 0);
if ((((tag_word == 4u) || (tag_word == 5u)) || (tag_word == 6u)) || (tag_word == 9u)) if ((((tag_word == 68u) || (tag_word == 276u)) || (tag_word == 72u)) || (tag_word == 5u))
{ {
uint bbox_offset = (_1054.Load(40) >> uint(2)) + (6u * m.path_ix); uint bbox_offset = (_92.Load(40) >> uint(2)) + (6u * m.path_ix);
float bbox_l = float(_199.Load(bbox_offset * 4 + 8)) - 32768.0f; float bbox_l = float(_284.Load(bbox_offset * 4 + 8)) - 32768.0f;
float bbox_t = float(_199.Load((bbox_offset + 1u) * 4 + 8)) - 32768.0f; float bbox_t = float(_284.Load((bbox_offset + 1u) * 4 + 8)) - 32768.0f;
float bbox_r = float(_199.Load((bbox_offset + 2u) * 4 + 8)) - 32768.0f; float bbox_r = float(_284.Load((bbox_offset + 2u) * 4 + 8)) - 32768.0f;
float bbox_b = float(_199.Load((bbox_offset + 3u) * 4 + 8)) - 32768.0f; float bbox_b = float(_284.Load((bbox_offset + 3u) * 4 + 8)) - 32768.0f;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
float linewidth = asfloat(_199.Load((bbox_offset + 4u) * 4 + 8)); float linewidth = asfloat(_284.Load((bbox_offset + 4u) * 4 + 8));
uint fill_mode = uint(linewidth >= 0.0f); uint fill_mode = uint(linewidth >= 0.0f);
if ((linewidth >= 0.0f) || (tag_word == 5u)) if ((linewidth >= 0.0f) || (tag_word == 276u))
{ {
uint trans_ix = _199.Load((bbox_offset + 5u) * 4 + 8); uint trans_ix = _284.Load((bbox_offset + 5u) * 4 + 8);
uint t = (_1054.Load(36) >> uint(2)) + (6u * trans_ix); uint t = (_92.Load(36) >> uint(2)) + (6u * trans_ix);
mat = asfloat(uint4(_199.Load(t * 4 + 8), _199.Load((t + 1u) * 4 + 8), _199.Load((t + 2u) * 4 + 8), _199.Load((t + 3u) * 4 + 8))); mat = asfloat(uint4(_284.Load(t * 4 + 8), _284.Load((t + 1u) * 4 + 8), _284.Load((t + 2u) * 4 + 8), _284.Load((t + 3u) * 4 + 8)));
if (tag_word == 5u) if (tag_word == 276u)
{ {
translate = asfloat(uint2(_199.Load((t + 4u) * 4 + 8), _199.Load((t + 5u) * 4 + 8))); translate = asfloat(uint2(_284.Load((t + 4u) * 4 + 8), _284.Load((t + 5u) * 4 + 8)));
} }
} }
if (linewidth >= 0.0f) if (linewidth >= 0.0f)
{ {
linewidth *= sqrt(abs((mat.x * mat.w) - (mat.y * mat.z))); linewidth *= sqrt(abs((mat.x * mat.w) - (mat.y * mat.z)));
} }
linewidth = max(linewidth, 0.0f);
switch (tag_word) switch (tag_word)
{ {
case 4u: case 68u:
case 72u:
{ {
ElementRef param_17 = this_ref; _284.Store(di * 4 + 8, asuint(linewidth));
FillColor fill = Element_FillColor_read(param_17);
anno_fill.bbox = bbox;
anno_fill.linewidth = linewidth;
anno_fill.rgba_color = fill.rgba_color;
Alloc _1288;
_1288.offset = _1054.Load(32);
param_18.offset = _1288.offset;
AnnotatedRef param_19 = out_ref;
uint param_20 = fill_mode;
AnnoColor param_21 = anno_fill;
Annotated_Color_write(param_18, param_19, param_20, param_21);
break; break;
} }
case 5u: case 276u:
{ {
ElementRef param_22 = this_ref; _284.Store(di * 4 + 8, asuint(linewidth));
FillLinGradient lin = Element_FillLinGradient_read(param_22); uint index = _102.Load(dd * 4 + 0);
anno_lin.bbox = bbox; float2 p0 = asfloat(uint2(_102.Load((dd + 1u) * 4 + 0), _102.Load((dd + 2u) * 4 + 0)));
anno_lin.linewidth = linewidth; float2 p1 = asfloat(uint2(_102.Load((dd + 3u) * 4 + 0), _102.Load((dd + 4u) * 4 + 0)));
anno_lin.index = lin.index; p0 = ((mat.xy * p0.x) + (mat.zw * p0.y)) + translate;
float2 p0 = ((mat.xy * lin.p0.x) + (mat.zw * lin.p0.y)) + translate; p1 = ((mat.xy * p1.x) + (mat.zw * p1.y)) + translate;
float2 p1 = ((mat.xy * lin.p1.x) + (mat.zw * lin.p1.y)) + translate;
float2 dxy = p1 - p0; float2 dxy = p1 - p0;
float scale = 1.0f / ((dxy.x * dxy.x) + (dxy.y * dxy.y)); float scale = 1.0f / ((dxy.x * dxy.x) + (dxy.y * dxy.y));
float line_x = dxy.x * scale; float line_x = dxy.x * scale;
float line_y = dxy.y * scale; float line_y = dxy.y * scale;
anno_lin.line_x = line_x; float line_c = -((p0.x * line_x) + (p0.y * line_y));
anno_lin.line_y = line_y; _284.Store((di + 1u) * 4 + 8, asuint(line_x));
anno_lin.line_c = -((p0.x * line_x) + (p0.y * line_y)); _284.Store((di + 2u) * 4 + 8, asuint(line_y));
Alloc _1384; _284.Store((di + 3u) * 4 + 8, asuint(line_c));
_1384.offset = _1054.Load(32);
param_23.offset = _1384.offset;
AnnotatedRef param_24 = out_ref;
uint param_25 = fill_mode;
AnnoLinGradient param_26 = anno_lin;
Annotated_LinGradient_write(param_23, param_24, param_25, param_26);
break; break;
} }
case 6u: case 5u:
{ {
ElementRef param_27 = this_ref;
FillImage fill_img = Element_FillImage_read(param_27);
anno_img.bbox = bbox;
anno_img.linewidth = linewidth;
anno_img.index = fill_img.index;
anno_img.offset = fill_img.offset;
Alloc _1412;
_1412.offset = _1054.Load(32);
param_28.offset = _1412.offset;
AnnotatedRef param_29 = out_ref;
uint param_30 = fill_mode;
AnnoImage param_31 = anno_img;
Annotated_Image_write(param_28, param_29, param_30, param_31);
break;
}
case 9u:
{
ElementRef param_32 = this_ref;
Clip begin_clip = Element_BeginClip_read(param_32);
anno_begin_clip.bbox = bbox;
anno_begin_clip.linewidth = 0.0f;
anno_begin_clip.blend = begin_clip.blend;
uint flags = uint(begin_clip.blend != 3u) << uint(1);
Alloc _1442;
_1442.offset = _1054.Load(32);
param_33.offset = _1442.offset;
AnnotatedRef param_34 = out_ref;
uint param_35 = flags;
AnnoBeginClip param_36 = anno_begin_clip;
Annotated_BeginClip_write(param_33, param_34, param_35, param_36);
break; break;
} }
} }
} }
else if ((tag_word == 5u) || (tag_word == 37u))
{
if (tag_word == 10u)
{
ElementRef param_37 = this_ref;
Clip end_clip = Element_BeginClip_read(param_37);
anno_end_clip.bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f);
anno_end_clip.blend = end_clip.blend;
uint flags_1 = uint(end_clip.blend != 3u) << uint(1);
Alloc _1480;
_1480.offset = _1054.Load(32);
param_38.offset = _1480.offset;
AnnotatedRef param_39 = out_ref;
uint param_40 = flags_1;
AnnoEndClip param_41 = anno_end_clip;
Annotated_EndClip_write(param_38, param_39, param_40, param_41);
}
}
if ((tag_word == 9u) || (tag_word == 10u))
{ {
uint path_ix = ~(out_ix + i_2); uint path_ix = ~(out_ix + i_2);
if (tag_word == 9u) if (tag_word == 5u)
{ {
path_ix = m.path_ix; path_ix = m.path_ix;
} }
_199.Store((clip_out_base + m.clip_ix) * 4 + 8, path_ix); _284.Store((clip_out_base + m.clip_ix) * 4 + 8, path_ix);
} }
out_ref.offset += 40u;
} }
} }

View file

@ -44,145 +44,53 @@ struct spvUnsafeArray
} }
}; };
struct DrawMonoid
{
uint path_ix;
uint clip_ix;
uint scene_offset;
uint info_offset;
};
struct Alloc struct Alloc
{ {
uint offset; uint offset;
}; };
struct ElementRef struct Config
{ {
uint offset; uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
}; };
struct FillColorRef struct ConfigBuf
{ {
uint offset; Config conf;
};
struct FillColor
{
uint rgba_color;
};
struct FillLinGradientRef
{
uint offset;
};
struct FillLinGradient
{
uint index;
float2 p0;
float2 p1;
};
struct FillImageRef
{
uint offset;
};
struct FillImage
{
uint index;
int2 offset;
};
struct ClipRef
{
uint offset;
};
struct Clip
{
float4 bbox;
uint blend;
};
struct ElementTag
{
uint tag;
uint flags;
};
struct DrawMonoid
{
uint path_ix;
uint clip_ix;
};
struct AnnoImageRef
{
uint offset;
};
struct AnnoImage
{
float4 bbox;
float linewidth;
uint index;
int2 offset;
};
struct AnnoColorRef
{
uint offset;
};
struct AnnoColor
{
float4 bbox;
float linewidth;
uint rgba_color;
};
struct AnnoLinGradientRef
{
uint offset;
};
struct AnnoLinGradient
{
float4 bbox;
float linewidth;
uint index;
float line_x;
float line_y;
float line_c;
};
struct AnnoBeginClipRef
{
uint offset;
};
struct AnnoBeginClip
{
float4 bbox;
float linewidth;
uint blend;
};
struct AnnoEndClipRef
{
uint offset;
};
struct AnnoEndClip
{
float4 bbox;
uint blend;
};
struct AnnotatedRef
{
uint offset;
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint memory[1];
}; };
struct SceneBuf struct SceneBuf
@ -194,6 +102,8 @@ struct DrawMonoid_1
{ {
uint path_ix; uint path_ix;
uint clip_ix; uint clip_ix;
uint scene_offset;
uint info_offset;
}; };
struct ParentBuf struct ParentBuf
@ -201,442 +111,56 @@ struct ParentBuf
DrawMonoid_1 parent[1]; DrawMonoid_1 parent[1];
}; };
struct Alloc_1 struct Memory
{ {
uint offset; uint mem_offset;
}; uint mem_error;
uint memory[1];
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc_1 tile_alloc;
Alloc_1 bin_alloc;
Alloc_1 ptcl_alloc;
Alloc_1 pathseg_alloc;
Alloc_1 anno_alloc;
Alloc_1 trans_alloc;
Alloc_1 bbox_alloc;
Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
};
struct ConfigBuf
{
Config conf;
}; };
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
ElementTag Element_tag(thread const ElementRef& ref, const device SceneBuf& v_223)
{
uint tag_and_flags = v_223.scene[ref.offset >> uint(2)];
return ElementTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) };
}
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
DrawMonoid map_tag(thread const uint& tag_word) DrawMonoid map_tag(thread const uint& tag_word)
{ {
switch (tag_word) uint has_path = uint(tag_word != 0u);
{ return DrawMonoid{ has_path, tag_word & 1u, tag_word & 28u, (tag_word >> uint(4)) & 28u };
case 4u:
case 5u:
case 6u:
{
return DrawMonoid{ 1u, 0u };
}
case 9u:
case 10u:
{
return DrawMonoid{ 1u, 1u };
}
default:
{
return DrawMonoid{ 0u, 0u };
}
}
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
ElementRef Element_index(thread const ElementRef& ref, thread const uint& index) DrawMonoid combine_draw_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b)
{
return ElementRef{ ref.offset + (index * 36u) };
}
static inline __attribute__((always_inline))
DrawMonoid combine_tag_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b)
{ {
DrawMonoid c; DrawMonoid c;
c.path_ix = a.path_ix + b.path_ix; c.path_ix = a.path_ix + b.path_ix;
c.clip_ix = a.clip_ix + b.clip_ix; c.clip_ix = a.clip_ix + b.clip_ix;
c.scene_offset = a.scene_offset + b.scene_offset;
c.info_offset = a.info_offset + b.info_offset;
return c; return c;
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
DrawMonoid tag_monoid_identity() DrawMonoid draw_monoid_identity()
{ {
return DrawMonoid{ 0u, 0u }; return DrawMonoid{ 0u, 0u, 0u, 0u };
} }
static inline __attribute__((always_inline)) kernel void main0(device Memory& _284 [[buffer(0)]], const device ConfigBuf& _92 [[buffer(1)]], const device SceneBuf& _102 [[buffer(2)]], const device ParentBuf& _202 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
FillColor FillColor_read(thread const FillColorRef& ref, const device SceneBuf& v_223)
{
uint ix = ref.offset >> uint(2);
uint raw0 = v_223.scene[ix + 0u];
FillColor s;
s.rgba_color = raw0;
return s;
}
static inline __attribute__((always_inline))
FillColor Element_FillColor_read(thread const ElementRef& ref, const device SceneBuf& v_223)
{
FillColorRef param = FillColorRef{ ref.offset + 4u };
return FillColor_read(param, v_223);
}
static inline __attribute__((always_inline))
bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
{
return true;
}
static inline __attribute__((always_inline))
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_199)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return;
}
v_199.memory[offset] = val;
}
static inline __attribute__((always_inline))
void AnnoColor_write(thread const Alloc& a, thread const AnnoColorRef& ref, thread const AnnoColor& s, device Memory& v_199)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = as_type<uint>(s.bbox.x);
write_mem(param, param_1, param_2, v_199);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = as_type<uint>(s.bbox.y);
write_mem(param_3, param_4, param_5, v_199);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = as_type<uint>(s.bbox.z);
write_mem(param_6, param_7, param_8, v_199);
Alloc param_9 = a;
uint param_10 = ix + 3u;
uint param_11 = as_type<uint>(s.bbox.w);
write_mem(param_9, param_10, param_11, v_199);
Alloc param_12 = a;
uint param_13 = ix + 4u;
uint param_14 = as_type<uint>(s.linewidth);
write_mem(param_12, param_13, param_14, v_199);
Alloc param_15 = a;
uint param_16 = ix + 5u;
uint param_17 = s.rgba_color;
write_mem(param_15, param_16, param_17, v_199);
}
static inline __attribute__((always_inline))
void Annotated_Color_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoColor& s, device Memory& v_199)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint param_2 = (flags << uint(16)) | 1u;
write_mem(param, param_1, param_2, v_199);
Alloc param_3 = a;
AnnoColorRef param_4 = AnnoColorRef{ ref.offset + 4u };
AnnoColor param_5 = s;
AnnoColor_write(param_3, param_4, param_5, v_199);
}
static inline __attribute__((always_inline))
FillLinGradient FillLinGradient_read(thread const FillLinGradientRef& ref, const device SceneBuf& v_223)
{
uint ix = ref.offset >> uint(2);
uint raw0 = v_223.scene[ix + 0u];
uint raw1 = v_223.scene[ix + 1u];
uint raw2 = v_223.scene[ix + 2u];
uint raw3 = v_223.scene[ix + 3u];
uint raw4 = v_223.scene[ix + 4u];
FillLinGradient s;
s.index = raw0;
s.p0 = float2(as_type<float>(raw1), as_type<float>(raw2));
s.p1 = float2(as_type<float>(raw3), as_type<float>(raw4));
return s;
}
static inline __attribute__((always_inline))
FillLinGradient Element_FillLinGradient_read(thread const ElementRef& ref, const device SceneBuf& v_223)
{
FillLinGradientRef param = FillLinGradientRef{ ref.offset + 4u };
return FillLinGradient_read(param, v_223);
}
static inline __attribute__((always_inline))
void AnnoLinGradient_write(thread const Alloc& a, thread const AnnoLinGradientRef& ref, thread const AnnoLinGradient& s, device Memory& v_199)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = as_type<uint>(s.bbox.x);
write_mem(param, param_1, param_2, v_199);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = as_type<uint>(s.bbox.y);
write_mem(param_3, param_4, param_5, v_199);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = as_type<uint>(s.bbox.z);
write_mem(param_6, param_7, param_8, v_199);
Alloc param_9 = a;
uint param_10 = ix + 3u;
uint param_11 = as_type<uint>(s.bbox.w);
write_mem(param_9, param_10, param_11, v_199);
Alloc param_12 = a;
uint param_13 = ix + 4u;
uint param_14 = as_type<uint>(s.linewidth);
write_mem(param_12, param_13, param_14, v_199);
Alloc param_15 = a;
uint param_16 = ix + 5u;
uint param_17 = s.index;
write_mem(param_15, param_16, param_17, v_199);
Alloc param_18 = a;
uint param_19 = ix + 6u;
uint param_20 = as_type<uint>(s.line_x);
write_mem(param_18, param_19, param_20, v_199);
Alloc param_21 = a;
uint param_22 = ix + 7u;
uint param_23 = as_type<uint>(s.line_y);
write_mem(param_21, param_22, param_23, v_199);
Alloc param_24 = a;
uint param_25 = ix + 8u;
uint param_26 = as_type<uint>(s.line_c);
write_mem(param_24, param_25, param_26, v_199);
}
static inline __attribute__((always_inline))
void Annotated_LinGradient_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoLinGradient& s, device Memory& v_199)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint param_2 = (flags << uint(16)) | 2u;
write_mem(param, param_1, param_2, v_199);
Alloc param_3 = a;
AnnoLinGradientRef param_4 = AnnoLinGradientRef{ ref.offset + 4u };
AnnoLinGradient param_5 = s;
AnnoLinGradient_write(param_3, param_4, param_5, v_199);
}
static inline __attribute__((always_inline))
FillImage FillImage_read(thread const FillImageRef& ref, const device SceneBuf& v_223)
{
uint ix = ref.offset >> uint(2);
uint raw0 = v_223.scene[ix + 0u];
uint raw1 = v_223.scene[ix + 1u];
FillImage s;
s.index = raw0;
s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16);
return s;
}
static inline __attribute__((always_inline))
FillImage Element_FillImage_read(thread const ElementRef& ref, const device SceneBuf& v_223)
{
FillImageRef param = FillImageRef{ ref.offset + 4u };
return FillImage_read(param, v_223);
}
static inline __attribute__((always_inline))
void AnnoImage_write(thread const Alloc& a, thread const AnnoImageRef& ref, thread const AnnoImage& s, device Memory& v_199)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = as_type<uint>(s.bbox.x);
write_mem(param, param_1, param_2, v_199);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = as_type<uint>(s.bbox.y);
write_mem(param_3, param_4, param_5, v_199);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = as_type<uint>(s.bbox.z);
write_mem(param_6, param_7, param_8, v_199);
Alloc param_9 = a;
uint param_10 = ix + 3u;
uint param_11 = as_type<uint>(s.bbox.w);
write_mem(param_9, param_10, param_11, v_199);
Alloc param_12 = a;
uint param_13 = ix + 4u;
uint param_14 = as_type<uint>(s.linewidth);
write_mem(param_12, param_13, param_14, v_199);
Alloc param_15 = a;
uint param_16 = ix + 5u;
uint param_17 = s.index;
write_mem(param_15, param_16, param_17, v_199);
Alloc param_18 = a;
uint param_19 = ix + 6u;
uint param_20 = (uint(s.offset.x) & 65535u) | (uint(s.offset.y) << uint(16));
write_mem(param_18, param_19, param_20, v_199);
}
static inline __attribute__((always_inline))
void Annotated_Image_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoImage& s, device Memory& v_199)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint param_2 = (flags << uint(16)) | 3u;
write_mem(param, param_1, param_2, v_199);
Alloc param_3 = a;
AnnoImageRef param_4 = AnnoImageRef{ ref.offset + 4u };
AnnoImage param_5 = s;
AnnoImage_write(param_3, param_4, param_5, v_199);
}
static inline __attribute__((always_inline))
Clip Clip_read(thread const ClipRef& ref, const device SceneBuf& v_223)
{
uint ix = ref.offset >> uint(2);
uint raw0 = v_223.scene[ix + 0u];
uint raw1 = v_223.scene[ix + 1u];
uint raw2 = v_223.scene[ix + 2u];
uint raw3 = v_223.scene[ix + 3u];
Clip s;
s.bbox = float4(as_type<float>(raw0), as_type<float>(raw1), as_type<float>(raw2), as_type<float>(raw3));
s.blend = v_223.scene[ix + 4u];
return s;
}
static inline __attribute__((always_inline))
Clip Element_BeginClip_read(thread const ElementRef& ref, const device SceneBuf& v_223)
{
ClipRef param = ClipRef{ ref.offset + 4u };
return Clip_read(param, v_223);
}
static inline __attribute__((always_inline))
void AnnoBeginClip_write(thread const Alloc& a, thread const AnnoBeginClipRef& ref, thread const AnnoBeginClip& s, device Memory& v_199)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = as_type<uint>(s.bbox.x);
write_mem(param, param_1, param_2, v_199);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = as_type<uint>(s.bbox.y);
write_mem(param_3, param_4, param_5, v_199);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = as_type<uint>(s.bbox.z);
write_mem(param_6, param_7, param_8, v_199);
Alloc param_9 = a;
uint param_10 = ix + 3u;
uint param_11 = as_type<uint>(s.bbox.w);
write_mem(param_9, param_10, param_11, v_199);
Alloc param_12 = a;
uint param_13 = ix + 4u;
uint param_14 = as_type<uint>(s.linewidth);
write_mem(param_12, param_13, param_14, v_199);
Alloc param_15 = a;
uint param_16 = ix + 5u;
uint param_17 = s.blend;
write_mem(param_15, param_16, param_17, v_199);
}
static inline __attribute__((always_inline))
void Annotated_BeginClip_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoBeginClip& s, device Memory& v_199)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint param_2 = (flags << uint(16)) | 4u;
write_mem(param, param_1, param_2, v_199);
Alloc param_3 = a;
AnnoBeginClipRef param_4 = AnnoBeginClipRef{ ref.offset + 4u };
AnnoBeginClip param_5 = s;
AnnoBeginClip_write(param_3, param_4, param_5, v_199);
}
static inline __attribute__((always_inline))
void AnnoEndClip_write(thread const Alloc& a, thread const AnnoEndClipRef& ref, thread const AnnoEndClip& s, device Memory& v_199)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = as_type<uint>(s.bbox.x);
write_mem(param, param_1, param_2, v_199);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = as_type<uint>(s.bbox.y);
write_mem(param_3, param_4, param_5, v_199);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = as_type<uint>(s.bbox.z);
write_mem(param_6, param_7, param_8, v_199);
Alloc param_9 = a;
uint param_10 = ix + 3u;
uint param_11 = as_type<uint>(s.bbox.w);
write_mem(param_9, param_10, param_11, v_199);
Alloc param_12 = a;
uint param_13 = ix + 4u;
uint param_14 = s.blend;
write_mem(param_12, param_13, param_14, v_199);
}
static inline __attribute__((always_inline))
void Annotated_EndClip_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoEndClip& s, device Memory& v_199)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint param_2 = (flags << uint(16)) | 5u;
write_mem(param, param_1, param_2, v_199);
Alloc param_3 = a;
AnnoEndClipRef param_4 = AnnoEndClipRef{ ref.offset + 4u };
AnnoEndClip param_5 = s;
AnnoEndClip_write(param_3, param_4, param_5, v_199);
}
kernel void main0(device Memory& v_199 [[buffer(0)]], const device ConfigBuf& _1054 [[buffer(1)]], const device SceneBuf& v_223 [[buffer(2)]], const device ParentBuf& _1020 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
{ {
threadgroup DrawMonoid sh_scratch[256]; threadgroup DrawMonoid sh_scratch[256];
uint ix = gl_GlobalInvocationID.x * 8u; uint ix = gl_GlobalInvocationID.x * 8u;
ElementRef ref = ElementRef{ ix * 36u }; uint drawtag_base = _92.conf.drawtag_offset >> uint(2);
ElementRef param = ref; uint tag_word = _102.scene[drawtag_base + ix];
uint tag_word = Element_tag(param, v_223).tag; uint param = tag_word;
uint param_1 = tag_word; DrawMonoid agg = map_tag(param);
DrawMonoid agg = map_tag(param_1);
spvUnsafeArray<DrawMonoid, 8> local; spvUnsafeArray<DrawMonoid, 8> local;
local[0] = agg; local[0] = agg;
for (uint i = 1u; i < 8u; i++) for (uint i = 1u; i < 8u; i++)
{ {
ElementRef param_2 = ref; tag_word = _102.scene[(drawtag_base + ix) + i];
uint param_3 = i; uint param_1 = tag_word;
ElementRef param_4 = Element_index(param_2, param_3); DrawMonoid param_2 = agg;
tag_word = Element_tag(param_4, v_223).tag; DrawMonoid param_3 = map_tag(param_1);
uint param_5 = tag_word; agg = combine_draw_monoid(param_2, param_3);
DrawMonoid param_6 = agg;
DrawMonoid param_7 = map_tag(param_5);
agg = combine_tag_monoid(param_6, param_7);
local[i] = agg; local[i] = agg;
} }
sh_scratch[gl_LocalInvocationID.x] = agg; sh_scratch[gl_LocalInvocationID.x] = agg;
@ -646,181 +170,117 @@ kernel void main0(device Memory& v_199 [[buffer(0)]], const device ConfigBuf& _1
if (gl_LocalInvocationID.x >= (1u << i_1)) if (gl_LocalInvocationID.x >= (1u << i_1))
{ {
DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
DrawMonoid param_8 = other; DrawMonoid param_4 = other;
DrawMonoid param_9 = agg; DrawMonoid param_5 = agg;
agg = combine_tag_monoid(param_8, param_9); agg = combine_draw_monoid(param_4, param_5);
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);
sh_scratch[gl_LocalInvocationID.x] = agg; sh_scratch[gl_LocalInvocationID.x] = agg;
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);
DrawMonoid row = tag_monoid_identity(); DrawMonoid row = draw_monoid_identity();
if (gl_WorkGroupID.x > 0u) if (gl_WorkGroupID.x > 0u)
{ {
uint _1023 = gl_WorkGroupID.x - 1u; uint _205 = gl_WorkGroupID.x - 1u;
row.path_ix = _1020.parent[_1023].path_ix; row.path_ix = _202.parent[_205].path_ix;
row.clip_ix = _1020.parent[_1023].clip_ix; row.clip_ix = _202.parent[_205].clip_ix;
row.scene_offset = _202.parent[_205].scene_offset;
row.info_offset = _202.parent[_205].info_offset;
} }
if (gl_LocalInvocationID.x > 0u) if (gl_LocalInvocationID.x > 0u)
{ {
DrawMonoid param_10 = row; DrawMonoid param_6 = row;
DrawMonoid param_11 = sh_scratch[gl_LocalInvocationID.x - 1u]; DrawMonoid param_7 = sh_scratch[gl_LocalInvocationID.x - 1u];
row = combine_tag_monoid(param_10, param_11); row = combine_draw_monoid(param_6, param_7);
} }
uint drawdata_base = _92.conf.drawdata_offset >> uint(2);
uint drawinfo_base = _92.conf.drawinfo_alloc.offset >> uint(2);
uint out_ix = gl_GlobalInvocationID.x * 8u; uint out_ix = gl_GlobalInvocationID.x * 8u;
uint out_base = (_1054.conf.drawmonoid_alloc.offset >> uint(2)) + (out_ix * 2u); uint out_base = (_92.conf.drawmonoid_alloc.offset >> uint(2)) + (out_ix * 4u);
uint clip_out_base = _1054.conf.clip_alloc.offset >> uint(2); uint clip_out_base = _92.conf.clip_alloc.offset >> uint(2);
AnnotatedRef out_ref = AnnotatedRef{ _1054.conf.anno_alloc.offset + (out_ix * 40u) };
float4 mat; float4 mat;
float2 translate; float2 translate;
AnnoColor anno_fill;
Alloc param_18;
AnnoLinGradient anno_lin;
Alloc param_23;
AnnoImage anno_img;
Alloc param_28;
AnnoBeginClip anno_begin_clip;
Alloc param_33;
AnnoEndClip anno_end_clip;
Alloc param_38;
for (uint i_2 = 0u; i_2 < 8u; i_2++) for (uint i_2 = 0u; i_2 < 8u; i_2++)
{ {
DrawMonoid m = row; DrawMonoid m = row;
if (i_2 > 0u) if (i_2 > 0u)
{ {
DrawMonoid param_12 = m; DrawMonoid param_8 = m;
DrawMonoid param_13 = local[i_2 - 1u]; DrawMonoid param_9 = local[i_2 - 1u];
m = combine_tag_monoid(param_12, param_13); m = combine_draw_monoid(param_8, param_9);
} }
v_199.memory[out_base + (i_2 * 2u)] = m.path_ix; _284.memory[out_base + (i_2 * 4u)] = m.path_ix;
v_199.memory[(out_base + (i_2 * 2u)) + 1u] = m.clip_ix; _284.memory[(out_base + (i_2 * 4u)) + 1u] = m.clip_ix;
ElementRef param_14 = ref; _284.memory[(out_base + (i_2 * 4u)) + 2u] = m.scene_offset;
uint param_15 = i_2; _284.memory[(out_base + (i_2 * 4u)) + 3u] = m.info_offset;
ElementRef this_ref = Element_index(param_14, param_15); uint dd = drawdata_base + (m.scene_offset >> uint(2));
ElementRef param_16 = this_ref; uint di = drawinfo_base + (m.info_offset >> uint(2));
tag_word = Element_tag(param_16, v_223).tag; tag_word = _102.scene[(drawtag_base + ix) + i_2];
if ((((tag_word == 4u) || (tag_word == 5u)) || (tag_word == 6u)) || (tag_word == 9u)) if ((((tag_word == 68u) || (tag_word == 276u)) || (tag_word == 72u)) || (tag_word == 5u))
{ {
uint bbox_offset = (_1054.conf.bbox_alloc.offset >> uint(2)) + (6u * m.path_ix); uint bbox_offset = (_92.conf.path_bbox_alloc.offset >> uint(2)) + (6u * m.path_ix);
float bbox_l = float(v_199.memory[bbox_offset]) - 32768.0; float bbox_l = float(_284.memory[bbox_offset]) - 32768.0;
float bbox_t = float(v_199.memory[bbox_offset + 1u]) - 32768.0; float bbox_t = float(_284.memory[bbox_offset + 1u]) - 32768.0;
float bbox_r = float(v_199.memory[bbox_offset + 2u]) - 32768.0; float bbox_r = float(_284.memory[bbox_offset + 2u]) - 32768.0;
float bbox_b = float(v_199.memory[bbox_offset + 3u]) - 32768.0; float bbox_b = float(_284.memory[bbox_offset + 3u]) - 32768.0;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
float linewidth = as_type<float>(v_199.memory[bbox_offset + 4u]); float linewidth = as_type<float>(_284.memory[bbox_offset + 4u]);
uint fill_mode = uint(linewidth >= 0.0); uint fill_mode = uint(linewidth >= 0.0);
if ((linewidth >= 0.0) || (tag_word == 5u)) if ((linewidth >= 0.0) || (tag_word == 276u))
{ {
uint trans_ix = v_199.memory[bbox_offset + 5u]; uint trans_ix = _284.memory[bbox_offset + 5u];
uint t = (_1054.conf.trans_alloc.offset >> uint(2)) + (6u * trans_ix); uint t = (_92.conf.trans_alloc.offset >> uint(2)) + (6u * trans_ix);
mat = as_type<float4>(uint4(v_199.memory[t], v_199.memory[t + 1u], v_199.memory[t + 2u], v_199.memory[t + 3u])); mat = as_type<float4>(uint4(_284.memory[t], _284.memory[t + 1u], _284.memory[t + 2u], _284.memory[t + 3u]));
if (tag_word == 5u) if (tag_word == 276u)
{ {
translate = as_type<float2>(uint2(v_199.memory[t + 4u], v_199.memory[t + 5u])); translate = as_type<float2>(uint2(_284.memory[t + 4u], _284.memory[t + 5u]));
} }
} }
if (linewidth >= 0.0) if (linewidth >= 0.0)
{ {
linewidth *= sqrt(abs((mat.x * mat.w) - (mat.y * mat.z))); linewidth *= sqrt(abs((mat.x * mat.w) - (mat.y * mat.z)));
} }
linewidth = fast::max(linewidth, 0.0);
switch (tag_word) switch (tag_word)
{ {
case 4u: case 68u:
case 72u:
{ {
ElementRef param_17 = this_ref; _284.memory[di] = as_type<uint>(linewidth);
FillColor fill = Element_FillColor_read(param_17, v_223);
anno_fill.bbox = bbox;
anno_fill.linewidth = linewidth;
anno_fill.rgba_color = fill.rgba_color;
param_18.offset = _1054.conf.anno_alloc.offset;
AnnotatedRef param_19 = out_ref;
uint param_20 = fill_mode;
AnnoColor param_21 = anno_fill;
Annotated_Color_write(param_18, param_19, param_20, param_21, v_199);
break; break;
} }
case 5u: case 276u:
{ {
ElementRef param_22 = this_ref; _284.memory[di] = as_type<uint>(linewidth);
FillLinGradient lin = Element_FillLinGradient_read(param_22, v_223); uint index = _102.scene[dd];
anno_lin.bbox = bbox; float2 p0 = as_type<float2>(uint2(_102.scene[dd + 1u], _102.scene[dd + 2u]));
anno_lin.linewidth = linewidth; float2 p1 = as_type<float2>(uint2(_102.scene[dd + 3u], _102.scene[dd + 4u]));
anno_lin.index = lin.index; p0 = ((mat.xy * p0.x) + (mat.zw * p0.y)) + translate;
float2 p0 = ((mat.xy * lin.p0.x) + (mat.zw * lin.p0.y)) + translate; p1 = ((mat.xy * p1.x) + (mat.zw * p1.y)) + translate;
float2 p1 = ((mat.xy * lin.p1.x) + (mat.zw * lin.p1.y)) + translate;
float2 dxy = p1 - p0; float2 dxy = p1 - p0;
float scale = 1.0 / ((dxy.x * dxy.x) + (dxy.y * dxy.y)); float scale = 1.0 / ((dxy.x * dxy.x) + (dxy.y * dxy.y));
float line_x = dxy.x * scale; float line_x = dxy.x * scale;
float line_y = dxy.y * scale; float line_y = dxy.y * scale;
anno_lin.line_x = line_x; float line_c = -((p0.x * line_x) + (p0.y * line_y));
anno_lin.line_y = line_y; _284.memory[di + 1u] = as_type<uint>(line_x);
anno_lin.line_c = -((p0.x * line_x) + (p0.y * line_y)); _284.memory[di + 2u] = as_type<uint>(line_y);
param_23.offset = _1054.conf.anno_alloc.offset; _284.memory[di + 3u] = as_type<uint>(line_c);
AnnotatedRef param_24 = out_ref;
uint param_25 = fill_mode;
AnnoLinGradient param_26 = anno_lin;
Annotated_LinGradient_write(param_23, param_24, param_25, param_26, v_199);
break; break;
} }
case 6u: case 5u:
{ {
ElementRef param_27 = this_ref;
FillImage fill_img = Element_FillImage_read(param_27, v_223);
anno_img.bbox = bbox;
anno_img.linewidth = linewidth;
anno_img.index = fill_img.index;
anno_img.offset = fill_img.offset;
param_28.offset = _1054.conf.anno_alloc.offset;
AnnotatedRef param_29 = out_ref;
uint param_30 = fill_mode;
AnnoImage param_31 = anno_img;
Annotated_Image_write(param_28, param_29, param_30, param_31, v_199);
break;
}
case 9u:
{
ElementRef param_32 = this_ref;
Clip begin_clip = Element_BeginClip_read(param_32, v_223);
anno_begin_clip.bbox = bbox;
anno_begin_clip.linewidth = 0.0;
anno_begin_clip.blend = begin_clip.blend;
uint flags = uint(begin_clip.blend != 3u) << uint(1);
param_33.offset = _1054.conf.anno_alloc.offset;
AnnotatedRef param_34 = out_ref;
uint param_35 = flags;
AnnoBeginClip param_36 = anno_begin_clip;
Annotated_BeginClip_write(param_33, param_34, param_35, param_36, v_199);
break; break;
} }
} }
} }
else if ((tag_word == 5u) || (tag_word == 37u))
{
if (tag_word == 10u)
{
ElementRef param_37 = this_ref;
Clip end_clip = Element_BeginClip_read(param_37, v_223);
anno_end_clip.bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0);
anno_end_clip.blend = end_clip.blend;
uint flags_1 = uint(end_clip.blend != 3u) << uint(1);
param_38.offset = _1054.conf.anno_alloc.offset;
AnnotatedRef param_39 = out_ref;
uint param_40 = flags_1;
AnnoEndClip param_41 = anno_end_clip;
Annotated_EndClip_write(param_38, param_39, param_40, param_41, v_199);
}
}
if ((tag_word == 9u) || (tag_word == 10u))
{ {
uint path_ix = ~(out_ix + i_2); uint path_ix = ~(out_ix + i_2);
if (tag_word == 9u) if (tag_word == 5u)
{ {
path_ix = m.path_ix; path_ix = m.path_ix;
} }
v_199.memory[clip_out_base + m.clip_ix] = path_ix; _284.memory[clip_out_base + m.clip_ix] = path_ix;
} }
out_ref.offset += 40u;
} }
} }

Binary file not shown.

Binary file not shown.

View file

@ -1,22 +1,11 @@
struct ElementRef
{
uint offset;
};
struct ElementTag
{
uint tag;
uint flags;
};
struct DrawMonoid struct DrawMonoid
{ {
uint path_ix; uint path_ix;
uint clip_ix; uint clip_ix;
uint scene_offset;
uint info_offset;
}; };
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
struct Alloc struct Alloc
{ {
uint offset; uint offset;
@ -34,12 +23,14 @@ struct Config
Alloc pathseg_alloc; Alloc pathseg_alloc;
Alloc anno_alloc; Alloc anno_alloc;
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc path_bbox_alloc;
Alloc drawmonoid_alloc; Alloc drawmonoid_alloc;
Alloc clip_alloc; Alloc clip_alloc;
Alloc clip_bic_alloc; Alloc clip_bic_alloc;
Alloc clip_stack_alloc; Alloc clip_stack_alloc;
Alloc clip_bbox_alloc; Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip; uint n_clip;
@ -47,16 +38,16 @@ struct Config
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
uint pathseg_offset; uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
}; };
static const DrawMonoid _87 = { 1u, 0u }; static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
static const DrawMonoid _89 = { 1u, 1u };
static const DrawMonoid _91 = { 0u, 0u };
ByteAddressBuffer _46 : register(t2, space0); ByteAddressBuffer _86 : register(t1, space0);
RWByteAddressBuffer _199 : register(u3, space0); ByteAddressBuffer _96 : register(t2, space0);
RWByteAddressBuffer _213 : register(u0, space0); RWByteAddressBuffer _187 : register(u3, space0);
ByteAddressBuffer _219 : register(t1, space0); RWByteAddressBuffer _205 : register(u0, space0);
static uint3 gl_WorkGroupID; static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID; static uint3 gl_LocalInvocationID;
@ -70,68 +61,37 @@ struct SPIRV_Cross_Input
groupshared DrawMonoid sh_scratch[256]; groupshared DrawMonoid sh_scratch[256];
ElementTag Element_tag(ElementRef ref)
{
uint tag_and_flags = _46.Load((ref.offset >> uint(2)) * 4 + 0);
ElementTag _60 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) };
return _60;
}
DrawMonoid map_tag(uint tag_word) DrawMonoid map_tag(uint tag_word)
{ {
switch (tag_word) uint has_path = uint(tag_word != 0u);
{ DrawMonoid _69 = { has_path, tag_word & 1u, tag_word & 28u, (tag_word >> uint(4)) & 28u };
case 4u: return _69;
case 5u:
case 6u:
{
return _87;
}
case 9u:
case 10u:
{
return _89;
}
default:
{
return _91;
}
}
} }
ElementRef Element_index(ElementRef ref, uint index) DrawMonoid combine_draw_monoid(DrawMonoid a, DrawMonoid b)
{
ElementRef _39 = { ref.offset + (index * 36u) };
return _39;
}
DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b)
{ {
DrawMonoid c; DrawMonoid c;
c.path_ix = a.path_ix + b.path_ix; c.path_ix = a.path_ix + b.path_ix;
c.clip_ix = a.clip_ix + b.clip_ix; c.clip_ix = a.clip_ix + b.clip_ix;
c.scene_offset = a.scene_offset + b.scene_offset;
c.info_offset = a.info_offset + b.info_offset;
return c; return c;
} }
void comp_main() void comp_main()
{ {
uint ix = gl_GlobalInvocationID.x * 8u; uint ix = gl_GlobalInvocationID.x * 8u;
ElementRef _107 = { ix * 36u }; uint drawtag_base = _86.Load(100) >> uint(2);
ElementRef ref = _107; uint tag_word = _96.Load((drawtag_base + ix) * 4 + 0);
ElementRef param = ref; uint param = tag_word;
uint tag_word = Element_tag(param).tag; DrawMonoid agg = map_tag(param);
uint param_1 = tag_word;
DrawMonoid agg = map_tag(param_1);
for (uint i = 1u; i < 8u; i++) for (uint i = 1u; i < 8u; i++)
{ {
ElementRef param_2 = ref; uint tag_word_1 = _96.Load(((drawtag_base + ix) + i) * 4 + 0);
uint param_3 = i; uint param_1 = tag_word_1;
ElementRef param_4 = Element_index(param_2, param_3); DrawMonoid param_2 = agg;
tag_word = Element_tag(param_4).tag; DrawMonoid param_3 = map_tag(param_1);
uint param_5 = tag_word; agg = combine_draw_monoid(param_2, param_3);
DrawMonoid param_6 = agg;
DrawMonoid param_7 = map_tag(param_5);
agg = combine_tag_monoid(param_6, param_7);
} }
sh_scratch[gl_LocalInvocationID.x] = agg; sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 8u; i_1++) for (uint i_1 = 0u; i_1 < 8u; i_1++)
@ -140,17 +100,19 @@ void comp_main()
if ((gl_LocalInvocationID.x + (1u << i_1)) < 256u) if ((gl_LocalInvocationID.x + (1u << i_1)) < 256u)
{ {
DrawMonoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)]; DrawMonoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)];
DrawMonoid param_8 = agg; DrawMonoid param_4 = agg;
DrawMonoid param_9 = other; DrawMonoid param_5 = other;
agg = combine_tag_monoid(param_8, param_9); agg = combine_draw_monoid(param_4, param_5);
} }
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();
sh_scratch[gl_LocalInvocationID.x] = agg; sh_scratch[gl_LocalInvocationID.x] = agg;
} }
if (gl_LocalInvocationID.x == 0u) if (gl_LocalInvocationID.x == 0u)
{ {
_199.Store(gl_WorkGroupID.x * 8 + 0, agg.path_ix); _187.Store(gl_WorkGroupID.x * 16 + 0, agg.path_ix);
_199.Store(gl_WorkGroupID.x * 8 + 4, agg.clip_ix); _187.Store(gl_WorkGroupID.x * 16 + 4, agg.clip_ix);
_187.Store(gl_WorkGroupID.x * 16 + 8, agg.scene_offset);
_187.Store(gl_WorkGroupID.x * 16 + 12, agg.info_offset);
} }
} }

View file

@ -5,48 +5,14 @@
using namespace metal; using namespace metal;
struct ElementRef
{
uint offset;
};
struct ElementTag
{
uint tag;
uint flags;
};
struct DrawMonoid struct DrawMonoid
{ {
uint path_ix; uint path_ix;
uint clip_ix; uint clip_ix;
uint scene_offset;
uint info_offset;
}; };
struct SceneBuf
{
uint scene[1];
};
struct DrawMonoid_1
{
uint path_ix;
uint clip_ix;
};
struct OutBuf
{
DrawMonoid_1 outbuf[1];
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint memory[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
struct Alloc struct Alloc
{ {
uint offset; uint offset;
@ -64,12 +30,14 @@ struct Config
Alloc pathseg_alloc; Alloc pathseg_alloc;
Alloc anno_alloc; Alloc anno_alloc;
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc path_bbox_alloc;
Alloc drawmonoid_alloc; Alloc drawmonoid_alloc;
Alloc clip_alloc; Alloc clip_alloc;
Alloc clip_bic_alloc; Alloc clip_bic_alloc;
Alloc clip_stack_alloc; Alloc clip_stack_alloc;
Alloc clip_bbox_alloc; Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip; uint n_clip;
@ -77,6 +45,8 @@ struct Config
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
uint pathseg_offset; uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
}; };
struct ConfigBuf struct ConfigBuf
@ -84,70 +54,66 @@ struct ConfigBuf
Config conf; Config conf;
}; };
static inline __attribute__((always_inline)) struct SceneBuf
ElementTag Element_tag(thread const ElementRef& ref, const device SceneBuf& v_46)
{ {
uint tag_and_flags = v_46.scene[ref.offset >> uint(2)]; uint scene[1];
return ElementTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) }; };
}
struct DrawMonoid_1
{
uint path_ix;
uint clip_ix;
uint scene_offset;
uint info_offset;
};
struct OutBuf
{
DrawMonoid_1 outbuf[1];
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint memory[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
DrawMonoid map_tag(thread const uint& tag_word) DrawMonoid map_tag(thread const uint& tag_word)
{ {
switch (tag_word) uint has_path = uint(tag_word != 0u);
{ return DrawMonoid{ has_path, tag_word & 1u, tag_word & 28u, (tag_word >> uint(4)) & 28u };
case 4u:
case 5u:
case 6u:
{
return DrawMonoid{ 1u, 0u };
}
case 9u:
case 10u:
{
return DrawMonoid{ 1u, 1u };
}
default:
{
return DrawMonoid{ 0u, 0u };
}
}
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
ElementRef Element_index(thread const ElementRef& ref, thread const uint& index) DrawMonoid combine_draw_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b)
{
return ElementRef{ ref.offset + (index * 36u) };
}
static inline __attribute__((always_inline))
DrawMonoid combine_tag_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b)
{ {
DrawMonoid c; DrawMonoid c;
c.path_ix = a.path_ix + b.path_ix; c.path_ix = a.path_ix + b.path_ix;
c.clip_ix = a.clip_ix + b.clip_ix; c.clip_ix = a.clip_ix + b.clip_ix;
c.scene_offset = a.scene_offset + b.scene_offset;
c.info_offset = a.info_offset + b.info_offset;
return c; return c;
} }
kernel void main0(const device SceneBuf& v_46 [[buffer(2)]], device OutBuf& _199 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) kernel void main0(const device ConfigBuf& _86 [[buffer(1)]], const device SceneBuf& _96 [[buffer(2)]], device OutBuf& _187 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
{ {
threadgroup DrawMonoid sh_scratch[256]; threadgroup DrawMonoid sh_scratch[256];
uint ix = gl_GlobalInvocationID.x * 8u; uint ix = gl_GlobalInvocationID.x * 8u;
ElementRef ref = ElementRef{ ix * 36u }; uint drawtag_base = _86.conf.drawtag_offset >> uint(2);
ElementRef param = ref; uint tag_word = _96.scene[drawtag_base + ix];
uint tag_word = Element_tag(param, v_46).tag; uint param = tag_word;
uint param_1 = tag_word; DrawMonoid agg = map_tag(param);
DrawMonoid agg = map_tag(param_1);
for (uint i = 1u; i < 8u; i++) for (uint i = 1u; i < 8u; i++)
{ {
ElementRef param_2 = ref; uint tag_word_1 = _96.scene[(drawtag_base + ix) + i];
uint param_3 = i; uint param_1 = tag_word_1;
ElementRef param_4 = Element_index(param_2, param_3); DrawMonoid param_2 = agg;
tag_word = Element_tag(param_4, v_46).tag; DrawMonoid param_3 = map_tag(param_1);
uint param_5 = tag_word; agg = combine_draw_monoid(param_2, param_3);
DrawMonoid param_6 = agg;
DrawMonoid param_7 = map_tag(param_5);
agg = combine_tag_monoid(param_6, param_7);
} }
sh_scratch[gl_LocalInvocationID.x] = agg; sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 8u; i_1++) for (uint i_1 = 0u; i_1 < 8u; i_1++)
@ -156,17 +122,19 @@ kernel void main0(const device SceneBuf& v_46 [[buffer(2)]], device OutBuf& _199
if ((gl_LocalInvocationID.x + (1u << i_1)) < 256u) if ((gl_LocalInvocationID.x + (1u << i_1)) < 256u)
{ {
DrawMonoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)]; DrawMonoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)];
DrawMonoid param_8 = agg; DrawMonoid param_4 = agg;
DrawMonoid param_9 = other; DrawMonoid param_5 = other;
agg = combine_tag_monoid(param_8, param_9); agg = combine_draw_monoid(param_4, param_5);
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);
sh_scratch[gl_LocalInvocationID.x] = agg; sh_scratch[gl_LocalInvocationID.x] = agg;
} }
if (gl_LocalInvocationID.x == 0u) if (gl_LocalInvocationID.x == 0u)
{ {
_199.outbuf[gl_WorkGroupID.x].path_ix = agg.path_ix; _187.outbuf[gl_WorkGroupID.x].path_ix = agg.path_ix;
_199.outbuf[gl_WorkGroupID.x].clip_ix = agg.clip_ix; _187.outbuf[gl_WorkGroupID.x].clip_ix = agg.clip_ix;
_187.outbuf[gl_WorkGroupID.x].scene_offset = agg.scene_offset;
_187.outbuf[gl_WorkGroupID.x].info_offset = agg.info_offset;
} }
} }

Binary file not shown.

Binary file not shown.

View file

@ -2,13 +2,15 @@ struct DrawMonoid
{ {
uint path_ix; uint path_ix;
uint clip_ix; uint clip_ix;
uint scene_offset;
uint info_offset;
}; };
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
static const DrawMonoid _18 = { 0u, 0u }; static const DrawMonoid _18 = { 0u, 0u, 0u, 0u };
RWByteAddressBuffer _57 : register(u0, space0); RWByteAddressBuffer _71 : register(u0, space0);
static uint3 gl_LocalInvocationID; static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID; static uint3 gl_GlobalInvocationID;
@ -20,15 +22,17 @@ struct SPIRV_Cross_Input
groupshared DrawMonoid sh_scratch[256]; groupshared DrawMonoid sh_scratch[256];
DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b) DrawMonoid combine_draw_monoid(DrawMonoid a, DrawMonoid b)
{ {
DrawMonoid c; DrawMonoid c;
c.path_ix = a.path_ix + b.path_ix; c.path_ix = a.path_ix + b.path_ix;
c.clip_ix = a.clip_ix + b.clip_ix; c.clip_ix = a.clip_ix + b.clip_ix;
c.scene_offset = a.scene_offset + b.scene_offset;
c.info_offset = a.info_offset + b.info_offset;
return c; return c;
} }
DrawMonoid tag_monoid_identity() DrawMonoid draw_monoid_identity()
{ {
return _18; return _18;
} }
@ -36,22 +40,30 @@ DrawMonoid tag_monoid_identity()
void comp_main() void comp_main()
{ {
uint ix = gl_GlobalInvocationID.x * 8u; uint ix = gl_GlobalInvocationID.x * 8u;
DrawMonoid _61; DrawMonoid _75;
_61.path_ix = _57.Load(ix * 8 + 0); _75.path_ix = _71.Load(ix * 16 + 0);
_61.clip_ix = _57.Load(ix * 8 + 4); _75.clip_ix = _71.Load(ix * 16 + 4);
_75.scene_offset = _71.Load(ix * 16 + 8);
_75.info_offset = _71.Load(ix * 16 + 12);
DrawMonoid local[8]; DrawMonoid local[8];
local[0].path_ix = _61.path_ix; local[0].path_ix = _75.path_ix;
local[0].clip_ix = _61.clip_ix; local[0].clip_ix = _75.clip_ix;
local[0].scene_offset = _75.scene_offset;
local[0].info_offset = _75.info_offset;
DrawMonoid param_1; DrawMonoid param_1;
for (uint i = 1u; i < 8u; i++) for (uint i = 1u; i < 8u; i++)
{ {
DrawMonoid param = local[i - 1u]; DrawMonoid param = local[i - 1u];
DrawMonoid _88; DrawMonoid _106;
_88.path_ix = _57.Load((ix + i) * 8 + 0); _106.path_ix = _71.Load((ix + i) * 16 + 0);
_88.clip_ix = _57.Load((ix + i) * 8 + 4); _106.clip_ix = _71.Load((ix + i) * 16 + 4);
param_1.path_ix = _88.path_ix; _106.scene_offset = _71.Load((ix + i) * 16 + 8);
param_1.clip_ix = _88.clip_ix; _106.info_offset = _71.Load((ix + i) * 16 + 12);
local[i] = combine_tag_monoid(param, param_1); param_1.path_ix = _106.path_ix;
param_1.clip_ix = _106.clip_ix;
param_1.scene_offset = _106.scene_offset;
param_1.info_offset = _106.info_offset;
local[i] = combine_draw_monoid(param, param_1);
} }
DrawMonoid agg = local[7]; DrawMonoid agg = local[7];
sh_scratch[gl_LocalInvocationID.x] = agg; sh_scratch[gl_LocalInvocationID.x] = agg;
@ -63,13 +75,13 @@ void comp_main()
DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
DrawMonoid param_2 = other; DrawMonoid param_2 = other;
DrawMonoid param_3 = agg; DrawMonoid param_3 = agg;
agg = combine_tag_monoid(param_2, param_3); agg = combine_draw_monoid(param_2, param_3);
} }
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();
sh_scratch[gl_LocalInvocationID.x] = agg; sh_scratch[gl_LocalInvocationID.x] = agg;
} }
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();
DrawMonoid row = tag_monoid_identity(); DrawMonoid row = draw_monoid_identity();
if (gl_LocalInvocationID.x > 0u) if (gl_LocalInvocationID.x > 0u)
{ {
row = sh_scratch[gl_LocalInvocationID.x - 1u]; row = sh_scratch[gl_LocalInvocationID.x - 1u];
@ -78,10 +90,12 @@ void comp_main()
{ {
DrawMonoid param_4 = row; DrawMonoid param_4 = row;
DrawMonoid param_5 = local[i_2]; DrawMonoid param_5 = local[i_2];
DrawMonoid m = combine_tag_monoid(param_4, param_5); DrawMonoid m = combine_draw_monoid(param_4, param_5);
uint _177 = ix + i_2; uint _199 = ix + i_2;
_57.Store(_177 * 8 + 0, m.path_ix); _71.Store(_199 * 16 + 0, m.path_ix);
_57.Store(_177 * 8 + 4, m.clip_ix); _71.Store(_199 * 16 + 4, m.clip_ix);
_71.Store(_199 * 16 + 8, m.scene_offset);
_71.Store(_199 * 16 + 12, m.info_offset);
} }
} }

View file

@ -48,12 +48,16 @@ struct DrawMonoid
{ {
uint path_ix; uint path_ix;
uint clip_ix; uint clip_ix;
uint scene_offset;
uint info_offset;
}; };
struct DrawMonoid_1 struct DrawMonoid_1
{ {
uint path_ix; uint path_ix;
uint clip_ix; uint clip_ix;
uint scene_offset;
uint info_offset;
}; };
struct DataBuf struct DataBuf
@ -64,35 +68,41 @@ struct DataBuf
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
DrawMonoid combine_tag_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b) DrawMonoid combine_draw_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b)
{ {
DrawMonoid c; DrawMonoid c;
c.path_ix = a.path_ix + b.path_ix; c.path_ix = a.path_ix + b.path_ix;
c.clip_ix = a.clip_ix + b.clip_ix; c.clip_ix = a.clip_ix + b.clip_ix;
c.scene_offset = a.scene_offset + b.scene_offset;
c.info_offset = a.info_offset + b.info_offset;
return c; return c;
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
DrawMonoid tag_monoid_identity() DrawMonoid draw_monoid_identity()
{ {
return DrawMonoid{ 0u, 0u }; return DrawMonoid{ 0u, 0u, 0u, 0u };
} }
kernel void main0(device DataBuf& _57 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) kernel void main0(device DataBuf& _71 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
{ {
threadgroup DrawMonoid sh_scratch[256]; threadgroup DrawMonoid sh_scratch[256];
uint ix = gl_GlobalInvocationID.x * 8u; uint ix = gl_GlobalInvocationID.x * 8u;
spvUnsafeArray<DrawMonoid, 8> local; spvUnsafeArray<DrawMonoid, 8> local;
local[0].path_ix = _57.data[ix].path_ix; local[0].path_ix = _71.data[ix].path_ix;
local[0].clip_ix = _57.data[ix].clip_ix; local[0].clip_ix = _71.data[ix].clip_ix;
local[0].scene_offset = _71.data[ix].scene_offset;
local[0].info_offset = _71.data[ix].info_offset;
DrawMonoid param_1; DrawMonoid param_1;
for (uint i = 1u; i < 8u; i++) for (uint i = 1u; i < 8u; i++)
{ {
uint _82 = ix + i; uint _100 = ix + i;
DrawMonoid param = local[i - 1u]; DrawMonoid param = local[i - 1u];
param_1.path_ix = _57.data[_82].path_ix; param_1.path_ix = _71.data[_100].path_ix;
param_1.clip_ix = _57.data[_82].clip_ix; param_1.clip_ix = _71.data[_100].clip_ix;
local[i] = combine_tag_monoid(param, param_1); param_1.scene_offset = _71.data[_100].scene_offset;
param_1.info_offset = _71.data[_100].info_offset;
local[i] = combine_draw_monoid(param, param_1);
} }
DrawMonoid agg = local[7]; DrawMonoid agg = local[7];
sh_scratch[gl_LocalInvocationID.x] = agg; sh_scratch[gl_LocalInvocationID.x] = agg;
@ -104,13 +114,13 @@ kernel void main0(device DataBuf& _57 [[buffer(0)]], uint3 gl_GlobalInvocationID
DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
DrawMonoid param_2 = other; DrawMonoid param_2 = other;
DrawMonoid param_3 = agg; DrawMonoid param_3 = agg;
agg = combine_tag_monoid(param_2, param_3); agg = combine_draw_monoid(param_2, param_3);
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);
sh_scratch[gl_LocalInvocationID.x] = agg; sh_scratch[gl_LocalInvocationID.x] = agg;
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);
DrawMonoid row = tag_monoid_identity(); DrawMonoid row = draw_monoid_identity();
if (gl_LocalInvocationID.x > 0u) if (gl_LocalInvocationID.x > 0u)
{ {
row = sh_scratch[gl_LocalInvocationID.x - 1u]; row = sh_scratch[gl_LocalInvocationID.x - 1u];
@ -119,10 +129,12 @@ kernel void main0(device DataBuf& _57 [[buffer(0)]], uint3 gl_GlobalInvocationID
{ {
DrawMonoid param_4 = row; DrawMonoid param_4 = row;
DrawMonoid param_5 = local[i_2]; DrawMonoid param_5 = local[i_2];
DrawMonoid m = combine_tag_monoid(param_4, param_5); DrawMonoid m = combine_draw_monoid(param_4, param_5);
uint _177 = ix + i_2; uint _199 = ix + i_2;
_57.data[_177].path_ix = m.path_ix; _71.data[_199].path_ix = m.path_ix;
_57.data[_177].clip_ix = m.clip_ix; _71.data[_199].clip_ix = m.clip_ix;
_71.data[_199].scene_offset = m.scene_offset;
_71.data[_199].info_offset = m.info_offset;
} }
} }

Binary file not shown.

View file

@ -125,12 +125,14 @@ struct Config
Alloc pathseg_alloc; Alloc pathseg_alloc;
Alloc anno_alloc; Alloc anno_alloc;
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc path_bbox_alloc;
Alloc drawmonoid_alloc; Alloc drawmonoid_alloc;
Alloc clip_alloc; Alloc clip_alloc;
Alloc clip_bic_alloc; Alloc clip_bic_alloc;
Alloc clip_stack_alloc; Alloc clip_stack_alloc;
Alloc clip_bbox_alloc; Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip; uint n_clip;
@ -138,6 +140,8 @@ struct Config
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
uint pathseg_offset; uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
}; };
static const uint3 gl_WorkGroupSize = uint3(8u, 4u, 1u); static const uint3 gl_WorkGroupSize = uint3(8u, 4u, 1u);

View file

@ -183,12 +183,14 @@ struct Config
Alloc_1 pathseg_alloc; Alloc_1 pathseg_alloc;
Alloc_1 anno_alloc; Alloc_1 anno_alloc;
Alloc_1 trans_alloc; Alloc_1 trans_alloc;
Alloc_1 bbox_alloc; Alloc_1 path_bbox_alloc;
Alloc_1 drawmonoid_alloc; Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc; Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc; Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc; Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc; Alloc_1 clip_bbox_alloc;
Alloc_1 draw_bbox_alloc;
Alloc_1 drawinfo_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip; uint n_clip;
@ -196,6 +198,8 @@ struct Config
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
uint pathseg_offset; uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
}; };
struct ConfigBuf struct ConfigBuf

Binary file not shown.

View file

@ -125,12 +125,14 @@ struct Config
Alloc pathseg_alloc; Alloc pathseg_alloc;
Alloc anno_alloc; Alloc anno_alloc;
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc path_bbox_alloc;
Alloc drawmonoid_alloc; Alloc drawmonoid_alloc;
Alloc clip_alloc; Alloc clip_alloc;
Alloc clip_bic_alloc; Alloc clip_bic_alloc;
Alloc clip_stack_alloc; Alloc clip_stack_alloc;
Alloc clip_bbox_alloc; Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip; uint n_clip;
@ -138,6 +140,8 @@ struct Config
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
uint pathseg_offset; uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
}; };
static const uint3 gl_WorkGroupSize = uint3(8u, 4u, 1u); static const uint3 gl_WorkGroupSize = uint3(8u, 4u, 1u);

View file

@ -183,12 +183,14 @@ struct Config
Alloc_1 pathseg_alloc; Alloc_1 pathseg_alloc;
Alloc_1 anno_alloc; Alloc_1 anno_alloc;
Alloc_1 trans_alloc; Alloc_1 trans_alloc;
Alloc_1 bbox_alloc; Alloc_1 path_bbox_alloc;
Alloc_1 drawmonoid_alloc; Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc; Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc; Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc; Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc; Alloc_1 clip_bbox_alloc;
Alloc_1 draw_bbox_alloc;
Alloc_1 drawinfo_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip; uint n_clip;
@ -196,6 +198,8 @@ struct Config
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
uint pathseg_offset; uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
}; };
struct ConfigBuf struct ConfigBuf

Binary file not shown.

View file

@ -84,12 +84,14 @@ struct Config
Alloc pathseg_alloc; Alloc pathseg_alloc;
Alloc anno_alloc; Alloc anno_alloc;
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc path_bbox_alloc;
Alloc drawmonoid_alloc; Alloc drawmonoid_alloc;
Alloc clip_alloc; Alloc clip_alloc;
Alloc clip_bic_alloc; Alloc clip_bic_alloc;
Alloc clip_stack_alloc; Alloc clip_stack_alloc;
Alloc clip_bbox_alloc; Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip; uint n_clip;
@ -97,6 +99,8 @@ struct Config
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
uint pathseg_offset; uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
}; };
static const uint3 gl_WorkGroupSize = uint3(32u, 1u, 1u); static const uint3 gl_WorkGroupSize = uint3(32u, 1u, 1u);

View file

@ -144,12 +144,14 @@ struct Config
Alloc_1 pathseg_alloc; Alloc_1 pathseg_alloc;
Alloc_1 anno_alloc; Alloc_1 anno_alloc;
Alloc_1 trans_alloc; Alloc_1 trans_alloc;
Alloc_1 bbox_alloc; Alloc_1 path_bbox_alloc;
Alloc_1 drawmonoid_alloc; Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc; Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc; Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc; Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc; Alloc_1 clip_bbox_alloc;
Alloc_1 draw_bbox_alloc;
Alloc_1 drawinfo_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip; uint n_clip;
@ -157,6 +159,8 @@ struct Config
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
uint pathseg_offset; uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
}; };
struct ConfigBuf struct ConfigBuf

Binary file not shown.

Binary file not shown.

View file

@ -62,12 +62,14 @@ struct Config
Alloc pathseg_alloc; Alloc pathseg_alloc;
Alloc anno_alloc; Alloc anno_alloc;
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc path_bbox_alloc;
Alloc drawmonoid_alloc; Alloc drawmonoid_alloc;
Alloc clip_alloc; Alloc clip_alloc;
Alloc clip_bic_alloc; Alloc clip_bic_alloc;
Alloc clip_stack_alloc; Alloc clip_stack_alloc;
Alloc clip_bbox_alloc; Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip; uint n_clip;
@ -75,6 +77,8 @@ struct Config
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
uint pathseg_offset; uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
}; };
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
@ -361,7 +365,7 @@ uint round_up(float x)
void comp_main() void comp_main()
{ {
uint ix = gl_GlobalInvocationID.x * 4u; uint ix = gl_GlobalInvocationID.x * 4u;
uint tag_word = _574.Load(((_639.Load(84) >> uint(2)) + (ix >> uint(2))) * 4 + 0); uint tag_word = _574.Load(((_639.Load(92) >> uint(2)) + (ix >> uint(2))) * 4 + 0);
uint param = tag_word; uint param = tag_word;
TagMonoid local_tm = reduce_tag(param); TagMonoid local_tm = reduce_tag(param);
sh_tag[gl_LocalInvocationID.x] = local_tm; sh_tag[gl_LocalInvocationID.x] = local_tm;
@ -400,8 +404,8 @@ void comp_main()
TagMonoid param_4 = sh_tag[gl_LocalInvocationID.x - 1u]; TagMonoid param_4 = sh_tag[gl_LocalInvocationID.x - 1u];
tm = combine_tag_monoid(param_3, param_4); tm = combine_tag_monoid(param_3, param_4);
} }
uint ps_ix = (_639.Load(88) >> uint(2)) + tm.pathseg_offset; uint ps_ix = (_639.Load(96) >> uint(2)) + tm.pathseg_offset;
uint lw_ix = (_639.Load(80) >> uint(2)) + tm.linewidth_ix; uint lw_ix = (_639.Load(88) >> uint(2)) + tm.linewidth_ix;
uint save_path_ix = tm.path_ix; uint save_path_ix = tm.path_ix;
uint trans_ix = tm.trans_ix; uint trans_ix = tm.trans_ix;
TransformSegRef _771 = { _639.Load(36) + (trans_ix * 24u) }; TransformSegRef _771 = { _639.Load(36) + (trans_ix * 24u) };

View file

@ -127,12 +127,14 @@ struct Config
Alloc_1 pathseg_alloc; Alloc_1 pathseg_alloc;
Alloc_1 anno_alloc; Alloc_1 anno_alloc;
Alloc_1 trans_alloc; Alloc_1 trans_alloc;
Alloc_1 bbox_alloc; Alloc_1 path_bbox_alloc;
Alloc_1 drawmonoid_alloc; Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc; Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc; Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc; Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc; Alloc_1 clip_bbox_alloc;
Alloc_1 draw_bbox_alloc;
Alloc_1 drawinfo_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip; uint n_clip;
@ -140,6 +142,8 @@ struct Config
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
uint pathseg_offset; uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
}; };
struct ConfigBuf struct ConfigBuf
@ -635,7 +639,7 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);
uint path_ix = save_path_ix; uint path_ix = save_path_ix;
uint bbox_out_ix = (_639.conf.bbox_alloc.offset >> uint(2)) + (path_ix * 6u); uint bbox_out_ix = (_639.conf.path_bbox_alloc.offset >> uint(2)) + (path_ix * 6u);
Monoid row = monoid_identity(); Monoid row = monoid_identity();
if (gl_LocalInvocationID.x > 0u) if (gl_LocalInvocationID.x > 0u)
{ {

Binary file not shown.

Binary file not shown.

View file

@ -24,12 +24,14 @@ struct Config
Alloc pathseg_alloc; Alloc pathseg_alloc;
Alloc anno_alloc; Alloc anno_alloc;
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc path_bbox_alloc;
Alloc drawmonoid_alloc; Alloc drawmonoid_alloc;
Alloc clip_alloc; Alloc clip_alloc;
Alloc clip_bic_alloc; Alloc clip_bic_alloc;
Alloc clip_stack_alloc; Alloc clip_stack_alloc;
Alloc clip_bbox_alloc; Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip; uint n_clip;
@ -37,6 +39,8 @@ struct Config
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
uint pathseg_offset; uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
}; };
static const uint3 gl_WorkGroupSize = uint3(128u, 1u, 1u); static const uint3 gl_WorkGroupSize = uint3(128u, 1u, 1u);
@ -88,7 +92,7 @@ TagMonoid combine_tag_monoid(TagMonoid a, TagMonoid b)
void comp_main() void comp_main()
{ {
uint ix = gl_GlobalInvocationID.x * 2u; uint ix = gl_GlobalInvocationID.x * 2u;
uint scene_ix = (_139.Load(84) >> uint(2)) + ix; uint scene_ix = (_139.Load(92) >> uint(2)) + ix;
uint tag_word = _151.Load(scene_ix * 4 + 0); uint tag_word = _151.Load(scene_ix * 4 + 0);
uint param = tag_word; uint param = tag_word;
TagMonoid agg = reduce_tag(param); TagMonoid agg = reduce_tag(param);

View file

@ -31,12 +31,14 @@ struct Config
Alloc pathseg_alloc; Alloc pathseg_alloc;
Alloc anno_alloc; Alloc anno_alloc;
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc path_bbox_alloc;
Alloc drawmonoid_alloc; Alloc drawmonoid_alloc;
Alloc clip_alloc; Alloc clip_alloc;
Alloc clip_bic_alloc; Alloc clip_bic_alloc;
Alloc clip_stack_alloc; Alloc clip_stack_alloc;
Alloc clip_bbox_alloc; Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip; uint n_clip;
@ -44,6 +46,8 @@ struct Config
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
uint pathseg_offset; uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
}; };
struct ConfigBuf struct ConfigBuf

Binary file not shown.

Binary file not shown.

View file

@ -9,28 +9,6 @@ struct MallocResult
bool failed; bool failed;
}; };
struct AnnoEndClipRef
{
uint offset;
};
struct AnnoEndClip
{
float4 bbox;
uint blend;
};
struct AnnotatedRef
{
uint offset;
};
struct AnnotatedTag
{
uint tag;
uint flags;
};
struct PathRef struct PathRef
{ {
uint offset; uint offset;
@ -59,12 +37,14 @@ struct Config
Alloc pathseg_alloc; Alloc pathseg_alloc;
Alloc anno_alloc; Alloc anno_alloc;
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc path_bbox_alloc;
Alloc drawmonoid_alloc; Alloc drawmonoid_alloc;
Alloc clip_alloc; Alloc clip_alloc;
Alloc clip_bic_alloc; Alloc clip_bic_alloc;
Alloc clip_stack_alloc; Alloc clip_stack_alloc;
Alloc clip_bbox_alloc; Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip; uint n_clip;
@ -72,12 +52,15 @@ struct Config
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
uint pathseg_offset; uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
}; };
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
RWByteAddressBuffer _92 : register(u0, space0); RWByteAddressBuffer _70 : register(u0, space0);
ByteAddressBuffer _314 : register(t1, space0); ByteAddressBuffer _181 : register(t1, space0);
ByteAddressBuffer _257 : register(t2, space0);
static uint3 gl_LocalInvocationID; static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID; static uint3 gl_GlobalInvocationID;
@ -90,62 +73,15 @@ struct SPIRV_Cross_Input
groupshared uint sh_tile_count[256]; groupshared uint sh_tile_count[256];
groupshared MallocResult sh_tile_alloc; groupshared MallocResult sh_tile_alloc;
bool touch_mem(Alloc alloc, uint offset) float4 load_draw_bbox(uint draw_ix)
{ {
return true; uint base = (_181.Load(64) >> uint(2)) + (4u * draw_ix);
} float x0 = asfloat(_70.Load(base * 4 + 8));
float y0 = asfloat(_70.Load((base + 1u) * 4 + 8));
uint read_mem(Alloc alloc, uint offset) float x1 = asfloat(_70.Load((base + 2u) * 4 + 8));
{ float y1 = asfloat(_70.Load((base + 3u) * 4 + 8));
Alloc param = alloc; float4 bbox = float4(x0, y0, x1, y1);
uint param_1 = offset; return bbox;
if (!touch_mem(param, param_1))
{
return 0u;
}
uint v = _92.Load(offset * 4 + 8);
return v;
}
AnnotatedTag Annotated_tag(Alloc a, AnnotatedRef ref)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint tag_and_flags = read_mem(param, param_1);
AnnotatedTag _246 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) };
return _246;
}
AnnoEndClip AnnoEndClip_read(Alloc a, AnnoEndClipRef ref)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint raw0 = read_mem(param, param_1);
Alloc param_2 = a;
uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3);
Alloc param_4 = a;
uint param_5 = ix + 2u;
uint raw2 = read_mem(param_4, param_5);
Alloc param_6 = a;
uint param_7 = ix + 3u;
uint raw3 = read_mem(param_6, param_7);
Alloc param_8 = a;
uint param_9 = ix + 4u;
uint raw4 = read_mem(param_8, param_9);
AnnoEndClip s;
s.bbox = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3));
s.blend = raw4;
return s;
}
AnnoEndClip Annotated_EndClip_read(Alloc a, AnnotatedRef ref)
{
AnnoEndClipRef _252 = { ref.offset + 4u };
Alloc param = a;
AnnoEndClipRef param_1 = _252;
return AnnoEndClip_read(param, param_1);
} }
Alloc new_alloc(uint offset, uint size, bool mem_ok) Alloc new_alloc(uint offset, uint size, bool mem_ok)
@ -157,22 +93,22 @@ Alloc new_alloc(uint offset, uint size, bool mem_ok)
MallocResult malloc(uint size) MallocResult malloc(uint size)
{ {
uint _98; uint _76;
_92.InterlockedAdd(0, size, _98); _70.InterlockedAdd(0, size, _76);
uint offset = _98; uint offset = _76;
uint _105; uint _83;
_92.GetDimensions(_105); _70.GetDimensions(_83);
_105 = (_105 - 8) / 4; _83 = (_83 - 8) / 4;
MallocResult r; MallocResult r;
r.failed = (offset + size) > uint(int(_105) * 4); r.failed = (offset + size) > uint(int(_83) * 4);
uint param = offset; uint param = offset;
uint param_1 = size; uint param_1 = size;
bool param_2 = !r.failed; bool param_2 = !r.failed;
r.alloc = new_alloc(param, param_1, param_2); r.alloc = new_alloc(param, param_1, param_2);
if (r.failed) if (r.failed)
{ {
uint _127; uint _105;
_92.InterlockedMax(4, 1u, _127); _70.InterlockedMax(4, 1u, _105);
return r; return r;
} }
return r; return r;
@ -180,8 +116,13 @@ MallocResult malloc(uint size)
Alloc slice_mem(Alloc a, uint offset, uint size) Alloc slice_mem(Alloc a, uint offset, uint size)
{ {
Alloc _169 = { a.offset + offset }; Alloc _131 = { a.offset + offset };
return _169; return _131;
}
bool touch_mem(Alloc alloc, uint offset)
{
return true;
} }
void write_mem(Alloc alloc, uint offset, uint val) void write_mem(Alloc alloc, uint offset, uint val)
@ -192,7 +133,7 @@ void write_mem(Alloc alloc, uint offset, uint val)
{ {
return; return;
} }
_92.Store(offset * 4 + 8, val); _70.Store(offset * 4 + 8, val);
} }
void Path_write(Alloc a, PathRef ref, Path s) void Path_write(Alloc a, PathRef ref, Path s)
@ -216,56 +157,34 @@ void comp_main()
{ {
uint th_ix = gl_LocalInvocationID.x; uint th_ix = gl_LocalInvocationID.x;
uint element_ix = gl_GlobalInvocationID.x; uint element_ix = gl_GlobalInvocationID.x;
PathRef _321 = { _314.Load(16) + (element_ix * 12u) }; PathRef _241 = { _181.Load(16) + (element_ix * 12u) };
PathRef path_ref = _321; PathRef path_ref = _241;
AnnotatedRef _330 = { _314.Load(32) + (element_ix * 40u) }; uint drawtag_base = _181.Load(100) >> uint(2);
AnnotatedRef ref = _330; uint drawtag = 0u;
uint tag = 0u; if (element_ix < _181.Load(0))
if (element_ix < _314.Load(0))
{ {
Alloc _341; drawtag = _257.Load((drawtag_base + element_ix) * 4 + 0);
_341.offset = _314.Load(32);
Alloc param;
param.offset = _341.offset;
AnnotatedRef param_1 = ref;
tag = Annotated_tag(param, param_1).tag;
} }
int x0 = 0; int x0 = 0;
int y0 = 0; int y0 = 0;
int x1 = 0; int x1 = 0;
int y1 = 0; int y1 = 0;
switch (tag) if ((drawtag != 0u) && (drawtag != 37u))
{ {
case 1u: uint param = element_ix;
case 2u: float4 bbox = load_draw_bbox(param);
case 3u: x0 = int(floor(bbox.x * 0.0625f));
case 4u: y0 = int(floor(bbox.y * 0.0625f));
case 5u: x1 = int(ceil(bbox.z * 0.0625f));
{ y1 = int(ceil(bbox.w * 0.0625f));
Alloc _359;
_359.offset = _314.Load(32);
Alloc param_2;
param_2.offset = _359.offset;
AnnotatedRef param_3 = ref;
AnnoEndClip clip = Annotated_EndClip_read(param_2, param_3);
x0 = int(floor(clip.bbox.x * 0.0625f));
y0 = int(floor(clip.bbox.y * 0.0625f));
x1 = int(ceil(clip.bbox.z * 0.0625f));
y1 = int(ceil(clip.bbox.w * 0.0625f));
break;
} }
} x0 = clamp(x0, 0, int(_181.Load(8)));
x0 = clamp(x0, 0, int(_314.Load(8))); y0 = clamp(y0, 0, int(_181.Load(12)));
y0 = clamp(y0, 0, int(_314.Load(12))); x1 = clamp(x1, 0, int(_181.Load(8)));
x1 = clamp(x1, 0, int(_314.Load(8))); y1 = clamp(y1, 0, int(_181.Load(12)));
y1 = clamp(y1, 0, int(_314.Load(12)));
Path path; Path path;
path.bbox = uint4(uint(x0), uint(y0), uint(x1), uint(y1)); path.bbox = uint4(uint(x0), uint(y0), uint(x1), uint(y1));
uint tile_count = uint((x1 - x0) * (y1 - y0)); uint tile_count = uint((x1 - x0) * (y1 - y0));
if (tag == 5u)
{
tile_count = 0u;
}
sh_tile_count[th_ix] = tile_count; sh_tile_count[th_ix] = tile_count;
uint total_tile_count = tile_count; uint total_tile_count = tile_count;
for (uint i = 0u; i < 8u; i++) for (uint i = 0u; i < 8u; i++)
@ -280,59 +199,59 @@ void comp_main()
} }
if (th_ix == 255u) if (th_ix == 255u)
{ {
uint param_4 = total_tile_count * 8u; uint param_1 = total_tile_count * 8u;
MallocResult _485 = malloc(param_4); MallocResult _392 = malloc(param_1);
sh_tile_alloc = _485; sh_tile_alloc = _392;
} }
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();
MallocResult alloc_start = sh_tile_alloc; MallocResult alloc_start = sh_tile_alloc;
bool _496; bool _403;
if (!alloc_start.failed) if (!alloc_start.failed)
{ {
_496 = _92.Load(4) != 0u; _403 = _70.Load(4) != 0u;
} }
else else
{ {
_496 = alloc_start.failed; _403 = alloc_start.failed;
} }
if (_496) if (_403)
{ {
return; return;
} }
if (element_ix < _314.Load(0)) if (element_ix < _181.Load(0))
{ {
uint _509; uint _416;
if (th_ix > 0u) if (th_ix > 0u)
{ {
_509 = sh_tile_count[th_ix - 1u]; _416 = sh_tile_count[th_ix - 1u];
} }
else else
{ {
_509 = 0u; _416 = 0u;
} }
uint tile_subix = _509; uint tile_subix = _416;
Alloc param_5 = alloc_start.alloc; Alloc param_2 = alloc_start.alloc;
uint param_6 = 8u * tile_subix; uint param_3 = 8u * tile_subix;
uint param_7 = 8u * tile_count; uint param_4 = 8u * tile_count;
Alloc tiles_alloc = slice_mem(param_5, param_6, param_7); Alloc tiles_alloc = slice_mem(param_2, param_3, param_4);
TileRef _531 = { tiles_alloc.offset }; TileRef _438 = { tiles_alloc.offset };
path.tiles = _531; path.tiles = _438;
Alloc _536; Alloc _444;
_536.offset = _314.Load(16); _444.offset = _181.Load(16);
Alloc param_8; Alloc param_5;
param_8.offset = _536.offset; param_5.offset = _444.offset;
PathRef param_9 = path_ref; PathRef param_6 = path_ref;
Path param_10 = path; Path param_7 = path;
Path_write(param_8, param_9, param_10); Path_write(param_5, param_6, param_7);
} }
uint total_count = sh_tile_count[255] * 2u; uint total_count = sh_tile_count[255] * 2u;
uint start_ix = alloc_start.alloc.offset >> uint(2); uint start_ix = alloc_start.alloc.offset >> uint(2);
for (uint i_1 = th_ix; i_1 < total_count; i_1 += 256u) for (uint i_1 = th_ix; i_1 < total_count; i_1 += 256u)
{ {
Alloc param_11 = alloc_start.alloc; Alloc param_8 = alloc_start.alloc;
uint param_12 = start_ix + i_1; uint param_9 = start_ix + i_1;
uint param_13 = 0u; uint param_10 = 0u;
write_mem(param_11, param_12, param_13); write_mem(param_8, param_9, param_10);
} }
} }

View file

@ -18,28 +18,6 @@ struct MallocResult
bool failed; bool failed;
}; };
struct AnnoEndClipRef
{
uint offset;
};
struct AnnoEndClip
{
float4 bbox;
uint blend;
};
struct AnnotatedRef
{
uint offset;
};
struct AnnotatedTag
{
uint tag;
uint flags;
};
struct PathRef struct PathRef
{ {
uint offset; uint offset;
@ -80,12 +58,14 @@ struct Config
Alloc_1 pathseg_alloc; Alloc_1 pathseg_alloc;
Alloc_1 anno_alloc; Alloc_1 anno_alloc;
Alloc_1 trans_alloc; Alloc_1 trans_alloc;
Alloc_1 bbox_alloc; Alloc_1 path_bbox_alloc;
Alloc_1 drawmonoid_alloc; Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc; Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc; Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc; Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc; Alloc_1 clip_bbox_alloc;
Alloc_1 draw_bbox_alloc;
Alloc_1 drawinfo_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip; uint n_clip;
@ -93,6 +73,8 @@ struct Config
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
uint pathseg_offset; uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
}; };
struct ConfigBuf struct ConfigBuf
@ -100,67 +82,23 @@ struct ConfigBuf
Config conf; Config conf;
}; };
struct SceneBuf
{
uint scene[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
bool touch_mem(thread const Alloc& alloc, thread const uint& offset) float4 load_draw_bbox(thread const uint& draw_ix, device Memory& v_70, constant uint& v_70BufferSize, const device ConfigBuf& v_181)
{ {
return true; uint base = (v_181.conf.draw_bbox_alloc.offset >> uint(2)) + (4u * draw_ix);
} float x0 = as_type<float>(v_70.memory[base]);
float y0 = as_type<float>(v_70.memory[base + 1u]);
static inline __attribute__((always_inline)) float x1 = as_type<float>(v_70.memory[base + 2u]);
uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_92, constant uint& v_92BufferSize) float y1 = as_type<float>(v_70.memory[base + 3u]);
{ float4 bbox = float4(x0, y0, x1, y1);
Alloc param = alloc; return bbox;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return 0u;
}
uint v = v_92.memory[offset];
return v;
}
static inline __attribute__((always_inline))
AnnotatedTag Annotated_tag(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_92, constant uint& v_92BufferSize)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint tag_and_flags = read_mem(param, param_1, v_92, v_92BufferSize);
return AnnotatedTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) };
}
static inline __attribute__((always_inline))
AnnoEndClip AnnoEndClip_read(thread const Alloc& a, thread const AnnoEndClipRef& ref, device Memory& v_92, constant uint& v_92BufferSize)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint raw0 = read_mem(param, param_1, v_92, v_92BufferSize);
Alloc param_2 = a;
uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3, v_92, v_92BufferSize);
Alloc param_4 = a;
uint param_5 = ix + 2u;
uint raw2 = read_mem(param_4, param_5, v_92, v_92BufferSize);
Alloc param_6 = a;
uint param_7 = ix + 3u;
uint raw3 = read_mem(param_6, param_7, v_92, v_92BufferSize);
Alloc param_8 = a;
uint param_9 = ix + 4u;
uint raw4 = read_mem(param_8, param_9, v_92, v_92BufferSize);
AnnoEndClip s;
s.bbox = float4(as_type<float>(raw0), as_type<float>(raw1), as_type<float>(raw2), as_type<float>(raw3));
s.blend = raw4;
return s;
}
static inline __attribute__((always_inline))
AnnoEndClip Annotated_EndClip_read(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_92, constant uint& v_92BufferSize)
{
Alloc param = a;
AnnoEndClipRef param_1 = AnnoEndClipRef{ ref.offset + 4u };
return AnnoEndClip_read(param, param_1, v_92, v_92BufferSize);
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
@ -172,19 +110,19 @@ Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
MallocResult malloc(thread const uint& size, device Memory& v_92, constant uint& v_92BufferSize) MallocResult malloc(thread const uint& size, device Memory& v_70, constant uint& v_70BufferSize)
{ {
uint _98 = atomic_fetch_add_explicit((device atomic_uint*)&v_92.mem_offset, size, memory_order_relaxed); uint _76 = atomic_fetch_add_explicit((device atomic_uint*)&v_70.mem_offset, size, memory_order_relaxed);
uint offset = _98; uint offset = _76;
MallocResult r; MallocResult r;
r.failed = (offset + size) > uint(int((v_92BufferSize - 8) / 4) * 4); r.failed = (offset + size) > uint(int((v_70BufferSize - 8) / 4) * 4);
uint param = offset; uint param = offset;
uint param_1 = size; uint param_1 = size;
bool param_2 = !r.failed; bool param_2 = !r.failed;
r.alloc = new_alloc(param, param_1, param_2); r.alloc = new_alloc(param, param_1, param_2);
if (r.failed) if (r.failed)
{ {
uint _127 = atomic_fetch_max_explicit((device atomic_uint*)&v_92.mem_error, 1u, memory_order_relaxed); uint _105 = atomic_fetch_max_explicit((device atomic_uint*)&v_70.mem_error, 1u, memory_order_relaxed);
return r; return r;
} }
return r; return r;
@ -197,7 +135,13 @@ Alloc slice_mem(thread const Alloc& a, thread const uint& offset, thread const u
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_92, constant uint& v_92BufferSize) bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
{
return true;
}
static inline __attribute__((always_inline))
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_70, constant uint& v_70BufferSize)
{ {
Alloc param = alloc; Alloc param = alloc;
uint param_1 = offset; uint param_1 = offset;
@ -205,78 +149,61 @@ void write_mem(thread const Alloc& alloc, thread const uint& offset, thread cons
{ {
return; return;
} }
v_92.memory[offset] = val; v_70.memory[offset] = val;
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
void Path_write(thread const Alloc& a, thread const PathRef& ref, thread const Path& s, device Memory& v_92, constant uint& v_92BufferSize) void Path_write(thread const Alloc& a, thread const PathRef& ref, thread const Path& s, device Memory& v_70, constant uint& v_70BufferSize)
{ {
uint ix = ref.offset >> uint(2); uint ix = ref.offset >> uint(2);
Alloc param = a; Alloc param = a;
uint param_1 = ix + 0u; uint param_1 = ix + 0u;
uint param_2 = s.bbox.x | (s.bbox.y << uint(16)); uint param_2 = s.bbox.x | (s.bbox.y << uint(16));
write_mem(param, param_1, param_2, v_92, v_92BufferSize); write_mem(param, param_1, param_2, v_70, v_70BufferSize);
Alloc param_3 = a; Alloc param_3 = a;
uint param_4 = ix + 1u; uint param_4 = ix + 1u;
uint param_5 = s.bbox.z | (s.bbox.w << uint(16)); uint param_5 = s.bbox.z | (s.bbox.w << uint(16));
write_mem(param_3, param_4, param_5, v_92, v_92BufferSize); write_mem(param_3, param_4, param_5, v_70, v_70BufferSize);
Alloc param_6 = a; Alloc param_6 = a;
uint param_7 = ix + 2u; uint param_7 = ix + 2u;
uint param_8 = s.tiles.offset; uint param_8 = s.tiles.offset;
write_mem(param_6, param_7, param_8, v_92, v_92BufferSize); write_mem(param_6, param_7, param_8, v_70, v_70BufferSize);
} }
kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_92 [[buffer(0)]], const device ConfigBuf& _314 [[buffer(1)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_70 [[buffer(0)]], const device ConfigBuf& v_181 [[buffer(1)]], const device SceneBuf& _257 [[buffer(2)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{ {
threadgroup uint sh_tile_count[256]; threadgroup uint sh_tile_count[256];
threadgroup MallocResult sh_tile_alloc; threadgroup MallocResult sh_tile_alloc;
constant uint& v_92BufferSize = spvBufferSizeConstants[0]; constant uint& v_70BufferSize = spvBufferSizeConstants[0];
uint th_ix = gl_LocalInvocationID.x; uint th_ix = gl_LocalInvocationID.x;
uint element_ix = gl_GlobalInvocationID.x; uint element_ix = gl_GlobalInvocationID.x;
PathRef path_ref = PathRef{ _314.conf.tile_alloc.offset + (element_ix * 12u) }; PathRef path_ref = PathRef{ v_181.conf.tile_alloc.offset + (element_ix * 12u) };
AnnotatedRef ref = AnnotatedRef{ _314.conf.anno_alloc.offset + (element_ix * 40u) }; uint drawtag_base = v_181.conf.drawtag_offset >> uint(2);
uint tag = 0u; uint drawtag = 0u;
if (element_ix < _314.conf.n_elements) if (element_ix < v_181.conf.n_elements)
{ {
Alloc param; drawtag = _257.scene[drawtag_base + element_ix];
param.offset = _314.conf.anno_alloc.offset;
AnnotatedRef param_1 = ref;
tag = Annotated_tag(param, param_1, v_92, v_92BufferSize).tag;
} }
int x0 = 0; int x0 = 0;
int y0 = 0; int y0 = 0;
int x1 = 0; int x1 = 0;
int y1 = 0; int y1 = 0;
switch (tag) if ((drawtag != 0u) && (drawtag != 37u))
{ {
case 1u: uint param = element_ix;
case 2u: float4 bbox = load_draw_bbox(param, v_70, v_70BufferSize, v_181);
case 3u: x0 = int(floor(bbox.x * 0.0625));
case 4u: y0 = int(floor(bbox.y * 0.0625));
case 5u: x1 = int(ceil(bbox.z * 0.0625));
{ y1 = int(ceil(bbox.w * 0.0625));
Alloc param_2;
param_2.offset = _314.conf.anno_alloc.offset;
AnnotatedRef param_3 = ref;
AnnoEndClip clip = Annotated_EndClip_read(param_2, param_3, v_92, v_92BufferSize);
x0 = int(floor(clip.bbox.x * 0.0625));
y0 = int(floor(clip.bbox.y * 0.0625));
x1 = int(ceil(clip.bbox.z * 0.0625));
y1 = int(ceil(clip.bbox.w * 0.0625));
break;
} }
} x0 = clamp(x0, 0, int(v_181.conf.width_in_tiles));
x0 = clamp(x0, 0, int(_314.conf.width_in_tiles)); y0 = clamp(y0, 0, int(v_181.conf.height_in_tiles));
y0 = clamp(y0, 0, int(_314.conf.height_in_tiles)); x1 = clamp(x1, 0, int(v_181.conf.width_in_tiles));
x1 = clamp(x1, 0, int(_314.conf.width_in_tiles)); y1 = clamp(y1, 0, int(v_181.conf.height_in_tiles));
y1 = clamp(y1, 0, int(_314.conf.height_in_tiles));
Path path; Path path;
path.bbox = uint4(uint(x0), uint(y0), uint(x1), uint(y1)); path.bbox = uint4(uint(x0), uint(y0), uint(x1), uint(y1));
uint tile_count = uint((x1 - x0) * (y1 - y0)); uint tile_count = uint((x1 - x0) * (y1 - y0));
if (tag == 5u)
{
tile_count = 0u;
}
sh_tile_count[th_ix] = tile_count; sh_tile_count[th_ix] = tile_count;
uint total_tile_count = tile_count; uint total_tile_count = tile_count;
for (uint i = 0u; i < 8u; i++) for (uint i = 0u; i < 8u; i++)
@ -291,56 +218,56 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
} }
if (th_ix == 255u) if (th_ix == 255u)
{ {
uint param_4 = total_tile_count * 8u; uint param_1 = total_tile_count * 8u;
MallocResult _485 = malloc(param_4, v_92, v_92BufferSize); MallocResult _392 = malloc(param_1, v_70, v_70BufferSize);
sh_tile_alloc = _485; sh_tile_alloc = _392;
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);
MallocResult alloc_start = sh_tile_alloc; MallocResult alloc_start = sh_tile_alloc;
bool _496; bool _403;
if (!alloc_start.failed) if (!alloc_start.failed)
{ {
_496 = v_92.mem_error != 0u; _403 = v_70.mem_error != 0u;
} }
else else
{ {
_496 = alloc_start.failed; _403 = alloc_start.failed;
} }
if (_496) if (_403)
{ {
return; return;
} }
if (element_ix < _314.conf.n_elements) if (element_ix < v_181.conf.n_elements)
{ {
uint _509; uint _416;
if (th_ix > 0u) if (th_ix > 0u)
{ {
_509 = sh_tile_count[th_ix - 1u]; _416 = sh_tile_count[th_ix - 1u];
} }
else else
{ {
_509 = 0u; _416 = 0u;
} }
uint tile_subix = _509; uint tile_subix = _416;
Alloc param_5 = alloc_start.alloc; Alloc param_2 = alloc_start.alloc;
uint param_6 = 8u * tile_subix; uint param_3 = 8u * tile_subix;
uint param_7 = 8u * tile_count; uint param_4 = 8u * tile_count;
Alloc tiles_alloc = slice_mem(param_5, param_6, param_7); Alloc tiles_alloc = slice_mem(param_2, param_3, param_4);
path.tiles = TileRef{ tiles_alloc.offset }; path.tiles = TileRef{ tiles_alloc.offset };
Alloc param_8; Alloc param_5;
param_8.offset = _314.conf.tile_alloc.offset; param_5.offset = v_181.conf.tile_alloc.offset;
PathRef param_9 = path_ref; PathRef param_6 = path_ref;
Path param_10 = path; Path param_7 = path;
Path_write(param_8, param_9, param_10, v_92, v_92BufferSize); Path_write(param_5, param_6, param_7, v_70, v_70BufferSize);
} }
uint total_count = sh_tile_count[255] * 2u; uint total_count = sh_tile_count[255] * 2u;
uint start_ix = alloc_start.alloc.offset >> uint(2); uint start_ix = alloc_start.alloc.offset >> uint(2);
for (uint i_1 = th_ix; i_1 < total_count; i_1 += 256u) for (uint i_1 = th_ix; i_1 < total_count; i_1 += 256u)
{ {
Alloc param_11 = alloc_start.alloc; Alloc param_8 = alloc_start.alloc;
uint param_12 = start_ix + i_1; uint param_9 = start_ix + i_1;
uint param_13 = 0u; uint param_10 = 0u;
write_mem(param_11, param_12, param_13, v_92, v_92BufferSize); write_mem(param_8, param_9, param_10, v_70, v_70BufferSize);
} }
} }

Binary file not shown.

Binary file not shown.

View file

@ -37,12 +37,14 @@ struct Config
Alloc pathseg_alloc; Alloc pathseg_alloc;
Alloc anno_alloc; Alloc anno_alloc;
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc path_bbox_alloc;
Alloc drawmonoid_alloc; Alloc drawmonoid_alloc;
Alloc clip_alloc; Alloc clip_alloc;
Alloc clip_bic_alloc; Alloc clip_bic_alloc;
Alloc clip_stack_alloc; Alloc clip_stack_alloc;
Alloc clip_bbox_alloc; Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip; uint n_clip;
@ -50,6 +52,8 @@ struct Config
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
uint pathseg_offset; uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
}; };
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
@ -155,7 +159,7 @@ void TransformSeg_write(Alloc a, TransformSegRef ref, TransformSeg s)
void comp_main() void comp_main()
{ {
uint ix = gl_GlobalInvocationID.x * 8u; uint ix = gl_GlobalInvocationID.x * 8u;
TransformRef _285 = { _278.Load(76) + (ix * 24u) }; TransformRef _285 = { _278.Load(84) + (ix * 24u) };
TransformRef ref = _285; TransformRef ref = _285;
TransformRef param = ref; TransformRef param = ref;
Transform agg = Transform_read(param); Transform agg = Transform_read(param);

View file

@ -100,12 +100,14 @@ struct Config
Alloc_1 pathseg_alloc; Alloc_1 pathseg_alloc;
Alloc_1 anno_alloc; Alloc_1 anno_alloc;
Alloc_1 trans_alloc; Alloc_1 trans_alloc;
Alloc_1 bbox_alloc; Alloc_1 path_bbox_alloc;
Alloc_1 drawmonoid_alloc; Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc; Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc; Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc; Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc; Alloc_1 clip_bbox_alloc;
Alloc_1 draw_bbox_alloc;
Alloc_1 drawinfo_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip; uint n_clip;
@ -113,6 +115,8 @@ struct Config
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
uint pathseg_offset; uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
}; };
struct ConfigBuf struct ConfigBuf

Binary file not shown.

Binary file not shown.

View file

@ -26,12 +26,14 @@ struct Config
Alloc pathseg_alloc; Alloc pathseg_alloc;
Alloc anno_alloc; Alloc anno_alloc;
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc path_bbox_alloc;
Alloc drawmonoid_alloc; Alloc drawmonoid_alloc;
Alloc clip_alloc; Alloc clip_alloc;
Alloc clip_bic_alloc; Alloc clip_bic_alloc;
Alloc clip_stack_alloc; Alloc clip_stack_alloc;
Alloc clip_bbox_alloc; Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip; uint n_clip;
@ -39,6 +41,8 @@ struct Config
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
uint pathseg_offset; uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
}; };
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
@ -92,7 +96,7 @@ Transform combine_monoid(Transform a, Transform b)
void comp_main() void comp_main()
{ {
uint ix = gl_GlobalInvocationID.x * 8u; uint ix = gl_GlobalInvocationID.x * 8u;
TransformRef _168 = { _161.Load(76) + (ix * 24u) }; TransformRef _168 = { _161.Load(84) + (ix * 24u) };
TransformRef ref = _168; TransformRef ref = _168;
TransformRef param = ref; TransformRef param = ref;
Transform agg = Transform_read(param); Transform agg = Transform_read(param);

View file

@ -38,12 +38,14 @@ struct Config
Alloc pathseg_alloc; Alloc pathseg_alloc;
Alloc anno_alloc; Alloc anno_alloc;
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc path_bbox_alloc;
Alloc drawmonoid_alloc; Alloc drawmonoid_alloc;
Alloc clip_alloc; Alloc clip_alloc;
Alloc clip_bic_alloc; Alloc clip_bic_alloc;
Alloc clip_stack_alloc; Alloc clip_stack_alloc;
Alloc clip_bbox_alloc; Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans; uint n_trans;
uint n_path; uint n_path;
uint n_clip; uint n_clip;
@ -51,6 +53,8 @@ struct Config
uint linewidth_offset; uint linewidth_offset;
uint pathtag_offset; uint pathtag_offset;
uint pathseg_offset; uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
}; };
struct ConfigBuf struct ConfigBuf

Binary file not shown.

View file

@ -248,7 +248,7 @@ void main() {
barrier(); barrier();
uint path_ix = save_path_ix; uint path_ix = save_path_ix;
uint bbox_out_ix = (conf.bbox_alloc.offset >> 2) + path_ix * 6; uint bbox_out_ix = (conf.path_bbox_alloc.offset >> 2) + path_ix * 6;
// Write bboxes to paths; do atomic min/max if partial // Write bboxes to paths; do atomic min/max if partial
Monoid row = monoid_identity(); Monoid row = monoid_identity();
if (gl_LocalInvocationID.x > 0) { if (gl_LocalInvocationID.x > 0) {

View file

@ -42,7 +42,7 @@ struct Config {
// new element pipeline stuff follows // new element pipeline stuff follows
// Bounding boxes of paths, stored as int (so atomics work) // Bounding boxes of paths, stored as int (so atomics work)
Alloc bbox_alloc; Alloc path_bbox_alloc;
// Monoid for draw objects // Monoid for draw objects
Alloc drawmonoid_alloc; Alloc drawmonoid_alloc;
@ -54,6 +54,10 @@ struct Config {
Alloc clip_stack_alloc; Alloc clip_stack_alloc;
// Clip processing results (path_ix + bbox) // Clip processing results (path_ix + bbox)
Alloc clip_bbox_alloc; Alloc clip_bbox_alloc;
// Bounding box per draw object
Alloc draw_bbox_alloc;
// Info computed in draw stage, per draw object
Alloc drawinfo_alloc;
// Number of transforms in scene // Number of transforms in scene
// This is probably not needed. // This is probably not needed.
@ -63,6 +67,10 @@ struct Config {
uint n_path; uint n_path;
// Total number of BeginClip and EndClip draw objects. // Total number of BeginClip and EndClip draw objects.
uint n_clip; uint n_clip;
// Note: one of these offsets *could* be hardcoded to zero (as was the
// original element stream), but for now retain flexibility.
// Offset (in bytes) of transform stream in scene buffer // Offset (in bytes) of transform stream in scene buffer
uint trans_offset; uint trans_offset;
// Offset (in bytes) of linewidth stream in scene // Offset (in bytes) of linewidth stream in scene
@ -71,6 +79,10 @@ struct Config {
uint pathtag_offset; uint pathtag_offset;
// Offset (in bytes) of path segment stream in scene // Offset (in bytes) of path segment stream in scene
uint pathseg_offset; uint pathseg_offset;
// Offset (in bytes) of draw object tag stream in scene; see drawtag.h
uint drawtag_offset;
// Offset (in bytes) of draw payload stream in scene
uint drawdata_offset;
}; };
#endif #endif

View file

@ -17,7 +17,11 @@ layout(set = 0, binding = 1) readonly buffer ConfigBuf {
Config conf; Config conf;
}; };
#include "annotated.h" layout(binding = 2) readonly buffer SceneBuf {
uint[] scene;
};
#include "drawtag.h"
#include "tile.h" #include "tile.h"
// scale factors useful for converting coordinates to tiles // scale factors useful for converting coordinates to tiles
@ -27,31 +31,39 @@ layout(set = 0, binding = 1) readonly buffer ConfigBuf {
shared uint sh_tile_count[TILE_ALLOC_WG]; shared uint sh_tile_count[TILE_ALLOC_WG];
shared MallocResult sh_tile_alloc; shared MallocResult sh_tile_alloc;
vec4 load_draw_bbox(uint draw_ix) {
uint base = (conf.draw_bbox_alloc.offset >> 2) + 4 * draw_ix;
float x0 = uintBitsToFloat(memory[base]);
float y0 = uintBitsToFloat(memory[base + 1]);
float x1 = uintBitsToFloat(memory[base + 2]);
float y1 = uintBitsToFloat(memory[base + 3]);
vec4 bbox = vec4(x0, y0, x1, y1);
return bbox;
}
void main() { void main() {
uint th_ix = gl_LocalInvocationID.x; uint th_ix = gl_LocalInvocationID.x;
uint element_ix = gl_GlobalInvocationID.x; uint element_ix = gl_GlobalInvocationID.x;
// At the moment, element_ix == path_ix. The clip-intersected bounding boxes
// for elements (draw objects) are computed in the binning stage, but at some
// point we'll probably want to break that correspondence. Tiles should be
// allocated for paths, not draw objs. EndClip doesn't need an allocation.
PathRef path_ref = PathRef(conf.tile_alloc.offset + element_ix * Path_size); PathRef path_ref = PathRef(conf.tile_alloc.offset + element_ix * Path_size);
AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size); uint drawtag_base = conf.drawtag_offset >> 2;
uint tag = Annotated_Nop; uint drawtag = Drawtag_Nop;
if (element_ix < conf.n_elements) { if (element_ix < conf.n_elements) {
tag = Annotated_tag(conf.anno_alloc, ref).tag; drawtag = scene[drawtag_base + element_ix];
} }
int x0 = 0, y0 = 0, x1 = 0, y1 = 0; int x0 = 0, y0 = 0, x1 = 0, y1 = 0;
switch (tag) { // Allocate an empty path for EndClip; at some point we'll change
case Annotated_Color: // this to be per path rather than per draw object.
case Annotated_LinGradient: if (drawtag != Drawtag_Nop && drawtag != Drawtag_EndClip) {
case Annotated_Image: vec4 bbox = load_draw_bbox(element_ix);
case Annotated_BeginClip: x0 = int(floor(bbox.x * SX));
case Annotated_EndClip: y0 = int(floor(bbox.y * SY));
// Note: we take advantage of the fact that fills, strokes, and x1 = int(ceil(bbox.z * SX));
// clips have compatible layout. y1 = int(ceil(bbox.w * SY));
AnnoEndClip clip = Annotated_EndClip_read(conf.anno_alloc, ref);
x0 = int(floor(clip.bbox.x * SX));
y0 = int(floor(clip.bbox.y * SY));
x1 = int(ceil(clip.bbox.z * SX));
y1 = int(ceil(clip.bbox.w * SY));
break;
} }
x0 = clamp(x0, 0, int(conf.width_in_tiles)); x0 = clamp(x0, 0, int(conf.width_in_tiles));
y0 = clamp(y0, 0, int(conf.height_in_tiles)); y0 = clamp(y0, 0, int(conf.height_in_tiles));
@ -61,11 +73,6 @@ void main() {
Path path; Path path;
path.bbox = uvec4(x0, y0, x1, y1); path.bbox = uvec4(x0, y0, x1, y1);
uint tile_count = (x1 - x0) * (y1 - y0); uint tile_count = (x1 - x0) * (y1 - y0);
if (tag == Annotated_EndClip) {
// Don't actually allocate tiles for an end clip, but we do want
// the path structure (especially bbox) allocated for it.
tile_count = 0;
}
sh_tile_count[th_ix] = tile_count; sh_tile_count[th_ix] = tile_count;
uint total_tile_count = tile_count; uint total_tile_count = tile_count;

View file

@ -63,7 +63,10 @@ pub struct Blend {
impl Blend { impl Blend {
pub fn new(mode: BlendMode, composition_mode: CompositionMode) -> Self { pub fn new(mode: BlendMode, composition_mode: CompositionMode) -> Self {
Self { mode, composition_mode } Self {
mode,
composition_mode,
}
} }
pub(crate) fn pack(&self) -> u32 { pub(crate) fn pack(&self) -> u32 {

View file

@ -30,7 +30,8 @@ pub struct Encoder {
tag_stream: Vec<u8>, tag_stream: Vec<u8>,
pathseg_stream: Vec<u8>, pathseg_stream: Vec<u8>,
linewidth_stream: Vec<f32>, linewidth_stream: Vec<f32>,
drawobj_stream: Vec<u8>, drawtag_stream: Vec<u32>,
drawdata_stream: Vec<u8>,
n_path: u32, n_path: u32,
n_pathseg: u32, n_pathseg: u32,
n_clip: u32, n_clip: u32,
@ -43,53 +44,54 @@ pub struct Encoder {
pub struct GlyphEncoder { pub struct GlyphEncoder {
tag_stream: Vec<u8>, tag_stream: Vec<u8>,
pathseg_stream: Vec<u8>, pathseg_stream: Vec<u8>,
drawobj_stream: Vec<u8>, drawtag_stream: Vec<u32>,
drawdata_stream: Vec<u8>,
n_path: u32, n_path: u32,
n_pathseg: u32, n_pathseg: u32,
} }
// Currently same as Element, but may change - should become packed.
const DRAWOBJ_SIZE: usize = 36;
const TRANSFORM_SIZE: usize = 24; const TRANSFORM_SIZE: usize = 24;
const LINEWIDTH_SIZE: usize = 4; const LINEWIDTH_SIZE: usize = 4;
const PATHSEG_SIZE: usize = 52; const PATHSEG_SIZE: usize = 52;
const BBOX_SIZE: usize = 24; const PATH_BBOX_SIZE: usize = 24;
const DRAWMONOID_SIZE: usize = 8; const DRAWMONOID_SIZE: usize = 16;
const DRAW_BBOX_SIZE: usize = 16;
const DRAWTAG_SIZE: usize = 4;
const ANNOTATED_SIZE: usize = 40; const ANNOTATED_SIZE: usize = 40;
// These are bytemuck versions of elements currently defined in the // Tags for draw objects. See shader/drawtag.h for the authoritative source.
// Element struct in piet-gpu-types; that's pretty much going away. const DRAWTAG_FILLCOLOR: u32 = 0x44;
const DRAWTAG_FILLLINGRADIENT: u32 = 0x114;
const ELEMENT_FILLCOLOR: u32 = 4; const DRAWTAG_BEGINCLIP: u32 = 0x05;
const ELEMENT_FILLLINGRADIENT: u32 = 5; const DRAWTAG_ENDCLIP: u32 = 0x25;
const ELEMENT_BEGINCLIP: u32 = 9;
const ELEMENT_ENDCLIP: u32 = 10;
#[repr(C)] #[repr(C)]
#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] #[derive(Clone, Copy, Debug, Default, Zeroable, Pod)]
pub struct FillColor { pub struct FillColor {
tag: u32,
rgba_color: u32, rgba_color: u32,
padding: [u32; 7],
} }
#[repr(C)] #[repr(C)]
#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] #[derive(Clone, Copy, Debug, Default, Zeroable, Pod)]
pub struct FillLinGradient { pub struct FillLinGradient {
tag: u32,
index: u32, index: u32,
p0: [f32; 2], p0: [f32; 2],
p1: [f32; 2], p1: [f32; 2],
padding: [u32; 3], }
#[allow(unused)]
#[repr(C)]
#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)]
pub struct FillImage {
index: u32,
// [i16; 2]
offset: u32,
} }
#[repr(C)] #[repr(C)]
#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] #[derive(Clone, Copy, Debug, Default, Zeroable, Pod)]
pub struct Clip { pub struct Clip {
tag: u32,
bbox: [f32; 4],
blend: u32, blend: u32,
padding: [u32; 3],
} }
impl Encoder { impl Encoder {
@ -99,7 +101,8 @@ impl Encoder {
tag_stream: Vec::new(), tag_stream: Vec::new(),
pathseg_stream: Vec::new(), pathseg_stream: Vec::new(),
linewidth_stream: vec![-1.0], linewidth_stream: vec![-1.0],
drawobj_stream: Vec::new(), drawtag_stream: Vec::new(),
drawdata_stream: Vec::new(),
n_path: 0, n_path: 0,
n_pathseg: 0, n_pathseg: 0,
n_clip: 0, n_clip: 0,
@ -130,51 +133,36 @@ impl Encoder {
/// ///
/// This should be encoded after a path. /// This should be encoded after a path.
pub fn fill_color(&mut self, rgba_color: u32) { pub fn fill_color(&mut self, rgba_color: u32) {
let element = FillColor { self.drawtag_stream.push(DRAWTAG_FILLCOLOR);
tag: ELEMENT_FILLCOLOR, let element = FillColor { rgba_color };
rgba_color, self.drawdata_stream.extend(bytemuck::bytes_of(&element));
..Default::default()
};
self.drawobj_stream.extend(bytemuck::bytes_of(&element));
} }
/// Encode a fill linear gradient draw object. /// Encode a fill linear gradient draw object.
/// ///
/// This should be encoded after a path. /// This should be encoded after a path.
pub fn fill_lin_gradient(&mut self, index: u32, p0: [f32; 2], p1: [f32; 2]) { pub fn fill_lin_gradient(&mut self, index: u32, p0: [f32; 2], p1: [f32; 2]) {
let element = FillLinGradient { self.drawtag_stream.push(DRAWTAG_FILLLINGRADIENT);
tag: ELEMENT_FILLLINGRADIENT, let element = FillLinGradient { index, p0, p1 };
index, self.drawdata_stream.extend(bytemuck::bytes_of(&element));
p0,
p1,
..Default::default()
};
self.drawobj_stream.extend(bytemuck::bytes_of(&element));
} }
/// Start a clip and return a save point to be filled in later. /// Start a clip.
pub fn begin_clip(&mut self, blend: Option<Blend>) -> usize { pub fn begin_clip(&mut self, blend: Option<Blend>) {
let saved = self.drawobj_stream.len(); self.drawtag_stream.push(DRAWTAG_BEGINCLIP);
let element = Clip { let element = Clip {
tag: ELEMENT_BEGINCLIP,
blend: blend.unwrap_or(Blend::default()).pack(), blend: blend.unwrap_or(Blend::default()).pack(),
..Default::default()
}; };
self.drawobj_stream.extend(bytemuck::bytes_of(&element)); self.drawdata_stream.extend(bytemuck::bytes_of(&element));
self.n_clip += 1; self.n_clip += 1;
saved
} }
pub fn end_clip(&mut self, bbox: [f32; 4], blend: Option<Blend>, save_point: usize) { pub fn end_clip(&mut self, blend: Option<Blend>) {
self.drawtag_stream.push(DRAWTAG_ENDCLIP);
let element = Clip { let element = Clip {
tag: ELEMENT_ENDCLIP,
bbox,
blend: blend.unwrap_or(Blend::default()).pack(), blend: blend.unwrap_or(Blend::default()).pack(),
..Default::default()
}; };
self.drawobj_stream[save_point + 4..save_point + 20] self.drawdata_stream.extend(bytemuck::bytes_of(&element));
.clone_from_slice(bytemuck::bytes_of(&bbox));
self.drawobj_stream.extend(bytemuck::bytes_of(&element));
// This is a dummy path, and will go away with the new clip impl. // This is a dummy path, and will go away with the new clip impl.
self.tag_stream.push(0x10); self.tag_stream.push(0x10);
self.n_path += 1; self.n_path += 1;
@ -187,9 +175,11 @@ impl Encoder {
/// beginning of free memory. /// beginning of free memory.
pub fn stage_config(&self) -> (Config, usize) { pub fn stage_config(&self) -> (Config, usize) {
// Layout of scene buffer // Layout of scene buffer
let drawtag_offset = 0;
let n_drawobj = self.n_drawobj(); let n_drawobj = self.n_drawobj();
let n_drawobj_padded = align_up(n_drawobj, DRAW_PART_SIZE as usize); let n_drawobj_padded = align_up(n_drawobj, DRAW_PART_SIZE as usize);
let trans_offset = n_drawobj_padded * DRAWOBJ_SIZE; let drawdata_offset = drawtag_offset + n_drawobj_padded * DRAWTAG_SIZE;
let trans_offset = drawdata_offset + self.drawdata_stream.len();
let n_trans = self.transform_stream.len(); let n_trans = self.transform_stream.len();
let n_trans_padded = align_up(n_trans, TRANSFORM_PART_SIZE as usize); let n_trans_padded = align_up(n_trans, TRANSFORM_PART_SIZE as usize);
let linewidth_offset = trans_offset + n_trans_padded * TRANSFORM_SIZE; let linewidth_offset = trans_offset + n_trans_padded * TRANSFORM_SIZE;
@ -205,9 +195,9 @@ impl Encoder {
alloc += trans_alloc + n_trans_padded * TRANSFORM_SIZE; alloc += trans_alloc + n_trans_padded * TRANSFORM_SIZE;
let pathseg_alloc = alloc; let pathseg_alloc = alloc;
alloc += pathseg_alloc + self.n_pathseg as usize * PATHSEG_SIZE; alloc += pathseg_alloc + self.n_pathseg as usize * PATHSEG_SIZE;
let bbox_alloc = alloc; let path_bbox_alloc = alloc;
let n_path = self.n_path as usize; let n_path = self.n_path as usize;
alloc += bbox_alloc + n_path * BBOX_SIZE; alloc += path_bbox_alloc + n_path * PATH_BBOX_SIZE;
let drawmonoid_alloc = alloc; let drawmonoid_alloc = alloc;
alloc += n_drawobj_padded * DRAWMONOID_SIZE; alloc += n_drawobj_padded * DRAWMONOID_SIZE;
let anno_alloc = alloc; let anno_alloc = alloc;
@ -226,6 +216,12 @@ impl Encoder {
let clip_bbox_alloc = alloc; let clip_bbox_alloc = alloc;
const CLIP_BBOX_SIZE: usize = 16; const CLIP_BBOX_SIZE: usize = 16;
alloc += align_up(n_clip as usize, CLIP_PART_SIZE as usize) * CLIP_BBOX_SIZE; alloc += align_up(n_clip as usize, CLIP_PART_SIZE as usize) * CLIP_BBOX_SIZE;
let draw_bbox_alloc = alloc;
alloc += n_drawobj * DRAW_BBOX_SIZE;
let drawinfo_alloc = alloc;
// TODO: not optimized; it can be accumulated during encoding or summed from drawtags
const MAX_DRAWINFO_SIZE: usize = 16;
alloc += n_drawobj * MAX_DRAWINFO_SIZE;
let config = Config { let config = Config {
n_elements: n_drawobj as u32, n_elements: n_drawobj as u32,
@ -233,12 +229,14 @@ impl Encoder {
pathseg_alloc: pathseg_alloc as u32, pathseg_alloc: pathseg_alloc as u32,
anno_alloc: anno_alloc as u32, anno_alloc: anno_alloc as u32,
trans_alloc: trans_alloc as u32, trans_alloc: trans_alloc as u32,
bbox_alloc: bbox_alloc as u32, path_bbox_alloc: path_bbox_alloc as u32,
drawmonoid_alloc: drawmonoid_alloc as u32, drawmonoid_alloc: drawmonoid_alloc as u32,
clip_alloc: clip_alloc as u32, clip_alloc: clip_alloc as u32,
clip_bic_alloc: clip_bic_alloc as u32, clip_bic_alloc: clip_bic_alloc as u32,
clip_stack_alloc: clip_stack_alloc as u32, clip_stack_alloc: clip_stack_alloc as u32,
clip_bbox_alloc: clip_bbox_alloc as u32, clip_bbox_alloc: clip_bbox_alloc as u32,
draw_bbox_alloc: draw_bbox_alloc as u32,
drawinfo_alloc: drawinfo_alloc as u32,
n_trans: n_trans as u32, n_trans: n_trans as u32,
n_path: self.n_path, n_path: self.n_path,
n_clip: self.n_clip, n_clip: self.n_clip,
@ -246,15 +244,18 @@ impl Encoder {
linewidth_offset: linewidth_offset as u32, linewidth_offset: linewidth_offset as u32,
pathtag_offset: pathtag_offset as u32, pathtag_offset: pathtag_offset as u32,
pathseg_offset: pathseg_offset as u32, pathseg_offset: pathseg_offset as u32,
drawtag_offset: drawtag_offset as u32,
drawdata_offset: drawdata_offset as u32,
..Default::default() ..Default::default()
}; };
(config, alloc) (config, alloc)
} }
pub fn write_scene(&self, buf: &mut BufWrite) { pub fn write_scene(&self, buf: &mut BufWrite) {
buf.extend_slice(&self.drawobj_stream); buf.extend_slice(&self.drawtag_stream);
let n_drawobj = self.drawobj_stream.len() / DRAWOBJ_SIZE; let n_drawobj = self.drawtag_stream.len();
buf.fill_zero(padding(n_drawobj, DRAW_PART_SIZE as usize) * DRAWOBJ_SIZE); buf.fill_zero(padding(n_drawobj, DRAW_PART_SIZE as usize) * DRAWTAG_SIZE);
buf.extend_slice(&self.drawdata_stream);
buf.extend_slice(&self.transform_stream); buf.extend_slice(&self.transform_stream);
let n_trans = self.transform_stream.len(); let n_trans = self.transform_stream.len();
buf.fill_zero(padding(n_trans, TRANSFORM_PART_SIZE as usize) * TRANSFORM_SIZE); buf.fill_zero(padding(n_trans, TRANSFORM_PART_SIZE as usize) * TRANSFORM_SIZE);
@ -265,9 +266,9 @@ impl Encoder {
buf.extend_slice(&self.pathseg_stream); buf.extend_slice(&self.pathseg_stream);
} }
/// The number of elements in the draw object stream. /// The number of draw objects in the draw object stream.
pub(crate) fn n_drawobj(&self) -> usize { pub(crate) fn n_drawobj(&self) -> usize {
self.drawobj_stream.len() / DRAWOBJ_SIZE self.drawtag_stream.len()
} }
/// The number of paths. /// The number of paths.
@ -296,7 +297,8 @@ impl Encoder {
pub(crate) fn encode_glyph(&mut self, glyph: &GlyphEncoder) { pub(crate) fn encode_glyph(&mut self, glyph: &GlyphEncoder) {
self.tag_stream.extend(&glyph.tag_stream); self.tag_stream.extend(&glyph.tag_stream);
self.pathseg_stream.extend(&glyph.pathseg_stream); self.pathseg_stream.extend(&glyph.pathseg_stream);
self.drawobj_stream.extend(&glyph.drawobj_stream); self.drawtag_stream.extend(&glyph.drawtag_stream);
self.drawdata_stream.extend(&glyph.drawdata_stream);
self.n_path += glyph.n_path; self.n_path += glyph.n_path;
self.n_pathseg += glyph.n_pathseg; self.n_pathseg += glyph.n_pathseg;
} }
@ -325,15 +327,12 @@ impl GlyphEncoder {
/// ///
/// This should be encoded after a path. /// This should be encoded after a path.
pub(crate) fn fill_color(&mut self, rgba_color: u32) { pub(crate) fn fill_color(&mut self, rgba_color: u32) {
let element = FillColor { self.drawtag_stream.push(DRAWTAG_FILLCOLOR);
tag: ELEMENT_FILLCOLOR, let element = FillColor { rgba_color };
rgba_color, self.drawdata_stream.extend(bytemuck::bytes_of(&element));
..Default::default()
};
self.drawobj_stream.extend(bytemuck::bytes_of(&element));
} }
pub(crate) fn is_color(&self) -> bool { pub(crate) fn is_color(&self) -> bool {
!self.drawobj_stream.is_empty() !self.drawtag_stream.is_empty()
} }
} }

View file

@ -92,7 +92,7 @@ pub struct Renderer {
clip_binding: ClipBinding, clip_binding: ClipBinding,
tile_pipeline: Pipeline, tile_pipeline: Pipeline,
tile_ds: DescriptorSet, tile_ds: Vec<DescriptorSet>,
path_pipeline: Pipeline, path_pipeline: Pipeline,
path_ds: DescriptorSet, path_ds: DescriptorSet,
@ -105,7 +105,7 @@ pub struct Renderer {
bin_ds: DescriptorSet, bin_ds: DescriptorSet,
coarse_pipeline: Pipeline, coarse_pipeline: Pipeline,
coarse_ds: DescriptorSet, coarse_ds: Vec<DescriptorSet>,
k4_pipeline: Pipeline, k4_pipeline: Pipeline,
k4_ds: DescriptorSet, k4_ds: DescriptorSet,
@ -176,10 +176,8 @@ impl Renderer {
}; };
let image_dev = session.create_image2d(width as u32, height as u32, image_format)?; let image_dev = session.create_image2d(width as u32, height as u32, image_format)?;
// Note: this must be updated when the config struct size changes.
const CONFIG_BUFFER_SIZE: u64 = std::mem::size_of::<Config>() as u64; const CONFIG_BUFFER_SIZE: u64 = std::mem::size_of::<Config>() as u64;
let config_buf = session.create_buffer(CONFIG_BUFFER_SIZE, dev).unwrap(); let config_buf = session.create_buffer(CONFIG_BUFFER_SIZE, dev).unwrap();
// TODO: separate staging buffer (if needed)
let config_bufs = (0..n_bufs) let config_bufs = (0..n_bufs)
.map(|_| { .map(|_| {
session session
@ -212,10 +210,23 @@ impl Renderer {
let clip_binding = ClipBinding::new(session, &clip_code, &config_buf, &memory_buf_dev); let clip_binding = ClipBinding::new(session, &clip_code, &config_buf, &memory_buf_dev);
let tile_alloc_code = include_shader!(session, "../shader/gen/tile_alloc"); let tile_alloc_code = include_shader!(session, "../shader/gen/tile_alloc");
let tile_pipeline = session let tile_pipeline = session.create_compute_pipeline(
.create_compute_pipeline(tile_alloc_code, &[BindType::Buffer, BindType::BufReadOnly])?; tile_alloc_code,
let tile_ds = session &[
.create_simple_descriptor_set(&tile_pipeline, &[&memory_buf_dev, &config_buf])?; BindType::Buffer,
BindType::BufReadOnly,
BindType::BufReadOnly,
],
)?;
let tile_ds = scene_bufs
.iter()
.map(|scene_buf| {
session.create_simple_descriptor_set(
&tile_pipeline,
&[&memory_buf_dev, &config_buf, scene_buf],
)
})
.collect::<Result<Vec<_>, _>>()?;
let path_alloc_code = include_shader!(session, "../shader/gen/path_coarse"); let path_alloc_code = include_shader!(session, "../shader/gen/path_coarse");
let path_pipeline = session let path_pipeline = session
@ -243,11 +254,23 @@ impl Renderer {
session.create_simple_descriptor_set(&bin_pipeline, &[&memory_buf_dev, &config_buf])?; session.create_simple_descriptor_set(&bin_pipeline, &[&memory_buf_dev, &config_buf])?;
let coarse_code = include_shader!(session, "../shader/gen/coarse"); let coarse_code = include_shader!(session, "../shader/gen/coarse");
let coarse_pipeline = session let coarse_pipeline = session.create_compute_pipeline(
.create_compute_pipeline(coarse_code, &[BindType::Buffer, BindType::BufReadOnly])?; coarse_code,
let coarse_ds = session &[
.create_simple_descriptor_set(&coarse_pipeline, &[&memory_buf_dev, &config_buf])?; BindType::Buffer,
BindType::BufReadOnly,
BindType::BufReadOnly,
],
)?;
let coarse_ds = scene_bufs
.iter()
.map(|scene_buf| {
session.create_simple_descriptor_set(
&coarse_pipeline,
&[&memory_buf_dev, &config_buf, scene_buf],
)
})
.collect::<Result<Vec<_>, _>>()?;
let bg_image = Self::make_test_bg_image(&session); let bg_image = Self::make_test_bg_image(&session);
const GRADIENT_BUF_SIZE: usize = const GRADIENT_BUF_SIZE: usize =
@ -430,7 +453,7 @@ impl Renderer {
cmd_buf.begin_debug_label("Tile allocation"); cmd_buf.begin_debug_label("Tile allocation");
cmd_buf.dispatch( cmd_buf.dispatch(
&self.tile_pipeline, &self.tile_pipeline,
&self.tile_ds, &self.tile_ds[buf_ix],
(((self.n_paths + 255) / 256) as u32, 1, 1), (((self.n_paths + 255) / 256) as u32, 1, 1),
(256, 1, 1), (256, 1, 1),
); );
@ -462,7 +485,7 @@ impl Renderer {
cmd_buf.begin_debug_label("Coarse raster"); cmd_buf.begin_debug_label("Coarse raster");
cmd_buf.dispatch( cmd_buf.dispatch(
&self.coarse_pipeline, &self.coarse_pipeline,
&self.coarse_ds, &self.coarse_ds[buf_ix],
( (
(self.width as u32 + 255) / 256, (self.width as u32 + 255) / 256,
(self.height as u32 + 255) / 256, (self.height as u32 + 255) / 256,

View file

@ -64,9 +64,6 @@ struct State {
} }
struct ClipElement { struct ClipElement {
/// Byte offset of BeginClip element in element vec, for bbox fixup.
save_point: usize,
bbox: Option<Rect>,
blend: Option<Blend>, blend: Option<Blend>,
} }
@ -199,8 +196,6 @@ impl RenderContext for PietGpuRenderContext {
fn stroke(&mut self, shape: impl Shape, brush: &impl IntoBrush<Self>, width: f64) { fn stroke(&mut self, shape: impl Shape, brush: &impl IntoBrush<Self>, width: f64) {
self.encode_linewidth(width.abs() as f32); self.encode_linewidth(width.abs() as f32);
let brush = brush.make_brush(self, || shape.bounding_box()).into_owned(); let brush = brush.make_brush(self, || shape.bounding_box()).into_owned();
// Note: the bbox contribution of stroke becomes more complicated with miter joins.
self.accumulate_bbox(|| shape.bounding_box() + Insets::uniform(width * 0.5));
let path = shape.path_elements(TOLERANCE); let path = shape.path_elements(TOLERANCE);
self.encode_path(path, false); self.encode_path(path, false);
self.encode_brush(&brush); self.encode_brush(&brush);
@ -217,9 +212,6 @@ impl RenderContext for PietGpuRenderContext {
fn fill(&mut self, shape: impl Shape, brush: &impl IntoBrush<Self>) { fn fill(&mut self, shape: impl Shape, brush: &impl IntoBrush<Self>) {
let brush = brush.make_brush(self, || shape.bounding_box()).into_owned(); let brush = brush.make_brush(self, || shape.bounding_box()).into_owned();
// Note: we might get a good speedup from using an approximate bounding box.
// Perhaps that should be added to kurbo.
self.accumulate_bbox(|| shape.bounding_box());
let path = shape.path_elements(TOLERANCE); let path = shape.path_elements(TOLERANCE);
self.encode_linewidth(-1.0); self.encode_linewidth(-1.0);
self.encode_path(path, true); self.encode_path(path, true);
@ -232,15 +224,11 @@ impl RenderContext for PietGpuRenderContext {
self.encode_linewidth(-1.0); self.encode_linewidth(-1.0);
let path = shape.path_elements(TOLERANCE); let path = shape.path_elements(TOLERANCE);
self.encode_path(path, true); self.encode_path(path, true);
let save_point = self.new_encoder.begin_clip(None); self.new_encoder.begin_clip(None);
if self.clip_stack.len() >= MAX_BLEND_STACK { if self.clip_stack.len() >= MAX_BLEND_STACK {
panic!("Maximum clip/blend stack size {} exceeded", MAX_BLEND_STACK); panic!("Maximum clip/blend stack size {} exceeded", MAX_BLEND_STACK);
} }
self.clip_stack.push(ClipElement { self.clip_stack.push(ClipElement { blend: None });
bbox: None,
save_point,
blend: None,
});
if let Some(tos) = self.state_stack.last_mut() { if let Some(tos) = self.state_stack.last_mut() {
tos.n_clip += 1; tos.n_clip += 1;
} }
@ -340,16 +328,11 @@ impl PietGpuRenderContext {
self.encode_linewidth(-1.0); self.encode_linewidth(-1.0);
let path = shape.path_elements(TOLERANCE); let path = shape.path_elements(TOLERANCE);
self.encode_path(path, true); self.encode_path(path, true);
let save_point = self.new_encoder.begin_clip(Some(blend)); self.new_encoder.begin_clip(Some(blend));
if self.clip_stack.len() >= MAX_BLEND_STACK { if self.clip_stack.len() >= MAX_BLEND_STACK {
panic!("Maximum clip/blend stack size {} exceeded", MAX_BLEND_STACK); panic!("Maximum clip/blend stack size {} exceeded", MAX_BLEND_STACK);
} }
self.clip_stack.push(ClipElement { self.clip_stack.push(ClipElement { blend: Some(blend) });
bbox: None,
save_point,
blend: Some(blend),
});
self.accumulate_bbox(|| shape.bounding_box());
if let Some(tos) = self.state_stack.last_mut() { if let Some(tos) = self.state_stack.last_mut() {
tos.n_clip += 1; tos.n_clip += 1;
} }
@ -406,37 +389,7 @@ impl PietGpuRenderContext {
fn pop_clip(&mut self) { fn pop_clip(&mut self) {
let tos = self.clip_stack.pop().unwrap(); let tos = self.clip_stack.pop().unwrap();
let bbox = tos.bbox.unwrap_or_default(); self.new_encoder.end_clip(tos.blend);
let bbox_f32_4 = rect_to_f32_4(bbox);
self.new_encoder.end_clip(bbox_f32_4, tos.blend, tos.save_point);
if let Some(bbox) = tos.bbox {
self.union_bbox(bbox);
}
}
/// Accumulate a bbox.
///
/// The bbox is given lazily as a closure, relative to the current transform.
/// It's lazy because we don't need to compute it unless we're inside a clip.
fn accumulate_bbox(&mut self, f: impl FnOnce() -> Rect) {
if !self.clip_stack.is_empty() {
let bbox = f();
let bbox = self.cur_transform.transform_rect_bbox(bbox);
self.union_bbox(bbox);
}
}
/// Accumulate an absolute bbox.
///
/// The bbox is given already transformed into surface coordinates.
fn union_bbox(&mut self, bbox: Rect) {
if let Some(tos) = self.clip_stack.last_mut() {
tos.bbox = if let Some(old_bbox) = tos.bbox {
Some(old_bbox.union(bbox))
} else {
Some(bbox)
};
}
} }
pub(crate) fn encode_glyph(&mut self, glyph: &GlyphEncoder) { pub(crate) fn encode_glyph(&mut self, glyph: &GlyphEncoder) {

View file

@ -47,12 +47,14 @@ pub struct Config {
pub pathseg_alloc: u32, pub pathseg_alloc: u32,
pub anno_alloc: u32, pub anno_alloc: u32,
pub trans_alloc: u32, pub trans_alloc: u32,
pub bbox_alloc: u32, pub path_bbox_alloc: u32,
pub drawmonoid_alloc: u32, pub drawmonoid_alloc: u32,
pub clip_alloc: u32, pub clip_alloc: u32,
pub clip_bic_alloc: u32, pub clip_bic_alloc: u32,
pub clip_stack_alloc: u32, pub clip_stack_alloc: u32,
pub clip_bbox_alloc: u32, pub clip_bbox_alloc: u32,
pub draw_bbox_alloc: u32,
pub drawinfo_alloc: u32,
pub n_trans: u32, pub n_trans: u32,
pub n_path: u32, pub n_path: u32,
pub n_clip: u32, pub n_clip: u32,
@ -60,6 +62,8 @@ pub struct Config {
pub linewidth_offset: u32, pub linewidth_offset: u32,
pub pathtag_offset: u32, pub pathtag_offset: u32,
pub pathseg_offset: u32, pub pathseg_offset: u32,
pub drawtag_offset: u32,
pub drawdata_offset: u32,
} }
// The "element" stage combines a number of stages for parts of the pipeline. // The "element" stage combines a number of stages for parts of the pipeline.

View file

@ -28,6 +28,8 @@ use piet_gpu_hal::{
pub struct DrawMonoid { pub struct DrawMonoid {
pub path_ix: u32, pub path_ix: u32,
pub clip_ix: u32, pub clip_ix: u32,
pub scene_offset: u32,
pub info_offset: u32,
} }
const DRAW_WG: u64 = 256; const DRAW_WG: u64 = 256;
@ -93,7 +95,7 @@ impl DrawStage {
pub unsafe fn new(session: &Session, code: &DrawCode) -> DrawStage { pub unsafe fn new(session: &Session, code: &DrawCode) -> DrawStage {
// We're limited to DRAW_PART_SIZE^2 // We're limited to DRAW_PART_SIZE^2
// Also note: size here allows padding // Also note: size here allows padding
let root_buf_size = DRAW_PART_SIZE * 8; let root_buf_size = DRAW_PART_SIZE * 16;
let root_buf = session let root_buf = session
.create_buffer(root_buf_size, BufferUsage::STORAGE) .create_buffer(root_buf_size, BufferUsage::STORAGE)
.unwrap(); .unwrap();

View file

@ -2,7 +2,7 @@
use rand::{Rng, RngCore}; use rand::{Rng, RngCore};
use crate::{PietGpuRenderContext, Blend, BlendMode, CompositionMode}; use crate::{Blend, BlendMode, CompositionMode, PietGpuRenderContext};
use piet::kurbo::{Affine, BezPath, Circle, Line, Point, Rect, Shape}; use piet::kurbo::{Affine, BezPath, Circle, Line, Point, Rect, Shape};
use piet::{ use piet::{
Color, FixedGradient, FixedLinearGradient, GradientStop, Text, TextAttribute, TextLayoutBuilder, Color, FixedGradient, FixedLinearGradient, GradientStop, Text, TextAttribute, TextLayoutBuilder,
@ -13,10 +13,7 @@ use crate::{PicoSvg, RenderContext, Vec2};
const N_CIRCLES: usize = 0; const N_CIRCLES: usize = 0;
pub fn render_blend_test(rc: &mut PietGpuRenderContext, i: usize, blend: Blend) { pub fn render_blend_test(rc: &mut PietGpuRenderContext, i: usize, blend: Blend) {
rc.fill( rc.fill(Rect::new(400., 400., 800., 800.), &Color::rgb8(0, 0, 200));
Rect::new(400., 400., 800., 800.),
&Color::rgb8(0, 0, 200),
);
rc.save().unwrap(); rc.save().unwrap();
rc.blend(Rect::new(0., 0., 1000., 1000.), blend); rc.blend(Rect::new(0., 0., 1000., 1000.), blend);
rc.transform(Affine::translate(Vec2::new(600., 600.)) * Affine::rotate(0.01 * i as f64)); rc.transform(Affine::translate(Vec2::new(600., 600.)) * Affine::rotate(0.01 * i as f64));

View file

@ -163,8 +163,7 @@ impl ClipData {
let clip_bbox_alloc = clip_stack_alloc + 20 * n_clip; let clip_bbox_alloc = clip_stack_alloc + 20 * n_clip;
stages::Config { stages::Config {
clip_alloc: clip_alloc as u32, clip_alloc: clip_alloc as u32,
// TODO: this wants to be renamed to path_bbox_alloc path_bbox_alloc: path_bbox_alloc as u32,
bbox_alloc: path_bbox_alloc as u32,
drawmonoid_alloc: drawmonoid_alloc as u32, drawmonoid_alloc: drawmonoid_alloc as u32,
clip_bic_alloc: clip_bic_alloc as u32, clip_bic_alloc: clip_bic_alloc as u32,
clip_stack_alloc: clip_stack_alloc as u32, clip_stack_alloc: clip_stack_alloc as u32,
@ -194,7 +193,7 @@ impl ClipData {
let clip_range = clip_bbox_start..(clip_bbox_start + n_clip * 16); let clip_range = clip_bbox_start..(clip_bbox_start + n_clip * 16);
let clip_result = bytemuck::cast_slice::<u8, [f32; 4]>(&buf[clip_range]); let clip_result = bytemuck::cast_slice::<u8, [f32; 4]>(&buf[clip_range]);
let draw_start = 8 + n_clip * 4 + n_path * 24; let draw_start = 8 + n_clip * 4 + n_path * 24;
let draw_range = draw_start..(draw_start + n_clip * 8); let draw_range = draw_start..(draw_start + n_clip * 16);
let draw_result = bytemuck::cast_slice::<u8, DrawMonoid>(&buf[draw_range]); let draw_result = bytemuck::cast_slice::<u8, DrawMonoid>(&buf[draw_range]);
let mut bbox_stack = Vec::new(); let mut bbox_stack = Vec::new();
let mut parent_stack = Vec::new(); let mut parent_stack = Vec::new();

View file

@ -17,20 +17,29 @@
//! Tests for the piet-gpu draw object stage. //! Tests for the piet-gpu draw object stage.
use piet_gpu_hal::{BufWrite, BufferUsage}; use piet_gpu_hal::{BufWrite, BufferUsage};
use rand::Rng; use rand::{seq::SliceRandom, Rng};
use crate::{Config, Runner, TestResult}; use crate::{Config, Runner, TestResult};
use piet_gpu::stages::{self, DrawCode, DrawMonoid, DrawStage}; use piet_gpu::stages::{self, DrawCode, DrawMonoid, DrawStage};
const ELEMENT_SIZE: usize = 36; const DRAWTAG_SIZE: usize = 4;
const ANNOTATED_SIZE: usize = 40; const ANNOTATED_SIZE: usize = 40;
const ELEMENT_FILLCOLOR: u32 = 4; // Tags for draw objects. See shader/drawtag.h for the authoritative source.
const ELEMENT_FILLLINGRADIENT: u32 = 5; const DRAWTAG_FILLCOLOR: u32 = 4;
const ELEMENT_FILLIMAGE: u32 = 6; const DRAWTAG_FILLLINGRADIENT: u32 = 20;
const ELEMENT_BEGINCLIP: u32 = 9; const DRAWTAG_FILLIMAGE: u32 = 8;
const ELEMENT_ENDCLIP: u32 = 10; const DRAWTAG_BEGINCLIP: u32 = 5;
const DRAWTAG_ENDCLIP: u32 = 37;
const TAGS: &[u32] = &[
DRAWTAG_FILLCOLOR,
DRAWTAG_FILLLINGRADIENT,
DRAWTAG_FILLIMAGE,
DRAWTAG_BEGINCLIP,
DRAWTAG_ENDCLIP,
];
struct DrawTestData { struct DrawTestData {
tags: Vec<u32>, tags: Vec<u32>,
@ -47,7 +56,7 @@ pub unsafe fn draw_test(runner: &mut Runner, config: &Config) -> TestResult {
.session .session
.create_buffer_init(std::slice::from_ref(&stage_config), BufferUsage::STORAGE) .create_buffer_init(std::slice::from_ref(&stage_config), BufferUsage::STORAGE)
.unwrap(); .unwrap();
let scene_size = n_tag * ELEMENT_SIZE as u64; let scene_size = n_tag * DRAWTAG_SIZE as u64;
let scene_buf = runner let scene_buf = runner
.session .session
.create_buffer_with(scene_size, |b| data.fill_scene(b), BufferUsage::STORAGE) .create_buffer_with(scene_size, |b| data.fill_scene(b), BufferUsage::STORAGE)
@ -92,7 +101,7 @@ pub unsafe fn draw_test(runner: &mut Runner, config: &Config) -> TestResult {
impl DrawTestData { impl DrawTestData {
fn new(n: u64) -> DrawTestData { fn new(n: u64) -> DrawTestData {
let mut rng = rand::thread_rng(); let mut rng = rand::thread_rng();
let tags = (0..n).map(|_| rng.gen_range(0, 12)).collect(); let tags = (0..n).map(|_| *TAGS.choose(&mut rng).unwrap()).collect();
DrawTestData { tags } DrawTestData { tags }
} }
@ -101,13 +110,14 @@ impl DrawTestData {
// Layout of memory // Layout of memory
let drawmonoid_alloc = 0; let drawmonoid_alloc = 0;
let anno_alloc = drawmonoid_alloc + 8 * n_tags; let anno_alloc = drawmonoid_alloc + 16 * n_tags;
let clip_alloc = anno_alloc + ANNOTATED_SIZE * n_tags; let clip_alloc = anno_alloc + ANNOTATED_SIZE * n_tags;
let stage_config = stages::Config { let stage_config = stages::Config {
n_elements: n_tags as u32, n_elements: n_tags as u32,
anno_alloc: anno_alloc as u32, anno_alloc: anno_alloc as u32,
drawmonoid_alloc: drawmonoid_alloc as u32, drawmonoid_alloc: drawmonoid_alloc as u32,
clip_alloc: clip_alloc as u32, clip_alloc: clip_alloc as u32,
drawtag_offset: 0,
..Default::default() ..Default::default()
}; };
stage_config stage_config
@ -116,37 +126,35 @@ impl DrawTestData {
fn memory_size(&self) -> u64 { fn memory_size(&self) -> u64 {
// Note: this overallocates the clip buf a bit - only needed for the // Note: this overallocates the clip buf a bit - only needed for the
// total number of begin_clip and end_clip tags. // total number of begin_clip and end_clip tags.
(8 + self.tags.len() * (8 + 4 + ANNOTATED_SIZE)) as u64 (8 + self.tags.len() * (16 + 4 + ANNOTATED_SIZE)) as u64
} }
fn fill_scene(&self, buf: &mut BufWrite) { fn fill_scene(&self, buf: &mut BufWrite) {
let mut element = [0u32; ELEMENT_SIZE / 4]; buf.extend_slice(&self.tags);
for tag in &self.tags {
element[0] = *tag;
buf.push(element);
}
} }
fn verify(&self, buf: &[u8]) -> Option<String> { fn verify(&self, buf: &[u8]) -> Option<String> {
let size = self.tags.len() * 8; let size = self.tags.len() * 16;
let actual = bytemuck::cast_slice::<u8, DrawMonoid>(&buf[8..8 + size]); let actual = bytemuck::cast_slice::<u8, DrawMonoid>(&buf[8..8 + size]);
let mut expected = DrawMonoid::default(); let mut expected = DrawMonoid::default();
for (i, (tag, actual)) in self.tags.iter().zip(actual).enumerate() { for (i, (tag, actual)) in self.tags.iter().zip(actual).enumerate() {
// Verify exclusive prefix sum. // Verify exclusive prefix sum.
let (path_ix, clip_ix) = Self::reduce_tag(*tag); let (path_ix, clip_ix) = Self::reduce_tag(*tag);
if *actual != expected { if *actual != expected {
println!("{:?} {:?}", actual, expected);
return Some(format!("draw mismatch at {}", i)); return Some(format!("draw mismatch at {}", i));
} }
expected.path_ix += path_ix; expected.path_ix += path_ix;
expected.clip_ix += clip_ix; expected.clip_ix += clip_ix;
expected.scene_offset += tag & 28;
} }
None None
} }
fn reduce_tag(tag: u32) -> (u32, u32) { fn reduce_tag(tag: u32) -> (u32, u32) {
match tag { match tag {
ELEMENT_FILLCOLOR | ELEMENT_FILLLINGRADIENT | ELEMENT_FILLIMAGE => (1, 0), DRAWTAG_FILLCOLOR | DRAWTAG_FILLLINGRADIENT | DRAWTAG_FILLIMAGE => (1, 0),
ELEMENT_BEGINCLIP | ELEMENT_ENDCLIP => (1, 1), DRAWTAG_BEGINCLIP | DRAWTAG_ENDCLIP => (1, 1),
// TODO: ENDCLIP will become (0, 1) // TODO: ENDCLIP will become (0, 1)
_ => (0, 0), _ => (0, 0),
} }

View file

@ -207,11 +207,11 @@ impl PathData {
// Layout of memory // Layout of memory
let trans_alloc = 0; let trans_alloc = 0;
let pathseg_alloc = trans_alloc + n_trans * 24; let pathseg_alloc = trans_alloc + n_trans * 24;
let bbox_alloc = pathseg_alloc + self.n_pathseg * PATHSEG_SIZE; let path_bbox_alloc = pathseg_alloc + self.n_pathseg * PATHSEG_SIZE;
let stage_config = stages::Config { let stage_config = stages::Config {
pathseg_alloc, pathseg_alloc,
trans_alloc, trans_alloc,
bbox_alloc, path_bbox_alloc,
n_trans, n_trans,
n_path: self.n_path, n_path: self.n_path,
pathtag_offset, pathtag_offset,