Merge pull request #137 from linebender/draw_element

Add draw object stage
This commit is contained in:
Raph Levien 2021-12-03 15:32:51 -08:00 committed by GitHub
commit 22b86072f2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
48 changed files with 1655 additions and 84 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -78,3 +78,19 @@ build gen/pathseg.spv: glsl pathseg.comp | tile.h pathseg.h pathtag.h setup.h me
build gen/pathseg.hlsl: hlsl gen/pathseg.spv build gen/pathseg.hlsl: hlsl gen/pathseg.spv
build gen/pathseg.dxil: dxil gen/pathseg.hlsl build gen/pathseg.dxil: dxil gen/pathseg.hlsl
build gen/pathseg.msl: msl gen/pathseg.spv build gen/pathseg.msl: msl gen/pathseg.spv
build gen/draw_reduce.spv: glsl draw_reduce.comp | scene.h drawtag.h setup.h mem.h
build gen/draw_reduce.hlsl: hlsl gen/draw_reduce.spv
build gen/draw_reduce.dxil: dxil gen/draw_reduce.hlsl
build gen/draw_reduce.msl: msl gen/draw_reduce.spv
build gen/draw_root.spv: glsl draw_scan.comp | drawtag.h
flags = -DROOT
build gen/draw_root.hlsl: hlsl gen/draw_root.spv
build gen/draw_root.dxil: dxil gen/draw_root.hlsl
build gen/draw_root.msl: msl gen/draw_root.spv
build gen/draw_leaf.spv: glsl draw_leaf.comp | scene.h drawtag.h setup.h mem.h
build gen/draw_leaf.hlsl: hlsl gen/draw_leaf.spv
build gen/draw_leaf.dxil: dxil gen/draw_leaf.hlsl
build gen/draw_leaf.msl: msl gen/draw_leaf.spv

Binary file not shown.

View file

@ -0,0 +1,79 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
// The leaf scan pass for draw tag scan implemented as a tree reduction.
// This stage can be fused with its consumer but is separate now.
#version 450
#extension GL_GOOGLE_include_directive : enable
#include "mem.h"
#include "setup.h"
#define N_ROWS 8
#define LG_WG_SIZE 9
#define WG_SIZE (1 << LG_WG_SIZE)
#define PARTITION_SIZE (WG_SIZE * N_ROWS)
layout(local_size_x = WG_SIZE, local_size_y = 1) in;
layout(binding = 1) readonly buffer ConfigBuf {
Config conf;
};
layout(binding = 2) readonly buffer SceneBuf {
uint[] scene;
};
#include "scene.h"
#include "tile.h"
#include "drawtag.h"
#define Monoid DrawMonoid
layout(set = 0, binding = 3) readonly buffer ParentBuf {
Monoid[] parent;
};
shared Monoid sh_scratch[WG_SIZE];
void main() {
Monoid local[N_ROWS];
uint ix = gl_GlobalInvocationID.x * N_ROWS;
ElementRef ref = ElementRef(ix * Element_size);
uint tag_word = Element_tag(ref).tag;
Monoid agg = map_tag(tag_word);
local[0] = agg;
for (uint i = 1; i < N_ROWS; i++) {
tag_word = Element_tag(Element_index(ref, i)).tag;
agg = combine_tag_monoid(agg, map_tag(tag_word));
local[i] = agg;
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i = 0; i < LG_WG_SIZE; i++) {
barrier();
if (gl_LocalInvocationID.x >= (1u << i)) {
Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i)];
agg = combine_tag_monoid(other, agg);
}
barrier();
sh_scratch[gl_LocalInvocationID.x] = agg;
}
barrier();
Monoid row = tag_monoid_identity();
if (gl_WorkGroupID.x > 0) {
row = parent[gl_WorkGroupID.x - 1];
}
if (gl_LocalInvocationID.x > 0) {
row = combine_tag_monoid(row, sh_scratch[gl_LocalInvocationID.x - 1]);
}
uint out_base = (conf.drawmonoid_alloc.offset >> 2) + gl_GlobalInvocationID.x * 2 * N_ROWS;
for (uint i = 0; i < N_ROWS; i++) {
Monoid m = combine_tag_monoid(row, local[i]);
memory[out_base + i * 2] = m.path_ix;
memory[out_base + i * 2 + 1] = m.clip_ix;
}
}

View file

@ -0,0 +1,61 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
// The reduction phase for draw scan implemented as a tree reduction.
#version 450
#extension GL_GOOGLE_include_directive : enable
#include "mem.h"
#include "setup.h"
#define N_ROWS 8
#define LG_WG_SIZE 9
#define WG_SIZE (1 << LG_WG_SIZE)
#define PARTITION_SIZE (WG_SIZE * N_ROWS)
layout(local_size_x = WG_SIZE, local_size_y = 1) in;
layout(binding = 1) readonly buffer ConfigBuf {
Config conf;
};
layout(binding = 2) readonly buffer SceneBuf {
uint[] scene;
};
#include "scene.h"
#include "drawtag.h"
#define Monoid DrawMonoid
layout(set = 0, binding = 3) buffer OutBuf {
Monoid[] outbuf;
};
shared Monoid sh_scratch[WG_SIZE];
void main() {
uint ix = gl_GlobalInvocationID.x * N_ROWS;
ElementRef ref = ElementRef(ix * Element_size);
uint tag_word = Element_tag(ref).tag;
Monoid agg = map_tag(tag_word);
for (uint i = 1; i < N_ROWS; i++) {
tag_word = Element_tag(Element_index(ref, i)).tag;
agg = combine_tag_monoid(agg, map_tag(tag_word));
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i = 0; i < LG_WG_SIZE; i++) {
barrier();
// We could make this predicate tighter, but would it help?
if (gl_LocalInvocationID.x + (1u << i) < WG_SIZE) {
Monoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i)];
agg = combine_tag_monoid(agg, other);
}
barrier();
sh_scratch[gl_LocalInvocationID.x] = agg;
}
if (gl_LocalInvocationID.x == 0) {
outbuf[gl_WorkGroupID.x] = agg;
}
}

View file

@ -0,0 +1,74 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
// A scan pass for draw tag scan implemented as a tree reduction.
#version 450
#extension GL_GOOGLE_include_directive : enable
#include "drawtag.h"
#define N_ROWS 8
#define LG_WG_SIZE 9
#define WG_SIZE (1 << LG_WG_SIZE)
#define PARTITION_SIZE (WG_SIZE * N_ROWS)
layout(local_size_x = WG_SIZE, local_size_y = 1) in;
#define Monoid DrawMonoid
#define combine_monoid combine_tag_monoid
#define monoid_identity tag_monoid_identity
layout(binding = 0) buffer DataBuf {
Monoid[] data;
};
#ifndef ROOT
layout(binding = 1) readonly buffer ParentBuf {
Monoid[] parent;
};
#endif
shared Monoid sh_scratch[WG_SIZE];
void main() {
Monoid local[N_ROWS];
uint ix = gl_GlobalInvocationID.x * N_ROWS;
local[0] = data[ix];
for (uint i = 1; i < N_ROWS; i++) {
local[i] = combine_monoid(local[i - 1], data[ix + i]);
}
Monoid agg = local[N_ROWS - 1];
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i = 0; i < LG_WG_SIZE; i++) {
barrier();
if (gl_LocalInvocationID.x >= (1u << i)) {
Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i)];
agg = combine_monoid(other, agg);
}
barrier();
sh_scratch[gl_LocalInvocationID.x] = agg;
}
barrier();
// This could be a semigroup instead of a monoid if we reworked the
// conditional logic, but that might impact performance.
Monoid row = monoid_identity();
#ifdef ROOT
if (gl_LocalInvocationID.x > 0) {
row = sh_scratch[gl_LocalInvocationID.x - 1];
}
#else
if (gl_WorkGroupID.x > 0) {
row = parent[gl_WorkGroupID.x - 1];
}
if (gl_LocalInvocationID.x > 0) {
row = combine_monoid(row, sh_scratch[gl_LocalInvocationID.x - 1]);
}
#endif
for (uint i = 0; i < N_ROWS; i++) {
Monoid m = combine_monoid(row, local[i]);
data[ix + i] = m;
}
}

36
piet-gpu/shader/drawtag.h Normal file
View file

@ -0,0 +1,36 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
// Common data structures and functions for the draw tag stream.
struct DrawMonoid {
uint path_ix;
uint clip_ix;
};
DrawMonoid tag_monoid_identity() {
return DrawMonoid(0, 0);
}
DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b) {
DrawMonoid c;
c.path_ix = a.path_ix + b.path_ix;
c.clip_ix = a.clip_ix + b.clip_ix;
return c;
}
#ifdef Element_size
DrawMonoid map_tag(uint tag_word) {
switch (tag_word) {
case Element_FillColor:
case Element_FillLinGradient:
case Element_FillImage:
return DrawMonoid(1, 0);
case Element_BeginClip:
return DrawMonoid(1, 1);
case Element_EndClip:
return DrawMonoid(0, 1);
default:
return DrawMonoid(0, 0);
}
}
#endif

View file

@ -16,6 +16,7 @@ struct Config
Alloc anno_alloc; Alloc anno_alloc;
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc bbox_alloc;
Alloc drawmonoid_alloc;
uint n_trans; uint n_trans;
uint trans_offset; uint trans_offset;
uint pathtag_offset; uint pathtag_offset;

View file

@ -21,6 +21,7 @@ struct Config
Alloc anno_alloc; Alloc anno_alloc;
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc bbox_alloc;
Alloc drawmonoid_alloc;
uint n_trans; uint n_trans;
uint trans_offset; uint trans_offset;
uint pathtag_offset; uint pathtag_offset;

Binary file not shown.

Binary file not shown.

View file

@ -0,0 +1,190 @@
struct ElementRef
{
uint offset;
};
struct ElementTag
{
uint tag;
uint flags;
};
struct DrawMonoid
{
uint path_ix;
uint clip_ix;
};
struct Alloc
{
uint offset;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc drawmonoid_alloc;
uint n_trans;
uint trans_offset;
uint pathtag_offset;
uint linewidth_offset;
uint pathseg_offset;
};
static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u);
static const DrawMonoid _67 = { 0u, 0u };
static const DrawMonoid _94 = { 1u, 0u };
static const DrawMonoid _96 = { 1u, 1u };
static const DrawMonoid _98 = { 0u, 1u };
ByteAddressBuffer _49 : register(t2);
ByteAddressBuffer _218 : register(t3);
ByteAddressBuffer _248 : register(t1);
RWByteAddressBuffer _277 : register(u0);
static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_WorkGroupID : SV_GroupID;
uint3 gl_LocalInvocationID : SV_GroupThreadID;
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
groupshared DrawMonoid sh_scratch[512];
ElementTag Element_tag(ElementRef ref)
{
uint tag_and_flags = _49.Load((ref.offset >> uint(2)) * 4 + 0);
ElementTag _63 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) };
return _63;
}
DrawMonoid map_tag(uint tag_word)
{
switch (tag_word)
{
case 4u:
case 5u:
case 6u:
{
return _94;
}
case 9u:
{
return _96;
}
case 10u:
{
return _98;
}
default:
{
return _67;
}
}
}
ElementRef Element_index(ElementRef ref, uint index)
{
ElementRef _42 = { ref.offset + (index * 36u) };
return _42;
}
DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b)
{
DrawMonoid c;
c.path_ix = a.path_ix + b.path_ix;
c.clip_ix = a.clip_ix + b.clip_ix;
return c;
}
DrawMonoid tag_monoid_identity()
{
return _67;
}
void comp_main()
{
uint ix = gl_GlobalInvocationID.x * 8u;
ElementRef _115 = { ix * 36u };
ElementRef ref = _115;
ElementRef param = ref;
uint tag_word = Element_tag(param).tag;
uint param_1 = tag_word;
DrawMonoid agg = map_tag(param_1);
DrawMonoid local[8];
local[0] = agg;
for (uint i = 1u; i < 8u; i++)
{
ElementRef param_2 = ref;
uint param_3 = i;
ElementRef param_4 = Element_index(param_2, param_3);
tag_word = Element_tag(param_4).tag;
uint param_5 = tag_word;
DrawMonoid param_6 = agg;
DrawMonoid param_7 = map_tag(param_5);
agg = combine_tag_monoid(param_6, param_7);
local[i] = agg;
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 9u; i_1++)
{
GroupMemoryBarrierWithGroupSync();
if (gl_LocalInvocationID.x >= (1u << i_1))
{
DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
DrawMonoid param_8 = other;
DrawMonoid param_9 = agg;
agg = combine_tag_monoid(param_8, param_9);
}
GroupMemoryBarrierWithGroupSync();
sh_scratch[gl_LocalInvocationID.x] = agg;
}
GroupMemoryBarrierWithGroupSync();
DrawMonoid row = tag_monoid_identity();
if (gl_WorkGroupID.x > 0u)
{
DrawMonoid _224;
_224.path_ix = _218.Load((gl_WorkGroupID.x - 1u) * 8 + 0);
_224.clip_ix = _218.Load((gl_WorkGroupID.x - 1u) * 8 + 4);
row.path_ix = _224.path_ix;
row.clip_ix = _224.clip_ix;
}
if (gl_LocalInvocationID.x > 0u)
{
DrawMonoid param_10 = row;
DrawMonoid param_11 = sh_scratch[gl_LocalInvocationID.x - 1u];
row = combine_tag_monoid(param_10, param_11);
}
uint out_base = (_248.Load(44) >> uint(2)) + ((gl_GlobalInvocationID.x * 2u) * 8u);
for (uint i_2 = 0u; i_2 < 8u; i_2++)
{
DrawMonoid param_12 = row;
DrawMonoid param_13 = local[i_2];
DrawMonoid m = combine_tag_monoid(param_12, param_13);
_277.Store((out_base + (i_2 * 2u)) * 4 + 8, m.path_ix);
_277.Store(((out_base + (i_2 * 2u)) + 1u) * 4 + 8, m.clip_ix);
}
}
[numthreads(512, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_WorkGroupID = stage_input.gl_WorkGroupID;
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View file

@ -0,0 +1,235 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct ElementRef
{
uint offset;
};
struct ElementTag
{
uint tag;
uint flags;
};
struct DrawMonoid
{
uint path_ix;
uint clip_ix;
};
struct SceneBuf
{
uint scene[1];
};
struct DrawMonoid_1
{
uint path_ix;
uint clip_ix;
};
struct ParentBuf
{
DrawMonoid_1 parent[1];
};
struct Alloc
{
uint offset;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc drawmonoid_alloc;
uint n_trans;
uint trans_offset;
uint pathtag_offset;
uint linewidth_offset;
uint pathseg_offset;
};
struct ConfigBuf
{
Config conf;
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint memory[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(512u, 1u, 1u);
static inline __attribute__((always_inline))
ElementTag Element_tag(thread const ElementRef& ref, const device SceneBuf& v_49)
{
uint tag_and_flags = v_49.scene[ref.offset >> uint(2)];
return ElementTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) };
}
static inline __attribute__((always_inline))
DrawMonoid map_tag(thread const uint& tag_word)
{
switch (tag_word)
{
case 4u:
case 5u:
case 6u:
{
return DrawMonoid{ 1u, 0u };
}
case 9u:
{
return DrawMonoid{ 1u, 1u };
}
case 10u:
{
return DrawMonoid{ 0u, 1u };
}
default:
{
return DrawMonoid{ 0u, 0u };
}
}
}
static inline __attribute__((always_inline))
ElementRef Element_index(thread const ElementRef& ref, thread const uint& index)
{
return ElementRef{ ref.offset + (index * 36u) };
}
static inline __attribute__((always_inline))
DrawMonoid combine_tag_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b)
{
DrawMonoid c;
c.path_ix = a.path_ix + b.path_ix;
c.clip_ix = a.clip_ix + b.clip_ix;
return c;
}
static inline __attribute__((always_inline))
DrawMonoid tag_monoid_identity()
{
return DrawMonoid{ 0u, 0u };
}
kernel void main0(device Memory& _277 [[buffer(0)]], const device ConfigBuf& _248 [[buffer(1)]], const device SceneBuf& v_49 [[buffer(2)]], const device ParentBuf& _218 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
{
threadgroup DrawMonoid sh_scratch[512];
uint ix = gl_GlobalInvocationID.x * 8u;
ElementRef ref = ElementRef{ ix * 36u };
ElementRef param = ref;
uint tag_word = Element_tag(param, v_49).tag;
uint param_1 = tag_word;
DrawMonoid agg = map_tag(param_1);
spvUnsafeArray<DrawMonoid, 8> local;
local[0] = agg;
for (uint i = 1u; i < 8u; i++)
{
ElementRef param_2 = ref;
uint param_3 = i;
ElementRef param_4 = Element_index(param_2, param_3);
tag_word = Element_tag(param_4, v_49).tag;
uint param_5 = tag_word;
DrawMonoid param_6 = agg;
DrawMonoid param_7 = map_tag(param_5);
agg = combine_tag_monoid(param_6, param_7);
local[i] = agg;
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 9u; i_1++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
if (gl_LocalInvocationID.x >= (1u << i_1))
{
DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
DrawMonoid param_8 = other;
DrawMonoid param_9 = agg;
agg = combine_tag_monoid(param_8, param_9);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
sh_scratch[gl_LocalInvocationID.x] = agg;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
DrawMonoid row = tag_monoid_identity();
if (gl_WorkGroupID.x > 0u)
{
uint _221 = gl_WorkGroupID.x - 1u;
row.path_ix = _218.parent[_221].path_ix;
row.clip_ix = _218.parent[_221].clip_ix;
}
if (gl_LocalInvocationID.x > 0u)
{
DrawMonoid param_10 = row;
DrawMonoid param_11 = sh_scratch[gl_LocalInvocationID.x - 1u];
row = combine_tag_monoid(param_10, param_11);
}
uint out_base = (_248.conf.drawmonoid_alloc.offset >> uint(2)) + ((gl_GlobalInvocationID.x * 2u) * 8u);
for (uint i_2 = 0u; i_2 < 8u; i_2++)
{
DrawMonoid param_12 = row;
DrawMonoid param_13 = local[i_2];
DrawMonoid m = combine_tag_monoid(param_12, param_13);
_277.memory[out_base + (i_2 * 2u)] = m.path_ix;
_277.memory[(out_base + (i_2 * 2u)) + 1u] = m.clip_ix;
}
}

Binary file not shown.

Binary file not shown.

View file

@ -0,0 +1,162 @@
struct ElementRef
{
uint offset;
};
struct ElementTag
{
uint tag;
uint flags;
};
struct DrawMonoid
{
uint path_ix;
uint clip_ix;
};
static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u);
struct Alloc
{
uint offset;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc drawmonoid_alloc;
uint n_trans;
uint trans_offset;
uint pathtag_offset;
uint linewidth_offset;
uint pathseg_offset;
};
static const DrawMonoid _88 = { 1u, 0u };
static const DrawMonoid _90 = { 1u, 1u };
static const DrawMonoid _92 = { 0u, 1u };
static const DrawMonoid _94 = { 0u, 0u };
ByteAddressBuffer _46 : register(t2);
RWByteAddressBuffer _203 : register(u3);
RWByteAddressBuffer _217 : register(u0);
ByteAddressBuffer _223 : register(t1);
static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_WorkGroupID : SV_GroupID;
uint3 gl_LocalInvocationID : SV_GroupThreadID;
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
groupshared DrawMonoid sh_scratch[512];
ElementTag Element_tag(ElementRef ref)
{
uint tag_and_flags = _46.Load((ref.offset >> uint(2)) * 4 + 0);
ElementTag _60 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) };
return _60;
}
DrawMonoid map_tag(uint tag_word)
{
switch (tag_word)
{
case 4u:
case 5u:
case 6u:
{
return _88;
}
case 9u:
{
return _90;
}
case 10u:
{
return _92;
}
default:
{
return _94;
}
}
}
ElementRef Element_index(ElementRef ref, uint index)
{
ElementRef _39 = { ref.offset + (index * 36u) };
return _39;
}
DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b)
{
DrawMonoid c;
c.path_ix = a.path_ix + b.path_ix;
c.clip_ix = a.clip_ix + b.clip_ix;
return c;
}
void comp_main()
{
uint ix = gl_GlobalInvocationID.x * 8u;
ElementRef _110 = { ix * 36u };
ElementRef ref = _110;
ElementRef param = ref;
uint tag_word = Element_tag(param).tag;
uint param_1 = tag_word;
DrawMonoid agg = map_tag(param_1);
for (uint i = 1u; i < 8u; i++)
{
ElementRef param_2 = ref;
uint param_3 = i;
ElementRef param_4 = Element_index(param_2, param_3);
tag_word = Element_tag(param_4).tag;
uint param_5 = tag_word;
DrawMonoid param_6 = agg;
DrawMonoid param_7 = map_tag(param_5);
agg = combine_tag_monoid(param_6, param_7);
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 9u; i_1++)
{
GroupMemoryBarrierWithGroupSync();
if ((gl_LocalInvocationID.x + (1u << i_1)) < 512u)
{
DrawMonoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)];
DrawMonoid param_8 = agg;
DrawMonoid param_9 = other;
agg = combine_tag_monoid(param_8, param_9);
}
GroupMemoryBarrierWithGroupSync();
sh_scratch[gl_LocalInvocationID.x] = agg;
}
if (gl_LocalInvocationID.x == 0u)
{
_203.Store(gl_WorkGroupID.x * 8 + 0, agg.path_ix);
_203.Store(gl_WorkGroupID.x * 8 + 4, agg.clip_ix);
}
}
[numthreads(512, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_WorkGroupID = stage_input.gl_WorkGroupID;
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View file

@ -0,0 +1,169 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct ElementRef
{
uint offset;
};
struct ElementTag
{
uint tag;
uint flags;
};
struct DrawMonoid
{
uint path_ix;
uint clip_ix;
};
struct SceneBuf
{
uint scene[1];
};
struct DrawMonoid_1
{
uint path_ix;
uint clip_ix;
};
struct OutBuf
{
DrawMonoid_1 outbuf[1];
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint memory[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(512u, 1u, 1u);
struct Alloc
{
uint offset;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc drawmonoid_alloc;
uint n_trans;
uint trans_offset;
uint pathtag_offset;
uint linewidth_offset;
uint pathseg_offset;
};
struct ConfigBuf
{
Config conf;
};
static inline __attribute__((always_inline))
ElementTag Element_tag(thread const ElementRef& ref, const device SceneBuf& v_46)
{
uint tag_and_flags = v_46.scene[ref.offset >> uint(2)];
return ElementTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) };
}
static inline __attribute__((always_inline))
DrawMonoid map_tag(thread const uint& tag_word)
{
switch (tag_word)
{
case 4u:
case 5u:
case 6u:
{
return DrawMonoid{ 1u, 0u };
}
case 9u:
{
return DrawMonoid{ 1u, 1u };
}
case 10u:
{
return DrawMonoid{ 0u, 1u };
}
default:
{
return DrawMonoid{ 0u, 0u };
}
}
}
static inline __attribute__((always_inline))
ElementRef Element_index(thread const ElementRef& ref, thread const uint& index)
{
return ElementRef{ ref.offset + (index * 36u) };
}
static inline __attribute__((always_inline))
DrawMonoid combine_tag_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b)
{
DrawMonoid c;
c.path_ix = a.path_ix + b.path_ix;
c.clip_ix = a.clip_ix + b.clip_ix;
return c;
}
kernel void main0(const device SceneBuf& v_46 [[buffer(2)]], device OutBuf& _203 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
{
threadgroup DrawMonoid sh_scratch[512];
uint ix = gl_GlobalInvocationID.x * 8u;
ElementRef ref = ElementRef{ ix * 36u };
ElementRef param = ref;
uint tag_word = Element_tag(param, v_46).tag;
uint param_1 = tag_word;
DrawMonoid agg = map_tag(param_1);
for (uint i = 1u; i < 8u; i++)
{
ElementRef param_2 = ref;
uint param_3 = i;
ElementRef param_4 = Element_index(param_2, param_3);
tag_word = Element_tag(param_4, v_46).tag;
uint param_5 = tag_word;
DrawMonoid param_6 = agg;
DrawMonoid param_7 = map_tag(param_5);
agg = combine_tag_monoid(param_6, param_7);
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 9u; i_1++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
if ((gl_LocalInvocationID.x + (1u << i_1)) < 512u)
{
DrawMonoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)];
DrawMonoid param_8 = agg;
DrawMonoid param_9 = other;
agg = combine_tag_monoid(param_8, param_9);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
sh_scratch[gl_LocalInvocationID.x] = agg;
}
if (gl_LocalInvocationID.x == 0u)
{
_203.outbuf[gl_WorkGroupID.x].path_ix = agg.path_ix;
_203.outbuf[gl_WorkGroupID.x].clip_ix = agg.clip_ix;
}
}

Binary file not shown.

Binary file not shown.

View file

@ -0,0 +1,94 @@
struct DrawMonoid
{
uint path_ix;
uint clip_ix;
};
static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u);
static const DrawMonoid _18 = { 0u, 0u };
RWByteAddressBuffer _57 : register(u0);
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_LocalInvocationID : SV_GroupThreadID;
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
groupshared DrawMonoid sh_scratch[512];
DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b)
{
DrawMonoid c;
c.path_ix = a.path_ix + b.path_ix;
c.clip_ix = a.clip_ix + b.clip_ix;
return c;
}
DrawMonoid tag_monoid_identity()
{
return _18;
}
void comp_main()
{
uint ix = gl_GlobalInvocationID.x * 8u;
DrawMonoid _61;
_61.path_ix = _57.Load(ix * 8 + 0);
_61.clip_ix = _57.Load(ix * 8 + 4);
DrawMonoid local[8];
local[0].path_ix = _61.path_ix;
local[0].clip_ix = _61.clip_ix;
DrawMonoid param_1;
for (uint i = 1u; i < 8u; i++)
{
DrawMonoid param = local[i - 1u];
DrawMonoid _88;
_88.path_ix = _57.Load((ix + i) * 8 + 0);
_88.clip_ix = _57.Load((ix + i) * 8 + 4);
param_1.path_ix = _88.path_ix;
param_1.clip_ix = _88.clip_ix;
local[i] = combine_tag_monoid(param, param_1);
}
DrawMonoid agg = local[7];
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 9u; i_1++)
{
GroupMemoryBarrierWithGroupSync();
if (gl_LocalInvocationID.x >= (1u << i_1))
{
DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
DrawMonoid param_2 = other;
DrawMonoid param_3 = agg;
agg = combine_tag_monoid(param_2, param_3);
}
GroupMemoryBarrierWithGroupSync();
sh_scratch[gl_LocalInvocationID.x] = agg;
}
GroupMemoryBarrierWithGroupSync();
DrawMonoid row = tag_monoid_identity();
if (gl_LocalInvocationID.x > 0u)
{
row = sh_scratch[gl_LocalInvocationID.x - 1u];
}
for (uint i_2 = 0u; i_2 < 8u; i_2++)
{
DrawMonoid param_4 = row;
DrawMonoid param_5 = local[i_2];
DrawMonoid m = combine_tag_monoid(param_4, param_5);
uint _178 = ix + i_2;
_57.Store(_178 * 8 + 0, m.path_ix);
_57.Store(_178 * 8 + 4, m.clip_ix);
}
}
[numthreads(512, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View file

@ -0,0 +1,128 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct DrawMonoid
{
uint path_ix;
uint clip_ix;
};
struct DrawMonoid_1
{
uint path_ix;
uint clip_ix;
};
struct DataBuf
{
DrawMonoid_1 data[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(512u, 1u, 1u);
static inline __attribute__((always_inline))
DrawMonoid combine_tag_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b)
{
DrawMonoid c;
c.path_ix = a.path_ix + b.path_ix;
c.clip_ix = a.clip_ix + b.clip_ix;
return c;
}
static inline __attribute__((always_inline))
DrawMonoid tag_monoid_identity()
{
return DrawMonoid{ 0u, 0u };
}
kernel void main0(device DataBuf& _57 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
{
threadgroup DrawMonoid sh_scratch[512];
uint ix = gl_GlobalInvocationID.x * 8u;
spvUnsafeArray<DrawMonoid, 8> local;
local[0].path_ix = _57.data[ix].path_ix;
local[0].clip_ix = _57.data[ix].clip_ix;
DrawMonoid param_1;
for (uint i = 1u; i < 8u; i++)
{
uint _82 = ix + i;
DrawMonoid param = local[i - 1u];
param_1.path_ix = _57.data[_82].path_ix;
param_1.clip_ix = _57.data[_82].clip_ix;
local[i] = combine_tag_monoid(param, param_1);
}
DrawMonoid agg = local[7];
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 9u; i_1++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
if (gl_LocalInvocationID.x >= (1u << i_1))
{
DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
DrawMonoid param_2 = other;
DrawMonoid param_3 = agg;
agg = combine_tag_monoid(param_2, param_3);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
sh_scratch[gl_LocalInvocationID.x] = agg;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
DrawMonoid row = tag_monoid_identity();
if (gl_LocalInvocationID.x > 0u)
{
row = sh_scratch[gl_LocalInvocationID.x - 1u];
}
for (uint i_2 = 0u; i_2 < 8u; i_2++)
{
DrawMonoid param_4 = row;
DrawMonoid param_5 = local[i_2];
DrawMonoid m = combine_tag_monoid(param_4, param_5);
uint _178 = ix + i_2;
_57.data[_178].path_ix = m.path_ix;
_57.data[_178].clip_ix = m.clip_ix;
}
}

Binary file not shown.

Binary file not shown.

View file

@ -63,6 +63,7 @@ struct Config
Alloc anno_alloc; Alloc anno_alloc;
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc bbox_alloc;
Alloc drawmonoid_alloc;
uint n_trans; uint n_trans;
uint trans_offset; uint trans_offset;
uint pathtag_offset; uint pathtag_offset;
@ -354,7 +355,7 @@ uint round_up(float x)
void comp_main() void comp_main()
{ {
uint ix = gl_GlobalInvocationID.x * 4u; uint ix = gl_GlobalInvocationID.x * 4u;
uint tag_word = _574.Load(((_639.Load(52) >> uint(2)) + (ix >> uint(2))) * 4 + 0); uint tag_word = _574.Load(((_639.Load(56) >> uint(2)) + (ix >> uint(2))) * 4 + 0);
uint param = tag_word; uint param = tag_word;
TagMonoid local_tm = reduce_tag(param); TagMonoid local_tm = reduce_tag(param);
sh_tag[gl_LocalInvocationID.x] = local_tm; sh_tag[gl_LocalInvocationID.x] = local_tm;
@ -393,13 +394,13 @@ void comp_main()
TagMonoid param_4 = sh_tag[gl_LocalInvocationID.x - 1u]; TagMonoid param_4 = sh_tag[gl_LocalInvocationID.x - 1u];
tm = combine_tag_monoid(param_3, param_4); tm = combine_tag_monoid(param_3, param_4);
} }
uint ps_ix = (_639.Load(60) >> uint(2)) + tm.pathseg_offset; uint ps_ix = (_639.Load(64) >> uint(2)) + tm.pathseg_offset;
uint lw_ix = (_639.Load(56) >> uint(2)) + tm.linewidth_ix; uint lw_ix = (_639.Load(60) >> uint(2)) + tm.linewidth_ix;
uint save_path_ix = tm.path_ix; uint save_path_ix = tm.path_ix;
TransformSegRef _769 = { _639.Load(36) + (tm.trans_ix * 24u) }; TransformSegRef _768 = { _639.Load(36) + (tm.trans_ix * 24u) };
TransformSegRef trans_ref = _769; TransformSegRef trans_ref = _768;
PathSegRef _779 = { _639.Load(28) + (tm.pathseg_ix * 52u) }; PathSegRef _778 = { _639.Load(28) + (tm.pathseg_ix * 52u) };
PathSegRef ps_ref = _779; PathSegRef ps_ref = _778;
float2 p0; float2 p0;
float2 p1; float2 p1;
float2 p2; float2 p2;
@ -449,9 +450,9 @@ void comp_main()
} }
} }
float linewidth = asfloat(_574.Load(lw_ix * 4 + 0)); float linewidth = asfloat(_574.Load(lw_ix * 4 + 0));
Alloc _865; Alloc _864;
_865.offset = _639.Load(36); _864.offset = _639.Load(36);
param_13.offset = _865.offset; param_13.offset = _864.offset;
TransformSegRef param_14 = trans_ref; TransformSegRef param_14 = trans_ref;
TransformSeg transform = TransformSeg_read(param_13, param_14); TransformSeg transform = TransformSeg_read(param_13, param_14);
p0 = ((transform.mat.xy * p0.x) + (transform.mat.zw * p0.y)) + transform.translate; p0 = ((transform.mat.xy * p0.x) + (transform.mat.zw * p0.y)) + transform.translate;
@ -460,25 +461,25 @@ void comp_main()
if (seg_type >= 2u) if (seg_type >= 2u)
{ {
p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate; p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate;
float4 _935 = bbox; float4 _934 = bbox;
float2 _938 = min(_935.xy, p2); float2 _937 = min(_934.xy, p2);
bbox.x = _938.x; bbox.x = _937.x;
bbox.y = _938.y; bbox.y = _937.y;
float4 _943 = bbox; float4 _942 = bbox;
float2 _946 = max(_943.zw, p2); float2 _945 = max(_942.zw, p2);
bbox.z = _946.x; bbox.z = _945.x;
bbox.w = _946.y; bbox.w = _945.y;
if (seg_type == 3u) if (seg_type == 3u)
{ {
p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate; p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate;
float4 _971 = bbox; float4 _970 = bbox;
float2 _974 = min(_971.xy, p3); float2 _973 = min(_970.xy, p3);
bbox.x = _974.x; bbox.x = _973.x;
bbox.y = _974.y; bbox.y = _973.y;
float4 _979 = bbox; float4 _978 = bbox;
float2 _982 = max(_979.zw, p3); float2 _981 = max(_978.zw, p3);
bbox.z = _982.x; bbox.z = _981.x;
bbox.w = _982.y; bbox.w = _981.y;
} }
else else
{ {
@ -509,9 +510,9 @@ void comp_main()
cubic.trans_ix = (gl_GlobalInvocationID.x * 4u) + i_1; cubic.trans_ix = (gl_GlobalInvocationID.x * 4u) + i_1;
cubic.stroke = stroke; cubic.stroke = stroke;
uint fill_mode = uint(linewidth >= 0.0f); uint fill_mode = uint(linewidth >= 0.0f);
Alloc _1071; Alloc _1070;
_1071.offset = _639.Load(28); _1070.offset = _639.Load(28);
param_15.offset = _1071.offset; param_15.offset = _1070.offset;
PathSegRef param_16 = ps_ref; PathSegRef param_16 = ps_ref;
uint param_17 = fill_mode; uint param_17 = fill_mode;
PathCubic param_18 = cubic; PathCubic param_18 = cubic;
@ -567,17 +568,17 @@ void comp_main()
Monoid param_24 = local[i_4]; Monoid param_24 = local[i_4];
Monoid m = combine_monoid(param_23, param_24); Monoid m = combine_monoid(param_23, param_24);
bool do_atomic = false; bool do_atomic = false;
bool _1241 = i_4 == 3u; bool _1240 = i_4 == 3u;
bool _1248; bool _1247;
if (_1241) if (_1240)
{ {
_1248 = gl_LocalInvocationID.x == 511u; _1247 = gl_LocalInvocationID.x == 511u;
} }
else else
{ {
_1248 = _1241; _1247 = _1240;
} }
if (_1248) if (_1247)
{ {
do_atomic = true; do_atomic = true;
} }
@ -603,30 +604,30 @@ void comp_main()
} }
if (do_atomic) if (do_atomic)
{ {
bool _1300 = m.bbox.z > m.bbox.x; bool _1299 = m.bbox.z > m.bbox.x;
bool _1309; bool _1308;
if (!_1300) if (!_1299)
{ {
_1309 = m.bbox.w > m.bbox.y; _1308 = m.bbox.w > m.bbox.y;
} }
else else
{ {
_1309 = _1300; _1308 = _1299;
} }
if (_1309) if (_1308)
{ {
float param_29 = m.bbox.x; float param_29 = m.bbox.x;
uint _1318; uint _1317;
_111.InterlockedMin(bbox_out_ix * 4 + 8, round_down(param_29), _1318); _111.InterlockedMin(bbox_out_ix * 4 + 8, round_down(param_29), _1317);
float param_30 = m.bbox.y; float param_30 = m.bbox.y;
uint _1326; uint _1325;
_111.InterlockedMin((bbox_out_ix + 1u) * 4 + 8, round_down(param_30), _1326); _111.InterlockedMin((bbox_out_ix + 1u) * 4 + 8, round_down(param_30), _1325);
float param_31 = m.bbox.z; float param_31 = m.bbox.z;
uint _1334; uint _1333;
_111.InterlockedMax((bbox_out_ix + 2u) * 4 + 8, round_up(param_31), _1334); _111.InterlockedMax((bbox_out_ix + 2u) * 4 + 8, round_up(param_31), _1333);
float param_32 = m.bbox.w; float param_32 = m.bbox.w;
uint _1342; uint _1341;
_111.InterlockedMax((bbox_out_ix + 3u) * 4 + 8, round_up(param_32), _1342); _111.InterlockedMax((bbox_out_ix + 3u) * 4 + 8, round_up(param_32), _1341);
} }
bbox_out_ix += 4u; bbox_out_ix += 4u;
} }

View file

@ -128,6 +128,7 @@ struct Config
Alloc_1 anno_alloc; Alloc_1 anno_alloc;
Alloc_1 trans_alloc; Alloc_1 trans_alloc;
Alloc_1 bbox_alloc; Alloc_1 bbox_alloc;
Alloc_1 drawmonoid_alloc;
uint n_trans; uint n_trans;
uint trans_offset; uint trans_offset;
uint pathtag_offset; uint pathtag_offset;
@ -530,25 +531,25 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6
if (seg_type >= 2u) if (seg_type >= 2u)
{ {
p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate; p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate;
float4 _935 = bbox; float4 _934 = bbox;
float2 _938 = fast::min(_935.xy, p2); float2 _937 = fast::min(_934.xy, p2);
bbox.x = _938.x; bbox.x = _937.x;
bbox.y = _938.y; bbox.y = _937.y;
float4 _943 = bbox; float4 _942 = bbox;
float2 _946 = fast::max(_943.zw, p2); float2 _945 = fast::max(_942.zw, p2);
bbox.z = _946.x; bbox.z = _945.x;
bbox.w = _946.y; bbox.w = _945.y;
if (seg_type == 3u) if (seg_type == 3u)
{ {
p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate; p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate;
float4 _971 = bbox; float4 _970 = bbox;
float2 _974 = fast::min(_971.xy, p3); float2 _973 = fast::min(_970.xy, p3);
bbox.x = _974.x; bbox.x = _973.x;
bbox.y = _974.y; bbox.y = _973.y;
float4 _979 = bbox; float4 _978 = bbox;
float2 _982 = fast::max(_979.zw, p3); float2 _981 = fast::max(_978.zw, p3);
bbox.z = _982.x; bbox.z = _981.x;
bbox.w = _982.y; bbox.w = _981.y;
} }
else else
{ {
@ -635,17 +636,17 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6
Monoid param_24 = local[i_4]; Monoid param_24 = local[i_4];
Monoid m = combine_monoid(param_23, param_24); Monoid m = combine_monoid(param_23, param_24);
bool do_atomic = false; bool do_atomic = false;
bool _1241 = i_4 == 3u; bool _1240 = i_4 == 3u;
bool _1248; bool _1247;
if (_1241) if (_1240)
{ {
_1248 = gl_LocalInvocationID.x == 511u; _1247 = gl_LocalInvocationID.x == 511u;
} }
else else
{ {
_1248 = _1241; _1247 = _1240;
} }
if (_1248) if (_1247)
{ {
do_atomic = true; do_atomic = true;
} }
@ -671,26 +672,26 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6
} }
if (do_atomic) if (do_atomic)
{ {
bool _1300 = m.bbox.z > m.bbox.x; bool _1299 = m.bbox.z > m.bbox.x;
bool _1309; bool _1308;
if (!_1300) if (!_1299)
{ {
_1309 = m.bbox.w > m.bbox.y; _1308 = m.bbox.w > m.bbox.y;
} }
else else
{ {
_1309 = _1300; _1308 = _1299;
} }
if (_1309) if (_1308)
{ {
float param_29 = m.bbox.x; float param_29 = m.bbox.x;
uint _1318 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix], round_down(param_29), memory_order_relaxed); uint _1317 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix], round_down(param_29), memory_order_relaxed);
float param_30 = m.bbox.y; float param_30 = m.bbox.y;
uint _1326 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 1u], round_down(param_30), memory_order_relaxed); uint _1325 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 1u], round_down(param_30), memory_order_relaxed);
float param_31 = m.bbox.z; float param_31 = m.bbox.z;
uint _1334 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 2u], round_up(param_31), memory_order_relaxed); uint _1333 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 2u], round_up(param_31), memory_order_relaxed);
float param_32 = m.bbox.w; float param_32 = m.bbox.w;
uint _1342 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 3u], round_up(param_32), memory_order_relaxed); uint _1341 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 3u], round_up(param_32), memory_order_relaxed);
} }
bbox_out_ix += 4u; bbox_out_ix += 4u;
} }

Binary file not shown.

View file

@ -25,6 +25,7 @@ struct Config
Alloc anno_alloc; Alloc anno_alloc;
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc bbox_alloc;
Alloc drawmonoid_alloc;
uint n_trans; uint n_trans;
uint trans_offset; uint trans_offset;
uint pathtag_offset; uint pathtag_offset;
@ -81,7 +82,7 @@ TagMonoid combine_tag_monoid(TagMonoid a, TagMonoid b)
void comp_main() void comp_main()
{ {
uint ix = gl_GlobalInvocationID.x * 4u; uint ix = gl_GlobalInvocationID.x * 4u;
uint scene_ix = (_139.Load(52) >> uint(2)) + ix; uint scene_ix = (_139.Load(56) >> uint(2)) + ix;
uint tag_word = _151.Load(scene_ix * 4 + 0); uint tag_word = _151.Load(scene_ix * 4 + 0);
uint param = tag_word; uint param = tag_word;
TagMonoid agg = reduce_tag(param); TagMonoid agg = reduce_tag(param);

View file

@ -32,6 +32,7 @@ struct Config
Alloc anno_alloc; Alloc anno_alloc;
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc bbox_alloc;
Alloc drawmonoid_alloc;
uint n_trans; uint n_trans;
uint trans_offset; uint trans_offset;
uint pathtag_offset; uint pathtag_offset;

View file

@ -38,6 +38,7 @@ struct Config
Alloc anno_alloc; Alloc anno_alloc;
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc bbox_alloc;
Alloc drawmonoid_alloc;
uint n_trans; uint n_trans;
uint trans_offset; uint trans_offset;
uint pathtag_offset; uint pathtag_offset;
@ -148,7 +149,7 @@ void TransformSeg_write(Alloc a, TransformSegRef ref, TransformSeg s)
void comp_main() void comp_main()
{ {
uint ix = gl_GlobalInvocationID.x * 8u; uint ix = gl_GlobalInvocationID.x * 8u;
TransformRef _285 = { _278.Load(48) + (ix * 24u) }; TransformRef _285 = { _278.Load(52) + (ix * 24u) };
TransformRef ref = _285; TransformRef ref = _285;
TransformRef param = ref; TransformRef param = ref;
Transform agg = Transform_read(param); Transform agg = Transform_read(param);

View file

@ -101,6 +101,7 @@ struct Config
Alloc_1 anno_alloc; Alloc_1 anno_alloc;
Alloc_1 trans_alloc; Alloc_1 trans_alloc;
Alloc_1 bbox_alloc; Alloc_1 bbox_alloc;
Alloc_1 drawmonoid_alloc;
uint n_trans; uint n_trans;
uint trans_offset; uint trans_offset;
uint pathtag_offset; uint pathtag_offset;

View file

@ -27,6 +27,7 @@ struct Config
Alloc anno_alloc; Alloc anno_alloc;
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc bbox_alloc;
Alloc drawmonoid_alloc;
uint n_trans; uint n_trans;
uint trans_offset; uint trans_offset;
uint pathtag_offset; uint pathtag_offset;
@ -85,7 +86,7 @@ Transform combine_monoid(Transform a, Transform b)
void comp_main() void comp_main()
{ {
uint ix = gl_GlobalInvocationID.x * 8u; uint ix = gl_GlobalInvocationID.x * 8u;
TransformRef _168 = { _161.Load(48) + (ix * 24u) }; TransformRef _168 = { _161.Load(52) + (ix * 24u) };
TransformRef ref = _168; TransformRef ref = _168;
TransformRef param = ref; TransformRef param = ref;
Transform agg = Transform_read(param); Transform agg = Transform_read(param);

View file

@ -39,6 +39,7 @@ struct Config
Alloc anno_alloc; Alloc anno_alloc;
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc bbox_alloc;
Alloc drawmonoid_alloc;
uint n_trans; uint n_trans;
uint trans_offset; uint trans_offset;
uint pathtag_offset; uint pathtag_offset;

Binary file not shown.

Binary file not shown.

View file

@ -42,6 +42,8 @@ struct Config {
// Bounding boxes of paths, stored as int (so atomics work) // Bounding boxes of paths, stored as int (so atomics work)
Alloc bbox_alloc; Alloc bbox_alloc;
// Monoid for draw objects
Alloc drawmonoid_alloc;
// Number of transforms in scene // Number of transforms in scene
// This is probably not needed. // This is probably not needed.

Binary file not shown.

View file

@ -16,11 +16,13 @@
//! Stages for new element pipeline, exposed for testing. //! Stages for new element pipeline, exposed for testing.
mod draw;
mod path; mod path;
mod transform; mod transform;
use bytemuck::{Pod, Zeroable}; use bytemuck::{Pod, Zeroable};
pub use draw::{DrawBinding, DrawCode, DrawMonoid, DrawStage};
pub use path::{PathBinding, PathCode, PathEncoder, PathStage}; pub use path::{PathBinding, PathCode, PathEncoder, PathStage};
pub use transform::{Transform, TransformBinding, TransformCode, TransformStage}; pub use transform::{Transform, TransformBinding, TransformCode, TransformStage};
@ -41,6 +43,7 @@ pub struct Config {
pub anno_alloc: u32, pub anno_alloc: u32,
pub trans_alloc: u32, pub trans_alloc: u32,
pub bbox_alloc: u32, pub bbox_alloc: u32,
pub drawmonoid_alloc: u32,
pub n_trans: u32, pub n_trans: u32,
pub trans_offset: u32, pub trans_offset: u32,
pub pathtag_offset: u32, pub pathtag_offset: u32,

163
piet-gpu/src/stages/draw.rs Normal file
View file

@ -0,0 +1,163 @@
// Copyright 2021 The piet-gpu authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Also licensed under MIT license, at your choice.
//! The draw object stage of the element processing pipeline.
use bytemuck::{Pod, Zeroable};
use piet_gpu_hal::{
include_shader, BindType, Buffer, BufferUsage, CmdBuf, DescriptorSet, Pipeline, Session,
};
/// The output element of the draw object stage.
#[repr(C)]
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Zeroable, Pod)]
pub struct DrawMonoid {
pub path_ix: u32,
pub clip_ix: u32,
}
const DRAW_WG: u64 = 512;
const DRAW_N_ROWS: u64 = 8;
const DRAW_PART_SIZE: u64 = DRAW_WG * DRAW_N_ROWS;
pub struct DrawCode {
reduce_pipeline: Pipeline,
root_pipeline: Pipeline,
leaf_pipeline: Pipeline,
}
pub struct DrawStage {
// Right now we're limited to partition^2 (~16M) elements. This can be
// expanded but is tedious.
root_buf: Buffer,
root_ds: DescriptorSet,
}
pub struct DrawBinding {
reduce_ds: DescriptorSet,
leaf_ds: DescriptorSet,
}
impl DrawCode {
pub unsafe fn new(session: &Session) -> DrawCode {
let reduce_code = include_shader!(session, "../../shader/gen/draw_reduce");
let reduce_pipeline = session
.create_compute_pipeline(
reduce_code,
&[
BindType::Buffer,
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::Buffer,
],
)
.unwrap();
let root_code = include_shader!(session, "../../shader/gen/draw_root");
let root_pipeline = session
.create_compute_pipeline(root_code, &[BindType::Buffer])
.unwrap();
let leaf_code = include_shader!(session, "../../shader/gen/draw_leaf");
let leaf_pipeline = session
.create_compute_pipeline(
leaf_code,
&[
BindType::Buffer,
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::BufReadOnly,
],
)
.unwrap();
DrawCode {
reduce_pipeline,
root_pipeline,
leaf_pipeline,
}
}
}
impl DrawStage {
pub unsafe fn new(session: &Session, code: &DrawCode) -> DrawStage {
// We're limited to DRAW_PART_SIZE^2
// Also note: size here allows padding
let root_buf_size = DRAW_PART_SIZE * 8;
let root_buf = session
.create_buffer(root_buf_size, BufferUsage::STORAGE)
.unwrap();
let root_ds = session
.create_simple_descriptor_set(&code.root_pipeline, &[&root_buf])
.unwrap();
DrawStage { root_buf, root_ds }
}
pub unsafe fn bind(
&self,
session: &Session,
code: &DrawCode,
config_buf: &Buffer,
scene_buf: &Buffer,
memory_buf: &Buffer,
) -> DrawBinding {
let reduce_ds = session
.create_simple_descriptor_set(
&code.reduce_pipeline,
&[memory_buf, config_buf, scene_buf, &self.root_buf],
)
.unwrap();
let leaf_ds = session
.create_simple_descriptor_set(
&code.leaf_pipeline,
&[memory_buf, config_buf, scene_buf, &self.root_buf],
)
.unwrap();
DrawBinding { reduce_ds, leaf_ds }
}
pub unsafe fn record(
&self,
cmd_buf: &mut CmdBuf,
code: &DrawCode,
binding: &DrawBinding,
size: u64,
) {
if size > DRAW_PART_SIZE.pow(2) {
panic!("very large scan not yet implemented");
}
let n_workgroups = (size + DRAW_PART_SIZE - 1) / DRAW_PART_SIZE;
if n_workgroups > 1 {
cmd_buf.dispatch(
&code.reduce_pipeline,
&binding.reduce_ds,
(n_workgroups as u32, 1, 1),
(DRAW_WG as u32, 1, 1),
);
cmd_buf.memory_barrier();
cmd_buf.dispatch(
&code.root_pipeline,
&self.root_ds,
(1, 1, 1),
(DRAW_WG as u32, 1, 1),
);
cmd_buf.memory_barrier();
}
cmd_buf.dispatch(
&code.leaf_pipeline,
&binding.leaf_ds,
(n_workgroups as u32, 1, 1),
(DRAW_WG as u32, 1, 1),
);
}
}

147
tests/src/draw.rs Normal file
View file

@ -0,0 +1,147 @@
// Copyright 2021 The piet-gpu authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Also licensed under MIT license, at your choice.
//! Tests for the piet-gpu draw object stage.
use piet_gpu_hal::{BufWrite, BufferUsage};
use rand::Rng;
use crate::{Config, Runner, TestResult};
use piet_gpu::stages::{self, DrawCode, DrawMonoid, DrawStage};
const ELEMENT_SIZE: usize = 36;
const ELEMENT_FILLCOLOR: u32 = 4;
const ELEMENT_FILLLINGRADIENT: u32 = 5;
const ELEMENT_FILLIMAGE: u32 = 6;
const ELEMENT_BEGINCLIP: u32 = 9;
const ELEMENT_ENDCLIP: u32 = 10;
struct DrawTestData {
tags: Vec<u32>,
}
pub unsafe fn draw_test(runner: &mut Runner, config: &Config) -> TestResult {
let mut result = TestResult::new("draw");
let n_tag: u64 = config.size.choose(1 << 12, 1 << 20, 1 << 24);
let data = DrawTestData::new(n_tag);
let stage_config = data.get_config();
let config_buf = runner
.session
.create_buffer_init(std::slice::from_ref(&stage_config), BufferUsage::STORAGE)
.unwrap();
let scene_size = n_tag * ELEMENT_SIZE as u64;
let scene_buf = runner
.session
.create_buffer_with(scene_size, |b| data.fill_scene(b), BufferUsage::STORAGE)
.unwrap();
let memory = runner.buf_down(data.memory_size(), BufferUsage::STORAGE);
let code = DrawCode::new(&runner.session);
let stage = DrawStage::new(&runner.session, &code);
let binding = stage.bind(
&runner.session,
&code,
&config_buf,
&scene_buf,
&memory.dev_buf,
);
let mut total_elapsed = 0.0;
let n_iter = config.n_iter;
for i in 0..n_iter {
let mut commands = runner.commands();
commands.write_timestamp(0);
stage.record(&mut commands.cmd_buf, &code, &binding, n_tag);
commands.write_timestamp(1);
if i == 0 || config.verify_all {
commands.cmd_buf.memory_barrier();
commands.download(&memory);
}
total_elapsed += runner.submit(commands);
if i == 0 || config.verify_all {
let dst = memory.map_read(..);
if let Some(failure) = data.verify(&dst) {
result.fail(failure);
}
}
}
let n_elements = n_tag;
result.timing(total_elapsed, n_elements * n_iter);
result
}
impl DrawTestData {
fn new(n: u64) -> DrawTestData {
let mut rng = rand::thread_rng();
let tags = (0..n).map(|_| rng.gen_range(0, 12)).collect();
DrawTestData { tags }
}
fn get_config(&self) -> stages::Config {
let n_tags = self.tags.len();
// Layout of memory
let drawmonoid_alloc = 0;
let stage_config = stages::Config {
n_elements: n_tags as u32,
drawmonoid_alloc,
..Default::default()
};
stage_config
}
fn memory_size(&self) -> u64 {
8 + self.tags.len() as u64 * 8
}
fn fill_scene(&self, buf: &mut BufWrite) {
let mut element = [0u32; ELEMENT_SIZE / 4];
for tag in &self.tags {
element[0] = *tag;
buf.push(element);
}
}
fn verify(&self, buf: &[u8]) -> Option<String> {
let size = self.tags.len() * 8;
let actual = bytemuck::cast_slice::<u8, DrawMonoid>(&buf[8..8 + size]);
let mut expected = DrawMonoid::default();
for (i, (tag, actual)) in self.tags.iter().zip(actual).enumerate() {
// We compute an inclusive prefix sum, but for this application
// exclusive would be slightly better. We can adapt though.
let (path_ix, clip_ix) = Self::reduce_tag(*tag);
expected.path_ix += path_ix;
expected.clip_ix += clip_ix;
if *actual != expected {
return Some(format!("draw mismatch at {}", i));
}
}
None
}
fn reduce_tag(tag: u32) -> (u32, u32) {
match tag {
ELEMENT_FILLCOLOR | ELEMENT_FILLLINGRADIENT | ELEMENT_FILLIMAGE => (1, 0),
ELEMENT_BEGINCLIP => (1, 1),
ELEMENT_ENDCLIP => (0, 1),
_ => (0, 0),
}
}
}

View file

@ -18,6 +18,7 @@
mod clear; mod clear;
mod config; mod config;
mod draw;
mod linkedlist; mod linkedlist;
mod message_passing; mod message_passing;
mod prefix; mod prefix;
@ -137,6 +138,7 @@ fn main() {
if config.groups.matches("piet") { if config.groups.matches("piet") {
report(&transform::transform_test(&mut runner, &config)); report(&transform::transform_test(&mut runner, &config));
report(&path::path_test(&mut runner, &config)); report(&path::path_test(&mut runner, &config));
report(&draw::draw_test(&mut runner, &config));
} }
} }
} }