Add draw object stage

This is one of the stages in the new element pipeline. It's a simple
one, just a prefix sum of a couple counts, and some of it will probably
get merged with a downstream stage, but we'll do it separately for now
for convenience.

This patch also contains an update to Vulkan tools 1.2.198, which
accounts for the large diff of translated shaders.
This commit is contained in:
Raph Levien 2021-12-02 08:41:41 -08:00
parent 50153a7797
commit 875c8badf4
48 changed files with 1655 additions and 84 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -78,3 +78,19 @@ build gen/pathseg.spv: glsl pathseg.comp | tile.h pathseg.h pathtag.h setup.h me
build gen/pathseg.hlsl: hlsl gen/pathseg.spv build gen/pathseg.hlsl: hlsl gen/pathseg.spv
build gen/pathseg.dxil: dxil gen/pathseg.hlsl build gen/pathseg.dxil: dxil gen/pathseg.hlsl
build gen/pathseg.msl: msl gen/pathseg.spv build gen/pathseg.msl: msl gen/pathseg.spv
build gen/draw_reduce.spv: glsl draw_reduce.comp | scene.h drawtag.h setup.h mem.h
build gen/draw_reduce.hlsl: hlsl gen/draw_reduce.spv
build gen/draw_reduce.dxil: dxil gen/draw_reduce.hlsl
build gen/draw_reduce.msl: msl gen/draw_reduce.spv
build gen/draw_root.spv: glsl draw_scan.comp | drawtag.h
flags = -DROOT
build gen/draw_root.hlsl: hlsl gen/draw_root.spv
build gen/draw_root.dxil: dxil gen/draw_root.hlsl
build gen/draw_root.msl: msl gen/draw_root.spv
build gen/draw_leaf.spv: glsl draw_leaf.comp | scene.h drawtag.h setup.h mem.h
build gen/draw_leaf.hlsl: hlsl gen/draw_leaf.spv
build gen/draw_leaf.dxil: dxil gen/draw_leaf.hlsl
build gen/draw_leaf.msl: msl gen/draw_leaf.spv

Binary file not shown.

View file

@ -0,0 +1,79 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
// The leaf scan pass for draw tag scan implemented as a tree reduction.
// This stage can be fused with its consumer but is separate now.
#version 450
#extension GL_GOOGLE_include_directive : enable
#include "mem.h"
#include "setup.h"
#define N_ROWS 8
#define LG_WG_SIZE 9
#define WG_SIZE (1 << LG_WG_SIZE)
#define PARTITION_SIZE (WG_SIZE * N_ROWS)
layout(local_size_x = WG_SIZE, local_size_y = 1) in;
layout(binding = 1) readonly buffer ConfigBuf {
Config conf;
};
layout(binding = 2) readonly buffer SceneBuf {
uint[] scene;
};
#include "scene.h"
#include "tile.h"
#include "drawtag.h"
#define Monoid DrawMonoid
layout(set = 0, binding = 3) readonly buffer ParentBuf {
Monoid[] parent;
};
shared Monoid sh_scratch[WG_SIZE];
void main() {
Monoid local[N_ROWS];
uint ix = gl_GlobalInvocationID.x * N_ROWS;
ElementRef ref = ElementRef(ix * Element_size);
uint tag_word = Element_tag(ref).tag;
Monoid agg = map_tag(tag_word);
local[0] = agg;
for (uint i = 1; i < N_ROWS; i++) {
tag_word = Element_tag(Element_index(ref, i)).tag;
agg = combine_tag_monoid(agg, map_tag(tag_word));
local[i] = agg;
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i = 0; i < LG_WG_SIZE; i++) {
barrier();
if (gl_LocalInvocationID.x >= (1u << i)) {
Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i)];
agg = combine_tag_monoid(other, agg);
}
barrier();
sh_scratch[gl_LocalInvocationID.x] = agg;
}
barrier();
Monoid row = tag_monoid_identity();
if (gl_WorkGroupID.x > 0) {
row = parent[gl_WorkGroupID.x - 1];
}
if (gl_LocalInvocationID.x > 0) {
row = combine_tag_monoid(row, sh_scratch[gl_LocalInvocationID.x - 1]);
}
uint out_base = (conf.drawmonoid_alloc.offset >> 2) + gl_GlobalInvocationID.x * 2 * N_ROWS;
for (uint i = 0; i < N_ROWS; i++) {
Monoid m = combine_tag_monoid(row, local[i]);
memory[out_base + i * 2] = m.path_ix;
memory[out_base + i * 2 + 1] = m.clip_ix;
}
}

View file

@ -0,0 +1,61 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
// The reduction phase for draw scan implemented as a tree reduction.
#version 450
#extension GL_GOOGLE_include_directive : enable
#include "mem.h"
#include "setup.h"
#define N_ROWS 8
#define LG_WG_SIZE 9
#define WG_SIZE (1 << LG_WG_SIZE)
#define PARTITION_SIZE (WG_SIZE * N_ROWS)
layout(local_size_x = WG_SIZE, local_size_y = 1) in;
layout(binding = 1) readonly buffer ConfigBuf {
Config conf;
};
layout(binding = 2) readonly buffer SceneBuf {
uint[] scene;
};
#include "scene.h"
#include "drawtag.h"
#define Monoid DrawMonoid
layout(set = 0, binding = 3) buffer OutBuf {
Monoid[] outbuf;
};
shared Monoid sh_scratch[WG_SIZE];
void main() {
uint ix = gl_GlobalInvocationID.x * N_ROWS;
ElementRef ref = ElementRef(ix * Element_size);
uint tag_word = Element_tag(ref).tag;
Monoid agg = map_tag(tag_word);
for (uint i = 1; i < N_ROWS; i++) {
tag_word = Element_tag(Element_index(ref, i)).tag;
agg = combine_tag_monoid(agg, map_tag(tag_word));
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i = 0; i < LG_WG_SIZE; i++) {
barrier();
// We could make this predicate tighter, but would it help?
if (gl_LocalInvocationID.x + (1u << i) < WG_SIZE) {
Monoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i)];
agg = combine_tag_monoid(agg, other);
}
barrier();
sh_scratch[gl_LocalInvocationID.x] = agg;
}
if (gl_LocalInvocationID.x == 0) {
outbuf[gl_WorkGroupID.x] = agg;
}
}

View file

@ -0,0 +1,74 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
// A scan pass for draw tag scan implemented as a tree reduction.
#version 450
#extension GL_GOOGLE_include_directive : enable
#include "drawtag.h"
#define N_ROWS 8
#define LG_WG_SIZE 9
#define WG_SIZE (1 << LG_WG_SIZE)
#define PARTITION_SIZE (WG_SIZE * N_ROWS)
layout(local_size_x = WG_SIZE, local_size_y = 1) in;
#define Monoid DrawMonoid
#define combine_monoid combine_tag_monoid
#define monoid_identity tag_monoid_identity
layout(binding = 0) buffer DataBuf {
Monoid[] data;
};
#ifndef ROOT
layout(binding = 1) readonly buffer ParentBuf {
Monoid[] parent;
};
#endif
shared Monoid sh_scratch[WG_SIZE];
void main() {
Monoid local[N_ROWS];
uint ix = gl_GlobalInvocationID.x * N_ROWS;
local[0] = data[ix];
for (uint i = 1; i < N_ROWS; i++) {
local[i] = combine_monoid(local[i - 1], data[ix + i]);
}
Monoid agg = local[N_ROWS - 1];
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i = 0; i < LG_WG_SIZE; i++) {
barrier();
if (gl_LocalInvocationID.x >= (1u << i)) {
Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i)];
agg = combine_monoid(other, agg);
}
barrier();
sh_scratch[gl_LocalInvocationID.x] = agg;
}
barrier();
// This could be a semigroup instead of a monoid if we reworked the
// conditional logic, but that might impact performance.
Monoid row = monoid_identity();
#ifdef ROOT
if (gl_LocalInvocationID.x > 0) {
row = sh_scratch[gl_LocalInvocationID.x - 1];
}
#else
if (gl_WorkGroupID.x > 0) {
row = parent[gl_WorkGroupID.x - 1];
}
if (gl_LocalInvocationID.x > 0) {
row = combine_monoid(row, sh_scratch[gl_LocalInvocationID.x - 1]);
}
#endif
for (uint i = 0; i < N_ROWS; i++) {
Monoid m = combine_monoid(row, local[i]);
data[ix + i] = m;
}
}

36
piet-gpu/shader/drawtag.h Normal file
View file

@ -0,0 +1,36 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
// Common data structures and functions for the draw tag stream.
struct DrawMonoid {
uint path_ix;
uint clip_ix;
};
DrawMonoid tag_monoid_identity() {
return DrawMonoid(0, 0);
}
DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b) {
DrawMonoid c;
c.path_ix = a.path_ix + b.path_ix;
c.clip_ix = a.clip_ix + b.clip_ix;
return c;
}
#ifdef Element_size
DrawMonoid map_tag(uint tag_word) {
switch (tag_word) {
case Element_FillColor:
case Element_FillLinGradient:
case Element_FillImage:
return DrawMonoid(1, 0);
case Element_BeginClip:
return DrawMonoid(1, 1);
case Element_EndClip:
return DrawMonoid(0, 1);
default:
return DrawMonoid(0, 0);
}
}
#endif

View file

@ -16,6 +16,7 @@ struct Config
Alloc anno_alloc; Alloc anno_alloc;
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc bbox_alloc;
Alloc drawmonoid_alloc;
uint n_trans; uint n_trans;
uint trans_offset; uint trans_offset;
uint pathtag_offset; uint pathtag_offset;

View file

@ -21,6 +21,7 @@ struct Config
Alloc anno_alloc; Alloc anno_alloc;
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc bbox_alloc;
Alloc drawmonoid_alloc;
uint n_trans; uint n_trans;
uint trans_offset; uint trans_offset;
uint pathtag_offset; uint pathtag_offset;

Binary file not shown.

Binary file not shown.

View file

@ -0,0 +1,190 @@
struct ElementRef
{
uint offset;
};
struct ElementTag
{
uint tag;
uint flags;
};
struct DrawMonoid
{
uint path_ix;
uint clip_ix;
};
struct Alloc
{
uint offset;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc drawmonoid_alloc;
uint n_trans;
uint trans_offset;
uint pathtag_offset;
uint linewidth_offset;
uint pathseg_offset;
};
static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u);
static const DrawMonoid _67 = { 0u, 0u };
static const DrawMonoid _94 = { 1u, 0u };
static const DrawMonoid _96 = { 1u, 1u };
static const DrawMonoid _98 = { 0u, 1u };
ByteAddressBuffer _49 : register(t2);
ByteAddressBuffer _218 : register(t3);
ByteAddressBuffer _248 : register(t1);
RWByteAddressBuffer _277 : register(u0);
static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_WorkGroupID : SV_GroupID;
uint3 gl_LocalInvocationID : SV_GroupThreadID;
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
groupshared DrawMonoid sh_scratch[512];
ElementTag Element_tag(ElementRef ref)
{
uint tag_and_flags = _49.Load((ref.offset >> uint(2)) * 4 + 0);
ElementTag _63 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) };
return _63;
}
DrawMonoid map_tag(uint tag_word)
{
switch (tag_word)
{
case 4u:
case 5u:
case 6u:
{
return _94;
}
case 9u:
{
return _96;
}
case 10u:
{
return _98;
}
default:
{
return _67;
}
}
}
ElementRef Element_index(ElementRef ref, uint index)
{
ElementRef _42 = { ref.offset + (index * 36u) };
return _42;
}
DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b)
{
DrawMonoid c;
c.path_ix = a.path_ix + b.path_ix;
c.clip_ix = a.clip_ix + b.clip_ix;
return c;
}
DrawMonoid tag_monoid_identity()
{
return _67;
}
void comp_main()
{
uint ix = gl_GlobalInvocationID.x * 8u;
ElementRef _115 = { ix * 36u };
ElementRef ref = _115;
ElementRef param = ref;
uint tag_word = Element_tag(param).tag;
uint param_1 = tag_word;
DrawMonoid agg = map_tag(param_1);
DrawMonoid local[8];
local[0] = agg;
for (uint i = 1u; i < 8u; i++)
{
ElementRef param_2 = ref;
uint param_3 = i;
ElementRef param_4 = Element_index(param_2, param_3);
tag_word = Element_tag(param_4).tag;
uint param_5 = tag_word;
DrawMonoid param_6 = agg;
DrawMonoid param_7 = map_tag(param_5);
agg = combine_tag_monoid(param_6, param_7);
local[i] = agg;
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 9u; i_1++)
{
GroupMemoryBarrierWithGroupSync();
if (gl_LocalInvocationID.x >= (1u << i_1))
{
DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
DrawMonoid param_8 = other;
DrawMonoid param_9 = agg;
agg = combine_tag_monoid(param_8, param_9);
}
GroupMemoryBarrierWithGroupSync();
sh_scratch[gl_LocalInvocationID.x] = agg;
}
GroupMemoryBarrierWithGroupSync();
DrawMonoid row = tag_monoid_identity();
if (gl_WorkGroupID.x > 0u)
{
DrawMonoid _224;
_224.path_ix = _218.Load((gl_WorkGroupID.x - 1u) * 8 + 0);
_224.clip_ix = _218.Load((gl_WorkGroupID.x - 1u) * 8 + 4);
row.path_ix = _224.path_ix;
row.clip_ix = _224.clip_ix;
}
if (gl_LocalInvocationID.x > 0u)
{
DrawMonoid param_10 = row;
DrawMonoid param_11 = sh_scratch[gl_LocalInvocationID.x - 1u];
row = combine_tag_monoid(param_10, param_11);
}
uint out_base = (_248.Load(44) >> uint(2)) + ((gl_GlobalInvocationID.x * 2u) * 8u);
for (uint i_2 = 0u; i_2 < 8u; i_2++)
{
DrawMonoid param_12 = row;
DrawMonoid param_13 = local[i_2];
DrawMonoid m = combine_tag_monoid(param_12, param_13);
_277.Store((out_base + (i_2 * 2u)) * 4 + 8, m.path_ix);
_277.Store(((out_base + (i_2 * 2u)) + 1u) * 4 + 8, m.clip_ix);
}
}
[numthreads(512, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_WorkGroupID = stage_input.gl_WorkGroupID;
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View file

@ -0,0 +1,235 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct ElementRef
{
uint offset;
};
struct ElementTag
{
uint tag;
uint flags;
};
struct DrawMonoid
{
uint path_ix;
uint clip_ix;
};
struct SceneBuf
{
uint scene[1];
};
struct DrawMonoid_1
{
uint path_ix;
uint clip_ix;
};
struct ParentBuf
{
DrawMonoid_1 parent[1];
};
struct Alloc
{
uint offset;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc drawmonoid_alloc;
uint n_trans;
uint trans_offset;
uint pathtag_offset;
uint linewidth_offset;
uint pathseg_offset;
};
struct ConfigBuf
{
Config conf;
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint memory[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(512u, 1u, 1u);
static inline __attribute__((always_inline))
ElementTag Element_tag(thread const ElementRef& ref, const device SceneBuf& v_49)
{
uint tag_and_flags = v_49.scene[ref.offset >> uint(2)];
return ElementTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) };
}
static inline __attribute__((always_inline))
DrawMonoid map_tag(thread const uint& tag_word)
{
switch (tag_word)
{
case 4u:
case 5u:
case 6u:
{
return DrawMonoid{ 1u, 0u };
}
case 9u:
{
return DrawMonoid{ 1u, 1u };
}
case 10u:
{
return DrawMonoid{ 0u, 1u };
}
default:
{
return DrawMonoid{ 0u, 0u };
}
}
}
static inline __attribute__((always_inline))
ElementRef Element_index(thread const ElementRef& ref, thread const uint& index)
{
return ElementRef{ ref.offset + (index * 36u) };
}
static inline __attribute__((always_inline))
DrawMonoid combine_tag_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b)
{
DrawMonoid c;
c.path_ix = a.path_ix + b.path_ix;
c.clip_ix = a.clip_ix + b.clip_ix;
return c;
}
static inline __attribute__((always_inline))
DrawMonoid tag_monoid_identity()
{
return DrawMonoid{ 0u, 0u };
}
kernel void main0(device Memory& _277 [[buffer(0)]], const device ConfigBuf& _248 [[buffer(1)]], const device SceneBuf& v_49 [[buffer(2)]], const device ParentBuf& _218 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
{
threadgroup DrawMonoid sh_scratch[512];
uint ix = gl_GlobalInvocationID.x * 8u;
ElementRef ref = ElementRef{ ix * 36u };
ElementRef param = ref;
uint tag_word = Element_tag(param, v_49).tag;
uint param_1 = tag_word;
DrawMonoid agg = map_tag(param_1);
spvUnsafeArray<DrawMonoid, 8> local;
local[0] = agg;
for (uint i = 1u; i < 8u; i++)
{
ElementRef param_2 = ref;
uint param_3 = i;
ElementRef param_4 = Element_index(param_2, param_3);
tag_word = Element_tag(param_4, v_49).tag;
uint param_5 = tag_word;
DrawMonoid param_6 = agg;
DrawMonoid param_7 = map_tag(param_5);
agg = combine_tag_monoid(param_6, param_7);
local[i] = agg;
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 9u; i_1++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
if (gl_LocalInvocationID.x >= (1u << i_1))
{
DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
DrawMonoid param_8 = other;
DrawMonoid param_9 = agg;
agg = combine_tag_monoid(param_8, param_9);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
sh_scratch[gl_LocalInvocationID.x] = agg;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
DrawMonoid row = tag_monoid_identity();
if (gl_WorkGroupID.x > 0u)
{
uint _221 = gl_WorkGroupID.x - 1u;
row.path_ix = _218.parent[_221].path_ix;
row.clip_ix = _218.parent[_221].clip_ix;
}
if (gl_LocalInvocationID.x > 0u)
{
DrawMonoid param_10 = row;
DrawMonoid param_11 = sh_scratch[gl_LocalInvocationID.x - 1u];
row = combine_tag_monoid(param_10, param_11);
}
uint out_base = (_248.conf.drawmonoid_alloc.offset >> uint(2)) + ((gl_GlobalInvocationID.x * 2u) * 8u);
for (uint i_2 = 0u; i_2 < 8u; i_2++)
{
DrawMonoid param_12 = row;
DrawMonoid param_13 = local[i_2];
DrawMonoid m = combine_tag_monoid(param_12, param_13);
_277.memory[out_base + (i_2 * 2u)] = m.path_ix;
_277.memory[(out_base + (i_2 * 2u)) + 1u] = m.clip_ix;
}
}

Binary file not shown.

Binary file not shown.

View file

@ -0,0 +1,162 @@
struct ElementRef
{
uint offset;
};
struct ElementTag
{
uint tag;
uint flags;
};
struct DrawMonoid
{
uint path_ix;
uint clip_ix;
};
static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u);
struct Alloc
{
uint offset;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc drawmonoid_alloc;
uint n_trans;
uint trans_offset;
uint pathtag_offset;
uint linewidth_offset;
uint pathseg_offset;
};
static const DrawMonoid _88 = { 1u, 0u };
static const DrawMonoid _90 = { 1u, 1u };
static const DrawMonoid _92 = { 0u, 1u };
static const DrawMonoid _94 = { 0u, 0u };
ByteAddressBuffer _46 : register(t2);
RWByteAddressBuffer _203 : register(u3);
RWByteAddressBuffer _217 : register(u0);
ByteAddressBuffer _223 : register(t1);
static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_WorkGroupID : SV_GroupID;
uint3 gl_LocalInvocationID : SV_GroupThreadID;
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
groupshared DrawMonoid sh_scratch[512];
ElementTag Element_tag(ElementRef ref)
{
uint tag_and_flags = _46.Load((ref.offset >> uint(2)) * 4 + 0);
ElementTag _60 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) };
return _60;
}
DrawMonoid map_tag(uint tag_word)
{
switch (tag_word)
{
case 4u:
case 5u:
case 6u:
{
return _88;
}
case 9u:
{
return _90;
}
case 10u:
{
return _92;
}
default:
{
return _94;
}
}
}
ElementRef Element_index(ElementRef ref, uint index)
{
ElementRef _39 = { ref.offset + (index * 36u) };
return _39;
}
DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b)
{
DrawMonoid c;
c.path_ix = a.path_ix + b.path_ix;
c.clip_ix = a.clip_ix + b.clip_ix;
return c;
}
void comp_main()
{
uint ix = gl_GlobalInvocationID.x * 8u;
ElementRef _110 = { ix * 36u };
ElementRef ref = _110;
ElementRef param = ref;
uint tag_word = Element_tag(param).tag;
uint param_1 = tag_word;
DrawMonoid agg = map_tag(param_1);
for (uint i = 1u; i < 8u; i++)
{
ElementRef param_2 = ref;
uint param_3 = i;
ElementRef param_4 = Element_index(param_2, param_3);
tag_word = Element_tag(param_4).tag;
uint param_5 = tag_word;
DrawMonoid param_6 = agg;
DrawMonoid param_7 = map_tag(param_5);
agg = combine_tag_monoid(param_6, param_7);
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 9u; i_1++)
{
GroupMemoryBarrierWithGroupSync();
if ((gl_LocalInvocationID.x + (1u << i_1)) < 512u)
{
DrawMonoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)];
DrawMonoid param_8 = agg;
DrawMonoid param_9 = other;
agg = combine_tag_monoid(param_8, param_9);
}
GroupMemoryBarrierWithGroupSync();
sh_scratch[gl_LocalInvocationID.x] = agg;
}
if (gl_LocalInvocationID.x == 0u)
{
_203.Store(gl_WorkGroupID.x * 8 + 0, agg.path_ix);
_203.Store(gl_WorkGroupID.x * 8 + 4, agg.clip_ix);
}
}
[numthreads(512, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_WorkGroupID = stage_input.gl_WorkGroupID;
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View file

@ -0,0 +1,169 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct ElementRef
{
uint offset;
};
struct ElementTag
{
uint tag;
uint flags;
};
struct DrawMonoid
{
uint path_ix;
uint clip_ix;
};
struct SceneBuf
{
uint scene[1];
};
struct DrawMonoid_1
{
uint path_ix;
uint clip_ix;
};
struct OutBuf
{
DrawMonoid_1 outbuf[1];
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint memory[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(512u, 1u, 1u);
struct Alloc
{
uint offset;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc drawmonoid_alloc;
uint n_trans;
uint trans_offset;
uint pathtag_offset;
uint linewidth_offset;
uint pathseg_offset;
};
struct ConfigBuf
{
Config conf;
};
static inline __attribute__((always_inline))
ElementTag Element_tag(thread const ElementRef& ref, const device SceneBuf& v_46)
{
uint tag_and_flags = v_46.scene[ref.offset >> uint(2)];
return ElementTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) };
}
static inline __attribute__((always_inline))
DrawMonoid map_tag(thread const uint& tag_word)
{
switch (tag_word)
{
case 4u:
case 5u:
case 6u:
{
return DrawMonoid{ 1u, 0u };
}
case 9u:
{
return DrawMonoid{ 1u, 1u };
}
case 10u:
{
return DrawMonoid{ 0u, 1u };
}
default:
{
return DrawMonoid{ 0u, 0u };
}
}
}
static inline __attribute__((always_inline))
ElementRef Element_index(thread const ElementRef& ref, thread const uint& index)
{
return ElementRef{ ref.offset + (index * 36u) };
}
static inline __attribute__((always_inline))
DrawMonoid combine_tag_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b)
{
DrawMonoid c;
c.path_ix = a.path_ix + b.path_ix;
c.clip_ix = a.clip_ix + b.clip_ix;
return c;
}
kernel void main0(const device SceneBuf& v_46 [[buffer(2)]], device OutBuf& _203 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
{
threadgroup DrawMonoid sh_scratch[512];
uint ix = gl_GlobalInvocationID.x * 8u;
ElementRef ref = ElementRef{ ix * 36u };
ElementRef param = ref;
uint tag_word = Element_tag(param, v_46).tag;
uint param_1 = tag_word;
DrawMonoid agg = map_tag(param_1);
for (uint i = 1u; i < 8u; i++)
{
ElementRef param_2 = ref;
uint param_3 = i;
ElementRef param_4 = Element_index(param_2, param_3);
tag_word = Element_tag(param_4, v_46).tag;
uint param_5 = tag_word;
DrawMonoid param_6 = agg;
DrawMonoid param_7 = map_tag(param_5);
agg = combine_tag_monoid(param_6, param_7);
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 9u; i_1++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
if ((gl_LocalInvocationID.x + (1u << i_1)) < 512u)
{
DrawMonoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)];
DrawMonoid param_8 = agg;
DrawMonoid param_9 = other;
agg = combine_tag_monoid(param_8, param_9);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
sh_scratch[gl_LocalInvocationID.x] = agg;
}
if (gl_LocalInvocationID.x == 0u)
{
_203.outbuf[gl_WorkGroupID.x].path_ix = agg.path_ix;
_203.outbuf[gl_WorkGroupID.x].clip_ix = agg.clip_ix;
}
}

Binary file not shown.

Binary file not shown.

View file

@ -0,0 +1,94 @@
struct DrawMonoid
{
uint path_ix;
uint clip_ix;
};
static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u);
static const DrawMonoid _18 = { 0u, 0u };
RWByteAddressBuffer _57 : register(u0);
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_LocalInvocationID : SV_GroupThreadID;
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
groupshared DrawMonoid sh_scratch[512];
DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b)
{
DrawMonoid c;
c.path_ix = a.path_ix + b.path_ix;
c.clip_ix = a.clip_ix + b.clip_ix;
return c;
}
DrawMonoid tag_monoid_identity()
{
return _18;
}
void comp_main()
{
uint ix = gl_GlobalInvocationID.x * 8u;
DrawMonoid _61;
_61.path_ix = _57.Load(ix * 8 + 0);
_61.clip_ix = _57.Load(ix * 8 + 4);
DrawMonoid local[8];
local[0].path_ix = _61.path_ix;
local[0].clip_ix = _61.clip_ix;
DrawMonoid param_1;
for (uint i = 1u; i < 8u; i++)
{
DrawMonoid param = local[i - 1u];
DrawMonoid _88;
_88.path_ix = _57.Load((ix + i) * 8 + 0);
_88.clip_ix = _57.Load((ix + i) * 8 + 4);
param_1.path_ix = _88.path_ix;
param_1.clip_ix = _88.clip_ix;
local[i] = combine_tag_monoid(param, param_1);
}
DrawMonoid agg = local[7];
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 9u; i_1++)
{
GroupMemoryBarrierWithGroupSync();
if (gl_LocalInvocationID.x >= (1u << i_1))
{
DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
DrawMonoid param_2 = other;
DrawMonoid param_3 = agg;
agg = combine_tag_monoid(param_2, param_3);
}
GroupMemoryBarrierWithGroupSync();
sh_scratch[gl_LocalInvocationID.x] = agg;
}
GroupMemoryBarrierWithGroupSync();
DrawMonoid row = tag_monoid_identity();
if (gl_LocalInvocationID.x > 0u)
{
row = sh_scratch[gl_LocalInvocationID.x - 1u];
}
for (uint i_2 = 0u; i_2 < 8u; i_2++)
{
DrawMonoid param_4 = row;
DrawMonoid param_5 = local[i_2];
DrawMonoid m = combine_tag_monoid(param_4, param_5);
uint _178 = ix + i_2;
_57.Store(_178 * 8 + 0, m.path_ix);
_57.Store(_178 * 8 + 4, m.clip_ix);
}
}
[numthreads(512, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View file

@ -0,0 +1,128 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct DrawMonoid
{
uint path_ix;
uint clip_ix;
};
struct DrawMonoid_1
{
uint path_ix;
uint clip_ix;
};
struct DataBuf
{
DrawMonoid_1 data[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(512u, 1u, 1u);
static inline __attribute__((always_inline))
DrawMonoid combine_tag_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b)
{
DrawMonoid c;
c.path_ix = a.path_ix + b.path_ix;
c.clip_ix = a.clip_ix + b.clip_ix;
return c;
}
static inline __attribute__((always_inline))
DrawMonoid tag_monoid_identity()
{
return DrawMonoid{ 0u, 0u };
}
kernel void main0(device DataBuf& _57 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
{
threadgroup DrawMonoid sh_scratch[512];
uint ix = gl_GlobalInvocationID.x * 8u;
spvUnsafeArray<DrawMonoid, 8> local;
local[0].path_ix = _57.data[ix].path_ix;
local[0].clip_ix = _57.data[ix].clip_ix;
DrawMonoid param_1;
for (uint i = 1u; i < 8u; i++)
{
uint _82 = ix + i;
DrawMonoid param = local[i - 1u];
param_1.path_ix = _57.data[_82].path_ix;
param_1.clip_ix = _57.data[_82].clip_ix;
local[i] = combine_tag_monoid(param, param_1);
}
DrawMonoid agg = local[7];
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 9u; i_1++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
if (gl_LocalInvocationID.x >= (1u << i_1))
{
DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
DrawMonoid param_2 = other;
DrawMonoid param_3 = agg;
agg = combine_tag_monoid(param_2, param_3);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
sh_scratch[gl_LocalInvocationID.x] = agg;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
DrawMonoid row = tag_monoid_identity();
if (gl_LocalInvocationID.x > 0u)
{
row = sh_scratch[gl_LocalInvocationID.x - 1u];
}
for (uint i_2 = 0u; i_2 < 8u; i_2++)
{
DrawMonoid param_4 = row;
DrawMonoid param_5 = local[i_2];
DrawMonoid m = combine_tag_monoid(param_4, param_5);
uint _178 = ix + i_2;
_57.data[_178].path_ix = m.path_ix;
_57.data[_178].clip_ix = m.clip_ix;
}
}

Binary file not shown.

Binary file not shown.

View file

@ -63,6 +63,7 @@ struct Config
Alloc anno_alloc; Alloc anno_alloc;
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc bbox_alloc;
Alloc drawmonoid_alloc;
uint n_trans; uint n_trans;
uint trans_offset; uint trans_offset;
uint pathtag_offset; uint pathtag_offset;
@ -354,7 +355,7 @@ uint round_up(float x)
void comp_main() void comp_main()
{ {
uint ix = gl_GlobalInvocationID.x * 4u; uint ix = gl_GlobalInvocationID.x * 4u;
uint tag_word = _574.Load(((_639.Load(52) >> uint(2)) + (ix >> uint(2))) * 4 + 0); uint tag_word = _574.Load(((_639.Load(56) >> uint(2)) + (ix >> uint(2))) * 4 + 0);
uint param = tag_word; uint param = tag_word;
TagMonoid local_tm = reduce_tag(param); TagMonoid local_tm = reduce_tag(param);
sh_tag[gl_LocalInvocationID.x] = local_tm; sh_tag[gl_LocalInvocationID.x] = local_tm;
@ -393,13 +394,13 @@ void comp_main()
TagMonoid param_4 = sh_tag[gl_LocalInvocationID.x - 1u]; TagMonoid param_4 = sh_tag[gl_LocalInvocationID.x - 1u];
tm = combine_tag_monoid(param_3, param_4); tm = combine_tag_monoid(param_3, param_4);
} }
uint ps_ix = (_639.Load(60) >> uint(2)) + tm.pathseg_offset; uint ps_ix = (_639.Load(64) >> uint(2)) + tm.pathseg_offset;
uint lw_ix = (_639.Load(56) >> uint(2)) + tm.linewidth_ix; uint lw_ix = (_639.Load(60) >> uint(2)) + tm.linewidth_ix;
uint save_path_ix = tm.path_ix; uint save_path_ix = tm.path_ix;
TransformSegRef _769 = { _639.Load(36) + (tm.trans_ix * 24u) }; TransformSegRef _768 = { _639.Load(36) + (tm.trans_ix * 24u) };
TransformSegRef trans_ref = _769; TransformSegRef trans_ref = _768;
PathSegRef _779 = { _639.Load(28) + (tm.pathseg_ix * 52u) }; PathSegRef _778 = { _639.Load(28) + (tm.pathseg_ix * 52u) };
PathSegRef ps_ref = _779; PathSegRef ps_ref = _778;
float2 p0; float2 p0;
float2 p1; float2 p1;
float2 p2; float2 p2;
@ -449,9 +450,9 @@ void comp_main()
} }
} }
float linewidth = asfloat(_574.Load(lw_ix * 4 + 0)); float linewidth = asfloat(_574.Load(lw_ix * 4 + 0));
Alloc _865; Alloc _864;
_865.offset = _639.Load(36); _864.offset = _639.Load(36);
param_13.offset = _865.offset; param_13.offset = _864.offset;
TransformSegRef param_14 = trans_ref; TransformSegRef param_14 = trans_ref;
TransformSeg transform = TransformSeg_read(param_13, param_14); TransformSeg transform = TransformSeg_read(param_13, param_14);
p0 = ((transform.mat.xy * p0.x) + (transform.mat.zw * p0.y)) + transform.translate; p0 = ((transform.mat.xy * p0.x) + (transform.mat.zw * p0.y)) + transform.translate;
@ -460,25 +461,25 @@ void comp_main()
if (seg_type >= 2u) if (seg_type >= 2u)
{ {
p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate; p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate;
float4 _935 = bbox; float4 _934 = bbox;
float2 _938 = min(_935.xy, p2); float2 _937 = min(_934.xy, p2);
bbox.x = _938.x; bbox.x = _937.x;
bbox.y = _938.y; bbox.y = _937.y;
float4 _943 = bbox; float4 _942 = bbox;
float2 _946 = max(_943.zw, p2); float2 _945 = max(_942.zw, p2);
bbox.z = _946.x; bbox.z = _945.x;
bbox.w = _946.y; bbox.w = _945.y;
if (seg_type == 3u) if (seg_type == 3u)
{ {
p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate; p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate;
float4 _971 = bbox; float4 _970 = bbox;
float2 _974 = min(_971.xy, p3); float2 _973 = min(_970.xy, p3);
bbox.x = _974.x; bbox.x = _973.x;
bbox.y = _974.y; bbox.y = _973.y;
float4 _979 = bbox; float4 _978 = bbox;
float2 _982 = max(_979.zw, p3); float2 _981 = max(_978.zw, p3);
bbox.z = _982.x; bbox.z = _981.x;
bbox.w = _982.y; bbox.w = _981.y;
} }
else else
{ {
@ -509,9 +510,9 @@ void comp_main()
cubic.trans_ix = (gl_GlobalInvocationID.x * 4u) + i_1; cubic.trans_ix = (gl_GlobalInvocationID.x * 4u) + i_1;
cubic.stroke = stroke; cubic.stroke = stroke;
uint fill_mode = uint(linewidth >= 0.0f); uint fill_mode = uint(linewidth >= 0.0f);
Alloc _1071; Alloc _1070;
_1071.offset = _639.Load(28); _1070.offset = _639.Load(28);
param_15.offset = _1071.offset; param_15.offset = _1070.offset;
PathSegRef param_16 = ps_ref; PathSegRef param_16 = ps_ref;
uint param_17 = fill_mode; uint param_17 = fill_mode;
PathCubic param_18 = cubic; PathCubic param_18 = cubic;
@ -567,17 +568,17 @@ void comp_main()
Monoid param_24 = local[i_4]; Monoid param_24 = local[i_4];
Monoid m = combine_monoid(param_23, param_24); Monoid m = combine_monoid(param_23, param_24);
bool do_atomic = false; bool do_atomic = false;
bool _1241 = i_4 == 3u; bool _1240 = i_4 == 3u;
bool _1248; bool _1247;
if (_1241) if (_1240)
{ {
_1248 = gl_LocalInvocationID.x == 511u; _1247 = gl_LocalInvocationID.x == 511u;
} }
else else
{ {
_1248 = _1241; _1247 = _1240;
} }
if (_1248) if (_1247)
{ {
do_atomic = true; do_atomic = true;
} }
@ -603,30 +604,30 @@ void comp_main()
} }
if (do_atomic) if (do_atomic)
{ {
bool _1300 = m.bbox.z > m.bbox.x; bool _1299 = m.bbox.z > m.bbox.x;
bool _1309; bool _1308;
if (!_1300) if (!_1299)
{ {
_1309 = m.bbox.w > m.bbox.y; _1308 = m.bbox.w > m.bbox.y;
} }
else else
{ {
_1309 = _1300; _1308 = _1299;
} }
if (_1309) if (_1308)
{ {
float param_29 = m.bbox.x; float param_29 = m.bbox.x;
uint _1318; uint _1317;
_111.InterlockedMin(bbox_out_ix * 4 + 8, round_down(param_29), _1318); _111.InterlockedMin(bbox_out_ix * 4 + 8, round_down(param_29), _1317);
float param_30 = m.bbox.y; float param_30 = m.bbox.y;
uint _1326; uint _1325;
_111.InterlockedMin((bbox_out_ix + 1u) * 4 + 8, round_down(param_30), _1326); _111.InterlockedMin((bbox_out_ix + 1u) * 4 + 8, round_down(param_30), _1325);
float param_31 = m.bbox.z; float param_31 = m.bbox.z;
uint _1334; uint _1333;
_111.InterlockedMax((bbox_out_ix + 2u) * 4 + 8, round_up(param_31), _1334); _111.InterlockedMax((bbox_out_ix + 2u) * 4 + 8, round_up(param_31), _1333);
float param_32 = m.bbox.w; float param_32 = m.bbox.w;
uint _1342; uint _1341;
_111.InterlockedMax((bbox_out_ix + 3u) * 4 + 8, round_up(param_32), _1342); _111.InterlockedMax((bbox_out_ix + 3u) * 4 + 8, round_up(param_32), _1341);
} }
bbox_out_ix += 4u; bbox_out_ix += 4u;
} }

View file

@ -128,6 +128,7 @@ struct Config
Alloc_1 anno_alloc; Alloc_1 anno_alloc;
Alloc_1 trans_alloc; Alloc_1 trans_alloc;
Alloc_1 bbox_alloc; Alloc_1 bbox_alloc;
Alloc_1 drawmonoid_alloc;
uint n_trans; uint n_trans;
uint trans_offset; uint trans_offset;
uint pathtag_offset; uint pathtag_offset;
@ -530,25 +531,25 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6
if (seg_type >= 2u) if (seg_type >= 2u)
{ {
p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate; p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate;
float4 _935 = bbox; float4 _934 = bbox;
float2 _938 = fast::min(_935.xy, p2); float2 _937 = fast::min(_934.xy, p2);
bbox.x = _938.x; bbox.x = _937.x;
bbox.y = _938.y; bbox.y = _937.y;
float4 _943 = bbox; float4 _942 = bbox;
float2 _946 = fast::max(_943.zw, p2); float2 _945 = fast::max(_942.zw, p2);
bbox.z = _946.x; bbox.z = _945.x;
bbox.w = _946.y; bbox.w = _945.y;
if (seg_type == 3u) if (seg_type == 3u)
{ {
p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate; p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate;
float4 _971 = bbox; float4 _970 = bbox;
float2 _974 = fast::min(_971.xy, p3); float2 _973 = fast::min(_970.xy, p3);
bbox.x = _974.x; bbox.x = _973.x;
bbox.y = _974.y; bbox.y = _973.y;
float4 _979 = bbox; float4 _978 = bbox;
float2 _982 = fast::max(_979.zw, p3); float2 _981 = fast::max(_978.zw, p3);
bbox.z = _982.x; bbox.z = _981.x;
bbox.w = _982.y; bbox.w = _981.y;
} }
else else
{ {
@ -635,17 +636,17 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6
Monoid param_24 = local[i_4]; Monoid param_24 = local[i_4];
Monoid m = combine_monoid(param_23, param_24); Monoid m = combine_monoid(param_23, param_24);
bool do_atomic = false; bool do_atomic = false;
bool _1241 = i_4 == 3u; bool _1240 = i_4 == 3u;
bool _1248; bool _1247;
if (_1241) if (_1240)
{ {
_1248 = gl_LocalInvocationID.x == 511u; _1247 = gl_LocalInvocationID.x == 511u;
} }
else else
{ {
_1248 = _1241; _1247 = _1240;
} }
if (_1248) if (_1247)
{ {
do_atomic = true; do_atomic = true;
} }
@ -671,26 +672,26 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6
} }
if (do_atomic) if (do_atomic)
{ {
bool _1300 = m.bbox.z > m.bbox.x; bool _1299 = m.bbox.z > m.bbox.x;
bool _1309; bool _1308;
if (!_1300) if (!_1299)
{ {
_1309 = m.bbox.w > m.bbox.y; _1308 = m.bbox.w > m.bbox.y;
} }
else else
{ {
_1309 = _1300; _1308 = _1299;
} }
if (_1309) if (_1308)
{ {
float param_29 = m.bbox.x; float param_29 = m.bbox.x;
uint _1318 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix], round_down(param_29), memory_order_relaxed); uint _1317 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix], round_down(param_29), memory_order_relaxed);
float param_30 = m.bbox.y; float param_30 = m.bbox.y;
uint _1326 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 1u], round_down(param_30), memory_order_relaxed); uint _1325 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 1u], round_down(param_30), memory_order_relaxed);
float param_31 = m.bbox.z; float param_31 = m.bbox.z;
uint _1334 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 2u], round_up(param_31), memory_order_relaxed); uint _1333 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 2u], round_up(param_31), memory_order_relaxed);
float param_32 = m.bbox.w; float param_32 = m.bbox.w;
uint _1342 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 3u], round_up(param_32), memory_order_relaxed); uint _1341 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 3u], round_up(param_32), memory_order_relaxed);
} }
bbox_out_ix += 4u; bbox_out_ix += 4u;
} }

Binary file not shown.

View file

@ -25,6 +25,7 @@ struct Config
Alloc anno_alloc; Alloc anno_alloc;
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc bbox_alloc;
Alloc drawmonoid_alloc;
uint n_trans; uint n_trans;
uint trans_offset; uint trans_offset;
uint pathtag_offset; uint pathtag_offset;
@ -81,7 +82,7 @@ TagMonoid combine_tag_monoid(TagMonoid a, TagMonoid b)
void comp_main() void comp_main()
{ {
uint ix = gl_GlobalInvocationID.x * 4u; uint ix = gl_GlobalInvocationID.x * 4u;
uint scene_ix = (_139.Load(52) >> uint(2)) + ix; uint scene_ix = (_139.Load(56) >> uint(2)) + ix;
uint tag_word = _151.Load(scene_ix * 4 + 0); uint tag_word = _151.Load(scene_ix * 4 + 0);
uint param = tag_word; uint param = tag_word;
TagMonoid agg = reduce_tag(param); TagMonoid agg = reduce_tag(param);

View file

@ -32,6 +32,7 @@ struct Config
Alloc anno_alloc; Alloc anno_alloc;
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc bbox_alloc;
Alloc drawmonoid_alloc;
uint n_trans; uint n_trans;
uint trans_offset; uint trans_offset;
uint pathtag_offset; uint pathtag_offset;

View file

@ -38,6 +38,7 @@ struct Config
Alloc anno_alloc; Alloc anno_alloc;
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc bbox_alloc;
Alloc drawmonoid_alloc;
uint n_trans; uint n_trans;
uint trans_offset; uint trans_offset;
uint pathtag_offset; uint pathtag_offset;
@ -148,7 +149,7 @@ void TransformSeg_write(Alloc a, TransformSegRef ref, TransformSeg s)
void comp_main() void comp_main()
{ {
uint ix = gl_GlobalInvocationID.x * 8u; uint ix = gl_GlobalInvocationID.x * 8u;
TransformRef _285 = { _278.Load(48) + (ix * 24u) }; TransformRef _285 = { _278.Load(52) + (ix * 24u) };
TransformRef ref = _285; TransformRef ref = _285;
TransformRef param = ref; TransformRef param = ref;
Transform agg = Transform_read(param); Transform agg = Transform_read(param);

View file

@ -101,6 +101,7 @@ struct Config
Alloc_1 anno_alloc; Alloc_1 anno_alloc;
Alloc_1 trans_alloc; Alloc_1 trans_alloc;
Alloc_1 bbox_alloc; Alloc_1 bbox_alloc;
Alloc_1 drawmonoid_alloc;
uint n_trans; uint n_trans;
uint trans_offset; uint trans_offset;
uint pathtag_offset; uint pathtag_offset;

View file

@ -27,6 +27,7 @@ struct Config
Alloc anno_alloc; Alloc anno_alloc;
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc bbox_alloc;
Alloc drawmonoid_alloc;
uint n_trans; uint n_trans;
uint trans_offset; uint trans_offset;
uint pathtag_offset; uint pathtag_offset;
@ -85,7 +86,7 @@ Transform combine_monoid(Transform a, Transform b)
void comp_main() void comp_main()
{ {
uint ix = gl_GlobalInvocationID.x * 8u; uint ix = gl_GlobalInvocationID.x * 8u;
TransformRef _168 = { _161.Load(48) + (ix * 24u) }; TransformRef _168 = { _161.Load(52) + (ix * 24u) };
TransformRef ref = _168; TransformRef ref = _168;
TransformRef param = ref; TransformRef param = ref;
Transform agg = Transform_read(param); Transform agg = Transform_read(param);

View file

@ -39,6 +39,7 @@ struct Config
Alloc anno_alloc; Alloc anno_alloc;
Alloc trans_alloc; Alloc trans_alloc;
Alloc bbox_alloc; Alloc bbox_alloc;
Alloc drawmonoid_alloc;
uint n_trans; uint n_trans;
uint trans_offset; uint trans_offset;
uint pathtag_offset; uint pathtag_offset;

Binary file not shown.

Binary file not shown.

View file

@ -42,6 +42,8 @@ struct Config {
// Bounding boxes of paths, stored as int (so atomics work) // Bounding boxes of paths, stored as int (so atomics work)
Alloc bbox_alloc; Alloc bbox_alloc;
// Monoid for draw objects
Alloc drawmonoid_alloc;
// Number of transforms in scene // Number of transforms in scene
// This is probably not needed. // This is probably not needed.

Binary file not shown.

View file

@ -16,11 +16,13 @@
//! Stages for new element pipeline, exposed for testing. //! Stages for new element pipeline, exposed for testing.
mod draw;
mod path; mod path;
mod transform; mod transform;
use bytemuck::{Pod, Zeroable}; use bytemuck::{Pod, Zeroable};
pub use draw::{DrawBinding, DrawCode, DrawMonoid, DrawStage};
pub use path::{PathBinding, PathCode, PathEncoder, PathStage}; pub use path::{PathBinding, PathCode, PathEncoder, PathStage};
pub use transform::{Transform, TransformBinding, TransformCode, TransformStage}; pub use transform::{Transform, TransformBinding, TransformCode, TransformStage};
@ -41,6 +43,7 @@ pub struct Config {
pub anno_alloc: u32, pub anno_alloc: u32,
pub trans_alloc: u32, pub trans_alloc: u32,
pub bbox_alloc: u32, pub bbox_alloc: u32,
pub drawmonoid_alloc: u32,
pub n_trans: u32, pub n_trans: u32,
pub trans_offset: u32, pub trans_offset: u32,
pub pathtag_offset: u32, pub pathtag_offset: u32,

163
piet-gpu/src/stages/draw.rs Normal file
View file

@ -0,0 +1,163 @@
// Copyright 2021 The piet-gpu authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Also licensed under MIT license, at your choice.
//! The draw object stage of the element processing pipeline.
use bytemuck::{Pod, Zeroable};
use piet_gpu_hal::{
include_shader, BindType, Buffer, BufferUsage, CmdBuf, DescriptorSet, Pipeline, Session,
};
/// The output element of the draw object stage.
#[repr(C)]
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Zeroable, Pod)]
pub struct DrawMonoid {
pub path_ix: u32,
pub clip_ix: u32,
}
const DRAW_WG: u64 = 512;
const DRAW_N_ROWS: u64 = 8;
const DRAW_PART_SIZE: u64 = DRAW_WG * DRAW_N_ROWS;
pub struct DrawCode {
reduce_pipeline: Pipeline,
root_pipeline: Pipeline,
leaf_pipeline: Pipeline,
}
pub struct DrawStage {
// Right now we're limited to partition^2 (~16M) elements. This can be
// expanded but is tedious.
root_buf: Buffer,
root_ds: DescriptorSet,
}
pub struct DrawBinding {
reduce_ds: DescriptorSet,
leaf_ds: DescriptorSet,
}
impl DrawCode {
pub unsafe fn new(session: &Session) -> DrawCode {
let reduce_code = include_shader!(session, "../../shader/gen/draw_reduce");
let reduce_pipeline = session
.create_compute_pipeline(
reduce_code,
&[
BindType::Buffer,
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::Buffer,
],
)
.unwrap();
let root_code = include_shader!(session, "../../shader/gen/draw_root");
let root_pipeline = session
.create_compute_pipeline(root_code, &[BindType::Buffer])
.unwrap();
let leaf_code = include_shader!(session, "../../shader/gen/draw_leaf");
let leaf_pipeline = session
.create_compute_pipeline(
leaf_code,
&[
BindType::Buffer,
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::BufReadOnly,
],
)
.unwrap();
DrawCode {
reduce_pipeline,
root_pipeline,
leaf_pipeline,
}
}
}
impl DrawStage {
pub unsafe fn new(session: &Session, code: &DrawCode) -> DrawStage {
// We're limited to DRAW_PART_SIZE^2
// Also note: size here allows padding
let root_buf_size = DRAW_PART_SIZE * 8;
let root_buf = session
.create_buffer(root_buf_size, BufferUsage::STORAGE)
.unwrap();
let root_ds = session
.create_simple_descriptor_set(&code.root_pipeline, &[&root_buf])
.unwrap();
DrawStage { root_buf, root_ds }
}
pub unsafe fn bind(
&self,
session: &Session,
code: &DrawCode,
config_buf: &Buffer,
scene_buf: &Buffer,
memory_buf: &Buffer,
) -> DrawBinding {
let reduce_ds = session
.create_simple_descriptor_set(
&code.reduce_pipeline,
&[memory_buf, config_buf, scene_buf, &self.root_buf],
)
.unwrap();
let leaf_ds = session
.create_simple_descriptor_set(
&code.leaf_pipeline,
&[memory_buf, config_buf, scene_buf, &self.root_buf],
)
.unwrap();
DrawBinding { reduce_ds, leaf_ds }
}
pub unsafe fn record(
&self,
cmd_buf: &mut CmdBuf,
code: &DrawCode,
binding: &DrawBinding,
size: u64,
) {
if size > DRAW_PART_SIZE.pow(2) {
panic!("very large scan not yet implemented");
}
let n_workgroups = (size + DRAW_PART_SIZE - 1) / DRAW_PART_SIZE;
if n_workgroups > 1 {
cmd_buf.dispatch(
&code.reduce_pipeline,
&binding.reduce_ds,
(n_workgroups as u32, 1, 1),
(DRAW_WG as u32, 1, 1),
);
cmd_buf.memory_barrier();
cmd_buf.dispatch(
&code.root_pipeline,
&self.root_ds,
(1, 1, 1),
(DRAW_WG as u32, 1, 1),
);
cmd_buf.memory_barrier();
}
cmd_buf.dispatch(
&code.leaf_pipeline,
&binding.leaf_ds,
(n_workgroups as u32, 1, 1),
(DRAW_WG as u32, 1, 1),
);
}
}

147
tests/src/draw.rs Normal file
View file

@ -0,0 +1,147 @@
// Copyright 2021 The piet-gpu authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Also licensed under MIT license, at your choice.
//! Tests for the piet-gpu draw object stage.
use piet_gpu_hal::{BufWrite, BufferUsage};
use rand::Rng;
use crate::{Config, Runner, TestResult};
use piet_gpu::stages::{self, DrawCode, DrawMonoid, DrawStage};
const ELEMENT_SIZE: usize = 36;
const ELEMENT_FILLCOLOR: u32 = 4;
const ELEMENT_FILLLINGRADIENT: u32 = 5;
const ELEMENT_FILLIMAGE: u32 = 6;
const ELEMENT_BEGINCLIP: u32 = 9;
const ELEMENT_ENDCLIP: u32 = 10;
struct DrawTestData {
tags: Vec<u32>,
}
pub unsafe fn draw_test(runner: &mut Runner, config: &Config) -> TestResult {
let mut result = TestResult::new("draw");
let n_tag: u64 = config.size.choose(1 << 12, 1 << 20, 1 << 24);
let data = DrawTestData::new(n_tag);
let stage_config = data.get_config();
let config_buf = runner
.session
.create_buffer_init(std::slice::from_ref(&stage_config), BufferUsage::STORAGE)
.unwrap();
let scene_size = n_tag * ELEMENT_SIZE as u64;
let scene_buf = runner
.session
.create_buffer_with(scene_size, |b| data.fill_scene(b), BufferUsage::STORAGE)
.unwrap();
let memory = runner.buf_down(data.memory_size(), BufferUsage::STORAGE);
let code = DrawCode::new(&runner.session);
let stage = DrawStage::new(&runner.session, &code);
let binding = stage.bind(
&runner.session,
&code,
&config_buf,
&scene_buf,
&memory.dev_buf,
);
let mut total_elapsed = 0.0;
let n_iter = config.n_iter;
for i in 0..n_iter {
let mut commands = runner.commands();
commands.write_timestamp(0);
stage.record(&mut commands.cmd_buf, &code, &binding, n_tag);
commands.write_timestamp(1);
if i == 0 || config.verify_all {
commands.cmd_buf.memory_barrier();
commands.download(&memory);
}
total_elapsed += runner.submit(commands);
if i == 0 || config.verify_all {
let dst = memory.map_read(..);
if let Some(failure) = data.verify(&dst) {
result.fail(failure);
}
}
}
let n_elements = n_tag;
result.timing(total_elapsed, n_elements * n_iter);
result
}
impl DrawTestData {
fn new(n: u64) -> DrawTestData {
let mut rng = rand::thread_rng();
let tags = (0..n).map(|_| rng.gen_range(0, 12)).collect();
DrawTestData { tags }
}
fn get_config(&self) -> stages::Config {
let n_tags = self.tags.len();
// Layout of memory
let drawmonoid_alloc = 0;
let stage_config = stages::Config {
n_elements: n_tags as u32,
drawmonoid_alloc,
..Default::default()
};
stage_config
}
fn memory_size(&self) -> u64 {
8 + self.tags.len() as u64 * 8
}
fn fill_scene(&self, buf: &mut BufWrite) {
let mut element = [0u32; ELEMENT_SIZE / 4];
for tag in &self.tags {
element[0] = *tag;
buf.push(element);
}
}
fn verify(&self, buf: &[u8]) -> Option<String> {
let size = self.tags.len() * 8;
let actual = bytemuck::cast_slice::<u8, DrawMonoid>(&buf[8..8 + size]);
let mut expected = DrawMonoid::default();
for (i, (tag, actual)) in self.tags.iter().zip(actual).enumerate() {
// We compute an inclusive prefix sum, but for this application
// exclusive would be slightly better. We can adapt though.
let (path_ix, clip_ix) = Self::reduce_tag(*tag);
expected.path_ix += path_ix;
expected.clip_ix += clip_ix;
if *actual != expected {
return Some(format!("draw mismatch at {}", i));
}
}
None
}
fn reduce_tag(tag: u32) -> (u32, u32) {
match tag {
ELEMENT_FILLCOLOR | ELEMENT_FILLLINGRADIENT | ELEMENT_FILLIMAGE => (1, 0),
ELEMENT_BEGINCLIP => (1, 1),
ELEMENT_ENDCLIP => (0, 1),
_ => (0, 0),
}
}
}

View file

@ -18,6 +18,7 @@
mod clear; mod clear;
mod config; mod config;
mod draw;
mod linkedlist; mod linkedlist;
mod message_passing; mod message_passing;
mod prefix; mod prefix;
@ -137,6 +138,7 @@ fn main() {
if config.groups.matches("piet") { if config.groups.matches("piet") {
report(&transform::transform_test(&mut runner, &config)); report(&transform::transform_test(&mut runner, &config));
report(&path::path_test(&mut runner, &config)); report(&path::path_test(&mut runner, &config));
report(&draw::draw_test(&mut runner, &config));
} }
} }
} }