vello/piet-gpu/shader/gen/draw_leaf.msl
Raph Levien 875c8badf4 Add draw object stage
This is one of the stages in the new element pipeline. It's a simple
one, just a prefix sum of a couple counts, and some of it will probably
get merged with a downstream stage, but we'll do it separately for now
for convenience.

This patch also contains an update to Vulkan tools 1.2.198, which
accounts for the large diff of translated shaders.
2021-12-02 13:37:16 -08:00

236 lines
5.7 KiB
Plaintext

#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct ElementRef
{
uint offset;
};
struct ElementTag
{
uint tag;
uint flags;
};
struct DrawMonoid
{
uint path_ix;
uint clip_ix;
};
struct SceneBuf
{
uint scene[1];
};
struct DrawMonoid_1
{
uint path_ix;
uint clip_ix;
};
struct ParentBuf
{
DrawMonoid_1 parent[1];
};
struct Alloc
{
uint offset;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc bbox_alloc;
Alloc drawmonoid_alloc;
uint n_trans;
uint trans_offset;
uint pathtag_offset;
uint linewidth_offset;
uint pathseg_offset;
};
struct ConfigBuf
{
Config conf;
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint memory[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(512u, 1u, 1u);
static inline __attribute__((always_inline))
ElementTag Element_tag(thread const ElementRef& ref, const device SceneBuf& v_49)
{
uint tag_and_flags = v_49.scene[ref.offset >> uint(2)];
return ElementTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) };
}
static inline __attribute__((always_inline))
DrawMonoid map_tag(thread const uint& tag_word)
{
switch (tag_word)
{
case 4u:
case 5u:
case 6u:
{
return DrawMonoid{ 1u, 0u };
}
case 9u:
{
return DrawMonoid{ 1u, 1u };
}
case 10u:
{
return DrawMonoid{ 0u, 1u };
}
default:
{
return DrawMonoid{ 0u, 0u };
}
}
}
static inline __attribute__((always_inline))
ElementRef Element_index(thread const ElementRef& ref, thread const uint& index)
{
return ElementRef{ ref.offset + (index * 36u) };
}
static inline __attribute__((always_inline))
DrawMonoid combine_tag_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b)
{
DrawMonoid c;
c.path_ix = a.path_ix + b.path_ix;
c.clip_ix = a.clip_ix + b.clip_ix;
return c;
}
static inline __attribute__((always_inline))
DrawMonoid tag_monoid_identity()
{
return DrawMonoid{ 0u, 0u };
}
kernel void main0(device Memory& _277 [[buffer(0)]], const device ConfigBuf& _248 [[buffer(1)]], const device SceneBuf& v_49 [[buffer(2)]], const device ParentBuf& _218 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
{
threadgroup DrawMonoid sh_scratch[512];
uint ix = gl_GlobalInvocationID.x * 8u;
ElementRef ref = ElementRef{ ix * 36u };
ElementRef param = ref;
uint tag_word = Element_tag(param, v_49).tag;
uint param_1 = tag_word;
DrawMonoid agg = map_tag(param_1);
spvUnsafeArray<DrawMonoid, 8> local;
local[0] = agg;
for (uint i = 1u; i < 8u; i++)
{
ElementRef param_2 = ref;
uint param_3 = i;
ElementRef param_4 = Element_index(param_2, param_3);
tag_word = Element_tag(param_4, v_49).tag;
uint param_5 = tag_word;
DrawMonoid param_6 = agg;
DrawMonoid param_7 = map_tag(param_5);
agg = combine_tag_monoid(param_6, param_7);
local[i] = agg;
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 9u; i_1++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
if (gl_LocalInvocationID.x >= (1u << i_1))
{
DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
DrawMonoid param_8 = other;
DrawMonoid param_9 = agg;
agg = combine_tag_monoid(param_8, param_9);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
sh_scratch[gl_LocalInvocationID.x] = agg;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
DrawMonoid row = tag_monoid_identity();
if (gl_WorkGroupID.x > 0u)
{
uint _221 = gl_WorkGroupID.x - 1u;
row.path_ix = _218.parent[_221].path_ix;
row.clip_ix = _218.parent[_221].clip_ix;
}
if (gl_LocalInvocationID.x > 0u)
{
DrawMonoid param_10 = row;
DrawMonoid param_11 = sh_scratch[gl_LocalInvocationID.x - 1u];
row = combine_tag_monoid(param_10, param_11);
}
uint out_base = (_248.conf.drawmonoid_alloc.offset >> uint(2)) + ((gl_GlobalInvocationID.x * 2u) * 8u);
for (uint i_2 = 0u; i_2 < 8u; i_2++)
{
DrawMonoid param_12 = row;
DrawMonoid param_13 = local[i_2];
DrawMonoid m = combine_tag_monoid(param_12, param_13);
_277.memory[out_base + (i_2 * 2u)] = m.path_ix;
_277.memory[(out_base + (i_2 * 2u)) + 1u] = m.clip_ix;
}
}