mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-25 18:56:35 +11:00
acb3933d94
This patch switches to a variable size encoding of draw objects. In addition to the CPU-side scene encoding, it changes the representation of intermediate per draw object state from the `Annotated` struct to a variable "info" encoding. In addition, the bounding boxes are moved to a separate array (for a more "structure of "arrays" approach). Data that's unchanged from the scene encoding is not copied. Rather, downstream stages can access the data from the scene buffer (reducing allocation and copying). Prefix sums, computed in `DrawMonoid` track the offset of both scene and intermediate data. The tags for the CPU-side encoding have been split into their own stream (again a change from AoS to SoA style). This is not necessarily the final form. There's some stuff (including at least one piet-gpu-derive type) that can be deleted. In addition, the linewidth field should probably move from the info to path-specific. Also, the 1:1 correspondence between draw object and path has not yet been broken. Closes #152
140 lines
3.9 KiB
Text
Generated
140 lines
3.9 KiB
Text
Generated
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
|
#pragma clang diagnostic ignored "-Wmissing-braces"
|
|
|
|
#include <metal_stdlib>
|
|
#include <simd/simd.h>
|
|
|
|
using namespace metal;
|
|
|
|
template<typename T, size_t Num>
|
|
struct spvUnsafeArray
|
|
{
|
|
T elements[Num ? Num : 1];
|
|
|
|
thread T& operator [] (size_t pos) thread
|
|
{
|
|
return elements[pos];
|
|
}
|
|
constexpr const thread T& operator [] (size_t pos) const thread
|
|
{
|
|
return elements[pos];
|
|
}
|
|
|
|
device T& operator [] (size_t pos) device
|
|
{
|
|
return elements[pos];
|
|
}
|
|
constexpr const device T& operator [] (size_t pos) const device
|
|
{
|
|
return elements[pos];
|
|
}
|
|
|
|
constexpr const constant T& operator [] (size_t pos) const constant
|
|
{
|
|
return elements[pos];
|
|
}
|
|
|
|
threadgroup T& operator [] (size_t pos) threadgroup
|
|
{
|
|
return elements[pos];
|
|
}
|
|
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
|
|
{
|
|
return elements[pos];
|
|
}
|
|
};
|
|
|
|
struct DrawMonoid
|
|
{
|
|
uint path_ix;
|
|
uint clip_ix;
|
|
uint scene_offset;
|
|
uint info_offset;
|
|
};
|
|
|
|
struct DrawMonoid_1
|
|
{
|
|
uint path_ix;
|
|
uint clip_ix;
|
|
uint scene_offset;
|
|
uint info_offset;
|
|
};
|
|
|
|
struct DataBuf
|
|
{
|
|
DrawMonoid_1 data[1];
|
|
};
|
|
|
|
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
|
|
|
|
static inline __attribute__((always_inline))
|
|
DrawMonoid combine_draw_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b)
|
|
{
|
|
DrawMonoid c;
|
|
c.path_ix = a.path_ix + b.path_ix;
|
|
c.clip_ix = a.clip_ix + b.clip_ix;
|
|
c.scene_offset = a.scene_offset + b.scene_offset;
|
|
c.info_offset = a.info_offset + b.info_offset;
|
|
return c;
|
|
}
|
|
|
|
static inline __attribute__((always_inline))
|
|
DrawMonoid draw_monoid_identity()
|
|
{
|
|
return DrawMonoid{ 0u, 0u, 0u, 0u };
|
|
}
|
|
|
|
kernel void main0(device DataBuf& _71 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
|
|
{
|
|
threadgroup DrawMonoid sh_scratch[256];
|
|
uint ix = gl_GlobalInvocationID.x * 8u;
|
|
spvUnsafeArray<DrawMonoid, 8> local;
|
|
local[0].path_ix = _71.data[ix].path_ix;
|
|
local[0].clip_ix = _71.data[ix].clip_ix;
|
|
local[0].scene_offset = _71.data[ix].scene_offset;
|
|
local[0].info_offset = _71.data[ix].info_offset;
|
|
DrawMonoid param_1;
|
|
for (uint i = 1u; i < 8u; i++)
|
|
{
|
|
uint _100 = ix + i;
|
|
DrawMonoid param = local[i - 1u];
|
|
param_1.path_ix = _71.data[_100].path_ix;
|
|
param_1.clip_ix = _71.data[_100].clip_ix;
|
|
param_1.scene_offset = _71.data[_100].scene_offset;
|
|
param_1.info_offset = _71.data[_100].info_offset;
|
|
local[i] = combine_draw_monoid(param, param_1);
|
|
}
|
|
DrawMonoid agg = local[7];
|
|
sh_scratch[gl_LocalInvocationID.x] = agg;
|
|
for (uint i_1 = 0u; i_1 < 8u; i_1++)
|
|
{
|
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
|
if (gl_LocalInvocationID.x >= (1u << i_1))
|
|
{
|
|
DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
|
|
DrawMonoid param_2 = other;
|
|
DrawMonoid param_3 = agg;
|
|
agg = combine_draw_monoid(param_2, param_3);
|
|
}
|
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
|
sh_scratch[gl_LocalInvocationID.x] = agg;
|
|
}
|
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
|
DrawMonoid row = draw_monoid_identity();
|
|
if (gl_LocalInvocationID.x > 0u)
|
|
{
|
|
row = sh_scratch[gl_LocalInvocationID.x - 1u];
|
|
}
|
|
for (uint i_2 = 0u; i_2 < 8u; i_2++)
|
|
{
|
|
DrawMonoid param_4 = row;
|
|
DrawMonoid param_5 = local[i_2];
|
|
DrawMonoid m = combine_draw_monoid(param_4, param_5);
|
|
uint _199 = ix + i_2;
|
|
_71.data[_199].path_ix = m.path_ix;
|
|
_71.data[_199].clip_ix = m.clip_ix;
|
|
_71.data[_199].scene_offset = m.scene_offset;
|
|
_71.data[_199].info_offset = m.info_offset;
|
|
}
|
|
}
|
|
|