mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-11 04:51:32 +11:00
319 lines
11 KiB
Plaintext
Generated
319 lines
11 KiB
Plaintext
Generated
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
|
#pragma clang diagnostic ignored "-Wmissing-braces"
|
|
|
|
#include <metal_stdlib>
|
|
#include <simd/simd.h>
|
|
|
|
using namespace metal;
|
|
|
|
template<typename T, size_t Num>
|
|
struct spvUnsafeArray
|
|
{
|
|
T elements[Num ? Num : 1];
|
|
|
|
thread T& operator [] (size_t pos) thread
|
|
{
|
|
return elements[pos];
|
|
}
|
|
constexpr const thread T& operator [] (size_t pos) const thread
|
|
{
|
|
return elements[pos];
|
|
}
|
|
|
|
device T& operator [] (size_t pos) device
|
|
{
|
|
return elements[pos];
|
|
}
|
|
constexpr const device T& operator [] (size_t pos) const device
|
|
{
|
|
return elements[pos];
|
|
}
|
|
|
|
constexpr const constant T& operator [] (size_t pos) const constant
|
|
{
|
|
return elements[pos];
|
|
}
|
|
|
|
threadgroup T& operator [] (size_t pos) threadgroup
|
|
{
|
|
return elements[pos];
|
|
}
|
|
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
|
|
{
|
|
return elements[pos];
|
|
}
|
|
};
|
|
|
|
struct DrawMonoid
|
|
{
|
|
uint path_ix;
|
|
uint clip_ix;
|
|
uint scene_offset;
|
|
uint info_offset;
|
|
};
|
|
|
|
struct Alloc
|
|
{
|
|
uint offset;
|
|
};
|
|
|
|
struct Config
|
|
{
|
|
uint mem_size;
|
|
uint n_elements;
|
|
uint n_pathseg;
|
|
uint width_in_tiles;
|
|
uint height_in_tiles;
|
|
Alloc tile_alloc;
|
|
Alloc bin_alloc;
|
|
Alloc ptcl_alloc;
|
|
Alloc pathseg_alloc;
|
|
Alloc anno_alloc;
|
|
Alloc trans_alloc;
|
|
Alloc path_bbox_alloc;
|
|
Alloc drawmonoid_alloc;
|
|
Alloc clip_alloc;
|
|
Alloc clip_bic_alloc;
|
|
Alloc clip_stack_alloc;
|
|
Alloc clip_bbox_alloc;
|
|
Alloc draw_bbox_alloc;
|
|
Alloc drawinfo_alloc;
|
|
uint n_trans;
|
|
uint n_path;
|
|
uint n_clip;
|
|
uint trans_offset;
|
|
uint linewidth_offset;
|
|
uint pathtag_offset;
|
|
uint pathseg_offset;
|
|
uint drawtag_offset;
|
|
uint drawdata_offset;
|
|
};
|
|
|
|
struct ConfigBuf
|
|
{
|
|
Config conf;
|
|
};
|
|
|
|
struct SceneBuf
|
|
{
|
|
uint scene[1];
|
|
};
|
|
|
|
struct DrawMonoid_1
|
|
{
|
|
uint path_ix;
|
|
uint clip_ix;
|
|
uint scene_offset;
|
|
uint info_offset;
|
|
};
|
|
|
|
struct ParentBuf
|
|
{
|
|
DrawMonoid_1 parent[1];
|
|
};
|
|
|
|
struct Memory
|
|
{
|
|
uint mem_offset;
|
|
uint mem_error;
|
|
uint blend_offset;
|
|
uint memory[1];
|
|
};
|
|
|
|
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
|
|
|
|
static inline __attribute__((always_inline))
|
|
DrawMonoid map_tag(thread const uint& tag_word)
|
|
{
|
|
uint has_path = uint(tag_word != 0u);
|
|
return DrawMonoid{ has_path, tag_word & 1u, tag_word & 28u, (tag_word >> uint(4)) & 60u };
|
|
}
|
|
|
|
static inline __attribute__((always_inline))
|
|
DrawMonoid combine_draw_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b)
|
|
{
|
|
DrawMonoid c;
|
|
c.path_ix = a.path_ix + b.path_ix;
|
|
c.clip_ix = a.clip_ix + b.clip_ix;
|
|
c.scene_offset = a.scene_offset + b.scene_offset;
|
|
c.info_offset = a.info_offset + b.info_offset;
|
|
return c;
|
|
}
|
|
|
|
static inline __attribute__((always_inline))
|
|
DrawMonoid draw_monoid_identity()
|
|
{
|
|
return DrawMonoid{ 0u, 0u, 0u, 0u };
|
|
}
|
|
|
|
kernel void main0(device Memory& _285 [[buffer(0)]], const device ConfigBuf& _93 [[buffer(1)]], const device SceneBuf& _103 [[buffer(2)]], const device ParentBuf& _203 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
|
|
{
|
|
threadgroup DrawMonoid sh_scratch[256];
|
|
uint ix = gl_GlobalInvocationID.x * 8u;
|
|
uint drawtag_base = _93.conf.drawtag_offset >> uint(2);
|
|
uint tag_word = _103.scene[drawtag_base + ix];
|
|
uint param = tag_word;
|
|
DrawMonoid agg = map_tag(param);
|
|
spvUnsafeArray<DrawMonoid, 8> local;
|
|
local[0] = agg;
|
|
for (uint i = 1u; i < 8u; i++)
|
|
{
|
|
tag_word = _103.scene[(drawtag_base + ix) + i];
|
|
uint param_1 = tag_word;
|
|
DrawMonoid param_2 = agg;
|
|
DrawMonoid param_3 = map_tag(param_1);
|
|
agg = combine_draw_monoid(param_2, param_3);
|
|
local[i] = agg;
|
|
}
|
|
sh_scratch[gl_LocalInvocationID.x] = agg;
|
|
for (uint i_1 = 0u; i_1 < 8u; i_1++)
|
|
{
|
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
|
if (gl_LocalInvocationID.x >= (1u << i_1))
|
|
{
|
|
DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
|
|
DrawMonoid param_4 = other;
|
|
DrawMonoid param_5 = agg;
|
|
agg = combine_draw_monoid(param_4, param_5);
|
|
}
|
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
|
sh_scratch[gl_LocalInvocationID.x] = agg;
|
|
}
|
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
|
DrawMonoid row = draw_monoid_identity();
|
|
if (gl_WorkGroupID.x > 0u)
|
|
{
|
|
uint _206 = gl_WorkGroupID.x - 1u;
|
|
row.path_ix = _203.parent[_206].path_ix;
|
|
row.clip_ix = _203.parent[_206].clip_ix;
|
|
row.scene_offset = _203.parent[_206].scene_offset;
|
|
row.info_offset = _203.parent[_206].info_offset;
|
|
}
|
|
if (gl_LocalInvocationID.x > 0u)
|
|
{
|
|
DrawMonoid param_6 = row;
|
|
DrawMonoid param_7 = sh_scratch[gl_LocalInvocationID.x - 1u];
|
|
row = combine_draw_monoid(param_6, param_7);
|
|
}
|
|
uint drawdata_base = _93.conf.drawdata_offset >> uint(2);
|
|
uint drawinfo_base = _93.conf.drawinfo_alloc.offset >> uint(2);
|
|
uint out_ix = gl_GlobalInvocationID.x * 8u;
|
|
uint out_base = (_93.conf.drawmonoid_alloc.offset >> uint(2)) + (out_ix * 4u);
|
|
uint clip_out_base = _93.conf.clip_alloc.offset >> uint(2);
|
|
float4 mat;
|
|
float2 translate;
|
|
float2 p0;
|
|
float2 p1;
|
|
for (uint i_2 = 0u; i_2 < 8u; i_2++)
|
|
{
|
|
DrawMonoid m = row;
|
|
if (i_2 > 0u)
|
|
{
|
|
DrawMonoid param_8 = m;
|
|
DrawMonoid param_9 = local[i_2 - 1u];
|
|
m = combine_draw_monoid(param_8, param_9);
|
|
}
|
|
_285.memory[out_base + (i_2 * 4u)] = m.path_ix;
|
|
_285.memory[(out_base + (i_2 * 4u)) + 1u] = m.clip_ix;
|
|
_285.memory[(out_base + (i_2 * 4u)) + 2u] = m.scene_offset;
|
|
_285.memory[(out_base + (i_2 * 4u)) + 3u] = m.info_offset;
|
|
uint dd = drawdata_base + (m.scene_offset >> uint(2));
|
|
uint di = drawinfo_base + (m.info_offset >> uint(2));
|
|
tag_word = _103.scene[(drawtag_base + ix) + i_2];
|
|
if (((((tag_word == 68u) || (tag_word == 276u)) || (tag_word == 732u)) || (tag_word == 72u)) || (tag_word == 5u))
|
|
{
|
|
uint bbox_offset = (_93.conf.path_bbox_alloc.offset >> uint(2)) + (6u * m.path_ix);
|
|
float bbox_l = float(_285.memory[bbox_offset]) - 32768.0;
|
|
float bbox_t = float(_285.memory[bbox_offset + 1u]) - 32768.0;
|
|
float bbox_r = float(_285.memory[bbox_offset + 2u]) - 32768.0;
|
|
float bbox_b = float(_285.memory[bbox_offset + 3u]) - 32768.0;
|
|
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
|
|
float linewidth = as_type<float>(_285.memory[bbox_offset + 4u]);
|
|
uint fill_mode = uint(linewidth >= 0.0);
|
|
if (((linewidth >= 0.0) || (tag_word == 276u)) || (tag_word == 732u))
|
|
{
|
|
uint trans_ix = _285.memory[bbox_offset + 5u];
|
|
uint t = (_93.conf.trans_alloc.offset >> uint(2)) + (6u * trans_ix);
|
|
mat = as_type<float4>(uint4(_285.memory[t], _285.memory[t + 1u], _285.memory[t + 2u], _285.memory[t + 3u]));
|
|
if ((tag_word == 276u) || (tag_word == 732u))
|
|
{
|
|
translate = as_type<float2>(uint2(_285.memory[t + 4u], _285.memory[t + 5u]));
|
|
}
|
|
}
|
|
if (linewidth >= 0.0)
|
|
{
|
|
linewidth *= sqrt(abs((mat.x * mat.w) - (mat.y * mat.z)));
|
|
}
|
|
switch (tag_word)
|
|
{
|
|
case 68u:
|
|
case 72u:
|
|
{
|
|
_285.memory[di] = as_type<uint>(linewidth);
|
|
break;
|
|
}
|
|
case 276u:
|
|
{
|
|
_285.memory[di] = as_type<uint>(linewidth);
|
|
p0 = as_type<float2>(uint2(_103.scene[dd + 1u], _103.scene[dd + 2u]));
|
|
p1 = as_type<float2>(uint2(_103.scene[dd + 3u], _103.scene[dd + 4u]));
|
|
p0 = ((mat.xy * p0.x) + (mat.zw * p0.y)) + translate;
|
|
p1 = ((mat.xy * p1.x) + (mat.zw * p1.y)) + translate;
|
|
float2 dxy = p1 - p0;
|
|
float scale = 1.0 / ((dxy.x * dxy.x) + (dxy.y * dxy.y));
|
|
float line_x = dxy.x * scale;
|
|
float line_y = dxy.y * scale;
|
|
float line_c = -((p0.x * line_x) + (p0.y * line_y));
|
|
_285.memory[di + 1u] = as_type<uint>(line_x);
|
|
_285.memory[di + 2u] = as_type<uint>(line_y);
|
|
_285.memory[di + 3u] = as_type<uint>(line_c);
|
|
break;
|
|
}
|
|
case 732u:
|
|
{
|
|
p0 = as_type<float2>(uint2(_103.scene[dd + 1u], _103.scene[dd + 2u]));
|
|
p1 = as_type<float2>(uint2(_103.scene[dd + 3u], _103.scene[dd + 4u]));
|
|
float r0 = as_type<float>(_103.scene[dd + 5u]);
|
|
float r1 = as_type<float>(_103.scene[dd + 6u]);
|
|
float inv_det = 1.0 / ((mat.x * mat.w) - (mat.y * mat.z));
|
|
float4 inv_mat = float4(mat.w, -mat.y, -mat.z, mat.x) * inv_det;
|
|
float2 inv_tr = (inv_mat.xz * translate.x) + (inv_mat.yw * translate.y);
|
|
inv_tr += p0;
|
|
float2 center1 = p1 - p0;
|
|
float rr = r1 / (r1 - r0);
|
|
float rainv = rr / ((r1 * r1) - dot(center1, center1));
|
|
float2 c1 = center1 * rainv;
|
|
float ra = rr * rainv;
|
|
float roff = rr - 1.0;
|
|
_285.memory[di] = as_type<uint>(linewidth);
|
|
_285.memory[di + 1u] = as_type<uint>(inv_mat.x);
|
|
_285.memory[di + 2u] = as_type<uint>(inv_mat.y);
|
|
_285.memory[di + 3u] = as_type<uint>(inv_mat.z);
|
|
_285.memory[di + 4u] = as_type<uint>(inv_mat.w);
|
|
_285.memory[di + 5u] = as_type<uint>(inv_tr.x);
|
|
_285.memory[di + 6u] = as_type<uint>(inv_tr.y);
|
|
_285.memory[di + 7u] = as_type<uint>(c1.x);
|
|
_285.memory[di + 8u] = as_type<uint>(c1.y);
|
|
_285.memory[di + 9u] = as_type<uint>(ra);
|
|
_285.memory[di + 10u] = as_type<uint>(roff);
|
|
break;
|
|
}
|
|
case 5u:
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
if ((tag_word == 5u) || (tag_word == 37u))
|
|
{
|
|
uint path_ix = ~(out_ix + i_2);
|
|
if (tag_word == 5u)
|
|
{
|
|
path_ix = m.path_ix;
|
|
}
|
|
_285.memory[clip_out_base + m.clip_ix] = path_ix;
|
|
}
|
|
}
|
|
}
|
|
|