#pragma clang diagnostic ignored "-Wmissing-prototypes" #pragma clang diagnostic ignored "-Wmissing-braces" #include #include using namespace metal; template struct spvUnsafeArray { T elements[Num ? Num : 1]; thread T& operator [] (size_t pos) thread { return elements[pos]; } constexpr const thread T& operator [] (size_t pos) const thread { return elements[pos]; } device T& operator [] (size_t pos) device { return elements[pos]; } constexpr const device T& operator [] (size_t pos) const device { return elements[pos]; } constexpr const constant T& operator [] (size_t pos) const constant { return elements[pos]; } threadgroup T& operator [] (size_t pos) threadgroup { return elements[pos]; } constexpr const threadgroup T& operator [] (size_t pos) const threadgroup { return elements[pos]; } }; struct DrawMonoid { uint path_ix; uint clip_ix; uint scene_offset; uint info_offset; }; struct Alloc { uint offset; }; struct Config { uint mem_size; uint n_elements; uint n_pathseg; uint width_in_tiles; uint height_in_tiles; Alloc tile_alloc; Alloc bin_alloc; Alloc ptcl_alloc; Alloc pathseg_alloc; Alloc anno_alloc; Alloc trans_alloc; Alloc path_bbox_alloc; Alloc drawmonoid_alloc; Alloc clip_alloc; Alloc clip_bic_alloc; Alloc clip_stack_alloc; Alloc clip_bbox_alloc; Alloc draw_bbox_alloc; Alloc drawinfo_alloc; uint n_trans; uint n_path; uint n_clip; uint trans_offset; uint linewidth_offset; uint pathtag_offset; uint pathseg_offset; uint drawtag_offset; uint drawdata_offset; }; struct ConfigBuf { Config conf; }; struct SceneBuf { uint scene[1]; }; struct DrawMonoid_1 { uint path_ix; uint clip_ix; uint scene_offset; uint info_offset; }; struct ParentBuf { DrawMonoid_1 parent[1]; }; struct Memory { uint mem_offset; uint mem_error; uint blend_offset; uint memory[1]; }; constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); static inline __attribute__((always_inline)) DrawMonoid map_tag(thread const uint& tag_word) { uint has_path = uint(tag_word != 0u); return DrawMonoid{ has_path, tag_word & 1u, tag_word & 28u, (tag_word >> uint(4)) & 60u }; } static inline __attribute__((always_inline)) DrawMonoid combine_draw_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b) { DrawMonoid c; c.path_ix = a.path_ix + b.path_ix; c.clip_ix = a.clip_ix + b.clip_ix; c.scene_offset = a.scene_offset + b.scene_offset; c.info_offset = a.info_offset + b.info_offset; return c; } static inline __attribute__((always_inline)) DrawMonoid draw_monoid_identity() { return DrawMonoid{ 0u, 0u, 0u, 0u }; } kernel void main0(device Memory& _285 [[buffer(0)]], const device ConfigBuf& _93 [[buffer(1)]], const device SceneBuf& _103 [[buffer(2)]], const device ParentBuf& _203 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) { threadgroup DrawMonoid sh_scratch[256]; uint ix = gl_GlobalInvocationID.x * 8u; uint drawtag_base = _93.conf.drawtag_offset >> uint(2); uint tag_word = _103.scene[drawtag_base + ix]; uint param = tag_word; DrawMonoid agg = map_tag(param); spvUnsafeArray local; local[0] = agg; for (uint i = 1u; i < 8u; i++) { tag_word = _103.scene[(drawtag_base + ix) + i]; uint param_1 = tag_word; DrawMonoid param_2 = agg; DrawMonoid param_3 = map_tag(param_1); agg = combine_draw_monoid(param_2, param_3); local[i] = agg; } sh_scratch[gl_LocalInvocationID.x] = agg; for (uint i_1 = 0u; i_1 < 8u; i_1++) { threadgroup_barrier(mem_flags::mem_threadgroup); if (gl_LocalInvocationID.x >= (1u << i_1)) { DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; DrawMonoid param_4 = other; DrawMonoid param_5 = agg; agg = combine_draw_monoid(param_4, param_5); } threadgroup_barrier(mem_flags::mem_threadgroup); sh_scratch[gl_LocalInvocationID.x] = agg; } threadgroup_barrier(mem_flags::mem_threadgroup); DrawMonoid row = draw_monoid_identity(); if (gl_WorkGroupID.x > 0u) { uint _206 = gl_WorkGroupID.x - 1u; row.path_ix = _203.parent[_206].path_ix; row.clip_ix = _203.parent[_206].clip_ix; row.scene_offset = _203.parent[_206].scene_offset; row.info_offset = _203.parent[_206].info_offset; } if (gl_LocalInvocationID.x > 0u) { DrawMonoid param_6 = row; DrawMonoid param_7 = sh_scratch[gl_LocalInvocationID.x - 1u]; row = combine_draw_monoid(param_6, param_7); } uint drawdata_base = _93.conf.drawdata_offset >> uint(2); uint drawinfo_base = _93.conf.drawinfo_alloc.offset >> uint(2); uint out_ix = gl_GlobalInvocationID.x * 8u; uint out_base = (_93.conf.drawmonoid_alloc.offset >> uint(2)) + (out_ix * 4u); uint clip_out_base = _93.conf.clip_alloc.offset >> uint(2); float4 mat; float2 translate; float2 p0; float2 p1; for (uint i_2 = 0u; i_2 < 8u; i_2++) { DrawMonoid m = row; if (i_2 > 0u) { DrawMonoid param_8 = m; DrawMonoid param_9 = local[i_2 - 1u]; m = combine_draw_monoid(param_8, param_9); } _285.memory[out_base + (i_2 * 4u)] = m.path_ix; _285.memory[(out_base + (i_2 * 4u)) + 1u] = m.clip_ix; _285.memory[(out_base + (i_2 * 4u)) + 2u] = m.scene_offset; _285.memory[(out_base + (i_2 * 4u)) + 3u] = m.info_offset; uint dd = drawdata_base + (m.scene_offset >> uint(2)); uint di = drawinfo_base + (m.info_offset >> uint(2)); tag_word = _103.scene[(drawtag_base + ix) + i_2]; if (((((tag_word == 68u) || (tag_word == 276u)) || (tag_word == 732u)) || (tag_word == 72u)) || (tag_word == 5u)) { uint bbox_offset = (_93.conf.path_bbox_alloc.offset >> uint(2)) + (6u * m.path_ix); float bbox_l = float(_285.memory[bbox_offset]) - 32768.0; float bbox_t = float(_285.memory[bbox_offset + 1u]) - 32768.0; float bbox_r = float(_285.memory[bbox_offset + 2u]) - 32768.0; float bbox_b = float(_285.memory[bbox_offset + 3u]) - 32768.0; float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); float linewidth = as_type(_285.memory[bbox_offset + 4u]); uint fill_mode = uint(linewidth >= 0.0); if (((linewidth >= 0.0) || (tag_word == 276u)) || (tag_word == 732u)) { uint trans_ix = _285.memory[bbox_offset + 5u]; uint t = (_93.conf.trans_alloc.offset >> uint(2)) + (6u * trans_ix); mat = as_type(uint4(_285.memory[t], _285.memory[t + 1u], _285.memory[t + 2u], _285.memory[t + 3u])); if ((tag_word == 276u) || (tag_word == 732u)) { translate = as_type(uint2(_285.memory[t + 4u], _285.memory[t + 5u])); } } if (linewidth >= 0.0) { linewidth *= sqrt(abs((mat.x * mat.w) - (mat.y * mat.z))); } switch (tag_word) { case 68u: case 72u: { _285.memory[di] = as_type(linewidth); break; } case 276u: { _285.memory[di] = as_type(linewidth); p0 = as_type(uint2(_103.scene[dd + 1u], _103.scene[dd + 2u])); p1 = as_type(uint2(_103.scene[dd + 3u], _103.scene[dd + 4u])); p0 = ((mat.xy * p0.x) + (mat.zw * p0.y)) + translate; p1 = ((mat.xy * p1.x) + (mat.zw * p1.y)) + translate; float2 dxy = p1 - p0; float scale = 1.0 / ((dxy.x * dxy.x) + (dxy.y * dxy.y)); float line_x = dxy.x * scale; float line_y = dxy.y * scale; float line_c = -((p0.x * line_x) + (p0.y * line_y)); _285.memory[di + 1u] = as_type(line_x); _285.memory[di + 2u] = as_type(line_y); _285.memory[di + 3u] = as_type(line_c); break; } case 732u: { p0 = as_type(uint2(_103.scene[dd + 1u], _103.scene[dd + 2u])); p1 = as_type(uint2(_103.scene[dd + 3u], _103.scene[dd + 4u])); float r0 = as_type(_103.scene[dd + 5u]); float r1 = as_type(_103.scene[dd + 6u]); float inv_det = 1.0 / ((mat.x * mat.w) - (mat.y * mat.z)); float4 inv_mat = float4(mat.w, -mat.y, -mat.z, mat.x) * inv_det; float2 inv_tr = (inv_mat.xz * translate.x) + (inv_mat.yw * translate.y); inv_tr += p0; float2 center1 = p1 - p0; float rr = r1 / (r1 - r0); float rainv = rr / ((r1 * r1) - dot(center1, center1)); float2 c1 = center1 * rainv; float ra = rr * rainv; float roff = rr - 1.0; _285.memory[di] = as_type(linewidth); _285.memory[di + 1u] = as_type(inv_mat.x); _285.memory[di + 2u] = as_type(inv_mat.y); _285.memory[di + 3u] = as_type(inv_mat.z); _285.memory[di + 4u] = as_type(inv_mat.w); _285.memory[di + 5u] = as_type(inv_tr.x); _285.memory[di + 6u] = as_type(inv_tr.y); _285.memory[di + 7u] = as_type(c1.x); _285.memory[di + 8u] = as_type(c1.y); _285.memory[di + 9u] = as_type(ra); _285.memory[di + 10u] = as_type(roff); break; } case 5u: { break; } } } if ((tag_word == 5u) || (tag_word == 37u)) { uint path_ix = ~(out_ix + i_2); if (tag_word == 5u) { path_ix = m.path_ix; } _285.memory[clip_out_base + m.clip_ix] = path_ix; } } }