#pragma clang diagnostic ignored "-Wmissing-prototypes" #pragma clang diagnostic ignored "-Wunused-variable" #include #include #include using namespace metal; // Implementation of the GLSL findLSB() function template inline T spvFindLSB(T x) { return select(ctz(x), T(-1), x == T(0)); } struct Alloc { uint offset; }; struct BinInstanceRef { uint offset; }; struct BinInstance { uint element_ix; }; struct PathRef { uint offset; }; struct TileRef { uint offset; }; struct Path { uint4 bbox; TileRef tiles; }; struct TileSegRef { uint offset; }; struct Tile { TileSegRef tile; int backdrop; }; struct CmdStrokeRef { uint offset; }; struct CmdStroke { uint tile_ref; float half_width; }; struct CmdFillRef { uint offset; }; struct CmdFill { uint tile_ref; int backdrop; }; struct CmdColorRef { uint offset; }; struct CmdColor { uint rgba_color; }; struct CmdLinGradRef { uint offset; }; struct CmdLinGrad { uint index; float line_x; float line_y; float line_c; }; struct CmdRadGradRef { uint offset; }; struct CmdRadGrad { uint index; float4 mat; float2 xlat; float2 c1; float ra; float roff; }; struct CmdImageRef { uint offset; }; struct CmdImage { uint index; int2 offset; }; struct CmdEndClipRef { uint offset; }; struct CmdEndClip { uint blend; }; struct CmdJumpRef { uint offset; }; struct CmdJump { uint new_ref; }; struct CmdRef { uint offset; }; struct Memory { uint mem_offset; uint mem_error; uint blend_offset; uint memory[1]; }; struct Alloc_1 { uint offset; }; struct Config { uint mem_size; uint n_elements; uint n_pathseg; uint width_in_tiles; uint height_in_tiles; Alloc_1 tile_alloc; Alloc_1 bin_alloc; Alloc_1 ptcl_alloc; Alloc_1 pathseg_alloc; Alloc_1 anno_alloc; Alloc_1 trans_alloc; Alloc_1 path_bbox_alloc; Alloc_1 drawmonoid_alloc; Alloc_1 clip_alloc; Alloc_1 clip_bic_alloc; Alloc_1 clip_stack_alloc; Alloc_1 clip_bbox_alloc; Alloc_1 draw_bbox_alloc; Alloc_1 drawinfo_alloc; uint n_trans; uint n_path; uint n_clip; uint trans_offset; uint linewidth_offset; uint pathtag_offset; uint pathseg_offset; uint drawtag_offset; uint drawdata_offset; }; struct ConfigBuf { Config conf; }; struct SceneBuf { uint scene[1]; }; constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); static inline __attribute__((always_inline)) bool check_deps(thread const uint& dep_stage, device Memory& v_267) { uint _273 = atomic_fetch_or_explicit((device atomic_uint*)&v_267.mem_error, 0u, memory_order_relaxed); return (_273 & dep_stage) == 0u; } static inline __attribute__((always_inline)) Alloc slice_mem(thread const Alloc& a, thread const uint& offset, thread const uint& size) { return Alloc{ a.offset + offset }; } static inline __attribute__((always_inline)) bool touch_mem(thread const Alloc& alloc, thread const uint& offset) { return true; } static inline __attribute__((always_inline)) uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_267) { Alloc param = alloc; uint param_1 = offset; if (!touch_mem(param, param_1)) { return 0u; } uint v = v_267.memory[offset]; return v; } static inline __attribute__((always_inline)) Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const bool& mem_ok) { Alloc a; a.offset = offset; return a; } static inline __attribute__((always_inline)) BinInstanceRef BinInstance_index(thread const BinInstanceRef& ref, thread const uint& index) { return BinInstanceRef{ ref.offset + (index * 4u) }; } static inline __attribute__((always_inline)) BinInstance BinInstance_read(thread const Alloc& a, thread const BinInstanceRef& ref, device Memory& v_267) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint raw0 = read_mem(param, param_1, v_267); BinInstance s; s.element_ix = raw0; return s; } static inline __attribute__((always_inline)) Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_267) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint raw0 = read_mem(param, param_1, v_267); Alloc param_2 = a; uint param_3 = ix + 1u; uint raw1 = read_mem(param_2, param_3, v_267); Alloc param_4 = a; uint param_5 = ix + 2u; uint raw2 = read_mem(param_4, param_5, v_267); Path s; s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); s.tiles = TileRef{ raw2 }; return s; } static inline __attribute__((always_inline)) void write_tile_alloc(thread const uint& el_ix, thread const Alloc& a) { } static inline __attribute__((always_inline)) Alloc read_tile_alloc(thread const uint& el_ix, thread const bool& mem_ok, const device ConfigBuf& v_891) { uint param = 0u; uint param_1 = v_891.conf.mem_size; bool param_2 = mem_ok; return new_alloc(param, param_1, param_2); } static inline __attribute__((always_inline)) Tile Tile_read(thread const Alloc& a, thread const TileRef& ref, device Memory& v_267) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint raw0 = read_mem(param, param_1, v_267); Alloc param_2 = a; uint param_3 = ix + 1u; uint raw1 = read_mem(param_2, param_3, v_267); Tile s; s.tile = TileSegRef{ raw0 }; s.backdrop = int(raw1); return s; } static inline __attribute__((always_inline)) uint malloc_stage(thread const uint& size, thread const uint& mem_size, thread const uint& stage, device Memory& v_267) { uint _282 = atomic_fetch_add_explicit((device atomic_uint*)&v_267.mem_offset, size, memory_order_relaxed); uint offset = _282; if ((offset + size) > mem_size) { uint _292 = atomic_fetch_or_explicit((device atomic_uint*)&v_267.mem_error, stage, memory_order_relaxed); offset = 0u; } return offset; } static inline __attribute__((always_inline)) void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_267) { Alloc param = alloc; uint param_1 = offset; if (!touch_mem(param, param_1)) { return; } v_267.memory[offset] = val; } static inline __attribute__((always_inline)) void CmdJump_write(thread const Alloc& a, thread const CmdJumpRef& ref, thread const CmdJump& s, device Memory& v_267) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = s.new_ref; write_mem(param, param_1, param_2, v_267); } static inline __attribute__((always_inline)) void Cmd_Jump_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdJump& s, device Memory& v_267) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 11u; write_mem(param, param_1, param_2, v_267); Alloc param_3 = a; CmdJumpRef param_4 = CmdJumpRef{ ref.offset + 4u }; CmdJump param_5 = s; CmdJump_write(param_3, param_4, param_5, v_267); } static inline __attribute__((always_inline)) void alloc_cmd(thread Alloc& cmd_alloc, thread CmdRef& cmd_ref, thread uint& cmd_limit, thread bool& mem_ok, device Memory& v_267, const device ConfigBuf& v_891) { if (cmd_ref.offset < cmd_limit) { return; } uint param = 1024u; uint param_1 = v_891.conf.mem_size; uint param_2 = 8u; uint _915 = malloc_stage(param, param_1, param_2, v_267); uint new_cmd = _915; if (new_cmd == 0u) { mem_ok = false; } if (mem_ok) { CmdJump jump = CmdJump{ new_cmd }; Alloc param_3 = cmd_alloc; CmdRef param_4 = cmd_ref; CmdJump param_5 = jump; Cmd_Jump_write(param_3, param_4, param_5, v_267); } uint param_6 = new_cmd; uint param_7 = 1024u; bool param_8 = true; cmd_alloc = new_alloc(param_6, param_7, param_8); cmd_ref = CmdRef{ new_cmd }; cmd_limit = (new_cmd + 1024u) - 144u; } static inline __attribute__((always_inline)) void CmdFill_write(thread const Alloc& a, thread const CmdFillRef& ref, thread const CmdFill& s, device Memory& v_267) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = s.tile_ref; write_mem(param, param_1, param_2, v_267); Alloc param_3 = a; uint param_4 = ix + 1u; uint param_5 = uint(s.backdrop); write_mem(param_3, param_4, param_5, v_267); } static inline __attribute__((always_inline)) void Cmd_Fill_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdFill& s, device Memory& v_267) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 1u; write_mem(param, param_1, param_2, v_267); Alloc param_3 = a; CmdFillRef param_4 = CmdFillRef{ ref.offset + 4u }; CmdFill param_5 = s; CmdFill_write(param_3, param_4, param_5, v_267); } static inline __attribute__((always_inline)) void Cmd_Solid_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_267) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 3u; write_mem(param, param_1, param_2, v_267); } static inline __attribute__((always_inline)) void CmdStroke_write(thread const Alloc& a, thread const CmdStrokeRef& ref, thread const CmdStroke& s, device Memory& v_267) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = s.tile_ref; write_mem(param, param_1, param_2, v_267); Alloc param_3 = a; uint param_4 = ix + 1u; uint param_5 = as_type(s.half_width); write_mem(param_3, param_4, param_5, v_267); } static inline __attribute__((always_inline)) void Cmd_Stroke_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdStroke& s, device Memory& v_267) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 2u; write_mem(param, param_1, param_2, v_267); Alloc param_3 = a; CmdStrokeRef param_4 = CmdStrokeRef{ ref.offset + 4u }; CmdStroke param_5 = s; CmdStroke_write(param_3, param_4, param_5, v_267); } static inline __attribute__((always_inline)) void write_fill(thread const Alloc& alloc, thread CmdRef& cmd_ref, thread const Tile& tile, thread const float& linewidth, thread bool& mem_ok, device Memory& v_267) { if (linewidth < 0.0) { if (tile.tile.offset != 0u) { CmdFill cmd_fill = CmdFill{ tile.tile.offset, tile.backdrop }; if (mem_ok) { Alloc param = alloc; CmdRef param_1 = cmd_ref; CmdFill param_2 = cmd_fill; Cmd_Fill_write(param, param_1, param_2, v_267); } cmd_ref.offset += 12u; } else { if (mem_ok) { Alloc param_3 = alloc; CmdRef param_4 = cmd_ref; Cmd_Solid_write(param_3, param_4, v_267); } cmd_ref.offset += 4u; } } else { CmdStroke cmd_stroke = CmdStroke{ tile.tile.offset, 0.5 * linewidth }; if (mem_ok) { Alloc param_5 = alloc; CmdRef param_6 = cmd_ref; CmdStroke param_7 = cmd_stroke; Cmd_Stroke_write(param_5, param_6, param_7, v_267); } cmd_ref.offset += 12u; } } static inline __attribute__((always_inline)) void CmdColor_write(thread const Alloc& a, thread const CmdColorRef& ref, thread const CmdColor& s, device Memory& v_267) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = s.rgba_color; write_mem(param, param_1, param_2, v_267); } static inline __attribute__((always_inline)) void Cmd_Color_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdColor& s, device Memory& v_267) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 5u; write_mem(param, param_1, param_2, v_267); Alloc param_3 = a; CmdColorRef param_4 = CmdColorRef{ ref.offset + 4u }; CmdColor param_5 = s; CmdColor_write(param_3, param_4, param_5, v_267); } static inline __attribute__((always_inline)) void CmdLinGrad_write(thread const Alloc& a, thread const CmdLinGradRef& ref, thread const CmdLinGrad& s, device Memory& v_267) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = s.index; write_mem(param, param_1, param_2, v_267); Alloc param_3 = a; uint param_4 = ix + 1u; uint param_5 = as_type(s.line_x); write_mem(param_3, param_4, param_5, v_267); Alloc param_6 = a; uint param_7 = ix + 2u; uint param_8 = as_type(s.line_y); write_mem(param_6, param_7, param_8, v_267); Alloc param_9 = a; uint param_10 = ix + 3u; uint param_11 = as_type(s.line_c); write_mem(param_9, param_10, param_11, v_267); } static inline __attribute__((always_inline)) void Cmd_LinGrad_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdLinGrad& s, device Memory& v_267) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 6u; write_mem(param, param_1, param_2, v_267); Alloc param_3 = a; CmdLinGradRef param_4 = CmdLinGradRef{ ref.offset + 4u }; CmdLinGrad param_5 = s; CmdLinGrad_write(param_3, param_4, param_5, v_267); } static inline __attribute__((always_inline)) void CmdRadGrad_write(thread const Alloc& a, thread const CmdRadGradRef& ref, thread const CmdRadGrad& s, device Memory& v_267) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = s.index; write_mem(param, param_1, param_2, v_267); Alloc param_3 = a; uint param_4 = ix + 1u; uint param_5 = as_type(s.mat.x); write_mem(param_3, param_4, param_5, v_267); Alloc param_6 = a; uint param_7 = ix + 2u; uint param_8 = as_type(s.mat.y); write_mem(param_6, param_7, param_8, v_267); Alloc param_9 = a; uint param_10 = ix + 3u; uint param_11 = as_type(s.mat.z); write_mem(param_9, param_10, param_11, v_267); Alloc param_12 = a; uint param_13 = ix + 4u; uint param_14 = as_type(s.mat.w); write_mem(param_12, param_13, param_14, v_267); Alloc param_15 = a; uint param_16 = ix + 5u; uint param_17 = as_type(s.xlat.x); write_mem(param_15, param_16, param_17, v_267); Alloc param_18 = a; uint param_19 = ix + 6u; uint param_20 = as_type(s.xlat.y); write_mem(param_18, param_19, param_20, v_267); Alloc param_21 = a; uint param_22 = ix + 7u; uint param_23 = as_type(s.c1.x); write_mem(param_21, param_22, param_23, v_267); Alloc param_24 = a; uint param_25 = ix + 8u; uint param_26 = as_type(s.c1.y); write_mem(param_24, param_25, param_26, v_267); Alloc param_27 = a; uint param_28 = ix + 9u; uint param_29 = as_type(s.ra); write_mem(param_27, param_28, param_29, v_267); Alloc param_30 = a; uint param_31 = ix + 10u; uint param_32 = as_type(s.roff); write_mem(param_30, param_31, param_32, v_267); } static inline __attribute__((always_inline)) void Cmd_RadGrad_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdRadGrad& s, device Memory& v_267) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 7u; write_mem(param, param_1, param_2, v_267); Alloc param_3 = a; CmdRadGradRef param_4 = CmdRadGradRef{ ref.offset + 4u }; CmdRadGrad param_5 = s; CmdRadGrad_write(param_3, param_4, param_5, v_267); } static inline __attribute__((always_inline)) void CmdImage_write(thread const Alloc& a, thread const CmdImageRef& ref, thread const CmdImage& s, device Memory& v_267) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = s.index; write_mem(param, param_1, param_2, v_267); Alloc param_3 = a; uint param_4 = ix + 1u; uint param_5 = (uint(s.offset.x) & 65535u) | (uint(s.offset.y) << uint(16)); write_mem(param_3, param_4, param_5, v_267); } static inline __attribute__((always_inline)) void Cmd_Image_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdImage& s, device Memory& v_267) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 8u; write_mem(param, param_1, param_2, v_267); Alloc param_3 = a; CmdImageRef param_4 = CmdImageRef{ ref.offset + 4u }; CmdImage param_5 = s; CmdImage_write(param_3, param_4, param_5, v_267); } static inline __attribute__((always_inline)) void Cmd_BeginClip_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_267) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 9u; write_mem(param, param_1, param_2, v_267); } static inline __attribute__((always_inline)) void CmdEndClip_write(thread const Alloc& a, thread const CmdEndClipRef& ref, thread const CmdEndClip& s, device Memory& v_267) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = s.blend; write_mem(param, param_1, param_2, v_267); } static inline __attribute__((always_inline)) void Cmd_EndClip_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdEndClip& s, device Memory& v_267) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 10u; write_mem(param, param_1, param_2, v_267); Alloc param_3 = a; CmdEndClipRef param_4 = CmdEndClipRef{ ref.offset + 4u }; CmdEndClip param_5 = s; CmdEndClip_write(param_3, param_4, param_5, v_267); } static inline __attribute__((always_inline)) void Cmd_End_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_267) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 0u; write_mem(param, param_1, param_2, v_267); } kernel void main0(device Memory& v_267 [[buffer(0)]], const device ConfigBuf& v_891 [[buffer(1)]], const device SceneBuf& _1390 [[buffer(2)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) { threadgroup uint sh_bitmaps[8][256]; threadgroup Alloc sh_part_elements[256]; threadgroup uint sh_part_count[256]; threadgroup uint sh_elements[256]; threadgroup uint sh_tile_stride[256]; threadgroup uint sh_tile_width[256]; threadgroup uint sh_tile_x0[256]; threadgroup uint sh_tile_y0[256]; threadgroup uint sh_tile_base[256]; threadgroup uint sh_tile_count[256]; bool mem_ok = true; uint param = 7u; bool _1012 = check_deps(param, v_267); if (!_1012) { return; } uint width_in_bins = ((v_891.conf.width_in_tiles + 16u) - 1u) / 16u; uint bin_ix = (width_in_bins * gl_WorkGroupID.y) + gl_WorkGroupID.x; uint partition_ix = 0u; uint n_partitions = ((v_891.conf.n_elements + 256u) - 1u) / 256u; uint th_ix = gl_LocalInvocationID.x; uint bin_tile_x = 16u * gl_WorkGroupID.x; uint bin_tile_y = 16u * gl_WorkGroupID.y; uint tile_x = gl_LocalInvocationID.x % 16u; uint tile_y = gl_LocalInvocationID.x / 16u; uint this_tile_ix = (((bin_tile_y + tile_y) * v_891.conf.width_in_tiles) + bin_tile_x) + tile_x; Alloc param_1; param_1.offset = v_891.conf.ptcl_alloc.offset; uint param_2 = this_tile_ix * 1024u; uint param_3 = 1024u; Alloc cmd_alloc = slice_mem(param_1, param_2, param_3); CmdRef cmd_ref = CmdRef{ cmd_alloc.offset }; uint cmd_limit = (cmd_ref.offset + 1024u) - 144u; uint clip_depth = 0u; uint clip_zero_depth = 0u; uint rd_ix = 0u; uint wr_ix = 0u; uint part_start_ix = 0u; uint ready_ix = 0u; Alloc param_4 = cmd_alloc; uint param_5 = 0u; uint param_6 = 8u; Alloc scratch_alloc = slice_mem(param_4, param_5, param_6); cmd_ref.offset += 4u; uint render_blend_depth = 0u; uint max_blend_depth = 0u; uint drawmonoid_start = v_891.conf.drawmonoid_alloc.offset >> uint(2); uint drawtag_start = v_891.conf.drawtag_offset >> uint(2); uint drawdata_start = v_891.conf.drawdata_offset >> uint(2); uint drawinfo_start = v_891.conf.drawinfo_alloc.offset >> uint(2); Alloc param_7; Alloc param_9; uint _1322; uint element_ix; Alloc param_18; uint tile_count; uint _1622; float linewidth; CmdLinGrad cmd_lin; CmdRadGrad cmd_rad; while (true) { for (uint i = 0u; i < 8u; i++) { sh_bitmaps[i][th_ix] = 0u; } bool _1374; for (;;) { if ((ready_ix == wr_ix) && (partition_ix < n_partitions)) { part_start_ix = ready_ix; uint count = 0u; bool _1174 = th_ix < 256u; bool _1182; if (_1174) { _1182 = (partition_ix + th_ix) < n_partitions; } else { _1182 = _1174; } if (_1182) { uint in_ix = (v_891.conf.bin_alloc.offset >> uint(2)) + ((((partition_ix + th_ix) * 256u) + bin_ix) * 2u); param_7.offset = v_891.conf.bin_alloc.offset; uint param_8 = in_ix; count = read_mem(param_7, param_8, v_267); param_9.offset = v_891.conf.bin_alloc.offset; uint param_10 = in_ix + 1u; uint offset = read_mem(param_9, param_10, v_267); uint param_11 = offset; uint param_12 = count * 4u; bool param_13 = true; sh_part_elements[th_ix] = new_alloc(param_11, param_12, param_13); } for (uint i_1 = 0u; i_1 < 8u; i_1++) { if (th_ix < 256u) { sh_part_count[th_ix] = count; } threadgroup_barrier(mem_flags::mem_threadgroup); if (th_ix < 256u) { if (th_ix >= (1u << i_1)) { count += sh_part_count[th_ix - (1u << i_1)]; } } threadgroup_barrier(mem_flags::mem_threadgroup); } if (th_ix < 256u) { sh_part_count[th_ix] = part_start_ix + count; } threadgroup_barrier(mem_flags::mem_threadgroup); ready_ix = sh_part_count[255]; partition_ix += 256u; } uint ix = rd_ix + th_ix; if ((ix >= wr_ix) && (ix < ready_ix)) { uint part_ix = 0u; for (uint i_2 = 0u; i_2 < 8u; i_2++) { uint probe = part_ix + (128u >> i_2); if (ix >= sh_part_count[probe - 1u]) { part_ix = probe; } } if (part_ix > 0u) { _1322 = sh_part_count[part_ix - 1u]; } else { _1322 = part_start_ix; } ix -= _1322; Alloc bin_alloc = sh_part_elements[part_ix]; BinInstanceRef inst_ref = BinInstanceRef{ bin_alloc.offset }; BinInstanceRef param_14 = inst_ref; uint param_15 = ix; Alloc param_16 = bin_alloc; BinInstanceRef param_17 = BinInstance_index(param_14, param_15); BinInstance inst = BinInstance_read(param_16, param_17, v_267); sh_elements[th_ix] = inst.element_ix; } threadgroup_barrier(mem_flags::mem_threadgroup); wr_ix = min((rd_ix + 256u), ready_ix); bool _1364 = (wr_ix - rd_ix) < 256u; if (_1364) { _1374 = (wr_ix < ready_ix) || (partition_ix < n_partitions); } else { _1374 = _1364; } if (_1374) { continue; } else { break; } } uint tag = 0u; if ((th_ix + rd_ix) < wr_ix) { element_ix = sh_elements[th_ix]; tag = _1390.scene[drawtag_start + element_ix]; } switch (tag) { case 68u: case 72u: case 276u: case 732u: case 5u: case 37u: { uint drawmonoid_base = drawmonoid_start + (4u * element_ix); uint path_ix = v_267.memory[drawmonoid_base]; param_18.offset = v_891.conf.tile_alloc.offset; PathRef param_19 = PathRef{ v_891.conf.tile_alloc.offset + (path_ix * 12u) }; Path path = Path_read(param_18, param_19, v_267); uint stride = path.bbox.z - path.bbox.x; sh_tile_stride[th_ix] = stride; int dx = int(path.bbox.x) - int(bin_tile_x); int dy = int(path.bbox.y) - int(bin_tile_y); int x0 = clamp(dx, 0, 16); int y0 = clamp(dy, 0, 16); int x1 = clamp(int(path.bbox.z) - int(bin_tile_x), 0, 16); int y1 = clamp(int(path.bbox.w) - int(bin_tile_y), 0, 16); sh_tile_width[th_ix] = uint(x1 - x0); sh_tile_x0[th_ix] = uint(x0); sh_tile_y0[th_ix] = uint(y0); tile_count = uint(x1 - x0) * uint(y1 - y0); uint base = path.tiles.offset - (((uint(dy) * stride) + uint(dx)) * 8u); sh_tile_base[th_ix] = base; uint param_20 = path.tiles.offset; uint param_21 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u; bool param_22 = true; Alloc path_alloc = new_alloc(param_20, param_21, param_22); uint param_23 = th_ix; Alloc param_24 = path_alloc; write_tile_alloc(param_23, param_24); break; } default: { tile_count = 0u; break; } } sh_tile_count[th_ix] = tile_count; for (uint i_3 = 0u; i_3 < 8u; i_3++) { threadgroup_barrier(mem_flags::mem_threadgroup); if (th_ix >= (1u << i_3)) { tile_count += sh_tile_count[th_ix - (1u << i_3)]; } threadgroup_barrier(mem_flags::mem_threadgroup); sh_tile_count[th_ix] = tile_count; } threadgroup_barrier(mem_flags::mem_threadgroup); uint total_tile_count = sh_tile_count[255]; for (uint ix_1 = th_ix; ix_1 < total_tile_count; ix_1 += 256u) { uint el_ix = 0u; for (uint i_4 = 0u; i_4 < 8u; i_4++) { uint probe_1 = el_ix + (128u >> i_4); if (ix_1 >= sh_tile_count[probe_1 - 1u]) { el_ix = probe_1; } } uint element_ix_1 = sh_elements[el_ix]; uint tag_1 = _1390.scene[drawtag_start + element_ix_1]; if (el_ix > 0u) { _1622 = sh_tile_count[el_ix - 1u]; } else { _1622 = 0u; } uint seq_ix = ix_1 - _1622; uint width = sh_tile_width[el_ix]; uint x = sh_tile_x0[el_ix] + (seq_ix % width); uint y = sh_tile_y0[el_ix] + (seq_ix / width); bool include_tile = false; uint param_25 = el_ix; bool param_26 = true; Alloc param_27 = read_tile_alloc(param_25, param_26, v_891); TileRef param_28 = TileRef{ sh_tile_base[el_ix] + (((sh_tile_stride[el_ix] * y) + x) * 8u) }; Tile tile = Tile_read(param_27, param_28, v_267); bool is_clip = (tag_1 & 1u) != 0u; bool is_blend = false; if (is_clip) { uint drawmonoid_base_1 = drawmonoid_start + (4u * element_ix_1); uint scene_offset = v_267.memory[drawmonoid_base_1 + 2u]; uint dd = drawdata_start + (scene_offset >> uint(2)); uint blend = _1390.scene[dd]; is_blend = blend != 32771u; } bool _1706 = tile.tile.offset != 0u; bool _1715; if (!_1706) { _1715 = (tile.backdrop == 0) == is_clip; } else { _1715 = _1706; } include_tile = _1715 || is_blend; if (include_tile) { uint el_slice = el_ix / 32u; uint el_mask = 1u << (el_ix & 31u); uint _1737 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&sh_bitmaps[el_slice][(y * 16u) + x], el_mask, memory_order_relaxed); } } threadgroup_barrier(mem_flags::mem_threadgroup); uint slice_ix = 0u; uint bitmap = sh_bitmaps[0][th_ix]; while (true) { if (bitmap == 0u) { slice_ix++; if (slice_ix == 8u) { break; } bitmap = sh_bitmaps[slice_ix][th_ix]; if (bitmap == 0u) { continue; } } uint element_ref_ix = (slice_ix * 32u) + uint(int(spvFindLSB(bitmap))); uint element_ix_2 = sh_elements[element_ref_ix]; bitmap &= (bitmap - 1u); uint drawtag = _1390.scene[drawtag_start + element_ix_2]; if (clip_zero_depth == 0u) { uint param_29 = element_ref_ix; bool param_30 = true; Alloc param_31 = read_tile_alloc(param_29, param_30, v_891); TileRef param_32 = TileRef{ sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; Tile tile_1 = Tile_read(param_31, param_32, v_267); uint drawmonoid_base_2 = drawmonoid_start + (4u * element_ix_2); uint scene_offset_1 = v_267.memory[drawmonoid_base_2 + 2u]; uint info_offset = v_267.memory[drawmonoid_base_2 + 3u]; uint dd_1 = drawdata_start + (scene_offset_1 >> uint(2)); uint di = drawinfo_start + (info_offset >> uint(2)); switch (drawtag) { case 68u: { linewidth = as_type(v_267.memory[di]); Alloc param_33 = cmd_alloc; CmdRef param_34 = cmd_ref; uint param_35 = cmd_limit; alloc_cmd(param_33, param_34, param_35, mem_ok, v_267, v_891); cmd_alloc = param_33; cmd_ref = param_34; cmd_limit = param_35; Alloc param_36 = cmd_alloc; CmdRef param_37 = cmd_ref; Tile param_38 = tile_1; float param_39 = linewidth; write_fill(param_36, param_37, param_38, param_39, mem_ok, v_267); cmd_ref = param_37; uint rgba = _1390.scene[dd_1]; if (mem_ok) { Alloc param_40 = cmd_alloc; CmdRef param_41 = cmd_ref; CmdColor param_42 = CmdColor{ rgba }; Cmd_Color_write(param_40, param_41, param_42, v_267); } cmd_ref.offset += 8u; break; } case 276u: { Alloc param_43 = cmd_alloc; CmdRef param_44 = cmd_ref; uint param_45 = cmd_limit; alloc_cmd(param_43, param_44, param_45, mem_ok, v_267, v_891); cmd_alloc = param_43; cmd_ref = param_44; cmd_limit = param_45; linewidth = as_type(v_267.memory[di]); Alloc param_46 = cmd_alloc; CmdRef param_47 = cmd_ref; Tile param_48 = tile_1; float param_49 = linewidth; write_fill(param_46, param_47, param_48, param_49, mem_ok, v_267); cmd_ref = param_47; cmd_lin.index = _1390.scene[dd_1]; cmd_lin.line_x = as_type(v_267.memory[di + 1u]); cmd_lin.line_y = as_type(v_267.memory[di + 2u]); cmd_lin.line_c = as_type(v_267.memory[di + 3u]); if (mem_ok) { Alloc param_50 = cmd_alloc; CmdRef param_51 = cmd_ref; CmdLinGrad param_52 = cmd_lin; Cmd_LinGrad_write(param_50, param_51, param_52, v_267); } cmd_ref.offset += 20u; break; } case 732u: { Alloc param_53 = cmd_alloc; CmdRef param_54 = cmd_ref; uint param_55 = cmd_limit; alloc_cmd(param_53, param_54, param_55, mem_ok, v_267, v_891); cmd_alloc = param_53; cmd_ref = param_54; cmd_limit = param_55; linewidth = as_type(v_267.memory[di]); Alloc param_56 = cmd_alloc; CmdRef param_57 = cmd_ref; Tile param_58 = tile_1; float param_59 = linewidth; write_fill(param_56, param_57, param_58, param_59, mem_ok, v_267); cmd_ref = param_57; cmd_rad.index = _1390.scene[dd_1]; cmd_rad.mat = as_type(uint4(v_267.memory[di + 1u], v_267.memory[di + 2u], v_267.memory[di + 3u], v_267.memory[di + 4u])); cmd_rad.xlat = as_type(uint2(v_267.memory[di + 5u], v_267.memory[di + 6u])); cmd_rad.c1 = as_type(uint2(v_267.memory[di + 7u], v_267.memory[di + 8u])); cmd_rad.ra = as_type(v_267.memory[di + 9u]); cmd_rad.roff = as_type(v_267.memory[di + 10u]); if (mem_ok) { Alloc param_60 = cmd_alloc; CmdRef param_61 = cmd_ref; CmdRadGrad param_62 = cmd_rad; Cmd_RadGrad_write(param_60, param_61, param_62, v_267); } cmd_ref.offset += 48u; break; } case 72u: { Alloc param_63 = cmd_alloc; CmdRef param_64 = cmd_ref; uint param_65 = cmd_limit; alloc_cmd(param_63, param_64, param_65, mem_ok, v_267, v_891); cmd_alloc = param_63; cmd_ref = param_64; cmd_limit = param_65; linewidth = as_type(v_267.memory[di]); Alloc param_66 = cmd_alloc; CmdRef param_67 = cmd_ref; Tile param_68 = tile_1; float param_69 = linewidth; write_fill(param_66, param_67, param_68, param_69, mem_ok, v_267); cmd_ref = param_67; uint index = _1390.scene[dd_1]; uint raw1 = _1390.scene[dd_1 + 1u]; int2 offset_1 = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); if (mem_ok) { Alloc param_70 = cmd_alloc; CmdRef param_71 = cmd_ref; CmdImage param_72 = CmdImage{ index, offset_1 }; Cmd_Image_write(param_70, param_71, param_72, v_267); } cmd_ref.offset += 12u; break; } case 5u: { bool _2120 = tile_1.tile.offset == 0u; bool _2126; if (_2120) { _2126 = tile_1.backdrop == 0; } else { _2126 = _2120; } if (_2126) { clip_zero_depth = clip_depth + 1u; } else { Alloc param_73 = cmd_alloc; CmdRef param_74 = cmd_ref; uint param_75 = cmd_limit; alloc_cmd(param_73, param_74, param_75, mem_ok, v_267, v_891); cmd_alloc = param_73; cmd_ref = param_74; cmd_limit = param_75; if (mem_ok) { Alloc param_76 = cmd_alloc; CmdRef param_77 = cmd_ref; Cmd_BeginClip_write(param_76, param_77, v_267); } cmd_ref.offset += 4u; render_blend_depth++; max_blend_depth = max(max_blend_depth, render_blend_depth); } clip_depth++; break; } case 37u: { clip_depth--; Alloc param_78 = cmd_alloc; CmdRef param_79 = cmd_ref; Tile param_80 = tile_1; float param_81 = -1.0; write_fill(param_78, param_79, param_80, param_81, mem_ok, v_267); cmd_ref = param_79; uint blend_1 = _1390.scene[dd_1]; if (mem_ok) { Alloc param_82 = cmd_alloc; CmdRef param_83 = cmd_ref; CmdEndClip param_84 = CmdEndClip{ blend_1 }; Cmd_EndClip_write(param_82, param_83, param_84, v_267); } cmd_ref.offset += 8u; render_blend_depth--; break; } } } else { switch (drawtag) { case 5u: { clip_depth++; break; } case 37u: { if (clip_depth == clip_zero_depth) { clip_zero_depth = 0u; } clip_depth--; break; } } } } threadgroup_barrier(mem_flags::mem_threadgroup); rd_ix += 256u; if ((rd_ix >= ready_ix) && (partition_ix >= n_partitions)) { break; } } bool _2231 = (bin_tile_x + tile_x) < v_891.conf.width_in_tiles; bool _2240; if (_2231) { _2240 = (bin_tile_y + tile_y) < v_891.conf.height_in_tiles; } else { _2240 = _2231; } if (_2240) { if (mem_ok) { Alloc param_85 = cmd_alloc; CmdRef param_86 = cmd_ref; Cmd_End_write(param_85, param_86, v_267); } if (max_blend_depth > 4u) { uint scratch_size = (((max_blend_depth * 16u) * 16u) * 1u) * 4u; uint _2264 = atomic_fetch_add_explicit((device atomic_uint*)&v_267.blend_offset, scratch_size, memory_order_relaxed); uint scratch = _2264; Alloc param_87 = scratch_alloc; uint param_88 = scratch_alloc.offset >> uint(2); uint param_89 = scratch; write_mem(param_87, param_88, param_89, v_267); } } }