vello/piet-gpu/shader/gen/coarse.msl
Commit by GitHub Action bbdd4432f5 commit compiled shaders
2022-07-14 14:28:45 +00:00

1262 lines
41 KiB
Plaintext
Generated

#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
// Implementation of the GLSL findLSB() function
template<typename T>
inline T spvFindLSB(T x)
{
return select(ctz(x), T(-1), x == T(0));
}
struct Alloc
{
uint offset;
};
struct BinInstanceRef
{
uint offset;
};
struct BinInstance
{
uint element_ix;
};
struct PathRef
{
uint offset;
};
struct TileRef
{
uint offset;
};
struct Path
{
uint4 bbox;
TileRef tiles;
};
struct TileSegRef
{
uint offset;
};
struct Tile
{
TileSegRef tile;
int backdrop;
};
struct CmdStrokeRef
{
uint offset;
};
struct CmdStroke
{
uint tile_ref;
float half_width;
};
struct CmdFillRef
{
uint offset;
};
struct CmdFill
{
uint tile_ref;
int backdrop;
};
struct CmdColorRef
{
uint offset;
};
struct CmdColor
{
uint rgba_color;
};
struct CmdLinGradRef
{
uint offset;
};
struct CmdLinGrad
{
uint index;
float line_x;
float line_y;
float line_c;
};
struct CmdRadGradRef
{
uint offset;
};
struct CmdRadGrad
{
uint index;
float4 mat;
float2 xlat;
float2 c1;
float ra;
float roff;
};
struct CmdImageRef
{
uint offset;
};
struct CmdImage
{
uint index;
int2 offset;
};
struct CmdEndClipRef
{
uint offset;
};
struct CmdEndClip
{
uint blend;
};
struct CmdJumpRef
{
uint offset;
};
struct CmdJump
{
uint new_ref;
};
struct CmdRef
{
uint offset;
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint blend_offset;
uint memory[1];
};
struct Alloc_1
{
uint offset;
};
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc_1 tile_alloc;
Alloc_1 bin_alloc;
Alloc_1 ptcl_alloc;
Alloc_1 pathseg_alloc;
Alloc_1 anno_alloc;
Alloc_1 trans_alloc;
Alloc_1 path_bbox_alloc;
Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
Alloc_1 draw_bbox_alloc;
Alloc_1 drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct ConfigBuf
{
Config conf;
};
struct SceneBuf
{
uint scene[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
bool check_deps(thread const uint& dep_stage, device Memory& v_267)
{
uint _273 = atomic_fetch_or_explicit((device atomic_uint*)&v_267.mem_error, 0u, memory_order_relaxed);
return (_273 & dep_stage) == 0u;
}
static inline __attribute__((always_inline))
Alloc slice_mem(thread const Alloc& a, thread const uint& offset, thread const uint& size)
{
return Alloc{ a.offset + offset };
}
static inline __attribute__((always_inline))
bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
{
return true;
}
static inline __attribute__((always_inline))
uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_267)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return 0u;
}
uint v = v_267.memory[offset];
return v;
}
static inline __attribute__((always_inline))
Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const bool& mem_ok)
{
Alloc a;
a.offset = offset;
return a;
}
static inline __attribute__((always_inline))
BinInstanceRef BinInstance_index(thread const BinInstanceRef& ref, thread const uint& index)
{
return BinInstanceRef{ ref.offset + (index * 4u) };
}
static inline __attribute__((always_inline))
BinInstance BinInstance_read(thread const Alloc& a, thread const BinInstanceRef& ref, device Memory& v_267)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint raw0 = read_mem(param, param_1, v_267);
BinInstance s;
s.element_ix = raw0;
return s;
}
static inline __attribute__((always_inline))
Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_267)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint raw0 = read_mem(param, param_1, v_267);
Alloc param_2 = a;
uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3, v_267);
Alloc param_4 = a;
uint param_5 = ix + 2u;
uint raw2 = read_mem(param_4, param_5, v_267);
Path s;
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
s.tiles = TileRef{ raw2 };
return s;
}
static inline __attribute__((always_inline))
void write_tile_alloc(thread const uint& el_ix, thread const Alloc& a)
{
}
static inline __attribute__((always_inline))
Alloc read_tile_alloc(thread const uint& el_ix, thread const bool& mem_ok, const device ConfigBuf& v_891)
{
uint param = 0u;
uint param_1 = v_891.conf.mem_size;
bool param_2 = mem_ok;
return new_alloc(param, param_1, param_2);
}
static inline __attribute__((always_inline))
Tile Tile_read(thread const Alloc& a, thread const TileRef& ref, device Memory& v_267)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint raw0 = read_mem(param, param_1, v_267);
Alloc param_2 = a;
uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3, v_267);
Tile s;
s.tile = TileSegRef{ raw0 };
s.backdrop = int(raw1);
return s;
}
static inline __attribute__((always_inline))
uint malloc_stage(thread const uint& size, thread const uint& mem_size, thread const uint& stage, device Memory& v_267)
{
uint _282 = atomic_fetch_add_explicit((device atomic_uint*)&v_267.mem_offset, size, memory_order_relaxed);
uint offset = _282;
if ((offset + size) > mem_size)
{
uint _292 = atomic_fetch_or_explicit((device atomic_uint*)&v_267.mem_error, stage, memory_order_relaxed);
offset = 0u;
}
return offset;
}
static inline __attribute__((always_inline))
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_267)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return;
}
v_267.memory[offset] = val;
}
static inline __attribute__((always_inline))
void CmdJump_write(thread const Alloc& a, thread const CmdJumpRef& ref, thread const CmdJump& s, device Memory& v_267)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = s.new_ref;
write_mem(param, param_1, param_2, v_267);
}
static inline __attribute__((always_inline))
void Cmd_Jump_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdJump& s, device Memory& v_267)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint param_2 = 11u;
write_mem(param, param_1, param_2, v_267);
Alloc param_3 = a;
CmdJumpRef param_4 = CmdJumpRef{ ref.offset + 4u };
CmdJump param_5 = s;
CmdJump_write(param_3, param_4, param_5, v_267);
}
static inline __attribute__((always_inline))
void alloc_cmd(thread Alloc& cmd_alloc, thread CmdRef& cmd_ref, thread uint& cmd_limit, thread bool& mem_ok, device Memory& v_267, const device ConfigBuf& v_891)
{
if (cmd_ref.offset < cmd_limit)
{
return;
}
uint param = 1024u;
uint param_1 = v_891.conf.mem_size;
uint param_2 = 8u;
uint _915 = malloc_stage(param, param_1, param_2, v_267);
uint new_cmd = _915;
if (new_cmd == 0u)
{
mem_ok = false;
}
if (mem_ok)
{
CmdJump jump = CmdJump{ new_cmd };
Alloc param_3 = cmd_alloc;
CmdRef param_4 = cmd_ref;
CmdJump param_5 = jump;
Cmd_Jump_write(param_3, param_4, param_5, v_267);
}
uint param_6 = new_cmd;
uint param_7 = 1024u;
bool param_8 = true;
cmd_alloc = new_alloc(param_6, param_7, param_8);
cmd_ref = CmdRef{ new_cmd };
cmd_limit = (new_cmd + 1024u) - 144u;
}
static inline __attribute__((always_inline))
void CmdFill_write(thread const Alloc& a, thread const CmdFillRef& ref, thread const CmdFill& s, device Memory& v_267)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = s.tile_ref;
write_mem(param, param_1, param_2, v_267);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = uint(s.backdrop);
write_mem(param_3, param_4, param_5, v_267);
}
static inline __attribute__((always_inline))
void Cmd_Fill_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdFill& s, device Memory& v_267)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint param_2 = 1u;
write_mem(param, param_1, param_2, v_267);
Alloc param_3 = a;
CmdFillRef param_4 = CmdFillRef{ ref.offset + 4u };
CmdFill param_5 = s;
CmdFill_write(param_3, param_4, param_5, v_267);
}
static inline __attribute__((always_inline))
void Cmd_Solid_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_267)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint param_2 = 3u;
write_mem(param, param_1, param_2, v_267);
}
static inline __attribute__((always_inline))
void CmdStroke_write(thread const Alloc& a, thread const CmdStrokeRef& ref, thread const CmdStroke& s, device Memory& v_267)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = s.tile_ref;
write_mem(param, param_1, param_2, v_267);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = as_type<uint>(s.half_width);
write_mem(param_3, param_4, param_5, v_267);
}
static inline __attribute__((always_inline))
void Cmd_Stroke_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdStroke& s, device Memory& v_267)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint param_2 = 2u;
write_mem(param, param_1, param_2, v_267);
Alloc param_3 = a;
CmdStrokeRef param_4 = CmdStrokeRef{ ref.offset + 4u };
CmdStroke param_5 = s;
CmdStroke_write(param_3, param_4, param_5, v_267);
}
static inline __attribute__((always_inline))
void write_fill(thread const Alloc& alloc, thread CmdRef& cmd_ref, thread const Tile& tile, thread const float& linewidth, thread bool& mem_ok, device Memory& v_267)
{
if (linewidth < 0.0)
{
if (tile.tile.offset != 0u)
{
CmdFill cmd_fill = CmdFill{ tile.tile.offset, tile.backdrop };
if (mem_ok)
{
Alloc param = alloc;
CmdRef param_1 = cmd_ref;
CmdFill param_2 = cmd_fill;
Cmd_Fill_write(param, param_1, param_2, v_267);
}
cmd_ref.offset += 12u;
}
else
{
if (mem_ok)
{
Alloc param_3 = alloc;
CmdRef param_4 = cmd_ref;
Cmd_Solid_write(param_3, param_4, v_267);
}
cmd_ref.offset += 4u;
}
}
else
{
CmdStroke cmd_stroke = CmdStroke{ tile.tile.offset, 0.5 * linewidth };
if (mem_ok)
{
Alloc param_5 = alloc;
CmdRef param_6 = cmd_ref;
CmdStroke param_7 = cmd_stroke;
Cmd_Stroke_write(param_5, param_6, param_7, v_267);
}
cmd_ref.offset += 12u;
}
}
static inline __attribute__((always_inline))
void CmdColor_write(thread const Alloc& a, thread const CmdColorRef& ref, thread const CmdColor& s, device Memory& v_267)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = s.rgba_color;
write_mem(param, param_1, param_2, v_267);
}
static inline __attribute__((always_inline))
void Cmd_Color_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdColor& s, device Memory& v_267)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint param_2 = 5u;
write_mem(param, param_1, param_2, v_267);
Alloc param_3 = a;
CmdColorRef param_4 = CmdColorRef{ ref.offset + 4u };
CmdColor param_5 = s;
CmdColor_write(param_3, param_4, param_5, v_267);
}
static inline __attribute__((always_inline))
void CmdLinGrad_write(thread const Alloc& a, thread const CmdLinGradRef& ref, thread const CmdLinGrad& s, device Memory& v_267)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = s.index;
write_mem(param, param_1, param_2, v_267);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = as_type<uint>(s.line_x);
write_mem(param_3, param_4, param_5, v_267);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = as_type<uint>(s.line_y);
write_mem(param_6, param_7, param_8, v_267);
Alloc param_9 = a;
uint param_10 = ix + 3u;
uint param_11 = as_type<uint>(s.line_c);
write_mem(param_9, param_10, param_11, v_267);
}
static inline __attribute__((always_inline))
void Cmd_LinGrad_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdLinGrad& s, device Memory& v_267)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint param_2 = 6u;
write_mem(param, param_1, param_2, v_267);
Alloc param_3 = a;
CmdLinGradRef param_4 = CmdLinGradRef{ ref.offset + 4u };
CmdLinGrad param_5 = s;
CmdLinGrad_write(param_3, param_4, param_5, v_267);
}
static inline __attribute__((always_inline))
void CmdRadGrad_write(thread const Alloc& a, thread const CmdRadGradRef& ref, thread const CmdRadGrad& s, device Memory& v_267)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = s.index;
write_mem(param, param_1, param_2, v_267);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = as_type<uint>(s.mat.x);
write_mem(param_3, param_4, param_5, v_267);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = as_type<uint>(s.mat.y);
write_mem(param_6, param_7, param_8, v_267);
Alloc param_9 = a;
uint param_10 = ix + 3u;
uint param_11 = as_type<uint>(s.mat.z);
write_mem(param_9, param_10, param_11, v_267);
Alloc param_12 = a;
uint param_13 = ix + 4u;
uint param_14 = as_type<uint>(s.mat.w);
write_mem(param_12, param_13, param_14, v_267);
Alloc param_15 = a;
uint param_16 = ix + 5u;
uint param_17 = as_type<uint>(s.xlat.x);
write_mem(param_15, param_16, param_17, v_267);
Alloc param_18 = a;
uint param_19 = ix + 6u;
uint param_20 = as_type<uint>(s.xlat.y);
write_mem(param_18, param_19, param_20, v_267);
Alloc param_21 = a;
uint param_22 = ix + 7u;
uint param_23 = as_type<uint>(s.c1.x);
write_mem(param_21, param_22, param_23, v_267);
Alloc param_24 = a;
uint param_25 = ix + 8u;
uint param_26 = as_type<uint>(s.c1.y);
write_mem(param_24, param_25, param_26, v_267);
Alloc param_27 = a;
uint param_28 = ix + 9u;
uint param_29 = as_type<uint>(s.ra);
write_mem(param_27, param_28, param_29, v_267);
Alloc param_30 = a;
uint param_31 = ix + 10u;
uint param_32 = as_type<uint>(s.roff);
write_mem(param_30, param_31, param_32, v_267);
}
static inline __attribute__((always_inline))
void Cmd_RadGrad_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdRadGrad& s, device Memory& v_267)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint param_2 = 7u;
write_mem(param, param_1, param_2, v_267);
Alloc param_3 = a;
CmdRadGradRef param_4 = CmdRadGradRef{ ref.offset + 4u };
CmdRadGrad param_5 = s;
CmdRadGrad_write(param_3, param_4, param_5, v_267);
}
static inline __attribute__((always_inline))
void CmdImage_write(thread const Alloc& a, thread const CmdImageRef& ref, thread const CmdImage& s, device Memory& v_267)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = s.index;
write_mem(param, param_1, param_2, v_267);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = (uint(s.offset.x) & 65535u) | (uint(s.offset.y) << uint(16));
write_mem(param_3, param_4, param_5, v_267);
}
static inline __attribute__((always_inline))
void Cmd_Image_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdImage& s, device Memory& v_267)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint param_2 = 8u;
write_mem(param, param_1, param_2, v_267);
Alloc param_3 = a;
CmdImageRef param_4 = CmdImageRef{ ref.offset + 4u };
CmdImage param_5 = s;
CmdImage_write(param_3, param_4, param_5, v_267);
}
static inline __attribute__((always_inline))
void Cmd_BeginClip_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_267)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint param_2 = 9u;
write_mem(param, param_1, param_2, v_267);
}
static inline __attribute__((always_inline))
void CmdEndClip_write(thread const Alloc& a, thread const CmdEndClipRef& ref, thread const CmdEndClip& s, device Memory& v_267)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = s.blend;
write_mem(param, param_1, param_2, v_267);
}
static inline __attribute__((always_inline))
void Cmd_EndClip_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdEndClip& s, device Memory& v_267)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint param_2 = 10u;
write_mem(param, param_1, param_2, v_267);
Alloc param_3 = a;
CmdEndClipRef param_4 = CmdEndClipRef{ ref.offset + 4u };
CmdEndClip param_5 = s;
CmdEndClip_write(param_3, param_4, param_5, v_267);
}
static inline __attribute__((always_inline))
void Cmd_End_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_267)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint param_2 = 0u;
write_mem(param, param_1, param_2, v_267);
}
kernel void main0(device Memory& v_267 [[buffer(0)]], const device ConfigBuf& v_891 [[buffer(1)]], const device SceneBuf& _1390 [[buffer(2)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
{
threadgroup uint sh_bitmaps[8][256];
threadgroup Alloc sh_part_elements[256];
threadgroup uint sh_part_count[256];
threadgroup uint sh_elements[256];
threadgroup uint sh_tile_stride[256];
threadgroup uint sh_tile_width[256];
threadgroup uint sh_tile_x0[256];
threadgroup uint sh_tile_y0[256];
threadgroup uint sh_tile_base[256];
threadgroup uint sh_tile_count[256];
bool mem_ok = true;
uint param = 7u;
bool _1012 = check_deps(param, v_267);
if (!_1012)
{
return;
}
uint width_in_bins = ((v_891.conf.width_in_tiles + 16u) - 1u) / 16u;
uint bin_ix = (width_in_bins * gl_WorkGroupID.y) + gl_WorkGroupID.x;
uint partition_ix = 0u;
uint n_partitions = ((v_891.conf.n_elements + 256u) - 1u) / 256u;
uint th_ix = gl_LocalInvocationID.x;
uint bin_tile_x = 16u * gl_WorkGroupID.x;
uint bin_tile_y = 16u * gl_WorkGroupID.y;
uint tile_x = gl_LocalInvocationID.x % 16u;
uint tile_y = gl_LocalInvocationID.x / 16u;
uint this_tile_ix = (((bin_tile_y + tile_y) * v_891.conf.width_in_tiles) + bin_tile_x) + tile_x;
Alloc param_1;
param_1.offset = v_891.conf.ptcl_alloc.offset;
uint param_2 = this_tile_ix * 1024u;
uint param_3 = 1024u;
Alloc cmd_alloc = slice_mem(param_1, param_2, param_3);
CmdRef cmd_ref = CmdRef{ cmd_alloc.offset };
uint cmd_limit = (cmd_ref.offset + 1024u) - 144u;
uint clip_depth = 0u;
uint clip_zero_depth = 0u;
uint rd_ix = 0u;
uint wr_ix = 0u;
uint part_start_ix = 0u;
uint ready_ix = 0u;
Alloc param_4 = cmd_alloc;
uint param_5 = 0u;
uint param_6 = 8u;
Alloc scratch_alloc = slice_mem(param_4, param_5, param_6);
cmd_ref.offset += 4u;
uint render_blend_depth = 0u;
uint max_blend_depth = 0u;
uint drawmonoid_start = v_891.conf.drawmonoid_alloc.offset >> uint(2);
uint drawtag_start = v_891.conf.drawtag_offset >> uint(2);
uint drawdata_start = v_891.conf.drawdata_offset >> uint(2);
uint drawinfo_start = v_891.conf.drawinfo_alloc.offset >> uint(2);
Alloc param_7;
Alloc param_9;
uint _1322;
uint element_ix;
Alloc param_18;
uint tile_count;
uint _1622;
float linewidth;
CmdLinGrad cmd_lin;
CmdRadGrad cmd_rad;
while (true)
{
for (uint i = 0u; i < 8u; i++)
{
sh_bitmaps[i][th_ix] = 0u;
}
bool _1374;
for (;;)
{
if ((ready_ix == wr_ix) && (partition_ix < n_partitions))
{
part_start_ix = ready_ix;
uint count = 0u;
bool _1174 = th_ix < 256u;
bool _1182;
if (_1174)
{
_1182 = (partition_ix + th_ix) < n_partitions;
}
else
{
_1182 = _1174;
}
if (_1182)
{
uint in_ix = (v_891.conf.bin_alloc.offset >> uint(2)) + ((((partition_ix + th_ix) * 256u) + bin_ix) * 2u);
param_7.offset = v_891.conf.bin_alloc.offset;
uint param_8 = in_ix;
count = read_mem(param_7, param_8, v_267);
param_9.offset = v_891.conf.bin_alloc.offset;
uint param_10 = in_ix + 1u;
uint offset = read_mem(param_9, param_10, v_267);
uint param_11 = offset;
uint param_12 = count * 4u;
bool param_13 = true;
sh_part_elements[th_ix] = new_alloc(param_11, param_12, param_13);
}
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
if (th_ix < 256u)
{
sh_part_count[th_ix] = count;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
if (th_ix < 256u)
{
if (th_ix >= (1u << i_1))
{
count += sh_part_count[th_ix - (1u << i_1)];
}
}
threadgroup_barrier(mem_flags::mem_threadgroup);
}
if (th_ix < 256u)
{
sh_part_count[th_ix] = part_start_ix + count;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
ready_ix = sh_part_count[255];
partition_ix += 256u;
}
uint ix = rd_ix + th_ix;
if ((ix >= wr_ix) && (ix < ready_ix))
{
uint part_ix = 0u;
for (uint i_2 = 0u; i_2 < 8u; i_2++)
{
uint probe = part_ix + (128u >> i_2);
if (ix >= sh_part_count[probe - 1u])
{
part_ix = probe;
}
}
if (part_ix > 0u)
{
_1322 = sh_part_count[part_ix - 1u];
}
else
{
_1322 = part_start_ix;
}
ix -= _1322;
Alloc bin_alloc = sh_part_elements[part_ix];
BinInstanceRef inst_ref = BinInstanceRef{ bin_alloc.offset };
BinInstanceRef param_14 = inst_ref;
uint param_15 = ix;
Alloc param_16 = bin_alloc;
BinInstanceRef param_17 = BinInstance_index(param_14, param_15);
BinInstance inst = BinInstance_read(param_16, param_17, v_267);
sh_elements[th_ix] = inst.element_ix;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
wr_ix = min((rd_ix + 256u), ready_ix);
bool _1364 = (wr_ix - rd_ix) < 256u;
if (_1364)
{
_1374 = (wr_ix < ready_ix) || (partition_ix < n_partitions);
}
else
{
_1374 = _1364;
}
if (_1374)
{
continue;
}
else
{
break;
}
}
uint tag = 0u;
if ((th_ix + rd_ix) < wr_ix)
{
element_ix = sh_elements[th_ix];
tag = _1390.scene[drawtag_start + element_ix];
}
switch (tag)
{
case 68u:
case 72u:
case 276u:
case 732u:
case 5u:
case 37u:
{
uint drawmonoid_base = drawmonoid_start + (4u * element_ix);
uint path_ix = v_267.memory[drawmonoid_base];
param_18.offset = v_891.conf.tile_alloc.offset;
PathRef param_19 = PathRef{ v_891.conf.tile_alloc.offset + (path_ix * 12u) };
Path path = Path_read(param_18, param_19, v_267);
uint stride = path.bbox.z - path.bbox.x;
sh_tile_stride[th_ix] = stride;
int dx = int(path.bbox.x) - int(bin_tile_x);
int dy = int(path.bbox.y) - int(bin_tile_y);
int x0 = clamp(dx, 0, 16);
int y0 = clamp(dy, 0, 16);
int x1 = clamp(int(path.bbox.z) - int(bin_tile_x), 0, 16);
int y1 = clamp(int(path.bbox.w) - int(bin_tile_y), 0, 16);
sh_tile_width[th_ix] = uint(x1 - x0);
sh_tile_x0[th_ix] = uint(x0);
sh_tile_y0[th_ix] = uint(y0);
tile_count = uint(x1 - x0) * uint(y1 - y0);
uint base = path.tiles.offset - (((uint(dy) * stride) + uint(dx)) * 8u);
sh_tile_base[th_ix] = base;
uint param_20 = path.tiles.offset;
uint param_21 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
bool param_22 = true;
Alloc path_alloc = new_alloc(param_20, param_21, param_22);
uint param_23 = th_ix;
Alloc param_24 = path_alloc;
write_tile_alloc(param_23, param_24);
break;
}
default:
{
tile_count = 0u;
break;
}
}
sh_tile_count[th_ix] = tile_count;
for (uint i_3 = 0u; i_3 < 8u; i_3++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
if (th_ix >= (1u << i_3))
{
tile_count += sh_tile_count[th_ix - (1u << i_3)];
}
threadgroup_barrier(mem_flags::mem_threadgroup);
sh_tile_count[th_ix] = tile_count;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
uint total_tile_count = sh_tile_count[255];
for (uint ix_1 = th_ix; ix_1 < total_tile_count; ix_1 += 256u)
{
uint el_ix = 0u;
for (uint i_4 = 0u; i_4 < 8u; i_4++)
{
uint probe_1 = el_ix + (128u >> i_4);
if (ix_1 >= sh_tile_count[probe_1 - 1u])
{
el_ix = probe_1;
}
}
uint element_ix_1 = sh_elements[el_ix];
uint tag_1 = _1390.scene[drawtag_start + element_ix_1];
if (el_ix > 0u)
{
_1622 = sh_tile_count[el_ix - 1u];
}
else
{
_1622 = 0u;
}
uint seq_ix = ix_1 - _1622;
uint width = sh_tile_width[el_ix];
uint x = sh_tile_x0[el_ix] + (seq_ix % width);
uint y = sh_tile_y0[el_ix] + (seq_ix / width);
bool include_tile = false;
uint param_25 = el_ix;
bool param_26 = true;
Alloc param_27 = read_tile_alloc(param_25, param_26, v_891);
TileRef param_28 = TileRef{ sh_tile_base[el_ix] + (((sh_tile_stride[el_ix] * y) + x) * 8u) };
Tile tile = Tile_read(param_27, param_28, v_267);
bool is_clip = (tag_1 & 1u) != 0u;
bool is_blend = false;
if (is_clip)
{
uint drawmonoid_base_1 = drawmonoid_start + (4u * element_ix_1);
uint scene_offset = v_267.memory[drawmonoid_base_1 + 2u];
uint dd = drawdata_start + (scene_offset >> uint(2));
uint blend = _1390.scene[dd];
is_blend = blend != 32771u;
}
bool _1706 = tile.tile.offset != 0u;
bool _1715;
if (!_1706)
{
_1715 = (tile.backdrop == 0) == is_clip;
}
else
{
_1715 = _1706;
}
include_tile = _1715 || is_blend;
if (include_tile)
{
uint el_slice = el_ix / 32u;
uint el_mask = 1u << (el_ix & 31u);
uint _1737 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&sh_bitmaps[el_slice][(y * 16u) + x], el_mask, memory_order_relaxed);
}
}
threadgroup_barrier(mem_flags::mem_threadgroup);
uint slice_ix = 0u;
uint bitmap = sh_bitmaps[0][th_ix];
while (true)
{
if (bitmap == 0u)
{
slice_ix++;
if (slice_ix == 8u)
{
break;
}
bitmap = sh_bitmaps[slice_ix][th_ix];
if (bitmap == 0u)
{
continue;
}
}
uint element_ref_ix = (slice_ix * 32u) + uint(int(spvFindLSB(bitmap)));
uint element_ix_2 = sh_elements[element_ref_ix];
bitmap &= (bitmap - 1u);
uint drawtag = _1390.scene[drawtag_start + element_ix_2];
if (clip_zero_depth == 0u)
{
uint param_29 = element_ref_ix;
bool param_30 = true;
Alloc param_31 = read_tile_alloc(param_29, param_30, v_891);
TileRef param_32 = TileRef{ sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) };
Tile tile_1 = Tile_read(param_31, param_32, v_267);
uint drawmonoid_base_2 = drawmonoid_start + (4u * element_ix_2);
uint scene_offset_1 = v_267.memory[drawmonoid_base_2 + 2u];
uint info_offset = v_267.memory[drawmonoid_base_2 + 3u];
uint dd_1 = drawdata_start + (scene_offset_1 >> uint(2));
uint di = drawinfo_start + (info_offset >> uint(2));
switch (drawtag)
{
case 68u:
{
linewidth = as_type<float>(v_267.memory[di]);
Alloc param_33 = cmd_alloc;
CmdRef param_34 = cmd_ref;
uint param_35 = cmd_limit;
alloc_cmd(param_33, param_34, param_35, mem_ok, v_267, v_891);
cmd_alloc = param_33;
cmd_ref = param_34;
cmd_limit = param_35;
Alloc param_36 = cmd_alloc;
CmdRef param_37 = cmd_ref;
Tile param_38 = tile_1;
float param_39 = linewidth;
write_fill(param_36, param_37, param_38, param_39, mem_ok, v_267);
cmd_ref = param_37;
uint rgba = _1390.scene[dd_1];
if (mem_ok)
{
Alloc param_40 = cmd_alloc;
CmdRef param_41 = cmd_ref;
CmdColor param_42 = CmdColor{ rgba };
Cmd_Color_write(param_40, param_41, param_42, v_267);
}
cmd_ref.offset += 8u;
break;
}
case 276u:
{
Alloc param_43 = cmd_alloc;
CmdRef param_44 = cmd_ref;
uint param_45 = cmd_limit;
alloc_cmd(param_43, param_44, param_45, mem_ok, v_267, v_891);
cmd_alloc = param_43;
cmd_ref = param_44;
cmd_limit = param_45;
linewidth = as_type<float>(v_267.memory[di]);
Alloc param_46 = cmd_alloc;
CmdRef param_47 = cmd_ref;
Tile param_48 = tile_1;
float param_49 = linewidth;
write_fill(param_46, param_47, param_48, param_49, mem_ok, v_267);
cmd_ref = param_47;
cmd_lin.index = _1390.scene[dd_1];
cmd_lin.line_x = as_type<float>(v_267.memory[di + 1u]);
cmd_lin.line_y = as_type<float>(v_267.memory[di + 2u]);
cmd_lin.line_c = as_type<float>(v_267.memory[di + 3u]);
if (mem_ok)
{
Alloc param_50 = cmd_alloc;
CmdRef param_51 = cmd_ref;
CmdLinGrad param_52 = cmd_lin;
Cmd_LinGrad_write(param_50, param_51, param_52, v_267);
}
cmd_ref.offset += 20u;
break;
}
case 732u:
{
Alloc param_53 = cmd_alloc;
CmdRef param_54 = cmd_ref;
uint param_55 = cmd_limit;
alloc_cmd(param_53, param_54, param_55, mem_ok, v_267, v_891);
cmd_alloc = param_53;
cmd_ref = param_54;
cmd_limit = param_55;
linewidth = as_type<float>(v_267.memory[di]);
Alloc param_56 = cmd_alloc;
CmdRef param_57 = cmd_ref;
Tile param_58 = tile_1;
float param_59 = linewidth;
write_fill(param_56, param_57, param_58, param_59, mem_ok, v_267);
cmd_ref = param_57;
cmd_rad.index = _1390.scene[dd_1];
cmd_rad.mat = as_type<float4>(uint4(v_267.memory[di + 1u], v_267.memory[di + 2u], v_267.memory[di + 3u], v_267.memory[di + 4u]));
cmd_rad.xlat = as_type<float2>(uint2(v_267.memory[di + 5u], v_267.memory[di + 6u]));
cmd_rad.c1 = as_type<float2>(uint2(v_267.memory[di + 7u], v_267.memory[di + 8u]));
cmd_rad.ra = as_type<float>(v_267.memory[di + 9u]);
cmd_rad.roff = as_type<float>(v_267.memory[di + 10u]);
if (mem_ok)
{
Alloc param_60 = cmd_alloc;
CmdRef param_61 = cmd_ref;
CmdRadGrad param_62 = cmd_rad;
Cmd_RadGrad_write(param_60, param_61, param_62, v_267);
}
cmd_ref.offset += 48u;
break;
}
case 72u:
{
Alloc param_63 = cmd_alloc;
CmdRef param_64 = cmd_ref;
uint param_65 = cmd_limit;
alloc_cmd(param_63, param_64, param_65, mem_ok, v_267, v_891);
cmd_alloc = param_63;
cmd_ref = param_64;
cmd_limit = param_65;
linewidth = as_type<float>(v_267.memory[di]);
Alloc param_66 = cmd_alloc;
CmdRef param_67 = cmd_ref;
Tile param_68 = tile_1;
float param_69 = linewidth;
write_fill(param_66, param_67, param_68, param_69, mem_ok, v_267);
cmd_ref = param_67;
uint index = _1390.scene[dd_1];
uint raw1 = _1390.scene[dd_1 + 1u];
int2 offset_1 = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16);
if (mem_ok)
{
Alloc param_70 = cmd_alloc;
CmdRef param_71 = cmd_ref;
CmdImage param_72 = CmdImage{ index, offset_1 };
Cmd_Image_write(param_70, param_71, param_72, v_267);
}
cmd_ref.offset += 12u;
break;
}
case 5u:
{
bool _2120 = tile_1.tile.offset == 0u;
bool _2126;
if (_2120)
{
_2126 = tile_1.backdrop == 0;
}
else
{
_2126 = _2120;
}
if (_2126)
{
clip_zero_depth = clip_depth + 1u;
}
else
{
Alloc param_73 = cmd_alloc;
CmdRef param_74 = cmd_ref;
uint param_75 = cmd_limit;
alloc_cmd(param_73, param_74, param_75, mem_ok, v_267, v_891);
cmd_alloc = param_73;
cmd_ref = param_74;
cmd_limit = param_75;
if (mem_ok)
{
Alloc param_76 = cmd_alloc;
CmdRef param_77 = cmd_ref;
Cmd_BeginClip_write(param_76, param_77, v_267);
}
cmd_ref.offset += 4u;
render_blend_depth++;
max_blend_depth = max(max_blend_depth, render_blend_depth);
}
clip_depth++;
break;
}
case 37u:
{
clip_depth--;
Alloc param_78 = cmd_alloc;
CmdRef param_79 = cmd_ref;
Tile param_80 = tile_1;
float param_81 = -1.0;
write_fill(param_78, param_79, param_80, param_81, mem_ok, v_267);
cmd_ref = param_79;
uint blend_1 = _1390.scene[dd_1];
if (mem_ok)
{
Alloc param_82 = cmd_alloc;
CmdRef param_83 = cmd_ref;
CmdEndClip param_84 = CmdEndClip{ blend_1 };
Cmd_EndClip_write(param_82, param_83, param_84, v_267);
}
cmd_ref.offset += 8u;
render_blend_depth--;
break;
}
}
}
else
{
switch (drawtag)
{
case 5u:
{
clip_depth++;
break;
}
case 37u:
{
if (clip_depth == clip_zero_depth)
{
clip_zero_depth = 0u;
}
clip_depth--;
break;
}
}
}
}
threadgroup_barrier(mem_flags::mem_threadgroup);
rd_ix += 256u;
if ((rd_ix >= ready_ix) && (partition_ix >= n_partitions))
{
break;
}
}
bool _2231 = (bin_tile_x + tile_x) < v_891.conf.width_in_tiles;
bool _2240;
if (_2231)
{
_2240 = (bin_tile_y + tile_y) < v_891.conf.height_in_tiles;
}
else
{
_2240 = _2231;
}
if (_2240)
{
if (mem_ok)
{
Alloc param_85 = cmd_alloc;
CmdRef param_86 = cmd_ref;
Cmd_End_write(param_85, param_86, v_267);
}
if (max_blend_depth > 4u)
{
uint scratch_size = (((max_blend_depth * 16u) * 16u) * 1u) * 4u;
uint _2264 = atomic_fetch_add_explicit((device atomic_uint*)&v_267.blend_offset, scratch_size, memory_order_relaxed);
uint scratch = _2264;
Alloc param_87 = scratch_alloc;
uint param_88 = scratch_alloc.offset >> uint(2);
uint param_89 = scratch;
write_mem(param_87, param_88, param_89, v_267);
}
}
}