Merge pull request #173 from linebender/blend_mem

Blend mem
This commit is contained in:
Raph Levien 2022-05-20 14:27:58 -07:00 committed by GitHub
commit 60d197bb4e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
29 changed files with 961 additions and 772 deletions

View file

@ -151,6 +151,12 @@ void main() {
uint part_start_ix = 0; uint part_start_ix = 0;
uint ready_ix = 0; uint ready_ix = 0;
Alloc scratch_alloc = slice_mem(cmd_alloc, 0, Alloc_size);
cmd_ref.offset += 4;
// Accounting for allocation of blend memory
uint render_blend_depth = 0;
uint max_blend_depth = 0;
uint drawmonoid_start = conf.drawmonoid_alloc.offset >> 2; uint drawmonoid_start = conf.drawmonoid_alloc.offset >> 2;
uint drawtag_start = conf.drawtag_offset >> 2; uint drawtag_start = conf.drawtag_offset >> 2;
uint drawdata_start = conf.drawdata_offset >> 2; uint drawdata_start = conf.drawdata_offset >> 2;
@ -414,6 +420,8 @@ void main() {
} }
Cmd_BeginClip_write(cmd_alloc, cmd_ref); Cmd_BeginClip_write(cmd_alloc, cmd_ref);
cmd_ref.offset += 4; cmd_ref.offset += 4;
render_blend_depth++;
max_blend_depth = max(max_blend_depth, render_blend_depth);
} }
clip_depth++; clip_depth++;
break; break;
@ -426,6 +434,7 @@ void main() {
uint blend = scene[dd]; uint blend = scene[dd];
Cmd_EndClip_write(cmd_alloc, cmd_ref, CmdEndClip(blend)); Cmd_EndClip_write(cmd_alloc, cmd_ref, CmdEndClip(blend));
cmd_ref.offset += 4 + CmdEndClip_size; cmd_ref.offset += 4 + CmdEndClip_size;
render_blend_depth--;
break; break;
} }
} else { } else {
@ -451,5 +460,10 @@ void main() {
} }
if (bin_tile_x + tile_x < conf.width_in_tiles && bin_tile_y + tile_y < conf.height_in_tiles) { if (bin_tile_x + tile_x < conf.width_in_tiles && bin_tile_y + tile_y < conf.height_in_tiles) {
Cmd_End_write(cmd_alloc, cmd_ref); Cmd_End_write(cmd_alloc, cmd_ref);
if (max_blend_depth > BLEND_STACK_SPLIT) {
uint scratch_size = max_blend_depth * TILE_WIDTH_PX * TILE_HEIGHT_PX * CLIP_STATE_SIZE * 4;
MallocResult scratch = malloc(scratch_size);
alloc_write(scratch_alloc, scratch_alloc.offset, scratch.alloc);
}
} }
} }

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -175,9 +175,9 @@ struct Config
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
RWByteAddressBuffer _260 : register(u0, space0); RWByteAddressBuffer _266 : register(u0, space0);
ByteAddressBuffer _1005 : register(t1, space0); ByteAddressBuffer _1020 : register(t1, space0);
ByteAddressBuffer _1372 : register(t2, space0); ByteAddressBuffer _1399 : register(t2, space0);
static uint3 gl_WorkGroupID; static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID; static uint3 gl_LocalInvocationID;
@ -200,8 +200,8 @@ groupshared uint sh_tile_count[256];
Alloc slice_mem(Alloc a, uint offset, uint size) Alloc slice_mem(Alloc a, uint offset, uint size)
{ {
Alloc _337 = { a.offset + offset }; Alloc _343 = { a.offset + offset };
return _337; return _343;
} }
bool touch_mem(Alloc alloc, uint offset) bool touch_mem(Alloc alloc, uint offset)
@ -217,7 +217,7 @@ uint read_mem(Alloc alloc, uint offset)
{ {
return 0u; return 0u;
} }
uint v = _260.Load(offset * 4 + 8); uint v = _266.Load(offset * 4 + 8);
return v; return v;
} }
@ -230,8 +230,8 @@ Alloc new_alloc(uint offset, uint size, bool mem_ok)
BinInstanceRef BinInstance_index(BinInstanceRef ref, uint index) BinInstanceRef BinInstance_index(BinInstanceRef ref, uint index)
{ {
BinInstanceRef _346 = { ref.offset + (index * 4u) }; BinInstanceRef _361 = { ref.offset + (index * 4u) };
return _346; return _361;
} }
BinInstance BinInstance_read(Alloc a, BinInstanceRef ref) BinInstance BinInstance_read(Alloc a, BinInstanceRef ref)
@ -259,8 +259,8 @@ Path Path_read(Alloc a, PathRef ref)
uint raw2 = read_mem(param_4, param_5); uint raw2 = read_mem(param_4, param_5);
Path s; Path s;
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
TileRef _409 = { raw2 }; TileRef _424 = { raw2 };
s.tiles = _409; s.tiles = _424;
return s; return s;
} }
@ -270,11 +270,11 @@ void write_tile_alloc(uint el_ix, Alloc a)
Alloc read_tile_alloc(uint el_ix, bool mem_ok) Alloc read_tile_alloc(uint el_ix, bool mem_ok)
{ {
uint _892; uint _907;
_260.GetDimensions(_892); _266.GetDimensions(_907);
_892 = (_892 - 8) / 4; _907 = (_907 - 8) / 4;
uint param = 0u; uint param = 0u;
uint param_1 = uint(int(_892) * 4); uint param_1 = uint(int(_907) * 4);
bool param_2 = mem_ok; bool param_2 = mem_ok;
return new_alloc(param, param_1, param_2); return new_alloc(param, param_1, param_2);
} }
@ -288,31 +288,31 @@ Tile Tile_read(Alloc a, TileRef ref)
Alloc param_2 = a; Alloc param_2 = a;
uint param_3 = ix + 1u; uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3); uint raw1 = read_mem(param_2, param_3);
TileSegRef _434 = { raw0 }; TileSegRef _449 = { raw0 };
Tile s; Tile s;
s.tile = _434; s.tile = _449;
s.backdrop = int(raw1); s.backdrop = int(raw1);
return s; return s;
} }
MallocResult malloc(uint size) MallocResult malloc(uint size)
{ {
uint _266; uint _272;
_260.InterlockedAdd(0, size, _266); _266.InterlockedAdd(0, size, _272);
uint offset = _266; uint offset = _272;
uint _273; uint _279;
_260.GetDimensions(_273); _266.GetDimensions(_279);
_273 = (_273 - 8) / 4; _279 = (_279 - 8) / 4;
MallocResult r; MallocResult r;
r.failed = (offset + size) > uint(int(_273) * 4); r.failed = (offset + size) > uint(int(_279) * 4);
uint param = offset; uint param = offset;
uint param_1 = size; uint param_1 = size;
bool param_2 = !r.failed; bool param_2 = !r.failed;
r.alloc = new_alloc(param, param_1, param_2); r.alloc = new_alloc(param, param_1, param_2);
if (r.failed) if (r.failed)
{ {
uint _295; uint _301;
_260.InterlockedMax(4, 1u, _295); _266.InterlockedMax(4, 1u, _301);
return r; return r;
} }
return r; return r;
@ -326,7 +326,7 @@ void write_mem(Alloc alloc, uint offset, uint val)
{ {
return; return;
} }
_260.Store(offset * 4 + 8, val); _266.Store(offset * 4 + 8, val);
} }
void CmdJump_write(Alloc a, CmdJumpRef ref, CmdJump s) void CmdJump_write(Alloc a, CmdJumpRef ref, CmdJump s)
@ -344,9 +344,9 @@ void Cmd_Jump_write(Alloc a, CmdRef ref, CmdJump s)
uint param_1 = ref.offset >> uint(2); uint param_1 = ref.offset >> uint(2);
uint param_2 = 11u; uint param_2 = 11u;
write_mem(param, param_1, param_2); write_mem(param, param_1, param_2);
CmdJumpRef _885 = { ref.offset + 4u }; CmdJumpRef _900 = { ref.offset + 4u };
Alloc param_3 = a; Alloc param_3 = a;
CmdJumpRef param_4 = _885; CmdJumpRef param_4 = _900;
CmdJump param_5 = s; CmdJump param_5 = s;
CmdJump_write(param_3, param_4, param_5); CmdJump_write(param_3, param_4, param_5);
} }
@ -358,21 +358,21 @@ bool alloc_cmd(inout Alloc cmd_alloc, inout CmdRef cmd_ref, inout uint cmd_limit
return true; return true;
} }
uint param = 1024u; uint param = 1024u;
MallocResult _913 = malloc(param); MallocResult _928 = malloc(param);
MallocResult new_cmd = _913; MallocResult new_cmd = _928;
if (new_cmd.failed) if (new_cmd.failed)
{ {
return false; return false;
} }
CmdJump _923 = { new_cmd.alloc.offset }; CmdJump _938 = { new_cmd.alloc.offset };
CmdJump jump = _923; CmdJump jump = _938;
Alloc param_1 = cmd_alloc; Alloc param_1 = cmd_alloc;
CmdRef param_2 = cmd_ref; CmdRef param_2 = cmd_ref;
CmdJump param_3 = jump; CmdJump param_3 = jump;
Cmd_Jump_write(param_1, param_2, param_3); Cmd_Jump_write(param_1, param_2, param_3);
cmd_alloc = new_cmd.alloc; cmd_alloc = new_cmd.alloc;
CmdRef _935 = { cmd_alloc.offset }; CmdRef _950 = { cmd_alloc.offset };
cmd_ref = _935; cmd_ref = _950;
cmd_limit = (cmd_alloc.offset + 1024u) - 144u; cmd_limit = (cmd_alloc.offset + 1024u) - 144u;
return true; return true;
} }
@ -396,9 +396,9 @@ void Cmd_Fill_write(Alloc a, CmdRef ref, CmdFill s)
uint param_1 = ref.offset >> uint(2); uint param_1 = ref.offset >> uint(2);
uint param_2 = 1u; uint param_2 = 1u;
write_mem(param, param_1, param_2); write_mem(param, param_1, param_2);
CmdFillRef _742 = { ref.offset + 4u }; CmdFillRef _757 = { ref.offset + 4u };
Alloc param_3 = a; Alloc param_3 = a;
CmdFillRef param_4 = _742; CmdFillRef param_4 = _757;
CmdFill param_5 = s; CmdFill param_5 = s;
CmdFill_write(param_3, param_4, param_5); CmdFill_write(param_3, param_4, param_5);
} }
@ -430,9 +430,9 @@ void Cmd_Stroke_write(Alloc a, CmdRef ref, CmdStroke s)
uint param_1 = ref.offset >> uint(2); uint param_1 = ref.offset >> uint(2);
uint param_2 = 2u; uint param_2 = 2u;
write_mem(param, param_1, param_2); write_mem(param, param_1, param_2);
CmdStrokeRef _760 = { ref.offset + 4u }; CmdStrokeRef _775 = { ref.offset + 4u };
Alloc param_3 = a; Alloc param_3 = a;
CmdStrokeRef param_4 = _760; CmdStrokeRef param_4 = _775;
CmdStroke param_5 = s; CmdStroke param_5 = s;
CmdStroke_write(param_3, param_4, param_5); CmdStroke_write(param_3, param_4, param_5);
} }
@ -443,8 +443,8 @@ void write_fill(Alloc alloc, inout CmdRef cmd_ref, Tile tile, float linewidth)
{ {
if (tile.tile.offset != 0u) if (tile.tile.offset != 0u)
{ {
CmdFill _958 = { tile.tile.offset, tile.backdrop }; CmdFill _973 = { tile.tile.offset, tile.backdrop };
CmdFill cmd_fill = _958; CmdFill cmd_fill = _973;
Alloc param = alloc; Alloc param = alloc;
CmdRef param_1 = cmd_ref; CmdRef param_1 = cmd_ref;
CmdFill param_2 = cmd_fill; CmdFill param_2 = cmd_fill;
@ -461,8 +461,8 @@ void write_fill(Alloc alloc, inout CmdRef cmd_ref, Tile tile, float linewidth)
} }
else else
{ {
CmdStroke _988 = { tile.tile.offset, 0.5f * linewidth }; CmdStroke _1003 = { tile.tile.offset, 0.5f * linewidth };
CmdStroke cmd_stroke = _988; CmdStroke cmd_stroke = _1003;
Alloc param_5 = alloc; Alloc param_5 = alloc;
CmdRef param_6 = cmd_ref; CmdRef param_6 = cmd_ref;
CmdStroke param_7 = cmd_stroke; CmdStroke param_7 = cmd_stroke;
@ -486,9 +486,9 @@ void Cmd_Color_write(Alloc a, CmdRef ref, CmdColor s)
uint param_1 = ref.offset >> uint(2); uint param_1 = ref.offset >> uint(2);
uint param_2 = 5u; uint param_2 = 5u;
write_mem(param, param_1, param_2); write_mem(param, param_1, param_2);
CmdColorRef _786 = { ref.offset + 4u }; CmdColorRef _801 = { ref.offset + 4u };
Alloc param_3 = a; Alloc param_3 = a;
CmdColorRef param_4 = _786; CmdColorRef param_4 = _801;
CmdColor param_5 = s; CmdColor param_5 = s;
CmdColor_write(param_3, param_4, param_5); CmdColor_write(param_3, param_4, param_5);
} }
@ -520,9 +520,9 @@ void Cmd_LinGrad_write(Alloc a, CmdRef ref, CmdLinGrad s)
uint param_1 = ref.offset >> uint(2); uint param_1 = ref.offset >> uint(2);
uint param_2 = 6u; uint param_2 = 6u;
write_mem(param, param_1, param_2); write_mem(param, param_1, param_2);
CmdLinGradRef _804 = { ref.offset + 4u }; CmdLinGradRef _819 = { ref.offset + 4u };
Alloc param_3 = a; Alloc param_3 = a;
CmdLinGradRef param_4 = _804; CmdLinGradRef param_4 = _819;
CmdLinGrad param_5 = s; CmdLinGrad param_5 = s;
CmdLinGrad_write(param_3, param_4, param_5); CmdLinGrad_write(param_3, param_4, param_5);
} }
@ -582,9 +582,9 @@ void Cmd_RadGrad_write(Alloc a, CmdRef ref, CmdRadGrad s)
uint param_1 = ref.offset >> uint(2); uint param_1 = ref.offset >> uint(2);
uint param_2 = 7u; uint param_2 = 7u;
write_mem(param, param_1, param_2); write_mem(param, param_1, param_2);
CmdRadGradRef _822 = { ref.offset + 4u }; CmdRadGradRef _837 = { ref.offset + 4u };
Alloc param_3 = a; Alloc param_3 = a;
CmdRadGradRef param_4 = _822; CmdRadGradRef param_4 = _837;
CmdRadGrad param_5 = s; CmdRadGrad param_5 = s;
CmdRadGrad_write(param_3, param_4, param_5); CmdRadGrad_write(param_3, param_4, param_5);
} }
@ -608,9 +608,9 @@ void Cmd_Image_write(Alloc a, CmdRef ref, CmdImage s)
uint param_1 = ref.offset >> uint(2); uint param_1 = ref.offset >> uint(2);
uint param_2 = 8u; uint param_2 = 8u;
write_mem(param, param_1, param_2); write_mem(param, param_1, param_2);
CmdImageRef _840 = { ref.offset + 4u }; CmdImageRef _855 = { ref.offset + 4u };
Alloc param_3 = a; Alloc param_3 = a;
CmdImageRef param_4 = _840; CmdImageRef param_4 = _855;
CmdImage param_5 = s; CmdImage param_5 = s;
CmdImage_write(param_3, param_4, param_5); CmdImage_write(param_3, param_4, param_5);
} }
@ -638,9 +638,9 @@ void Cmd_EndClip_write(Alloc a, CmdRef ref, CmdEndClip s)
uint param_1 = ref.offset >> uint(2); uint param_1 = ref.offset >> uint(2);
uint param_2 = 10u; uint param_2 = 10u;
write_mem(param, param_1, param_2); write_mem(param, param_1, param_2);
CmdEndClipRef _866 = { ref.offset + 4u }; CmdEndClipRef _881 = { ref.offset + 4u };
Alloc param_3 = a; Alloc param_3 = a;
CmdEndClipRef param_4 = _866; CmdEndClipRef param_4 = _881;
CmdEndClip param_5 = s; CmdEndClip param_5 = s;
CmdEndClip_write(param_3, param_4, param_5); CmdEndClip_write(param_3, param_4, param_5);
} }
@ -653,27 +653,35 @@ void Cmd_End_write(Alloc a, CmdRef ref)
write_mem(param, param_1, param_2); write_mem(param, param_1, param_2);
} }
void alloc_write(Alloc a, uint offset, Alloc alloc)
{
Alloc param = a;
uint param_1 = offset >> uint(2);
uint param_2 = alloc.offset;
write_mem(param, param_1, param_2);
}
void comp_main() void comp_main()
{ {
uint width_in_bins = ((_1005.Load(8) + 16u) - 1u) / 16u; uint width_in_bins = ((_1020.Load(8) + 16u) - 1u) / 16u;
uint bin_ix = (width_in_bins * gl_WorkGroupID.y) + gl_WorkGroupID.x; uint bin_ix = (width_in_bins * gl_WorkGroupID.y) + gl_WorkGroupID.x;
uint partition_ix = 0u; uint partition_ix = 0u;
uint n_partitions = ((_1005.Load(0) + 256u) - 1u) / 256u; uint n_partitions = ((_1020.Load(0) + 256u) - 1u) / 256u;
uint th_ix = gl_LocalInvocationID.x; uint th_ix = gl_LocalInvocationID.x;
uint bin_tile_x = 16u * gl_WorkGroupID.x; uint bin_tile_x = 16u * gl_WorkGroupID.x;
uint bin_tile_y = 16u * gl_WorkGroupID.y; uint bin_tile_y = 16u * gl_WorkGroupID.y;
uint tile_x = gl_LocalInvocationID.x % 16u; uint tile_x = gl_LocalInvocationID.x % 16u;
uint tile_y = gl_LocalInvocationID.x / 16u; uint tile_y = gl_LocalInvocationID.x / 16u;
uint this_tile_ix = (((bin_tile_y + tile_y) * _1005.Load(8)) + bin_tile_x) + tile_x; uint this_tile_ix = (((bin_tile_y + tile_y) * _1020.Load(8)) + bin_tile_x) + tile_x;
Alloc _1070; Alloc _1085;
_1070.offset = _1005.Load(24); _1085.offset = _1020.Load(24);
Alloc param; Alloc param;
param.offset = _1070.offset; param.offset = _1085.offset;
uint param_1 = this_tile_ix * 1024u; uint param_1 = this_tile_ix * 1024u;
uint param_2 = 1024u; uint param_2 = 1024u;
Alloc cmd_alloc = slice_mem(param, param_1, param_2); Alloc cmd_alloc = slice_mem(param, param_1, param_2);
CmdRef _1079 = { cmd_alloc.offset }; CmdRef _1094 = { cmd_alloc.offset };
CmdRef cmd_ref = _1079; CmdRef cmd_ref = _1094;
uint cmd_limit = (cmd_ref.offset + 1024u) - 144u; uint cmd_limit = (cmd_ref.offset + 1024u) - 144u;
uint clip_depth = 0u; uint clip_depth = 0u;
uint clip_zero_depth = 0u; uint clip_zero_depth = 0u;
@ -681,18 +689,25 @@ void comp_main()
uint wr_ix = 0u; uint wr_ix = 0u;
uint part_start_ix = 0u; uint part_start_ix = 0u;
uint ready_ix = 0u; uint ready_ix = 0u;
uint drawmonoid_start = _1005.Load(44) >> uint(2); Alloc param_3 = cmd_alloc;
uint drawtag_start = _1005.Load(100) >> uint(2); uint param_4 = 0u;
uint drawdata_start = _1005.Load(104) >> uint(2); uint param_5 = 8u;
uint drawinfo_start = _1005.Load(68) >> uint(2); Alloc scratch_alloc = slice_mem(param_3, param_4, param_5);
bool mem_ok = _260.Load(4) == 0u; cmd_ref.offset += 4u;
Alloc param_3; uint render_blend_depth = 0u;
Alloc param_5; uint max_blend_depth = 0u;
uint _1304; uint drawmonoid_start = _1020.Load(44) >> uint(2);
uint drawtag_start = _1020.Load(100) >> uint(2);
uint drawdata_start = _1020.Load(104) >> uint(2);
uint drawinfo_start = _1020.Load(68) >> uint(2);
bool mem_ok = _266.Load(4) == 0u;
Alloc param_6;
Alloc param_8;
uint _1331;
uint element_ix; uint element_ix;
Alloc param_14; Alloc param_17;
uint tile_count; uint tile_count;
uint _1605; uint _1632;
float linewidth; float linewidth;
CmdLinGrad cmd_lin; CmdLinGrad cmd_lin;
CmdRadGrad cmd_rad; CmdRadGrad cmd_rad;
@ -702,40 +717,40 @@ void comp_main()
{ {
sh_bitmaps[i][th_ix] = 0u; sh_bitmaps[i][th_ix] = 0u;
} }
bool _1356; bool _1383;
for (;;) for (;;)
{ {
if ((ready_ix == wr_ix) && (partition_ix < n_partitions)) if ((ready_ix == wr_ix) && (partition_ix < n_partitions))
{ {
part_start_ix = ready_ix; part_start_ix = ready_ix;
uint count = 0u; uint count = 0u;
bool _1154 = th_ix < 256u; bool _1181 = th_ix < 256u;
bool _1162; bool _1189;
if (_1154) if (_1181)
{ {
_1162 = (partition_ix + th_ix) < n_partitions; _1189 = (partition_ix + th_ix) < n_partitions;
} }
else else
{ {
_1162 = _1154; _1189 = _1181;
} }
if (_1162) if (_1189)
{ {
uint in_ix = (_1005.Load(20) >> uint(2)) + ((((partition_ix + th_ix) * 256u) + bin_ix) * 2u); uint in_ix = (_1020.Load(20) >> uint(2)) + ((((partition_ix + th_ix) * 256u) + bin_ix) * 2u);
Alloc _1179; Alloc _1206;
_1179.offset = _1005.Load(20); _1206.offset = _1020.Load(20);
param_3.offset = _1179.offset; param_6.offset = _1206.offset;
uint param_4 = in_ix; uint param_7 = in_ix;
count = read_mem(param_3, param_4); count = read_mem(param_6, param_7);
Alloc _1190; Alloc _1217;
_1190.offset = _1005.Load(20); _1217.offset = _1020.Load(20);
param_5.offset = _1190.offset; param_8.offset = _1217.offset;
uint param_6 = in_ix + 1u; uint param_9 = in_ix + 1u;
uint offset = read_mem(param_5, param_6); uint offset = read_mem(param_8, param_9);
uint param_7 = offset; uint param_10 = offset;
uint param_8 = count * 4u; uint param_11 = count * 4u;
bool param_9 = mem_ok; bool param_12 = mem_ok;
sh_part_elements[th_ix] = new_alloc(param_7, param_8, param_9); sh_part_elements[th_ix] = new_alloc(param_10, param_11, param_12);
} }
for (uint i_1 = 0u; i_1 < 8u; i_1++) for (uint i_1 = 0u; i_1 < 8u; i_1++)
{ {
@ -775,35 +790,35 @@ void comp_main()
} }
if (part_ix > 0u) if (part_ix > 0u)
{ {
_1304 = sh_part_count[part_ix - 1u]; _1331 = sh_part_count[part_ix - 1u];
} }
else else
{ {
_1304 = part_start_ix; _1331 = part_start_ix;
} }
ix -= _1304; ix -= _1331;
Alloc bin_alloc = sh_part_elements[part_ix]; Alloc bin_alloc = sh_part_elements[part_ix];
BinInstanceRef _1323 = { bin_alloc.offset }; BinInstanceRef _1350 = { bin_alloc.offset };
BinInstanceRef inst_ref = _1323; BinInstanceRef inst_ref = _1350;
BinInstanceRef param_10 = inst_ref; BinInstanceRef param_13 = inst_ref;
uint param_11 = ix; uint param_14 = ix;
Alloc param_12 = bin_alloc; Alloc param_15 = bin_alloc;
BinInstanceRef param_13 = BinInstance_index(param_10, param_11); BinInstanceRef param_16 = BinInstance_index(param_13, param_14);
BinInstance inst = BinInstance_read(param_12, param_13); BinInstance inst = BinInstance_read(param_15, param_16);
sh_elements[th_ix] = inst.element_ix; sh_elements[th_ix] = inst.element_ix;
} }
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();
wr_ix = min((rd_ix + 256u), ready_ix); wr_ix = min((rd_ix + 256u), ready_ix);
bool _1346 = (wr_ix - rd_ix) < 256u; bool _1373 = (wr_ix - rd_ix) < 256u;
if (_1346) if (_1373)
{ {
_1356 = (wr_ix < ready_ix) || (partition_ix < n_partitions); _1383 = (wr_ix < ready_ix) || (partition_ix < n_partitions);
} }
else else
{ {
_1356 = _1346; _1383 = _1373;
} }
if (_1356) if (_1383)
{ {
continue; continue;
} }
@ -816,7 +831,7 @@ void comp_main()
if ((th_ix + rd_ix) < wr_ix) if ((th_ix + rd_ix) < wr_ix)
{ {
element_ix = sh_elements[th_ix]; element_ix = sh_elements[th_ix];
tag = _1372.Load((drawtag_start + element_ix) * 4 + 0); tag = _1399.Load((drawtag_start + element_ix) * 4 + 0);
} }
switch (tag) switch (tag)
{ {
@ -828,13 +843,13 @@ void comp_main()
case 37u: case 37u:
{ {
uint drawmonoid_base = drawmonoid_start + (4u * element_ix); uint drawmonoid_base = drawmonoid_start + (4u * element_ix);
uint path_ix = _260.Load(drawmonoid_base * 4 + 8); uint path_ix = _266.Load(drawmonoid_base * 4 + 8);
PathRef _1397 = { _1005.Load(16) + (path_ix * 12u) }; PathRef _1424 = { _1020.Load(16) + (path_ix * 12u) };
Alloc _1400; Alloc _1427;
_1400.offset = _1005.Load(16); _1427.offset = _1020.Load(16);
param_14.offset = _1400.offset; param_17.offset = _1427.offset;
PathRef param_15 = _1397; PathRef param_18 = _1424;
Path path = Path_read(param_14, param_15); Path path = Path_read(param_17, param_18);
uint stride = path.bbox.z - path.bbox.x; uint stride = path.bbox.z - path.bbox.x;
sh_tile_stride[th_ix] = stride; sh_tile_stride[th_ix] = stride;
int dx = int(path.bbox.x) - int(bin_tile_x); int dx = int(path.bbox.x) - int(bin_tile_x);
@ -849,13 +864,13 @@ void comp_main()
tile_count = uint(x1 - x0) * uint(y1 - y0); tile_count = uint(x1 - x0) * uint(y1 - y0);
uint base = path.tiles.offset - (((uint(dy) * stride) + uint(dx)) * 8u); uint base = path.tiles.offset - (((uint(dy) * stride) + uint(dx)) * 8u);
sh_tile_base[th_ix] = base; sh_tile_base[th_ix] = base;
uint param_16 = path.tiles.offset; uint param_19 = path.tiles.offset;
uint param_17 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u; uint param_20 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
bool param_18 = mem_ok; bool param_21 = mem_ok;
Alloc path_alloc = new_alloc(param_16, param_17, param_18); Alloc path_alloc = new_alloc(param_19, param_20, param_21);
uint param_19 = th_ix; uint param_22 = th_ix;
Alloc param_20 = path_alloc; Alloc param_23 = path_alloc;
write_tile_alloc(param_19, param_20); write_tile_alloc(param_22, param_23);
break; break;
} }
default: default:
@ -889,56 +904,56 @@ void comp_main()
} }
} }
uint element_ix_1 = sh_elements[el_ix]; uint element_ix_1 = sh_elements[el_ix];
uint tag_1 = _1372.Load((drawtag_start + element_ix_1) * 4 + 0); uint tag_1 = _1399.Load((drawtag_start + element_ix_1) * 4 + 0);
if (el_ix > 0u) if (el_ix > 0u)
{ {
_1605 = sh_tile_count[el_ix - 1u]; _1632 = sh_tile_count[el_ix - 1u];
} }
else else
{ {
_1605 = 0u; _1632 = 0u;
} }
uint seq_ix = ix_1 - _1605; uint seq_ix = ix_1 - _1632;
uint width = sh_tile_width[el_ix]; uint width = sh_tile_width[el_ix];
uint x = sh_tile_x0[el_ix] + (seq_ix % width); uint x = sh_tile_x0[el_ix] + (seq_ix % width);
uint y = sh_tile_y0[el_ix] + (seq_ix / width); uint y = sh_tile_y0[el_ix] + (seq_ix / width);
bool include_tile = false; bool include_tile = false;
if (mem_ok) if (mem_ok)
{ {
uint param_21 = el_ix; uint param_24 = el_ix;
bool param_22 = mem_ok; bool param_25 = mem_ok;
TileRef _1657 = { sh_tile_base[el_ix] + (((sh_tile_stride[el_ix] * y) + x) * 8u) }; TileRef _1684 = { sh_tile_base[el_ix] + (((sh_tile_stride[el_ix] * y) + x) * 8u) };
Alloc param_23 = read_tile_alloc(param_21, param_22); Alloc param_26 = read_tile_alloc(param_24, param_25);
TileRef param_24 = _1657; TileRef param_27 = _1684;
Tile tile = Tile_read(param_23, param_24); Tile tile = Tile_read(param_26, param_27);
bool is_clip = (tag_1 & 1u) != 0u; bool is_clip = (tag_1 & 1u) != 0u;
bool is_blend = false; bool is_blend = false;
if (is_clip) if (is_clip)
{ {
uint drawmonoid_base_1 = drawmonoid_start + (4u * element_ix_1); uint drawmonoid_base_1 = drawmonoid_start + (4u * element_ix_1);
uint scene_offset = _260.Load((drawmonoid_base_1 + 2u) * 4 + 8); uint scene_offset = _266.Load((drawmonoid_base_1 + 2u) * 4 + 8);
uint dd = drawdata_start + (scene_offset >> uint(2)); uint dd = drawdata_start + (scene_offset >> uint(2));
uint blend = _1372.Load(dd * 4 + 0); uint blend = _1399.Load(dd * 4 + 0);
is_blend = blend != 32771u; is_blend = blend != 32771u;
} }
bool _1693 = tile.tile.offset != 0u; bool _1720 = tile.tile.offset != 0u;
bool _1702; bool _1729;
if (!_1693) if (!_1720)
{ {
_1702 = (tile.backdrop == 0) == is_clip; _1729 = (tile.backdrop == 0) == is_clip;
} }
else else
{ {
_1702 = _1693; _1729 = _1720;
} }
include_tile = _1702 || is_blend; include_tile = _1729 || is_blend;
} }
if (include_tile) if (include_tile)
{ {
uint el_slice = el_ix / 32u; uint el_slice = el_ix / 32u;
uint el_mask = 1u << (el_ix & 31u); uint el_mask = 1u << (el_ix & 31u);
uint _1724; uint _1751;
InterlockedOr(sh_bitmaps[el_slice][(y * 16u) + x], el_mask, _1724); InterlockedOr(sh_bitmaps[el_slice][(y * 16u) + x], el_mask, _1751);
} }
} }
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();
@ -962,179 +977,181 @@ void comp_main()
uint element_ref_ix = (slice_ix * 32u) + uint(int(firstbitlow(bitmap))); uint element_ref_ix = (slice_ix * 32u) + uint(int(firstbitlow(bitmap)));
uint element_ix_2 = sh_elements[element_ref_ix]; uint element_ix_2 = sh_elements[element_ref_ix];
bitmap &= (bitmap - 1u); bitmap &= (bitmap - 1u);
uint drawtag = _1372.Load((drawtag_start + element_ix_2) * 4 + 0); uint drawtag = _1399.Load((drawtag_start + element_ix_2) * 4 + 0);
if (clip_zero_depth == 0u) if (clip_zero_depth == 0u)
{ {
uint param_25 = element_ref_ix; uint param_28 = element_ref_ix;
bool param_26 = mem_ok; bool param_29 = mem_ok;
TileRef _1801 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; TileRef _1828 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) };
Alloc param_27 = read_tile_alloc(param_25, param_26); Alloc param_30 = read_tile_alloc(param_28, param_29);
TileRef param_28 = _1801; TileRef param_31 = _1828;
Tile tile_1 = Tile_read(param_27, param_28); Tile tile_1 = Tile_read(param_30, param_31);
uint drawmonoid_base_2 = drawmonoid_start + (4u * element_ix_2); uint drawmonoid_base_2 = drawmonoid_start + (4u * element_ix_2);
uint scene_offset_1 = _260.Load((drawmonoid_base_2 + 2u) * 4 + 8); uint scene_offset_1 = _266.Load((drawmonoid_base_2 + 2u) * 4 + 8);
uint info_offset = _260.Load((drawmonoid_base_2 + 3u) * 4 + 8); uint info_offset = _266.Load((drawmonoid_base_2 + 3u) * 4 + 8);
uint dd_1 = drawdata_start + (scene_offset_1 >> uint(2)); uint dd_1 = drawdata_start + (scene_offset_1 >> uint(2));
uint di = drawinfo_start + (info_offset >> uint(2)); uint di = drawinfo_start + (info_offset >> uint(2));
switch (drawtag) switch (drawtag)
{ {
case 68u: case 68u:
{ {
linewidth = asfloat(_260.Load(di * 4 + 8)); linewidth = asfloat(_266.Load(di * 4 + 8));
Alloc param_29 = cmd_alloc; Alloc param_32 = cmd_alloc;
CmdRef param_30 = cmd_ref; CmdRef param_33 = cmd_ref;
uint param_31 = cmd_limit; uint param_34 = cmd_limit;
bool _1849 = alloc_cmd(param_29, param_30, param_31); bool _1876 = alloc_cmd(param_32, param_33, param_34);
cmd_alloc = param_29; cmd_alloc = param_32;
cmd_ref = param_30; cmd_ref = param_33;
cmd_limit = param_31; cmd_limit = param_34;
if (!_1849) if (!_1876)
{ {
break; break;
} }
Alloc param_32 = cmd_alloc; Alloc param_35 = cmd_alloc;
CmdRef param_33 = cmd_ref; CmdRef param_36 = cmd_ref;
Tile param_34 = tile_1; Tile param_37 = tile_1;
float param_35 = linewidth; float param_38 = linewidth;
write_fill(param_32, param_33, param_34, param_35); write_fill(param_35, param_36, param_37, param_38);
cmd_ref = param_33; cmd_ref = param_36;
uint rgba = _1372.Load(dd_1 * 4 + 0); uint rgba = _1399.Load(dd_1 * 4 + 0);
CmdColor _1872 = { rgba }; CmdColor _1899 = { rgba };
Alloc param_36 = cmd_alloc; Alloc param_39 = cmd_alloc;
CmdRef param_37 = cmd_ref; CmdRef param_40 = cmd_ref;
CmdColor param_38 = _1872; CmdColor param_41 = _1899;
Cmd_Color_write(param_36, param_37, param_38); Cmd_Color_write(param_39, param_40, param_41);
cmd_ref.offset += 8u; cmd_ref.offset += 8u;
break; break;
} }
case 276u: case 276u:
{ {
Alloc param_39 = cmd_alloc; Alloc param_42 = cmd_alloc;
CmdRef param_40 = cmd_ref; CmdRef param_43 = cmd_ref;
uint param_41 = cmd_limit; uint param_44 = cmd_limit;
bool _1890 = alloc_cmd(param_39, param_40, param_41); bool _1917 = alloc_cmd(param_42, param_43, param_44);
cmd_alloc = param_39; cmd_alloc = param_42;
cmd_ref = param_40; cmd_ref = param_43;
cmd_limit = param_41; cmd_limit = param_44;
if (!_1890) if (!_1917)
{ {
break; break;
} }
linewidth = asfloat(_260.Load(di * 4 + 8)); linewidth = asfloat(_266.Load(di * 4 + 8));
Alloc param_42 = cmd_alloc; Alloc param_45 = cmd_alloc;
CmdRef param_43 = cmd_ref; CmdRef param_46 = cmd_ref;
Tile param_44 = tile_1; Tile param_47 = tile_1;
float param_45 = linewidth; float param_48 = linewidth;
write_fill(param_42, param_43, param_44, param_45); write_fill(param_45, param_46, param_47, param_48);
cmd_ref = param_43; cmd_ref = param_46;
cmd_lin.index = _1372.Load(dd_1 * 4 + 0); cmd_lin.index = _1399.Load(dd_1 * 4 + 0);
cmd_lin.line_x = asfloat(_260.Load((di + 1u) * 4 + 8)); cmd_lin.line_x = asfloat(_266.Load((di + 1u) * 4 + 8));
cmd_lin.line_y = asfloat(_260.Load((di + 2u) * 4 + 8)); cmd_lin.line_y = asfloat(_266.Load((di + 2u) * 4 + 8));
cmd_lin.line_c = asfloat(_260.Load((di + 3u) * 4 + 8)); cmd_lin.line_c = asfloat(_266.Load((di + 3u) * 4 + 8));
Alloc param_46 = cmd_alloc; Alloc param_49 = cmd_alloc;
CmdRef param_47 = cmd_ref; CmdRef param_50 = cmd_ref;
CmdLinGrad param_48 = cmd_lin; CmdLinGrad param_51 = cmd_lin;
Cmd_LinGrad_write(param_46, param_47, param_48); Cmd_LinGrad_write(param_49, param_50, param_51);
cmd_ref.offset += 20u; cmd_ref.offset += 20u;
break; break;
} }
case 732u: case 732u:
{ {
Alloc param_49 = cmd_alloc; Alloc param_52 = cmd_alloc;
CmdRef param_50 = cmd_ref; CmdRef param_53 = cmd_ref;
uint param_51 = cmd_limit; uint param_54 = cmd_limit;
bool _1954 = alloc_cmd(param_49, param_50, param_51); bool _1981 = alloc_cmd(param_52, param_53, param_54);
cmd_alloc = param_49; cmd_alloc = param_52;
cmd_ref = param_50; cmd_ref = param_53;
cmd_limit = param_51; cmd_limit = param_54;
if (!_1954) if (!_1981)
{ {
break; break;
} }
linewidth = asfloat(_260.Load(di * 4 + 8)); linewidth = asfloat(_266.Load(di * 4 + 8));
Alloc param_52 = cmd_alloc; Alloc param_55 = cmd_alloc;
CmdRef param_53 = cmd_ref; CmdRef param_56 = cmd_ref;
Tile param_54 = tile_1; Tile param_57 = tile_1;
float param_55 = linewidth; float param_58 = linewidth;
write_fill(param_52, param_53, param_54, param_55); write_fill(param_55, param_56, param_57, param_58);
cmd_ref = param_53; cmd_ref = param_56;
cmd_rad.index = _1372.Load(dd_1 * 4 + 0); cmd_rad.index = _1399.Load(dd_1 * 4 + 0);
cmd_rad.mat = asfloat(uint4(_260.Load((di + 1u) * 4 + 8), _260.Load((di + 2u) * 4 + 8), _260.Load((di + 3u) * 4 + 8), _260.Load((di + 4u) * 4 + 8))); cmd_rad.mat = asfloat(uint4(_266.Load((di + 1u) * 4 + 8), _266.Load((di + 2u) * 4 + 8), _266.Load((di + 3u) * 4 + 8), _266.Load((di + 4u) * 4 + 8)));
cmd_rad.xlat = asfloat(uint2(_260.Load((di + 5u) * 4 + 8), _260.Load((di + 6u) * 4 + 8))); cmd_rad.xlat = asfloat(uint2(_266.Load((di + 5u) * 4 + 8), _266.Load((di + 6u) * 4 + 8)));
cmd_rad.c1 = asfloat(uint2(_260.Load((di + 7u) * 4 + 8), _260.Load((di + 8u) * 4 + 8))); cmd_rad.c1 = asfloat(uint2(_266.Load((di + 7u) * 4 + 8), _266.Load((di + 8u) * 4 + 8)));
cmd_rad.ra = asfloat(_260.Load((di + 9u) * 4 + 8)); cmd_rad.ra = asfloat(_266.Load((di + 9u) * 4 + 8));
cmd_rad.roff = asfloat(_260.Load((di + 10u) * 4 + 8)); cmd_rad.roff = asfloat(_266.Load((di + 10u) * 4 + 8));
Alloc param_56 = cmd_alloc; Alloc param_59 = cmd_alloc;
CmdRef param_57 = cmd_ref; CmdRef param_60 = cmd_ref;
CmdRadGrad param_58 = cmd_rad; CmdRadGrad param_61 = cmd_rad;
Cmd_RadGrad_write(param_56, param_57, param_58); Cmd_RadGrad_write(param_59, param_60, param_61);
cmd_ref.offset += 48u; cmd_ref.offset += 48u;
break; break;
} }
case 72u: case 72u:
{ {
linewidth = asfloat(_260.Load(di * 4 + 8)); linewidth = asfloat(_266.Load(di * 4 + 8));
Alloc param_59 = cmd_alloc; Alloc param_62 = cmd_alloc;
CmdRef param_60 = cmd_ref; CmdRef param_63 = cmd_ref;
uint param_61 = cmd_limit; uint param_64 = cmd_limit;
bool _2060 = alloc_cmd(param_59, param_60, param_61); bool _2087 = alloc_cmd(param_62, param_63, param_64);
cmd_alloc = param_59; cmd_alloc = param_62;
cmd_ref = param_60; cmd_ref = param_63;
cmd_limit = param_61; cmd_limit = param_64;
if (!_2060) if (!_2087)
{ {
break; break;
} }
Alloc param_62 = cmd_alloc; Alloc param_65 = cmd_alloc;
CmdRef param_63 = cmd_ref; CmdRef param_66 = cmd_ref;
Tile param_64 = tile_1; Tile param_67 = tile_1;
float param_65 = linewidth; float param_68 = linewidth;
write_fill(param_62, param_63, param_64, param_65); write_fill(param_65, param_66, param_67, param_68);
cmd_ref = param_63; cmd_ref = param_66;
uint index = _1372.Load(dd_1 * 4 + 0); uint index = _1399.Load(dd_1 * 4 + 0);
uint raw1 = _1372.Load((dd_1 + 1u) * 4 + 0); uint raw1 = _1399.Load((dd_1 + 1u) * 4 + 0);
int2 offset_1 = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); int2 offset_1 = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16);
CmdImage _2099 = { index, offset_1 }; CmdImage _2126 = { index, offset_1 };
Alloc param_66 = cmd_alloc; Alloc param_69 = cmd_alloc;
CmdRef param_67 = cmd_ref; CmdRef param_70 = cmd_ref;
CmdImage param_68 = _2099; CmdImage param_71 = _2126;
Cmd_Image_write(param_66, param_67, param_68); Cmd_Image_write(param_69, param_70, param_71);
cmd_ref.offset += 12u; cmd_ref.offset += 12u;
break; break;
} }
case 5u: case 5u:
{ {
bool _2113 = tile_1.tile.offset == 0u; bool _2140 = tile_1.tile.offset == 0u;
bool _2119; bool _2146;
if (_2113) if (_2140)
{ {
_2119 = tile_1.backdrop == 0; _2146 = tile_1.backdrop == 0;
} }
else else
{ {
_2119 = _2113; _2146 = _2140;
} }
if (_2119) if (_2146)
{ {
clip_zero_depth = clip_depth + 1u; clip_zero_depth = clip_depth + 1u;
} }
else else
{ {
Alloc param_69 = cmd_alloc; Alloc param_72 = cmd_alloc;
CmdRef param_70 = cmd_ref; CmdRef param_73 = cmd_ref;
uint param_71 = cmd_limit; uint param_74 = cmd_limit;
bool _2131 = alloc_cmd(param_69, param_70, param_71); bool _2158 = alloc_cmd(param_72, param_73, param_74);
cmd_alloc = param_69; cmd_alloc = param_72;
cmd_ref = param_70; cmd_ref = param_73;
cmd_limit = param_71; cmd_limit = param_74;
if (!_2131) if (!_2158)
{ {
break; break;
} }
Alloc param_72 = cmd_alloc; Alloc param_75 = cmd_alloc;
CmdRef param_73 = cmd_ref; CmdRef param_76 = cmd_ref;
Cmd_BeginClip_write(param_72, param_73); Cmd_BeginClip_write(param_75, param_76);
cmd_ref.offset += 4u; cmd_ref.offset += 4u;
render_blend_depth++;
max_blend_depth = max(max_blend_depth, render_blend_depth);
} }
clip_depth++; clip_depth++;
break; break;
@ -1142,30 +1159,31 @@ void comp_main()
case 37u: case 37u:
{ {
clip_depth--; clip_depth--;
Alloc param_74 = cmd_alloc; Alloc param_77 = cmd_alloc;
CmdRef param_75 = cmd_ref; CmdRef param_78 = cmd_ref;
uint param_76 = cmd_limit; uint param_79 = cmd_limit;
bool _2159 = alloc_cmd(param_74, param_75, param_76); bool _2191 = alloc_cmd(param_77, param_78, param_79);
cmd_alloc = param_74; cmd_alloc = param_77;
cmd_ref = param_75; cmd_ref = param_78;
cmd_limit = param_76; cmd_limit = param_79;
if (!_2159) if (!_2191)
{ {
break; break;
} }
Alloc param_77 = cmd_alloc; Alloc param_80 = cmd_alloc;
CmdRef param_78 = cmd_ref; CmdRef param_81 = cmd_ref;
Tile param_79 = tile_1; Tile param_82 = tile_1;
float param_80 = -1.0f; float param_83 = -1.0f;
write_fill(param_77, param_78, param_79, param_80); write_fill(param_80, param_81, param_82, param_83);
cmd_ref = param_78; cmd_ref = param_81;
uint blend_1 = _1372.Load(dd_1 * 4 + 0); uint blend_1 = _1399.Load(dd_1 * 4 + 0);
CmdEndClip _2182 = { blend_1 }; CmdEndClip _2214 = { blend_1 };
Alloc param_81 = cmd_alloc; Alloc param_84 = cmd_alloc;
CmdRef param_82 = cmd_ref; CmdRef param_85 = cmd_ref;
CmdEndClip param_83 = _2182; CmdEndClip param_86 = _2214;
Cmd_EndClip_write(param_81, param_82, param_83); Cmd_EndClip_write(param_84, param_85, param_86);
cmd_ref.offset += 8u; cmd_ref.offset += 8u;
render_blend_depth--;
break; break;
} }
} }
@ -1198,21 +1216,32 @@ void comp_main()
break; break;
} }
} }
bool _2229 = (bin_tile_x + tile_x) < _1005.Load(8); bool _2263 = (bin_tile_x + tile_x) < _1020.Load(8);
bool _2238; bool _2272;
if (_2229) if (_2263)
{ {
_2238 = (bin_tile_y + tile_y) < _1005.Load(12); _2272 = (bin_tile_y + tile_y) < _1020.Load(12);
} }
else else
{ {
_2238 = _2229; _2272 = _2263;
} }
if (_2238) if (_2272)
{ {
Alloc param_84 = cmd_alloc; Alloc param_87 = cmd_alloc;
CmdRef param_85 = cmd_ref; CmdRef param_88 = cmd_ref;
Cmd_End_write(param_84, param_85); Cmd_End_write(param_87, param_88);
if (max_blend_depth > 4u)
{
uint scratch_size = (((max_blend_depth * 16u) * 16u) * 1u) * 4u;
uint param_89 = scratch_size;
MallocResult _2293 = malloc(param_89);
MallocResult scratch = _2293;
Alloc param_90 = scratch_alloc;
uint param_91 = scratch_alloc.offset;
Alloc param_92 = scratch.alloc;
alloc_write(param_90, param_91, param_92);
}
} }
} }

File diff suppressed because it is too large Load diff

Binary file not shown.

Binary file not shown.

View file

@ -999,6 +999,8 @@ void comp_main()
Alloc cmd_alloc = slice_mem(param, param_1, param_2); Alloc cmd_alloc = slice_mem(param, param_1, param_2);
CmdRef _1705 = { cmd_alloc.offset }; CmdRef _1705 = { cmd_alloc.offset };
CmdRef cmd_ref = _1705; CmdRef cmd_ref = _1705;
uint blend_offset = _297.Load((cmd_ref.offset >> uint(2)) * 4 + 8);
cmd_ref.offset += 4u;
uint2 xy_uint = uint2(gl_LocalInvocationID.x + (16u * gl_WorkGroupID.x), gl_LocalInvocationID.y + (16u * gl_WorkGroupID.y)); uint2 xy_uint = uint2(gl_LocalInvocationID.x + (16u * gl_WorkGroupID.x), gl_LocalInvocationID.y + (16u * gl_WorkGroupID.y));
float2 xy = float2(xy_uint); float2 xy = float2(xy_uint);
float4 rgba[8]; float4 rgba[8];
@ -1011,7 +1013,9 @@ void comp_main()
float df[8]; float df[8];
TileSegRef tile_seg_ref; TileSegRef tile_seg_ref;
float area[8]; float area[8];
uint blend_stack[128][8]; uint blend_stack[4][8];
uint base_ix_1;
uint bg_rgba;
while (mem_ok) while (mem_ok)
{ {
Alloc param_3 = cmd_alloc; Alloc param_3 = cmd_alloc;
@ -1032,8 +1036,8 @@ void comp_main()
{ {
df[k] = 1000000000.0f; df[k] = 1000000000.0f;
} }
TileSegRef _1800 = { stroke.tile_ref }; TileSegRef _1810 = { stroke.tile_ref };
tile_seg_ref = _1800; tile_seg_ref = _1810;
do do
{ {
uint param_7 = tile_seg_ref.offset; uint param_7 = tile_seg_ref.offset;
@ -1069,8 +1073,8 @@ void comp_main()
{ {
area[k_3] = float(fill.backdrop); area[k_3] = float(fill.backdrop);
} }
TileSegRef _1920 = { fill.tile_ref }; TileSegRef _1930 = { fill.tile_ref };
tile_seg_ref = _1920; tile_seg_ref = _1930;
do do
{ {
uint param_15 = tile_seg_ref.offset; uint param_15 = tile_seg_ref.offset;
@ -1159,10 +1163,10 @@ void comp_main()
int x = int(round(clamp(my_d, 0.0f, 1.0f) * 511.0f)); int x = int(round(clamp(my_d, 0.0f, 1.0f) * 511.0f));
float4 fg_rgba = gradients[int2(x, int(lin.index))]; float4 fg_rgba = gradients[int2(x, int(lin.index))];
float3 param_29 = fg_rgba.xyz; float3 param_29 = fg_rgba.xyz;
float3 _2254 = fromsRGB(param_29); float3 _2264 = fromsRGB(param_29);
fg_rgba.x = _2254.x; fg_rgba.x = _2264.x;
fg_rgba.y = _2254.y; fg_rgba.y = _2264.y;
fg_rgba.z = _2254.z; fg_rgba.z = _2264.z;
float4 fg_k_1 = fg_rgba * area[k_9]; float4 fg_k_1 = fg_rgba * area[k_9];
rgba[k_9] = (rgba[k_9] * (1.0f - fg_k_1.w)) + fg_k_1; rgba[k_9] = (rgba[k_9] * (1.0f - fg_k_1.w)) + fg_k_1;
} }
@ -1185,10 +1189,10 @@ void comp_main()
int x_1 = int(round(clamp(t_2, 0.0f, 1.0f) * 511.0f)); int x_1 = int(round(clamp(t_2, 0.0f, 1.0f) * 511.0f));
float4 fg_rgba_1 = gradients[int2(x_1, int(rad.index))]; float4 fg_rgba_1 = gradients[int2(x_1, int(rad.index))];
float3 param_33 = fg_rgba_1.xyz; float3 param_33 = fg_rgba_1.xyz;
float3 _2364 = fromsRGB(param_33); float3 _2374 = fromsRGB(param_33);
fg_rgba_1.x = _2364.x; fg_rgba_1.x = _2374.x;
fg_rgba_1.y = _2364.y; fg_rgba_1.y = _2374.y;
fg_rgba_1.z = _2364.z; fg_rgba_1.z = _2374.z;
float4 fg_k_2 = fg_rgba_1 * area[k_10]; float4 fg_k_2 = fg_rgba_1 * area[k_10];
rgba[k_10] = (rgba[k_10] * (1.0f - fg_k_2.w)) + fg_k_2; rgba[k_10] = (rgba[k_10] * (1.0f - fg_k_2.w)) + fg_k_2;
} }
@ -1202,9 +1206,9 @@ void comp_main()
CmdImage fill_img = Cmd_Image_read(param_34, param_35); CmdImage fill_img = Cmd_Image_read(param_34, param_35);
uint2 param_36 = xy_uint; uint2 param_36 = xy_uint;
CmdImage param_37 = fill_img; CmdImage param_37 = fill_img;
float4 _2407[8]; float4 _2417[8];
fillImage(_2407, param_36, param_37); fillImage(_2417, param_36, param_37);
float4 img[8] = _2407; float4 img[8] = _2417;
for (uint k_11 = 0u; k_11 < 8u; k_11++) for (uint k_11 = 0u; k_11 < 8u; k_11++)
{ {
float4 fg_k_3 = img[k_11] * area[k_11]; float4 fg_k_3 = img[k_11] * area[k_11];
@ -1214,47 +1218,69 @@ void comp_main()
break; break;
} }
case 9u: case 9u:
{
if (clip_depth < 4u)
{ {
for (uint k_12 = 0u; k_12 < 8u; k_12++) for (uint k_12 = 0u; k_12 < 8u; k_12++)
{ {
uint d_2 = min(clip_depth, 127u);
float4 param_38 = float4(rgba[k_12]); float4 param_38 = float4(rgba[k_12]);
uint _2470 = packsRGB(param_38); uint _2479 = packsRGB(param_38);
blend_stack[d_2][k_12] = _2470; blend_stack[clip_depth][k_12] = _2479;
rgba[k_12] = 0.0f.xxxx; rgba[k_12] = 0.0f.xxxx;
} }
}
else
{
uint base_ix = ((blend_offset >> uint(2)) + (((clip_depth - 4u) * 16u) * 16u)) + (8u * (gl_LocalInvocationID.x + (8u * gl_LocalInvocationID.y)));
for (uint k_13 = 0u; k_13 < 8u; k_13++)
{
float4 param_39 = float4(rgba[k_13]);
uint _2522 = packsRGB(param_39);
_297.Store((base_ix + k_13) * 4 + 8, _2522);
rgba[k_13] = 0.0f.xxxx;
}
}
clip_depth++; clip_depth++;
cmd_ref.offset += 4u; cmd_ref.offset += 4u;
break; break;
} }
case 10u: case 10u:
{ {
Alloc param_39 = cmd_alloc; Alloc param_40 = cmd_alloc;
CmdRef param_40 = cmd_ref; CmdRef param_41 = cmd_ref;
CmdEndClip end_clip = Cmd_EndClip_read(param_39, param_40); CmdEndClip end_clip = Cmd_EndClip_read(param_40, param_41);
uint blend_mode = end_clip.blend >> uint(8);
uint comp_mode = end_clip.blend & 255u;
clip_depth--; clip_depth--;
for (uint k_13 = 0u; k_13 < 8u; k_13++) if (clip_depth >= 4u)
{ {
uint d_3 = min(clip_depth, 127u); base_ix_1 = ((blend_offset >> uint(2)) + (((clip_depth - 4u) * 16u) * 16u)) + (8u * (gl_LocalInvocationID.x + (8u * gl_LocalInvocationID.y)));
uint param_41 = blend_stack[d_3][k_13]; }
float4 bg = unpacksRGB(param_41); for (uint k_14 = 0u; k_14 < 8u; k_14++)
float4 fg_1 = rgba[k_13] * area[k_13]; {
float4 param_42 = bg; if (clip_depth < 4u)
float4 param_43 = fg_1; {
uint param_44 = end_clip.blend; bg_rgba = blend_stack[clip_depth][k_14];
rgba[k_13] = mix_blend_compose(param_42, param_43, param_44); }
else
{
bg_rgba = _297.Load((base_ix_1 + k_14) * 4 + 8);
}
uint param_42 = bg_rgba;
float4 bg = unpacksRGB(param_42);
float4 fg_1 = rgba[k_14] * area[k_14];
float4 param_43 = bg;
float4 param_44 = fg_1;
uint param_45 = end_clip.blend;
rgba[k_14] = mix_blend_compose(param_43, param_44, param_45);
} }
cmd_ref.offset += 8u; cmd_ref.offset += 8u;
break; break;
} }
case 11u: case 11u:
{ {
Alloc param_45 = cmd_alloc; Alloc param_46 = cmd_alloc;
CmdRef param_46 = cmd_ref; CmdRef param_47 = cmd_ref;
CmdRef _2548 = { Cmd_Jump_read(param_45, param_46).new_ref }; CmdRef _2621 = { Cmd_Jump_read(param_46, param_47).new_ref };
cmd_ref = _2548; cmd_ref = _2621;
cmd_alloc.offset = cmd_ref.offset; cmd_alloc.offset = cmd_ref.offset;
break; break;
} }
@ -1262,9 +1288,9 @@ void comp_main()
} }
for (uint i_1 = 0u; i_1 < 8u; i_1++) for (uint i_1 = 0u; i_1 < 8u; i_1++)
{ {
uint param_47 = i_1; uint param_48 = i_1;
float3 param_48 = rgba[i_1].xyz; float3 param_49 = rgba[i_1].xyz;
image[int2(xy_uint + chunk_offset(param_47))] = float4(tosRGB(param_48), rgba[i_1].w); image[int2(xy_uint + chunk_offset(param_48))] = float4(tosRGB(param_49), rgba[i_1].w);
} }
} }

View file

@ -1056,6 +1056,8 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
uint param_2 = 1024u; uint param_2 = 1024u;
Alloc cmd_alloc = slice_mem(param, param_1, param_2); Alloc cmd_alloc = slice_mem(param, param_1, param_2);
CmdRef cmd_ref = CmdRef{ cmd_alloc.offset }; CmdRef cmd_ref = CmdRef{ cmd_alloc.offset };
uint blend_offset = v_297.memory[cmd_ref.offset >> uint(2)];
cmd_ref.offset += 4u;
uint2 xy_uint = uint2(gl_LocalInvocationID.x + (16u * gl_WorkGroupID.x), gl_LocalInvocationID.y + (16u * gl_WorkGroupID.y)); uint2 xy_uint = uint2(gl_LocalInvocationID.x + (16u * gl_WorkGroupID.x), gl_LocalInvocationID.y + (16u * gl_WorkGroupID.y));
float2 xy = float2(xy_uint); float2 xy = float2(xy_uint);
spvUnsafeArray<float4, 8> rgba; spvUnsafeArray<float4, 8> rgba;
@ -1068,7 +1070,9 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
spvUnsafeArray<float, 8> df; spvUnsafeArray<float, 8> df;
TileSegRef tile_seg_ref; TileSegRef tile_seg_ref;
spvUnsafeArray<float, 8> area; spvUnsafeArray<float, 8> area;
spvUnsafeArray<spvUnsafeArray<uint, 8>, 128> blend_stack; spvUnsafeArray<spvUnsafeArray<uint, 8>, 4> blend_stack;
uint base_ix_1;
uint bg_rgba;
while (mem_ok) while (mem_ok)
{ {
Alloc param_3 = cmd_alloc; Alloc param_3 = cmd_alloc;
@ -1214,10 +1218,10 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
int x = int(round(fast::clamp(my_d, 0.0, 1.0) * 511.0)); int x = int(round(fast::clamp(my_d, 0.0, 1.0) * 511.0));
float4 fg_rgba = gradients.read(uint2(int2(x, int(lin.index)))); float4 fg_rgba = gradients.read(uint2(int2(x, int(lin.index))));
float3 param_29 = fg_rgba.xyz; float3 param_29 = fg_rgba.xyz;
float3 _2254 = fromsRGB(param_29); float3 _2264 = fromsRGB(param_29);
fg_rgba.x = _2254.x; fg_rgba.x = _2264.x;
fg_rgba.y = _2254.y; fg_rgba.y = _2264.y;
fg_rgba.z = _2254.z; fg_rgba.z = _2264.z;
float4 fg_k_1 = fg_rgba * area[k_9]; float4 fg_k_1 = fg_rgba * area[k_9];
rgba[k_9] = (rgba[k_9] * (1.0 - fg_k_1.w)) + fg_k_1; rgba[k_9] = (rgba[k_9] * (1.0 - fg_k_1.w)) + fg_k_1;
} }
@ -1240,10 +1244,10 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
int x_1 = int(round(fast::clamp(t_2, 0.0, 1.0) * 511.0)); int x_1 = int(round(fast::clamp(t_2, 0.0, 1.0) * 511.0));
float4 fg_rgba_1 = gradients.read(uint2(int2(x_1, int(rad.index)))); float4 fg_rgba_1 = gradients.read(uint2(int2(x_1, int(rad.index))));
float3 param_33 = fg_rgba_1.xyz; float3 param_33 = fg_rgba_1.xyz;
float3 _2364 = fromsRGB(param_33); float3 _2374 = fromsRGB(param_33);
fg_rgba_1.x = _2364.x; fg_rgba_1.x = _2374.x;
fg_rgba_1.y = _2364.y; fg_rgba_1.y = _2374.y;
fg_rgba_1.z = _2364.z; fg_rgba_1.z = _2374.z;
float4 fg_k_2 = fg_rgba_1 * area[k_10]; float4 fg_k_2 = fg_rgba_1 * area[k_10];
rgba[k_10] = (rgba[k_10] * (1.0 - fg_k_2.w)) + fg_k_2; rgba[k_10] = (rgba[k_10] * (1.0 - fg_k_2.w)) + fg_k_2;
} }
@ -1268,46 +1272,68 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
break; break;
} }
case 9u: case 9u:
{
if (clip_depth < 4u)
{ {
for (uint k_12 = 0u; k_12 < 8u; k_12++) for (uint k_12 = 0u; k_12 < 8u; k_12++)
{ {
uint d_2 = min(clip_depth, 127u);
float4 param_38 = float4(rgba[k_12]); float4 param_38 = float4(rgba[k_12]);
uint _2470 = packsRGB(param_38); uint _2479 = packsRGB(param_38);
blend_stack[d_2][k_12] = _2470; blend_stack[clip_depth][k_12] = _2479;
rgba[k_12] = float4(0.0); rgba[k_12] = float4(0.0);
} }
}
else
{
uint base_ix = ((blend_offset >> uint(2)) + (((clip_depth - 4u) * 16u) * 16u)) + (8u * (gl_LocalInvocationID.x + (8u * gl_LocalInvocationID.y)));
for (uint k_13 = 0u; k_13 < 8u; k_13++)
{
float4 param_39 = float4(rgba[k_13]);
uint _2522 = packsRGB(param_39);
v_297.memory[base_ix + k_13] = _2522;
rgba[k_13] = float4(0.0);
}
}
clip_depth++; clip_depth++;
cmd_ref.offset += 4u; cmd_ref.offset += 4u;
break; break;
} }
case 10u: case 10u:
{ {
Alloc param_39 = cmd_alloc; Alloc param_40 = cmd_alloc;
CmdRef param_40 = cmd_ref; CmdRef param_41 = cmd_ref;
CmdEndClip end_clip = Cmd_EndClip_read(param_39, param_40, v_297); CmdEndClip end_clip = Cmd_EndClip_read(param_40, param_41, v_297);
uint blend_mode = end_clip.blend >> uint(8);
uint comp_mode = end_clip.blend & 255u;
clip_depth--; clip_depth--;
for (uint k_13 = 0u; k_13 < 8u; k_13++) if (clip_depth >= 4u)
{ {
uint d_3 = min(clip_depth, 127u); base_ix_1 = ((blend_offset >> uint(2)) + (((clip_depth - 4u) * 16u) * 16u)) + (8u * (gl_LocalInvocationID.x + (8u * gl_LocalInvocationID.y)));
uint param_41 = blend_stack[d_3][k_13]; }
float4 bg = unpacksRGB(param_41); for (uint k_14 = 0u; k_14 < 8u; k_14++)
float4 fg_1 = rgba[k_13] * area[k_13]; {
float4 param_42 = bg; if (clip_depth < 4u)
float4 param_43 = fg_1; {
uint param_44 = end_clip.blend; bg_rgba = blend_stack[clip_depth][k_14];
rgba[k_13] = mix_blend_compose(param_42, param_43, param_44); }
else
{
bg_rgba = v_297.memory[base_ix_1 + k_14];
}
uint param_42 = bg_rgba;
float4 bg = unpacksRGB(param_42);
float4 fg_1 = rgba[k_14] * area[k_14];
float4 param_43 = bg;
float4 param_44 = fg_1;
uint param_45 = end_clip.blend;
rgba[k_14] = mix_blend_compose(param_43, param_44, param_45);
} }
cmd_ref.offset += 8u; cmd_ref.offset += 8u;
break; break;
} }
case 11u: case 11u:
{ {
Alloc param_45 = cmd_alloc; Alloc param_46 = cmd_alloc;
CmdRef param_46 = cmd_ref; CmdRef param_47 = cmd_ref;
cmd_ref = CmdRef{ Cmd_Jump_read(param_45, param_46, v_297).new_ref }; cmd_ref = CmdRef{ Cmd_Jump_read(param_46, param_47, v_297).new_ref };
cmd_alloc.offset = cmd_ref.offset; cmd_alloc.offset = cmd_ref.offset;
break; break;
} }
@ -1315,9 +1341,9 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
} }
for (uint i_1 = 0u; i_1 < 8u; i_1++) for (uint i_1 = 0u; i_1 < 8u; i_1++)
{ {
uint param_47 = i_1; uint param_48 = i_1;
float3 param_48 = rgba[i_1].xyz; float3 param_49 = rgba[i_1].xyz;
image.write(float4(tosRGB(param_48), rgba[i_1].w), uint2(int2(xy_uint + chunk_offset(param_47)))); image.write(float4(tosRGB(param_49), rgba[i_1].w), uint2(int2(xy_uint + chunk_offset(param_48))));
} }
} }

Binary file not shown.

Binary file not shown.

View file

@ -999,6 +999,8 @@ void comp_main()
Alloc cmd_alloc = slice_mem(param, param_1, param_2); Alloc cmd_alloc = slice_mem(param, param_1, param_2);
CmdRef _1705 = { cmd_alloc.offset }; CmdRef _1705 = { cmd_alloc.offset };
CmdRef cmd_ref = _1705; CmdRef cmd_ref = _1705;
uint blend_offset = _297.Load((cmd_ref.offset >> uint(2)) * 4 + 8);
cmd_ref.offset += 4u;
uint2 xy_uint = uint2(gl_LocalInvocationID.x + (16u * gl_WorkGroupID.x), gl_LocalInvocationID.y + (16u * gl_WorkGroupID.y)); uint2 xy_uint = uint2(gl_LocalInvocationID.x + (16u * gl_WorkGroupID.x), gl_LocalInvocationID.y + (16u * gl_WorkGroupID.y));
float2 xy = float2(xy_uint); float2 xy = float2(xy_uint);
float4 rgba[8]; float4 rgba[8];
@ -1011,7 +1013,9 @@ void comp_main()
float df[8]; float df[8];
TileSegRef tile_seg_ref; TileSegRef tile_seg_ref;
float area[8]; float area[8];
uint blend_stack[128][8]; uint blend_stack[4][8];
uint base_ix_1;
uint bg_rgba;
while (mem_ok) while (mem_ok)
{ {
Alloc param_3 = cmd_alloc; Alloc param_3 = cmd_alloc;
@ -1032,8 +1036,8 @@ void comp_main()
{ {
df[k] = 1000000000.0f; df[k] = 1000000000.0f;
} }
TileSegRef _1800 = { stroke.tile_ref }; TileSegRef _1810 = { stroke.tile_ref };
tile_seg_ref = _1800; tile_seg_ref = _1810;
do do
{ {
uint param_7 = tile_seg_ref.offset; uint param_7 = tile_seg_ref.offset;
@ -1069,8 +1073,8 @@ void comp_main()
{ {
area[k_3] = float(fill.backdrop); area[k_3] = float(fill.backdrop);
} }
TileSegRef _1920 = { fill.tile_ref }; TileSegRef _1930 = { fill.tile_ref };
tile_seg_ref = _1920; tile_seg_ref = _1930;
do do
{ {
uint param_15 = tile_seg_ref.offset; uint param_15 = tile_seg_ref.offset;
@ -1159,10 +1163,10 @@ void comp_main()
int x = int(round(clamp(my_d, 0.0f, 1.0f) * 511.0f)); int x = int(round(clamp(my_d, 0.0f, 1.0f) * 511.0f));
float4 fg_rgba = gradients[int2(x, int(lin.index))]; float4 fg_rgba = gradients[int2(x, int(lin.index))];
float3 param_29 = fg_rgba.xyz; float3 param_29 = fg_rgba.xyz;
float3 _2254 = fromsRGB(param_29); float3 _2264 = fromsRGB(param_29);
fg_rgba.x = _2254.x; fg_rgba.x = _2264.x;
fg_rgba.y = _2254.y; fg_rgba.y = _2264.y;
fg_rgba.z = _2254.z; fg_rgba.z = _2264.z;
float4 fg_k_1 = fg_rgba * area[k_9]; float4 fg_k_1 = fg_rgba * area[k_9];
rgba[k_9] = (rgba[k_9] * (1.0f - fg_k_1.w)) + fg_k_1; rgba[k_9] = (rgba[k_9] * (1.0f - fg_k_1.w)) + fg_k_1;
} }
@ -1185,10 +1189,10 @@ void comp_main()
int x_1 = int(round(clamp(t_2, 0.0f, 1.0f) * 511.0f)); int x_1 = int(round(clamp(t_2, 0.0f, 1.0f) * 511.0f));
float4 fg_rgba_1 = gradients[int2(x_1, int(rad.index))]; float4 fg_rgba_1 = gradients[int2(x_1, int(rad.index))];
float3 param_33 = fg_rgba_1.xyz; float3 param_33 = fg_rgba_1.xyz;
float3 _2364 = fromsRGB(param_33); float3 _2374 = fromsRGB(param_33);
fg_rgba_1.x = _2364.x; fg_rgba_1.x = _2374.x;
fg_rgba_1.y = _2364.y; fg_rgba_1.y = _2374.y;
fg_rgba_1.z = _2364.z; fg_rgba_1.z = _2374.z;
float4 fg_k_2 = fg_rgba_1 * area[k_10]; float4 fg_k_2 = fg_rgba_1 * area[k_10];
rgba[k_10] = (rgba[k_10] * (1.0f - fg_k_2.w)) + fg_k_2; rgba[k_10] = (rgba[k_10] * (1.0f - fg_k_2.w)) + fg_k_2;
} }
@ -1202,9 +1206,9 @@ void comp_main()
CmdImage fill_img = Cmd_Image_read(param_34, param_35); CmdImage fill_img = Cmd_Image_read(param_34, param_35);
uint2 param_36 = xy_uint; uint2 param_36 = xy_uint;
CmdImage param_37 = fill_img; CmdImage param_37 = fill_img;
float4 _2407[8]; float4 _2417[8];
fillImage(_2407, param_36, param_37); fillImage(_2417, param_36, param_37);
float4 img[8] = _2407; float4 img[8] = _2417;
for (uint k_11 = 0u; k_11 < 8u; k_11++) for (uint k_11 = 0u; k_11 < 8u; k_11++)
{ {
float4 fg_k_3 = img[k_11] * area[k_11]; float4 fg_k_3 = img[k_11] * area[k_11];
@ -1214,47 +1218,69 @@ void comp_main()
break; break;
} }
case 9u: case 9u:
{
if (clip_depth < 4u)
{ {
for (uint k_12 = 0u; k_12 < 8u; k_12++) for (uint k_12 = 0u; k_12 < 8u; k_12++)
{ {
uint d_2 = min(clip_depth, 127u);
float4 param_38 = float4(rgba[k_12]); float4 param_38 = float4(rgba[k_12]);
uint _2470 = packsRGB(param_38); uint _2479 = packsRGB(param_38);
blend_stack[d_2][k_12] = _2470; blend_stack[clip_depth][k_12] = _2479;
rgba[k_12] = 0.0f.xxxx; rgba[k_12] = 0.0f.xxxx;
} }
}
else
{
uint base_ix = ((blend_offset >> uint(2)) + (((clip_depth - 4u) * 16u) * 16u)) + (8u * (gl_LocalInvocationID.x + (8u * gl_LocalInvocationID.y)));
for (uint k_13 = 0u; k_13 < 8u; k_13++)
{
float4 param_39 = float4(rgba[k_13]);
uint _2522 = packsRGB(param_39);
_297.Store((base_ix + k_13) * 4 + 8, _2522);
rgba[k_13] = 0.0f.xxxx;
}
}
clip_depth++; clip_depth++;
cmd_ref.offset += 4u; cmd_ref.offset += 4u;
break; break;
} }
case 10u: case 10u:
{ {
Alloc param_39 = cmd_alloc; Alloc param_40 = cmd_alloc;
CmdRef param_40 = cmd_ref; CmdRef param_41 = cmd_ref;
CmdEndClip end_clip = Cmd_EndClip_read(param_39, param_40); CmdEndClip end_clip = Cmd_EndClip_read(param_40, param_41);
uint blend_mode = end_clip.blend >> uint(8);
uint comp_mode = end_clip.blend & 255u;
clip_depth--; clip_depth--;
for (uint k_13 = 0u; k_13 < 8u; k_13++) if (clip_depth >= 4u)
{ {
uint d_3 = min(clip_depth, 127u); base_ix_1 = ((blend_offset >> uint(2)) + (((clip_depth - 4u) * 16u) * 16u)) + (8u * (gl_LocalInvocationID.x + (8u * gl_LocalInvocationID.y)));
uint param_41 = blend_stack[d_3][k_13]; }
float4 bg = unpacksRGB(param_41); for (uint k_14 = 0u; k_14 < 8u; k_14++)
float4 fg_1 = rgba[k_13] * area[k_13]; {
float4 param_42 = bg; if (clip_depth < 4u)
float4 param_43 = fg_1; {
uint param_44 = end_clip.blend; bg_rgba = blend_stack[clip_depth][k_14];
rgba[k_13] = mix_blend_compose(param_42, param_43, param_44); }
else
{
bg_rgba = _297.Load((base_ix_1 + k_14) * 4 + 8);
}
uint param_42 = bg_rgba;
float4 bg = unpacksRGB(param_42);
float4 fg_1 = rgba[k_14] * area[k_14];
float4 param_43 = bg;
float4 param_44 = fg_1;
uint param_45 = end_clip.blend;
rgba[k_14] = mix_blend_compose(param_43, param_44, param_45);
} }
cmd_ref.offset += 8u; cmd_ref.offset += 8u;
break; break;
} }
case 11u: case 11u:
{ {
Alloc param_45 = cmd_alloc; Alloc param_46 = cmd_alloc;
CmdRef param_46 = cmd_ref; CmdRef param_47 = cmd_ref;
CmdRef _2548 = { Cmd_Jump_read(param_45, param_46).new_ref }; CmdRef _2621 = { Cmd_Jump_read(param_46, param_47).new_ref };
cmd_ref = _2548; cmd_ref = _2621;
cmd_alloc.offset = cmd_ref.offset; cmd_alloc.offset = cmd_ref.offset;
break; break;
} }
@ -1262,8 +1288,8 @@ void comp_main()
} }
for (uint i_1 = 0u; i_1 < 8u; i_1++) for (uint i_1 = 0u; i_1 < 8u; i_1++)
{ {
uint param_47 = i_1; uint param_48 = i_1;
image[int2(xy_uint + chunk_offset(param_47))] = rgba[i_1].w.x; image[int2(xy_uint + chunk_offset(param_48))] = rgba[i_1].w.x;
} }
} }

View file

@ -1056,6 +1056,8 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
uint param_2 = 1024u; uint param_2 = 1024u;
Alloc cmd_alloc = slice_mem(param, param_1, param_2); Alloc cmd_alloc = slice_mem(param, param_1, param_2);
CmdRef cmd_ref = CmdRef{ cmd_alloc.offset }; CmdRef cmd_ref = CmdRef{ cmd_alloc.offset };
uint blend_offset = v_297.memory[cmd_ref.offset >> uint(2)];
cmd_ref.offset += 4u;
uint2 xy_uint = uint2(gl_LocalInvocationID.x + (16u * gl_WorkGroupID.x), gl_LocalInvocationID.y + (16u * gl_WorkGroupID.y)); uint2 xy_uint = uint2(gl_LocalInvocationID.x + (16u * gl_WorkGroupID.x), gl_LocalInvocationID.y + (16u * gl_WorkGroupID.y));
float2 xy = float2(xy_uint); float2 xy = float2(xy_uint);
spvUnsafeArray<float4, 8> rgba; spvUnsafeArray<float4, 8> rgba;
@ -1068,7 +1070,9 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
spvUnsafeArray<float, 8> df; spvUnsafeArray<float, 8> df;
TileSegRef tile_seg_ref; TileSegRef tile_seg_ref;
spvUnsafeArray<float, 8> area; spvUnsafeArray<float, 8> area;
spvUnsafeArray<spvUnsafeArray<uint, 8>, 128> blend_stack; spvUnsafeArray<spvUnsafeArray<uint, 8>, 4> blend_stack;
uint base_ix_1;
uint bg_rgba;
while (mem_ok) while (mem_ok)
{ {
Alloc param_3 = cmd_alloc; Alloc param_3 = cmd_alloc;
@ -1214,10 +1218,10 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
int x = int(round(fast::clamp(my_d, 0.0, 1.0) * 511.0)); int x = int(round(fast::clamp(my_d, 0.0, 1.0) * 511.0));
float4 fg_rgba = gradients.read(uint2(int2(x, int(lin.index)))); float4 fg_rgba = gradients.read(uint2(int2(x, int(lin.index))));
float3 param_29 = fg_rgba.xyz; float3 param_29 = fg_rgba.xyz;
float3 _2254 = fromsRGB(param_29); float3 _2264 = fromsRGB(param_29);
fg_rgba.x = _2254.x; fg_rgba.x = _2264.x;
fg_rgba.y = _2254.y; fg_rgba.y = _2264.y;
fg_rgba.z = _2254.z; fg_rgba.z = _2264.z;
float4 fg_k_1 = fg_rgba * area[k_9]; float4 fg_k_1 = fg_rgba * area[k_9];
rgba[k_9] = (rgba[k_9] * (1.0 - fg_k_1.w)) + fg_k_1; rgba[k_9] = (rgba[k_9] * (1.0 - fg_k_1.w)) + fg_k_1;
} }
@ -1240,10 +1244,10 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
int x_1 = int(round(fast::clamp(t_2, 0.0, 1.0) * 511.0)); int x_1 = int(round(fast::clamp(t_2, 0.0, 1.0) * 511.0));
float4 fg_rgba_1 = gradients.read(uint2(int2(x_1, int(rad.index)))); float4 fg_rgba_1 = gradients.read(uint2(int2(x_1, int(rad.index))));
float3 param_33 = fg_rgba_1.xyz; float3 param_33 = fg_rgba_1.xyz;
float3 _2364 = fromsRGB(param_33); float3 _2374 = fromsRGB(param_33);
fg_rgba_1.x = _2364.x; fg_rgba_1.x = _2374.x;
fg_rgba_1.y = _2364.y; fg_rgba_1.y = _2374.y;
fg_rgba_1.z = _2364.z; fg_rgba_1.z = _2374.z;
float4 fg_k_2 = fg_rgba_1 * area[k_10]; float4 fg_k_2 = fg_rgba_1 * area[k_10];
rgba[k_10] = (rgba[k_10] * (1.0 - fg_k_2.w)) + fg_k_2; rgba[k_10] = (rgba[k_10] * (1.0 - fg_k_2.w)) + fg_k_2;
} }
@ -1268,46 +1272,68 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
break; break;
} }
case 9u: case 9u:
{
if (clip_depth < 4u)
{ {
for (uint k_12 = 0u; k_12 < 8u; k_12++) for (uint k_12 = 0u; k_12 < 8u; k_12++)
{ {
uint d_2 = min(clip_depth, 127u);
float4 param_38 = float4(rgba[k_12]); float4 param_38 = float4(rgba[k_12]);
uint _2470 = packsRGB(param_38); uint _2479 = packsRGB(param_38);
blend_stack[d_2][k_12] = _2470; blend_stack[clip_depth][k_12] = _2479;
rgba[k_12] = float4(0.0); rgba[k_12] = float4(0.0);
} }
}
else
{
uint base_ix = ((blend_offset >> uint(2)) + (((clip_depth - 4u) * 16u) * 16u)) + (8u * (gl_LocalInvocationID.x + (8u * gl_LocalInvocationID.y)));
for (uint k_13 = 0u; k_13 < 8u; k_13++)
{
float4 param_39 = float4(rgba[k_13]);
uint _2522 = packsRGB(param_39);
v_297.memory[base_ix + k_13] = _2522;
rgba[k_13] = float4(0.0);
}
}
clip_depth++; clip_depth++;
cmd_ref.offset += 4u; cmd_ref.offset += 4u;
break; break;
} }
case 10u: case 10u:
{ {
Alloc param_39 = cmd_alloc; Alloc param_40 = cmd_alloc;
CmdRef param_40 = cmd_ref; CmdRef param_41 = cmd_ref;
CmdEndClip end_clip = Cmd_EndClip_read(param_39, param_40, v_297); CmdEndClip end_clip = Cmd_EndClip_read(param_40, param_41, v_297);
uint blend_mode = end_clip.blend >> uint(8);
uint comp_mode = end_clip.blend & 255u;
clip_depth--; clip_depth--;
for (uint k_13 = 0u; k_13 < 8u; k_13++) if (clip_depth >= 4u)
{ {
uint d_3 = min(clip_depth, 127u); base_ix_1 = ((blend_offset >> uint(2)) + (((clip_depth - 4u) * 16u) * 16u)) + (8u * (gl_LocalInvocationID.x + (8u * gl_LocalInvocationID.y)));
uint param_41 = blend_stack[d_3][k_13]; }
float4 bg = unpacksRGB(param_41); for (uint k_14 = 0u; k_14 < 8u; k_14++)
float4 fg_1 = rgba[k_13] * area[k_13]; {
float4 param_42 = bg; if (clip_depth < 4u)
float4 param_43 = fg_1; {
uint param_44 = end_clip.blend; bg_rgba = blend_stack[clip_depth][k_14];
rgba[k_13] = mix_blend_compose(param_42, param_43, param_44); }
else
{
bg_rgba = v_297.memory[base_ix_1 + k_14];
}
uint param_42 = bg_rgba;
float4 bg = unpacksRGB(param_42);
float4 fg_1 = rgba[k_14] * area[k_14];
float4 param_43 = bg;
float4 param_44 = fg_1;
uint param_45 = end_clip.blend;
rgba[k_14] = mix_blend_compose(param_43, param_44, param_45);
} }
cmd_ref.offset += 8u; cmd_ref.offset += 8u;
break; break;
} }
case 11u: case 11u:
{ {
Alloc param_45 = cmd_alloc; Alloc param_46 = cmd_alloc;
CmdRef param_46 = cmd_ref; CmdRef param_47 = cmd_ref;
cmd_ref = CmdRef{ Cmd_Jump_read(param_45, param_46, v_297).new_ref }; cmd_ref = CmdRef{ Cmd_Jump_read(param_46, param_47, v_297).new_ref };
cmd_alloc.offset = cmd_ref.offset; cmd_alloc.offset = cmd_ref.offset;
break; break;
} }
@ -1315,8 +1341,8 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
} }
for (uint i_1 = 0u; i_1 < 8u; i_1++) for (uint i_1 = 0u; i_1 < 8u; i_1++)
{ {
uint param_47 = i_1; uint param_48 = i_1;
image.write(float4(rgba[i_1].w), uint2(int2(xy_uint + chunk_offset(param_47)))); image.write(float4(rgba[i_1].w), uint2(int2(xy_uint + chunk_offset(param_48))));
} }
} }

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -100,11 +100,14 @@ void main() {
Alloc cmd_alloc = slice_mem(conf.ptcl_alloc, tile_ix * PTCL_INITIAL_ALLOC, PTCL_INITIAL_ALLOC); Alloc cmd_alloc = slice_mem(conf.ptcl_alloc, tile_ix * PTCL_INITIAL_ALLOC, PTCL_INITIAL_ALLOC);
CmdRef cmd_ref = CmdRef(cmd_alloc.offset); CmdRef cmd_ref = CmdRef(cmd_alloc.offset);
uint blend_offset = memory[cmd_ref.offset >> 2];
cmd_ref.offset += 4;
uvec2 xy_uint = uvec2(gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_WorkGroupID.x, uvec2 xy_uint = uvec2(gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_WorkGroupID.x,
gl_LocalInvocationID.y + TILE_HEIGHT_PX * gl_WorkGroupID.y); gl_LocalInvocationID.y + TILE_HEIGHT_PX * gl_WorkGroupID.y);
vec2 xy = vec2(xy_uint); vec2 xy = vec2(xy_uint);
mediump vec4 rgba[CHUNK]; mediump vec4 rgba[CHUNK];
uint blend_stack[MAX_BLEND_STACK][CHUNK]; uint blend_stack[BLEND_STACK_SPLIT][CHUNK];
for (uint i = 0; i < CHUNK; i++) { for (uint i = 0; i < CHUNK; i++) {
rgba[i] = vec4(0.0); rgba[i] = vec4(0.0);
} }
@ -236,24 +239,38 @@ void main() {
cmd_ref.offset += 4 + CmdImage_size; cmd_ref.offset += 4 + CmdImage_size;
break; break;
case Cmd_BeginClip: case Cmd_BeginClip:
if (clip_depth < BLEND_STACK_SPLIT) {
for (uint k = 0; k < CHUNK; k++) { for (uint k = 0; k < CHUNK; k++) {
// We reject any inputs that might overflow in render_ctx.rs. blend_stack[clip_depth][k] = packsRGB(vec4(rgba[k]));
// The following is a sanity check so we don't corrupt memory should there be malformed inputs.
uint d = min(clip_depth, MAX_BLEND_STACK - 1);
blend_stack[d][k] = packsRGB(vec4(rgba[k]));
rgba[k] = vec4(0.0); rgba[k] = vec4(0.0);
} }
} else {
uint base_ix = (blend_offset >> 2) + (clip_depth - BLEND_STACK_SPLIT) * TILE_HEIGHT_PX * TILE_WIDTH_PX +
CHUNK * (gl_LocalInvocationID.x + CHUNK_DX * gl_LocalInvocationID.y);
for (uint k = 0; k < CHUNK; k++) {
memory[base_ix + k] = packsRGB(vec4(rgba[k]));
rgba[k] = vec4(0.0);
}
}
clip_depth++; clip_depth++;
cmd_ref.offset += 4; cmd_ref.offset += 4;
break; break;
case Cmd_EndClip: case Cmd_EndClip:
CmdEndClip end_clip = Cmd_EndClip_read(cmd_alloc, cmd_ref); CmdEndClip end_clip = Cmd_EndClip_read(cmd_alloc, cmd_ref);
uint blend_mode = uint(end_clip.blend >> 8);
uint comp_mode = uint(end_clip.blend & 0xFF);
clip_depth--; clip_depth--;
uint base_ix;
if (clip_depth >= BLEND_STACK_SPLIT) {
base_ix = (blend_offset >> 2) + (clip_depth - BLEND_STACK_SPLIT) * TILE_HEIGHT_PX * TILE_WIDTH_PX +
CHUNK * (gl_LocalInvocationID.x + CHUNK_DX * gl_LocalInvocationID.y);
}
for (uint k = 0; k < CHUNK; k++) { for (uint k = 0; k < CHUNK; k++) {
uint d = min(clip_depth, MAX_BLEND_STACK - 1); uint bg_rgba;
mediump vec4 bg = unpacksRGB(blend_stack[d][k]); if (clip_depth < BLEND_STACK_SPLIT) {
bg_rgba = blend_stack[clip_depth][k];
} else {
bg_rgba = memory[base_ix + k];
}
mediump vec4 bg = unpacksRGB(bg_rgba);
mediump vec4 fg = rgba[k] * area[k]; mediump vec4 fg = rgba[k] * area[k];
rgba[k] = mix_blend_compose(bg, fg, end_clip.blend); rgba[k] = mix_blend_compose(bg, fg, end_clip.blend);
} }

View file

@ -27,6 +27,10 @@
#define GRADIENT_WIDTH 512 #define GRADIENT_WIDTH 512
// We allocate this many blend stack entries in registers, and spill
// to memory for the overflow.
#define BLEND_STACK_SPLIT 4
#ifdef ERR_MALLOC_FAILED #ifdef ERR_MALLOC_FAILED
struct Config { struct Config {
uint n_elements; // paths uint n_elements; // paths
@ -91,7 +95,7 @@ struct Config {
#define MODE_STROKE 1 #define MODE_STROKE 1
// Size of kernel4 clip state, in words. // Size of kernel4 clip state, in words.
#define CLIP_STATE_SIZE 2 #define CLIP_STATE_SIZE 1
// fill_mode_from_flags extracts the fill mode from tag flags. // fill_mode_from_flags extracts the fill mode from tag flags.
uint fill_mode_from_flags(uint flags) { uint fill_mode_from_flags(uint flags) {

View file

@ -34,8 +34,6 @@ const TILE_H: usize = 16;
const PTCL_INITIAL_ALLOC: usize = 1024; const PTCL_INITIAL_ALLOC: usize = 1024;
const MAX_BLEND_STACK: usize = 128;
#[allow(unused)] #[allow(unused)]
fn dump_scene(buf: &[u8]) { fn dump_scene(buf: &[u8]) {
for i in 0..(buf.len() / 4) { for i in 0..(buf.len() / 4) {

View file

@ -5,7 +5,6 @@ use std::borrow::Cow;
use crate::encoder::GlyphEncoder; use crate::encoder::GlyphEncoder;
use crate::stages::{Config, Transform}; use crate::stages::{Config, Transform};
use crate::MAX_BLEND_STACK;
use piet::kurbo::{Affine, PathEl, Point, Rect, Shape}; use piet::kurbo::{Affine, PathEl, Point, Rect, Shape};
use piet::{ use piet::{
Color, Error, FixedGradient, ImageFormat, InterpolationMode, IntoBrush, RenderContext, Color, Error, FixedGradient, ImageFormat, InterpolationMode, IntoBrush, RenderContext,
@ -233,9 +232,6 @@ impl RenderContext for PietGpuRenderContext {
let path = shape.path_elements(TOLERANCE); let path = shape.path_elements(TOLERANCE);
self.encode_path(path, true); self.encode_path(path, true);
self.new_encoder.begin_clip(None); self.new_encoder.begin_clip(None);
if self.clip_stack.len() >= MAX_BLEND_STACK {
panic!("Maximum clip/blend stack size {} exceeded", MAX_BLEND_STACK);
}
self.clip_stack.push(ClipElement { blend: None }); self.clip_stack.push(ClipElement { blend: None });
if let Some(tos) = self.state_stack.last_mut() { if let Some(tos) = self.state_stack.last_mut() {
tos.n_clip += 1; tos.n_clip += 1;
@ -337,9 +333,6 @@ impl PietGpuRenderContext {
let path = shape.path_elements(TOLERANCE); let path = shape.path_elements(TOLERANCE);
self.encode_path(path, true); self.encode_path(path, true);
self.new_encoder.begin_clip(Some(blend)); self.new_encoder.begin_clip(Some(blend));
if self.clip_stack.len() >= MAX_BLEND_STACK {
panic!("Maximum clip/blend stack size {} exceeded", MAX_BLEND_STACK);
}
self.clip_stack.push(ClipElement { blend: Some(blend) }); self.clip_stack.push(ClipElement { blend: Some(blend) });
if let Some(tos) = self.state_stack.last_mut() { if let Some(tos) = self.state_stack.last_mut() {
tos.n_clip += 1; tos.n_clip += 1;