diff --git a/piet-gpu/shader/gen/binning.msl b/piet-gpu/shader/gen/binning.msl index 3bf96da..0e3b6c8 100644 --- a/piet-gpu/shader/gen/binning.msl +++ b/piet-gpu/shader/gen/binning.msl @@ -220,7 +220,7 @@ void BinInstance_write(thread const Alloc& a, thread const BinInstanceRef& ref, kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_94 [[buffer(0)]], const device ConfigBuf& v_202 [[buffer(1)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) { threadgroup uint bitmaps[8][256]; - threadgroup bool sh_alloc_failed; + threadgroup short sh_alloc_failed; threadgroup uint count[8][256]; threadgroup Alloc sh_chunk_alloc[256]; constant uint& v_94BufferSize = spvBufferSizeConstants[0]; @@ -232,7 +232,7 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M } if (gl_LocalInvocationID.x == 0u) { - sh_alloc_failed = false; + sh_alloc_failed = short(false); } threadgroup_barrier(mem_flags::mem_threadgroup); uint element_ix = (my_partition * 256u) + gl_LocalInvocationID.x; @@ -331,7 +331,7 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M sh_chunk_alloc[gl_LocalInvocationID.x] = chunk_alloc; if (chunk.failed) { - sh_alloc_failed = true; + sh_alloc_failed = short(true); } } uint out_ix = (v_202.conf.bin_alloc.offset >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u); @@ -347,13 +347,13 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M write_mem(param_16, param_17, param_18, v_94, v_94BufferSize); threadgroup_barrier(mem_flags::mem_threadgroup); bool _687; - if (!sh_alloc_failed) + if (!bool(sh_alloc_failed)) { _687 = v_94.mem_error != 0u; } else { - _687 = sh_alloc_failed; + _687 = bool(sh_alloc_failed); } if (_687) { diff --git a/piet-gpu/shader/gen/coarse.dxil b/piet-gpu/shader/gen/coarse.dxil index 0599eb8..c7e1682 100644 Binary files a/piet-gpu/shader/gen/coarse.dxil and b/piet-gpu/shader/gen/coarse.dxil differ diff --git a/piet-gpu/shader/gen/coarse.hlsl b/piet-gpu/shader/gen/coarse.hlsl index b5f3949..f00eabe 100644 --- a/piet-gpu/shader/gen/coarse.hlsl +++ b/piet-gpu/shader/gen/coarse.hlsl @@ -49,6 +49,17 @@ struct AnnoLinGradient float line_c; }; +struct AnnoEndClipRef +{ + uint offset; +}; + +struct AnnoEndClip +{ + float4 bbox; + uint blend; +}; + struct AnnotatedRef { uint offset; @@ -153,6 +164,16 @@ struct CmdImage int2 offset; }; +struct CmdEndClipRef +{ + uint offset; +}; + +struct CmdEndClip +{ + uint blend; +}; + struct CmdJumpRef { uint offset; @@ -197,8 +218,8 @@ struct Config static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); -RWByteAddressBuffer _283 : register(u0, space0); -ByteAddressBuffer _1169 : register(t1, space0); +RWByteAddressBuffer _308 : register(u0, space0); +ByteAddressBuffer _1283 : register(t1, space0); static uint3 gl_WorkGroupID; static uint3 gl_LocalInvocationID; @@ -221,8 +242,8 @@ groupshared uint sh_tile_count[256]; Alloc slice_mem(Alloc a, uint offset, uint size) { - Alloc _360 = { a.offset + offset }; - return _360; + Alloc _385 = { a.offset + offset }; + return _385; } bool touch_mem(Alloc alloc, uint offset) @@ -238,7 +259,7 @@ uint read_mem(Alloc alloc, uint offset) { return 0u; } - uint v = _283.Load(offset * 4 + 8); + uint v = _308.Load(offset * 4 + 8); return v; } @@ -251,8 +272,8 @@ Alloc new_alloc(uint offset, uint size, bool mem_ok) BinInstanceRef BinInstance_index(BinInstanceRef ref, uint index) { - BinInstanceRef _674 = { ref.offset + (index * 4u) }; - return _674; + BinInstanceRef _765 = { ref.offset + (index * 4u) }; + return _765; } BinInstance BinInstance_read(Alloc a, BinInstanceRef ref) @@ -271,8 +292,8 @@ AnnotatedTag Annotated_tag(Alloc a, AnnotatedRef ref) Alloc param = a; uint param_1 = ref.offset >> uint(2); uint tag_and_flags = read_mem(param, param_1); - AnnotatedTag _636 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; - return _636; + AnnotatedTag _717 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; + return _717; } Path Path_read(Alloc a, PathRef ref) @@ -289,8 +310,8 @@ Path Path_read(Alloc a, PathRef ref) uint raw2 = read_mem(param_4, param_5); Path s; s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); - TileRef _734 = { raw2 }; - s.tiles = _734; + TileRef _825 = { raw2 }; + s.tiles = _825; return s; } @@ -300,11 +321,11 @@ void write_tile_alloc(uint el_ix, Alloc a) Alloc read_tile_alloc(uint el_ix, bool mem_ok) { - uint _1055; - _283.GetDimensions(_1055); - _1055 = (_1055 - 8) / 4; + uint _1169; + _308.GetDimensions(_1169); + _1169 = (_1169 - 8) / 4; uint param = 0u; - uint param_1 = uint(int(_1055) * 4); + uint param_1 = uint(int(_1169) * 4); bool param_2 = mem_ok; return new_alloc(param, param_1, param_2); } @@ -318,9 +339,9 @@ Tile Tile_read(Alloc a, TileRef ref) Alloc param_2 = a; uint param_3 = ix + 1u; uint raw1 = read_mem(param_2, param_3); - TileSegRef _759 = { raw0 }; + TileSegRef _850 = { raw0 }; Tile s; - s.tile = _759; + s.tile = _850; s.backdrop = int(raw1); return s; } @@ -355,30 +376,30 @@ AnnoColor AnnoColor_read(Alloc a, AnnoColorRef ref) AnnoColor Annotated_Color_read(Alloc a, AnnotatedRef ref) { - AnnoColorRef _642 = { ref.offset + 4u }; + AnnoColorRef _723 = { ref.offset + 4u }; Alloc param = a; - AnnoColorRef param_1 = _642; + AnnoColorRef param_1 = _723; return AnnoColor_read(param, param_1); } MallocResult malloc(uint size) { - uint _289; - _283.InterlockedAdd(0, size, _289); - uint offset = _289; - uint _296; - _283.GetDimensions(_296); - _296 = (_296 - 8) / 4; + uint _314; + _308.InterlockedAdd(0, size, _314); + uint offset = _314; + uint _321; + _308.GetDimensions(_321); + _321 = (_321 - 8) / 4; MallocResult r; - r.failed = (offset + size) > uint(int(_296) * 4); + r.failed = (offset + size) > uint(int(_321) * 4); uint param = offset; uint param_1 = size; bool param_2 = !r.failed; r.alloc = new_alloc(param, param_1, param_2); if (r.failed) { - uint _318; - _283.InterlockedMax(4, 1u, _318); + uint _343; + _308.InterlockedMax(4, 1u, _343); return r; } return r; @@ -392,7 +413,7 @@ void write_mem(Alloc alloc, uint offset, uint val) { return; } - _283.Store(offset * 4 + 8, val); + _308.Store(offset * 4 + 8, val); } void CmdJump_write(Alloc a, CmdJumpRef ref, CmdJump s) @@ -410,9 +431,9 @@ void Cmd_Jump_write(Alloc a, CmdRef ref, CmdJump s) uint param_1 = ref.offset >> uint(2); uint param_2 = 10u; write_mem(param, param_1, param_2); - CmdJumpRef _1048 = { ref.offset + 4u }; + CmdJumpRef _1162 = { ref.offset + 4u }; Alloc param_3 = a; - CmdJumpRef param_4 = _1048; + CmdJumpRef param_4 = _1162; CmdJump param_5 = s; CmdJump_write(param_3, param_4, param_5); } @@ -424,21 +445,21 @@ bool alloc_cmd(inout Alloc cmd_alloc, inout CmdRef cmd_ref, inout uint cmd_limit return true; } uint param = 1024u; - MallocResult _1076 = malloc(param); - MallocResult new_cmd = _1076; + MallocResult _1190 = malloc(param); + MallocResult new_cmd = _1190; if (new_cmd.failed) { return false; } - CmdJump _1086 = { new_cmd.alloc.offset }; - CmdJump jump = _1086; + CmdJump _1200 = { new_cmd.alloc.offset }; + CmdJump jump = _1200; Alloc param_1 = cmd_alloc; CmdRef param_2 = cmd_ref; CmdJump param_3 = jump; Cmd_Jump_write(param_1, param_2, param_3); cmd_alloc = new_cmd.alloc; - CmdRef _1098 = { cmd_alloc.offset }; - cmd_ref = _1098; + CmdRef _1212 = { cmd_alloc.offset }; + cmd_ref = _1212; cmd_limit = (cmd_alloc.offset + 1024u) - 60u; return true; } @@ -467,9 +488,9 @@ void Cmd_Fill_write(Alloc a, CmdRef ref, CmdFill s) uint param_1 = ref.offset >> uint(2); uint param_2 = 1u; write_mem(param, param_1, param_2); - CmdFillRef _932 = { ref.offset + 4u }; + CmdFillRef _1036 = { ref.offset + 4u }; Alloc param_3 = a; - CmdFillRef param_4 = _932; + CmdFillRef param_4 = _1036; CmdFill param_5 = s; CmdFill_write(param_3, param_4, param_5); } @@ -501,9 +522,9 @@ void Cmd_Stroke_write(Alloc a, CmdRef ref, CmdStroke s) uint param_1 = ref.offset >> uint(2); uint param_2 = 2u; write_mem(param, param_1, param_2); - CmdStrokeRef _950 = { ref.offset + 4u }; + CmdStrokeRef _1054 = { ref.offset + 4u }; Alloc param_3 = a; - CmdStrokeRef param_4 = _950; + CmdStrokeRef param_4 = _1054; CmdStroke param_5 = s; CmdStroke_write(param_3, param_4, param_5); } @@ -515,8 +536,8 @@ void write_fill(Alloc alloc, inout CmdRef cmd_ref, uint flags, Tile tile, float { if (tile.tile.offset != 0u) { - CmdFill _1122 = { tile.tile.offset, tile.backdrop }; - CmdFill cmd_fill = _1122; + CmdFill _1236 = { tile.tile.offset, tile.backdrop }; + CmdFill cmd_fill = _1236; Alloc param_1 = alloc; CmdRef param_2 = cmd_ref; CmdFill param_3 = cmd_fill; @@ -533,8 +554,8 @@ void write_fill(Alloc alloc, inout CmdRef cmd_ref, uint flags, Tile tile, float } else { - CmdStroke _1152 = { tile.tile.offset, 0.5f * linewidth }; - CmdStroke cmd_stroke = _1152; + CmdStroke _1266 = { tile.tile.offset, 0.5f * linewidth }; + CmdStroke cmd_stroke = _1266; Alloc param_6 = alloc; CmdRef param_7 = cmd_ref; CmdStroke param_8 = cmd_stroke; @@ -558,9 +579,9 @@ void Cmd_Color_write(Alloc a, CmdRef ref, CmdColor s) uint param_1 = ref.offset >> uint(2); uint param_2 = 5u; write_mem(param, param_1, param_2); - CmdColorRef _976 = { ref.offset + 4u }; + CmdColorRef _1080 = { ref.offset + 4u }; Alloc param_3 = a; - CmdColorRef param_4 = _976; + CmdColorRef param_4 = _1080; CmdColor param_5 = s; CmdColor_write(param_3, param_4, param_5); } @@ -607,9 +628,9 @@ AnnoLinGradient AnnoLinGradient_read(Alloc a, AnnoLinGradientRef ref) AnnoLinGradient Annotated_LinGradient_read(Alloc a, AnnotatedRef ref) { - AnnoLinGradientRef _652 = { ref.offset + 4u }; + AnnoLinGradientRef _733 = { ref.offset + 4u }; Alloc param = a; - AnnoLinGradientRef param_1 = _652; + AnnoLinGradientRef param_1 = _733; return AnnoLinGradient_read(param, param_1); } @@ -640,9 +661,9 @@ void Cmd_LinGrad_write(Alloc a, CmdRef ref, CmdLinGrad s) uint param_1 = ref.offset >> uint(2); uint param_2 = 6u; write_mem(param, param_1, param_2); - CmdLinGradRef _994 = { ref.offset + 4u }; + CmdLinGradRef _1098 = { ref.offset + 4u }; Alloc param_3 = a; - CmdLinGradRef param_4 = _994; + CmdLinGradRef param_4 = _1098; CmdLinGrad param_5 = s; CmdLinGrad_write(param_3, param_4, param_5); } @@ -681,9 +702,9 @@ AnnoImage AnnoImage_read(Alloc a, AnnoImageRef ref) AnnoImage Annotated_Image_read(Alloc a, AnnotatedRef ref) { - AnnoImageRef _662 = { ref.offset + 4u }; + AnnoImageRef _743 = { ref.offset + 4u }; Alloc param = a; - AnnoImageRef param_1 = _662; + AnnoImageRef param_1 = _743; return AnnoImage_read(param, param_1); } @@ -706,9 +727,9 @@ void Cmd_Image_write(Alloc a, CmdRef ref, CmdImage s) uint param_1 = ref.offset >> uint(2); uint param_2 = 7u; write_mem(param, param_1, param_2); - CmdImageRef _1012 = { ref.offset + 4u }; + CmdImageRef _1116 = { ref.offset + 4u }; Alloc param_3 = a; - CmdImageRef param_4 = _1012; + CmdImageRef param_4 = _1116; CmdImage param_5 = s; CmdImage_write(param_3, param_4, param_5); } @@ -721,12 +742,58 @@ void Cmd_BeginClip_write(Alloc a, CmdRef ref) write_mem(param, param_1, param_2); } -void Cmd_EndClip_write(Alloc a, CmdRef ref) +AnnoEndClip AnnoEndClip_read(Alloc a, AnnoEndClipRef ref) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint raw0 = read_mem(param, param_1); + Alloc param_2 = a; + uint param_3 = ix + 1u; + uint raw1 = read_mem(param_2, param_3); + Alloc param_4 = a; + uint param_5 = ix + 2u; + uint raw2 = read_mem(param_4, param_5); + Alloc param_6 = a; + uint param_7 = ix + 3u; + uint raw3 = read_mem(param_6, param_7); + Alloc param_8 = a; + uint param_9 = ix + 4u; + uint raw4 = read_mem(param_8, param_9); + AnnoEndClip s; + s.bbox = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3)); + s.blend = raw4; + return s; +} + +AnnoEndClip Annotated_EndClip_read(Alloc a, AnnotatedRef ref) +{ + AnnoEndClipRef _753 = { ref.offset + 4u }; + Alloc param = a; + AnnoEndClipRef param_1 = _753; + return AnnoEndClip_read(param, param_1); +} + +void CmdEndClip_write(Alloc a, CmdEndClipRef ref, CmdEndClip s) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint param_2 = s.blend; + write_mem(param, param_1, param_2); +} + +void Cmd_EndClip_write(Alloc a, CmdRef ref, CmdEndClip s) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 9u; write_mem(param, param_1, param_2); + CmdEndClipRef _1143 = { ref.offset + 4u }; + Alloc param_3 = a; + CmdEndClipRef param_4 = _1143; + CmdEndClip param_5 = s; + CmdEndClip_write(param_3, param_4, param_5); } void Cmd_End_write(Alloc a, CmdRef ref) @@ -739,25 +806,25 @@ void Cmd_End_write(Alloc a, CmdRef ref) void comp_main() { - uint width_in_bins = ((_1169.Load(8) + 16u) - 1u) / 16u; + uint width_in_bins = ((_1283.Load(8) + 16u) - 1u) / 16u; uint bin_ix = (width_in_bins * gl_WorkGroupID.y) + gl_WorkGroupID.x; uint partition_ix = 0u; - uint n_partitions = ((_1169.Load(0) + 256u) - 1u) / 256u; + uint n_partitions = ((_1283.Load(0) + 256u) - 1u) / 256u; uint th_ix = gl_LocalInvocationID.x; uint bin_tile_x = 16u * gl_WorkGroupID.x; uint bin_tile_y = 16u * gl_WorkGroupID.y; uint tile_x = gl_LocalInvocationID.x % 16u; uint tile_y = gl_LocalInvocationID.x / 16u; - uint this_tile_ix = (((bin_tile_y + tile_y) * _1169.Load(8)) + bin_tile_x) + tile_x; - Alloc _1234; - _1234.offset = _1169.Load(24); + uint this_tile_ix = (((bin_tile_y + tile_y) * _1283.Load(8)) + bin_tile_x) + tile_x; + Alloc _1348; + _1348.offset = _1283.Load(24); Alloc param; - param.offset = _1234.offset; + param.offset = _1348.offset; uint param_1 = this_tile_ix * 1024u; uint param_2 = 1024u; Alloc cmd_alloc = slice_mem(param, param_1, param_2); - CmdRef _1243 = { cmd_alloc.offset }; - CmdRef cmd_ref = _1243; + CmdRef _1357 = { cmd_alloc.offset }; + CmdRef cmd_ref = _1357; uint cmd_limit = (cmd_ref.offset + 1024u) - 60u; uint clip_depth = 0u; uint clip_zero_depth = 0u; @@ -765,17 +832,17 @@ void comp_main() uint wr_ix = 0u; uint part_start_ix = 0u; uint ready_ix = 0u; - bool mem_ok = _283.Load(4) == 0u; + bool mem_ok = _308.Load(4) == 0u; Alloc param_3; Alloc param_5; - uint _1448; + uint _1562; uint element_ix; AnnotatedRef ref; Alloc param_14; Alloc param_16; uint tile_count; Alloc param_23; - uint _1770; + uint _1887; Alloc param_29; Tile tile_1; AnnoColor fill; @@ -783,40 +850,41 @@ void comp_main() Alloc param_52; CmdLinGrad cmd_lin; Alloc param_69; + Alloc param_95; while (true) { for (uint i = 0u; i < 8u; i++) { sh_bitmaps[i][th_ix] = 0u; } - bool _1500; + bool _1614; for (;;) { if ((ready_ix == wr_ix) && (partition_ix < n_partitions)) { part_start_ix = ready_ix; uint count = 0u; - bool _1298 = th_ix < 256u; - bool _1306; - if (_1298) + bool _1412 = th_ix < 256u; + bool _1420; + if (_1412) { - _1306 = (partition_ix + th_ix) < n_partitions; + _1420 = (partition_ix + th_ix) < n_partitions; } else { - _1306 = _1298; + _1420 = _1412; } - if (_1306) + if (_1420) { - uint in_ix = (_1169.Load(20) >> uint(2)) + ((((partition_ix + th_ix) * 256u) + bin_ix) * 2u); - Alloc _1323; - _1323.offset = _1169.Load(20); - param_3.offset = _1323.offset; + uint in_ix = (_1283.Load(20) >> uint(2)) + ((((partition_ix + th_ix) * 256u) + bin_ix) * 2u); + Alloc _1437; + _1437.offset = _1283.Load(20); + param_3.offset = _1437.offset; uint param_4 = in_ix; count = read_mem(param_3, param_4); - Alloc _1334; - _1334.offset = _1169.Load(20); - param_5.offset = _1334.offset; + Alloc _1448; + _1448.offset = _1283.Load(20); + param_5.offset = _1448.offset; uint param_6 = in_ix + 1u; uint offset = read_mem(param_5, param_6); uint param_7 = offset; @@ -862,16 +930,16 @@ void comp_main() } if (part_ix > 0u) { - _1448 = sh_part_count[part_ix - 1u]; + _1562 = sh_part_count[part_ix - 1u]; } else { - _1448 = part_start_ix; + _1562 = part_start_ix; } - ix -= _1448; + ix -= _1562; Alloc bin_alloc = sh_part_elements[part_ix]; - BinInstanceRef _1467 = { bin_alloc.offset }; - BinInstanceRef inst_ref = _1467; + BinInstanceRef _1581 = { bin_alloc.offset }; + BinInstanceRef inst_ref = _1581; BinInstanceRef param_10 = inst_ref; uint param_11 = ix; Alloc param_12 = bin_alloc; @@ -881,16 +949,16 @@ void comp_main() } GroupMemoryBarrierWithGroupSync(); wr_ix = min((rd_ix + 256u), ready_ix); - bool _1490 = (wr_ix - rd_ix) < 256u; - if (_1490) + bool _1604 = (wr_ix - rd_ix) < 256u; + if (_1604) { - _1500 = (wr_ix < ready_ix) || (partition_ix < n_partitions); + _1614 = (wr_ix < ready_ix) || (partition_ix < n_partitions); } else { - _1500 = _1490; + _1614 = _1604; } - if (_1500) + if (_1614) { continue; } @@ -903,11 +971,11 @@ void comp_main() if ((th_ix + rd_ix) < wr_ix) { element_ix = sh_elements[th_ix]; - AnnotatedRef _1521 = { _1169.Load(32) + (element_ix * 40u) }; - ref = _1521; - Alloc _1524; - _1524.offset = _1169.Load(32); - param_14.offset = _1524.offset; + AnnotatedRef _1635 = { _1283.Load(32) + (element_ix * 40u) }; + ref = _1635; + Alloc _1638; + _1638.offset = _1283.Load(32); + param_14.offset = _1638.offset; AnnotatedRef param_15 = ref; tag = Annotated_tag(param_14, param_15).tag; } @@ -919,13 +987,13 @@ void comp_main() case 4u: case 5u: { - uint drawmonoid_base = (_1169.Load(44) >> uint(2)) + (2u * element_ix); - uint path_ix = _283.Load(drawmonoid_base * 4 + 8); - PathRef _1553 = { _1169.Load(16) + (path_ix * 12u) }; - Alloc _1556; - _1556.offset = _1169.Load(16); - param_16.offset = _1556.offset; - PathRef param_17 = _1553; + uint drawmonoid_base = (_1283.Load(44) >> uint(2)) + (2u * element_ix); + uint path_ix = _308.Load(drawmonoid_base * 4 + 8); + PathRef _1667 = { _1283.Load(16) + (path_ix * 12u) }; + Alloc _1670; + _1670.offset = _1283.Load(16); + param_16.offset = _1670.offset; + PathRef param_17 = _1667; Path path = Path_read(param_16, param_17); uint stride = path.bbox.z - path.bbox.x; sh_tile_stride[th_ix] = stride; @@ -980,22 +1048,23 @@ void comp_main() el_ix = probe_1; } } - AnnotatedRef _1755 = { _1169.Load(32) + (sh_elements[el_ix] * 40u) }; - AnnotatedRef ref_1 = _1755; - Alloc _1759; - _1759.offset = _1169.Load(32); - param_23.offset = _1759.offset; + AnnotatedRef _1869 = { _1283.Load(32) + (sh_elements[el_ix] * 40u) }; + AnnotatedRef ref_1 = _1869; + Alloc _1874; + _1874.offset = _1283.Load(32); + param_23.offset = _1874.offset; AnnotatedRef param_24 = ref_1; - uint tag_1 = Annotated_tag(param_23, param_24).tag; + AnnotatedTag anno_tag = Annotated_tag(param_23, param_24); + uint tag_1 = anno_tag.tag; if (el_ix > 0u) { - _1770 = sh_tile_count[el_ix - 1u]; + _1887 = sh_tile_count[el_ix - 1u]; } else { - _1770 = 0u; + _1887 = 0u; } - uint seq_ix = ix_1 - _1770; + uint seq_ix = ix_1 - _1887; uint width = sh_tile_width[el_ix]; uint x = sh_tile_x0[el_ix] + (seq_ix % width); uint y = sh_tile_y0[el_ix] + (seq_ix / width); @@ -1004,29 +1073,47 @@ void comp_main() { uint param_25 = el_ix; bool param_26 = mem_ok; - TileRef _1822 = { sh_tile_base[el_ix] + (((sh_tile_stride[el_ix] * y) + x) * 8u) }; + TileRef _1939 = { sh_tile_base[el_ix] + (((sh_tile_stride[el_ix] * y) + x) * 8u) }; Alloc param_27 = read_tile_alloc(param_25, param_26); - TileRef param_28 = _1822; + TileRef param_28 = _1939; Tile tile = Tile_read(param_27, param_28); bool is_clip = (tag_1 == 4u) || (tag_1 == 5u); - bool _1834 = tile.tile.offset != 0u; - bool _1843; - if (!_1834) + bool _1951 = tile.tile.offset != 0u; + bool _1960; + if (!_1951) { - _1843 = (tile.backdrop == 0) == is_clip; + _1960 = (tile.backdrop == 0) == is_clip; } else { - _1843 = _1834; + _1960 = _1951; } - include_tile = _1843; + bool _1972; + if (!_1960) + { + bool _1971; + if (is_clip) + { + _1971 = (anno_tag.flags & 2u) != 0u; + } + else + { + _1971 = is_clip; + } + _1972 = _1971; + } + else + { + _1972 = _1960; + } + include_tile = _1972; } if (include_tile) { uint el_slice = el_ix / 32u; uint el_mask = 1u << (el_ix & 31u); - uint _1863; - InterlockedOr(sh_bitmaps[el_slice][(y * 16u) + x], el_mask, _1863); + uint _1992; + InterlockedOr(sh_bitmaps[el_slice][(y * 16u) + x], el_mask, _1992); } } GroupMemoryBarrierWithGroupSync(); @@ -1050,11 +1137,11 @@ void comp_main() uint element_ref_ix = (slice_ix * 32u) + uint(int(firstbitlow(bitmap))); uint element_ix_1 = sh_elements[element_ref_ix]; bitmap &= (bitmap - 1u); - AnnotatedRef _1917 = { _1169.Load(32) + (element_ix_1 * 40u) }; - ref = _1917; - Alloc _1922; - _1922.offset = _1169.Load(32); - param_29.offset = _1922.offset; + AnnotatedRef _2046 = { _1283.Load(32) + (element_ix_1 * 40u) }; + ref = _2046; + Alloc _2050; + _2050.offset = _1283.Load(32); + param_29.offset = _2050.offset; AnnotatedRef param_30 = ref; AnnotatedTag tag_2 = Annotated_tag(param_29, param_30); if (clip_zero_depth == 0u) @@ -1065,23 +1152,23 @@ void comp_main() { uint param_31 = element_ref_ix; bool param_32 = mem_ok; - TileRef _1958 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; + TileRef _2086 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; Alloc param_33 = read_tile_alloc(param_31, param_32); - TileRef param_34 = _1958; + TileRef param_34 = _2086; tile_1 = Tile_read(param_33, param_34); - Alloc _1965; - _1965.offset = _1169.Load(32); - param_35.offset = _1965.offset; + Alloc _2093; + _2093.offset = _1283.Load(32); + param_35.offset = _2093.offset; AnnotatedRef param_36 = ref; fill = Annotated_Color_read(param_35, param_36); Alloc param_37 = cmd_alloc; CmdRef param_38 = cmd_ref; uint param_39 = cmd_limit; - bool _1977 = alloc_cmd(param_37, param_38, param_39); + bool _2105 = alloc_cmd(param_37, param_38, param_39); cmd_alloc = param_37; cmd_ref = param_38; cmd_limit = param_39; - if (!_1977) + if (!_2105) { break; } @@ -1092,10 +1179,10 @@ void comp_main() float param_44 = fill.linewidth; write_fill(param_40, param_41, param_42, param_43, param_44); cmd_ref = param_41; - CmdColor _2001 = { fill.rgba_color }; + CmdColor _2129 = { fill.rgba_color }; Alloc param_45 = cmd_alloc; CmdRef param_46 = cmd_ref; - CmdColor param_47 = _2001; + CmdColor param_47 = _2129; Cmd_Color_write(param_45, param_46, param_47); cmd_ref.offset += 8u; break; @@ -1104,23 +1191,23 @@ void comp_main() { uint param_48 = element_ref_ix; bool param_49 = mem_ok; - TileRef _2030 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; + TileRef _2158 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; Alloc param_50 = read_tile_alloc(param_48, param_49); - TileRef param_51 = _2030; + TileRef param_51 = _2158; tile_1 = Tile_read(param_50, param_51); - Alloc _2037; - _2037.offset = _1169.Load(32); - param_52.offset = _2037.offset; + Alloc _2165; + _2165.offset = _1283.Load(32); + param_52.offset = _2165.offset; AnnotatedRef param_53 = ref; AnnoLinGradient lin = Annotated_LinGradient_read(param_52, param_53); Alloc param_54 = cmd_alloc; CmdRef param_55 = cmd_ref; uint param_56 = cmd_limit; - bool _2049 = alloc_cmd(param_54, param_55, param_56); + bool _2177 = alloc_cmd(param_54, param_55, param_56); cmd_alloc = param_54; cmd_ref = param_55; cmd_limit = param_56; - if (!_2049) + if (!_2177) { break; } @@ -1146,23 +1233,23 @@ void comp_main() { uint param_65 = element_ref_ix; bool param_66 = mem_ok; - TileRef _2114 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; + TileRef _2242 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; Alloc param_67 = read_tile_alloc(param_65, param_66); - TileRef param_68 = _2114; + TileRef param_68 = _2242; tile_1 = Tile_read(param_67, param_68); - Alloc _2121; - _2121.offset = _1169.Load(32); - param_69.offset = _2121.offset; + Alloc _2249; + _2249.offset = _1283.Load(32); + param_69.offset = _2249.offset; AnnotatedRef param_70 = ref; AnnoImage fill_img = Annotated_Image_read(param_69, param_70); Alloc param_71 = cmd_alloc; CmdRef param_72 = cmd_ref; uint param_73 = cmd_limit; - bool _2133 = alloc_cmd(param_71, param_72, param_73); + bool _2261 = alloc_cmd(param_71, param_72, param_73); cmd_alloc = param_71; cmd_ref = param_72; cmd_limit = param_73; - if (!_2133) + if (!_2261) { break; } @@ -1173,10 +1260,10 @@ void comp_main() float param_78 = fill_img.linewidth; write_fill(param_74, param_75, param_76, param_77, param_78); cmd_ref = param_75; - CmdImage _2159 = { fill_img.index, fill_img.offset }; + CmdImage _2287 = { fill_img.index, fill_img.offset }; Alloc param_79 = cmd_alloc; CmdRef param_80 = cmd_ref; - CmdImage param_81 = _2159; + CmdImage param_81 = _2287; Cmd_Image_write(param_79, param_80, param_81); cmd_ref.offset += 12u; break; @@ -1185,21 +1272,21 @@ void comp_main() { uint param_82 = element_ref_ix; bool param_83 = mem_ok; - TileRef _2188 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; + TileRef _2316 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; Alloc param_84 = read_tile_alloc(param_82, param_83); - TileRef param_85 = _2188; + TileRef param_85 = _2316; tile_1 = Tile_read(param_84, param_85); - bool _2194 = tile_1.tile.offset == 0u; - bool _2200; - if (_2194) + bool _2322 = tile_1.tile.offset == 0u; + bool _2328; + if (_2322) { - _2200 = tile_1.backdrop == 0; + _2328 = tile_1.backdrop == 0; } else { - _2200 = _2194; + _2328 = _2322; } - if (_2200) + if (_2328) { clip_zero_depth = clip_depth + 1u; } @@ -1208,11 +1295,11 @@ void comp_main() Alloc param_86 = cmd_alloc; CmdRef param_87 = cmd_ref; uint param_88 = cmd_limit; - bool _2212 = alloc_cmd(param_86, param_87, param_88); + bool _2340 = alloc_cmd(param_86, param_87, param_88); cmd_alloc = param_86; cmd_ref = param_87; cmd_limit = param_88; - if (!_2212) + if (!_2340) { break; } @@ -1228,33 +1315,40 @@ void comp_main() { uint param_91 = element_ref_ix; bool param_92 = mem_ok; - TileRef _2249 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; + TileRef _2377 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; Alloc param_93 = read_tile_alloc(param_91, param_92); - TileRef param_94 = _2249; + TileRef param_94 = _2377; tile_1 = Tile_read(param_93, param_94); + Alloc _2384; + _2384.offset = _1283.Load(32); + param_95.offset = _2384.offset; + AnnotatedRef param_96 = ref; + AnnoEndClip end_clip = Annotated_EndClip_read(param_95, param_96); clip_depth--; - Alloc param_95 = cmd_alloc; - CmdRef param_96 = cmd_ref; - uint param_97 = cmd_limit; - bool _2261 = alloc_cmd(param_95, param_96, param_97); - cmd_alloc = param_95; - cmd_ref = param_96; - cmd_limit = param_97; - if (!_2261) + Alloc param_97 = cmd_alloc; + CmdRef param_98 = cmd_ref; + uint param_99 = cmd_limit; + bool _2398 = alloc_cmd(param_97, param_98, param_99); + cmd_alloc = param_97; + cmd_ref = param_98; + cmd_limit = param_99; + if (!_2398) { break; } - Alloc param_98 = cmd_alloc; - CmdRef param_99 = cmd_ref; - uint param_100 = 0u; - Tile param_101 = tile_1; - float param_102 = 0.0f; - write_fill(param_98, param_99, param_100, param_101, param_102); - cmd_ref = param_99; - Alloc param_103 = cmd_alloc; - CmdRef param_104 = cmd_ref; - Cmd_EndClip_write(param_103, param_104); - cmd_ref.offset += 4u; + Alloc param_100 = cmd_alloc; + CmdRef param_101 = cmd_ref; + uint param_102 = 0u; + Tile param_103 = tile_1; + float param_104 = 0.0f; + write_fill(param_100, param_101, param_102, param_103, param_104); + cmd_ref = param_101; + CmdEndClip _2419 = { end_clip.blend }; + Alloc param_105 = cmd_alloc; + CmdRef param_106 = cmd_ref; + CmdEndClip param_107 = _2419; + Cmd_EndClip_write(param_105, param_106, param_107); + cmd_ref.offset += 8u; break; } } @@ -1287,21 +1381,21 @@ void comp_main() break; } } - bool _2326 = (bin_tile_x + tile_x) < _1169.Load(8); - bool _2335; - if (_2326) + bool _2467 = (bin_tile_x + tile_x) < _1283.Load(8); + bool _2476; + if (_2467) { - _2335 = (bin_tile_y + tile_y) < _1169.Load(12); + _2476 = (bin_tile_y + tile_y) < _1283.Load(12); } else { - _2335 = _2326; + _2476 = _2467; } - if (_2335) + if (_2476) { - Alloc param_105 = cmd_alloc; - CmdRef param_106 = cmd_ref; - Cmd_End_write(param_105, param_106); + Alloc param_108 = cmd_alloc; + CmdRef param_109 = cmd_ref; + Cmd_End_write(param_108, param_109); } } diff --git a/piet-gpu/shader/gen/coarse.msl b/piet-gpu/shader/gen/coarse.msl index 21bd30c..1422ff1 100644 --- a/piet-gpu/shader/gen/coarse.msl +++ b/piet-gpu/shader/gen/coarse.msl @@ -7,6 +7,13 @@ using namespace metal; +// Implementation of the GLSL findLSB() function +template +inline T spvFindLSB(T x) +{ + return select(ctz(x), T(-1), x == T(0)); +} + struct Alloc { uint offset; @@ -244,13 +251,6 @@ struct ConfigBuf constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); -// Implementation of the GLSL findLSB() function -template -inline T spvFindLSB(T x) -{ - return select(ctz(x), T(-1), x == T(0)); -} - static inline __attribute__((always_inline)) Alloc slice_mem(thread const Alloc& a, thread const uint& offset, thread const uint& size) { diff --git a/piet-gpu/shader/gen/draw_leaf.dxil b/piet-gpu/shader/gen/draw_leaf.dxil index d901a80..d1567c9 100644 Binary files a/piet-gpu/shader/gen/draw_leaf.dxil and b/piet-gpu/shader/gen/draw_leaf.dxil differ diff --git a/piet-gpu/shader/gen/draw_leaf.hlsl b/piet-gpu/shader/gen/draw_leaf.hlsl index 0ca5843..1f2f78b 100644 --- a/piet-gpu/shader/gen/draw_leaf.hlsl +++ b/piet-gpu/shader/gen/draw_leaf.hlsl @@ -41,6 +41,17 @@ struct FillImage int2 offset; }; +struct ClipRef +{ + uint offset; +}; + +struct Clip +{ + float4 bbox; + uint blend; +}; + struct ElementTag { uint tag; @@ -102,6 +113,7 @@ struct AnnoBeginClip { float4 bbox; float linewidth; + uint blend; }; struct AnnoEndClipRef @@ -112,6 +124,7 @@ struct AnnoEndClipRef struct AnnoEndClip { float4 bbox; + uint blend; }; struct AnnotatedRef @@ -148,14 +161,14 @@ struct Config static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); -static const DrawMonoid _348 = { 0u, 0u }; -static const DrawMonoid _372 = { 1u, 0u }; -static const DrawMonoid _374 = { 1u, 1u }; +static const DrawMonoid _413 = { 0u, 0u }; +static const DrawMonoid _437 = { 1u, 0u }; +static const DrawMonoid _439 = { 1u, 1u }; -RWByteAddressBuffer _187 : register(u0, space0); -ByteAddressBuffer _211 : register(t2, space0); -ByteAddressBuffer _934 : register(t3, space0); -ByteAddressBuffer _968 : register(t1, space0); +RWByteAddressBuffer _199 : register(u0, space0); +ByteAddressBuffer _223 : register(t2, space0); +ByteAddressBuffer _1020 : register(t3, space0); +ByteAddressBuffer _1054 : register(t1, space0); static uint3 gl_WorkGroupID; static uint3 gl_LocalInvocationID; @@ -171,9 +184,9 @@ groupshared DrawMonoid sh_scratch[256]; ElementTag Element_tag(ElementRef ref) { - uint tag_and_flags = _211.Load((ref.offset >> uint(2)) * 4 + 0); - ElementTag _321 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; - return _321; + uint tag_and_flags = _223.Load((ref.offset >> uint(2)) * 4 + 0); + ElementTag _378 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; + return _378; } DrawMonoid map_tag(uint tag_word) @@ -184,24 +197,24 @@ DrawMonoid map_tag(uint tag_word) case 5u: case 6u: { - return _372; + return _437; } case 9u: case 10u: { - return _374; + return _439; } default: { - return _348; + return _413; } } } ElementRef Element_index(ElementRef ref, uint index) { - ElementRef _200 = { ref.offset + (index * 36u) }; - return _200; + ElementRef _212 = { ref.offset + (index * 36u) }; + return _212; } DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b) @@ -214,13 +227,13 @@ DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b) DrawMonoid tag_monoid_identity() { - return _348; + return _413; } FillColor FillColor_read(FillColorRef ref) { uint ix = ref.offset >> uint(2); - uint raw0 = _211.Load((ix + 0u) * 4 + 0); + uint raw0 = _223.Load((ix + 0u) * 4 + 0); FillColor s; s.rgba_color = raw0; return s; @@ -228,8 +241,8 @@ FillColor FillColor_read(FillColorRef ref) FillColor Element_FillColor_read(ElementRef ref) { - FillColorRef _327 = { ref.offset + 4u }; - FillColorRef param = _327; + FillColorRef _384 = { ref.offset + 4u }; + FillColorRef param = _384; return FillColor_read(param); } @@ -246,7 +259,7 @@ void write_mem(Alloc alloc, uint offset, uint val) { return; } - _187.Store(offset * 4 + 8, val); + _199.Store(offset * 4 + 8, val); } void AnnoColor_write(Alloc a, AnnoColorRef ref, AnnoColor s) @@ -284,9 +297,9 @@ void Annotated_Color_write(Alloc a, AnnotatedRef ref, uint flags, AnnoColor s) uint param_1 = ref.offset >> uint(2); uint param_2 = (flags << uint(16)) | 1u; write_mem(param, param_1, param_2); - AnnoColorRef _735 = { ref.offset + 4u }; + AnnoColorRef _818 = { ref.offset + 4u }; Alloc param_3 = a; - AnnoColorRef param_4 = _735; + AnnoColorRef param_4 = _818; AnnoColor param_5 = s; AnnoColor_write(param_3, param_4, param_5); } @@ -294,11 +307,11 @@ void Annotated_Color_write(Alloc a, AnnotatedRef ref, uint flags, AnnoColor s) FillLinGradient FillLinGradient_read(FillLinGradientRef ref) { uint ix = ref.offset >> uint(2); - uint raw0 = _211.Load((ix + 0u) * 4 + 0); - uint raw1 = _211.Load((ix + 1u) * 4 + 0); - uint raw2 = _211.Load((ix + 2u) * 4 + 0); - uint raw3 = _211.Load((ix + 3u) * 4 + 0); - uint raw4 = _211.Load((ix + 4u) * 4 + 0); + uint raw0 = _223.Load((ix + 0u) * 4 + 0); + uint raw1 = _223.Load((ix + 1u) * 4 + 0); + uint raw2 = _223.Load((ix + 2u) * 4 + 0); + uint raw3 = _223.Load((ix + 3u) * 4 + 0); + uint raw4 = _223.Load((ix + 4u) * 4 + 0); FillLinGradient s; s.index = raw0; s.p0 = float2(asfloat(raw1), asfloat(raw2)); @@ -308,8 +321,8 @@ FillLinGradient FillLinGradient_read(FillLinGradientRef ref) FillLinGradient Element_FillLinGradient_read(ElementRef ref) { - FillLinGradientRef _335 = { ref.offset + 4u }; - FillLinGradientRef param = _335; + FillLinGradientRef _392 = { ref.offset + 4u }; + FillLinGradientRef param = _392; return FillLinGradient_read(param); } @@ -360,9 +373,9 @@ void Annotated_LinGradient_write(Alloc a, AnnotatedRef ref, uint flags, AnnoLinG uint param_1 = ref.offset >> uint(2); uint param_2 = (flags << uint(16)) | 2u; write_mem(param, param_1, param_2); - AnnoLinGradientRef _756 = { ref.offset + 4u }; + AnnoLinGradientRef _839 = { ref.offset + 4u }; Alloc param_3 = a; - AnnoLinGradientRef param_4 = _756; + AnnoLinGradientRef param_4 = _839; AnnoLinGradient param_5 = s; AnnoLinGradient_write(param_3, param_4, param_5); } @@ -370,8 +383,8 @@ void Annotated_LinGradient_write(Alloc a, AnnotatedRef ref, uint flags, AnnoLinG FillImage FillImage_read(FillImageRef ref) { uint ix = ref.offset >> uint(2); - uint raw0 = _211.Load((ix + 0u) * 4 + 0); - uint raw1 = _211.Load((ix + 1u) * 4 + 0); + uint raw0 = _223.Load((ix + 0u) * 4 + 0); + uint raw1 = _223.Load((ix + 1u) * 4 + 0); FillImage s; s.index = raw0; s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); @@ -380,8 +393,8 @@ FillImage FillImage_read(FillImageRef ref) FillImage Element_FillImage_read(ElementRef ref) { - FillImageRef _343 = { ref.offset + 4u }; - FillImageRef param = _343; + FillImageRef _400 = { ref.offset + 4u }; + FillImageRef param = _400; return FillImage_read(param); } @@ -424,13 +437,33 @@ void Annotated_Image_write(Alloc a, AnnotatedRef ref, uint flags, AnnoImage s) uint param_1 = ref.offset >> uint(2); uint param_2 = (flags << uint(16)) | 3u; write_mem(param, param_1, param_2); - AnnoImageRef _777 = { ref.offset + 4u }; + AnnoImageRef _860 = { ref.offset + 4u }; Alloc param_3 = a; - AnnoImageRef param_4 = _777; + AnnoImageRef param_4 = _860; AnnoImage param_5 = s; AnnoImage_write(param_3, param_4, param_5); } +Clip Clip_read(ClipRef ref) +{ + uint ix = ref.offset >> uint(2); + uint raw0 = _223.Load((ix + 0u) * 4 + 0); + uint raw1 = _223.Load((ix + 1u) * 4 + 0); + uint raw2 = _223.Load((ix + 2u) * 4 + 0); + uint raw3 = _223.Load((ix + 3u) * 4 + 0); + Clip s; + s.bbox = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3)); + s.blend = _223.Load((ix + 4u) * 4 + 0); + return s; +} + +Clip Element_BeginClip_read(ElementRef ref) +{ + ClipRef _408 = { ref.offset + 4u }; + ClipRef param = _408; + return Clip_read(param); +} + void AnnoBeginClip_write(Alloc a, AnnoBeginClipRef ref, AnnoBeginClip s) { uint ix = ref.offset >> uint(2); @@ -454,6 +487,10 @@ void AnnoBeginClip_write(Alloc a, AnnoBeginClipRef ref, AnnoBeginClip s) uint param_13 = ix + 4u; uint param_14 = asuint(s.linewidth); write_mem(param_12, param_13, param_14); + Alloc param_15 = a; + uint param_16 = ix + 5u; + uint param_17 = s.blend; + write_mem(param_15, param_16, param_17); } void Annotated_BeginClip_write(Alloc a, AnnotatedRef ref, uint flags, AnnoBeginClip s) @@ -462,9 +499,9 @@ void Annotated_BeginClip_write(Alloc a, AnnotatedRef ref, uint flags, AnnoBeginC uint param_1 = ref.offset >> uint(2); uint param_2 = (flags << uint(16)) | 4u; write_mem(param, param_1, param_2); - AnnoBeginClipRef _798 = { ref.offset + 4u }; + AnnoBeginClipRef _881 = { ref.offset + 4u }; Alloc param_3 = a; - AnnoBeginClipRef param_4 = _798; + AnnoBeginClipRef param_4 = _881; AnnoBeginClip param_5 = s; AnnoBeginClip_write(param_3, param_4, param_5); } @@ -488,17 +525,21 @@ void AnnoEndClip_write(Alloc a, AnnoEndClipRef ref, AnnoEndClip s) uint param_10 = ix + 3u; uint param_11 = asuint(s.bbox.w); write_mem(param_9, param_10, param_11); + Alloc param_12 = a; + uint param_13 = ix + 4u; + uint param_14 = s.blend; + write_mem(param_12, param_13, param_14); } -void Annotated_EndClip_write(Alloc a, AnnotatedRef ref, AnnoEndClip s) +void Annotated_EndClip_write(Alloc a, AnnotatedRef ref, uint flags, AnnoEndClip s) { Alloc param = a; uint param_1 = ref.offset >> uint(2); - uint param_2 = 5u; + uint param_2 = (flags << uint(16)) | 5u; write_mem(param, param_1, param_2); - AnnoEndClipRef _816 = { ref.offset + 4u }; + AnnoEndClipRef _902 = { ref.offset + 4u }; Alloc param_3 = a; - AnnoEndClipRef param_4 = _816; + AnnoEndClipRef param_4 = _902; AnnoEndClip param_5 = s; AnnoEndClip_write(param_3, param_4, param_5); } @@ -506,8 +547,8 @@ void Annotated_EndClip_write(Alloc a, AnnotatedRef ref, AnnoEndClip s) void comp_main() { uint ix = gl_GlobalInvocationID.x * 8u; - ElementRef _834 = { ix * 36u }; - ElementRef ref = _834; + ElementRef _920 = { ix * 36u }; + ElementRef ref = _920; ElementRef param = ref; uint tag_word = Element_tag(param).tag; uint param_1 = tag_word; @@ -544,11 +585,11 @@ void comp_main() DrawMonoid row = tag_monoid_identity(); if (gl_WorkGroupID.x > 0u) { - DrawMonoid _940; - _940.path_ix = _934.Load((gl_WorkGroupID.x - 1u) * 8 + 0); - _940.clip_ix = _934.Load((gl_WorkGroupID.x - 1u) * 8 + 4); - row.path_ix = _940.path_ix; - row.clip_ix = _940.clip_ix; + DrawMonoid _1026; + _1026.path_ix = _1020.Load((gl_WorkGroupID.x - 1u) * 8 + 0); + _1026.clip_ix = _1020.Load((gl_WorkGroupID.x - 1u) * 8 + 4); + row.path_ix = _1026.path_ix; + row.clip_ix = _1026.clip_ix; } if (gl_LocalInvocationID.x > 0u) { @@ -557,10 +598,10 @@ void comp_main() row = combine_tag_monoid(param_10, param_11); } uint out_ix = gl_GlobalInvocationID.x * 8u; - uint out_base = (_968.Load(44) >> uint(2)) + (out_ix * 2u); - uint clip_out_base = _968.Load(48) >> uint(2); - AnnotatedRef _989 = { _968.Load(32) + (out_ix * 40u) }; - AnnotatedRef out_ref = _989; + uint out_base = (_1054.Load(44) >> uint(2)) + (out_ix * 2u); + uint clip_out_base = _1054.Load(48) >> uint(2); + AnnotatedRef _1075 = { _1054.Load(32) + (out_ix * 40u) }; + AnnotatedRef out_ref = _1075; float4 mat; float2 translate; AnnoColor anno_fill; @@ -570,9 +611,9 @@ void comp_main() AnnoImage anno_img; Alloc param_28; AnnoBeginClip anno_begin_clip; - Alloc param_32; + Alloc param_33; AnnoEndClip anno_end_clip; - Alloc param_36; + Alloc param_38; for (uint i_2 = 0u; i_2 < 8u; i_2++) { DrawMonoid m = row; @@ -582,8 +623,8 @@ void comp_main() DrawMonoid param_13 = local[i_2 - 1u]; m = combine_tag_monoid(param_12, param_13); } - _187.Store((out_base + (i_2 * 2u)) * 4 + 8, m.path_ix); - _187.Store(((out_base + (i_2 * 2u)) + 1u) * 4 + 8, m.clip_ix); + _199.Store((out_base + (i_2 * 2u)) * 4 + 8, m.path_ix); + _199.Store(((out_base + (i_2 * 2u)) + 1u) * 4 + 8, m.clip_ix); ElementRef param_14 = ref; uint param_15 = i_2; ElementRef this_ref = Element_index(param_14, param_15); @@ -591,22 +632,22 @@ void comp_main() tag_word = Element_tag(param_16).tag; if ((((tag_word == 4u) || (tag_word == 5u)) || (tag_word == 6u)) || (tag_word == 9u)) { - uint bbox_offset = (_968.Load(40) >> uint(2)) + (6u * m.path_ix); - float bbox_l = float(_187.Load(bbox_offset * 4 + 8)) - 32768.0f; - float bbox_t = float(_187.Load((bbox_offset + 1u) * 4 + 8)) - 32768.0f; - float bbox_r = float(_187.Load((bbox_offset + 2u) * 4 + 8)) - 32768.0f; - float bbox_b = float(_187.Load((bbox_offset + 3u) * 4 + 8)) - 32768.0f; + uint bbox_offset = (_1054.Load(40) >> uint(2)) + (6u * m.path_ix); + float bbox_l = float(_199.Load(bbox_offset * 4 + 8)) - 32768.0f; + float bbox_t = float(_199.Load((bbox_offset + 1u) * 4 + 8)) - 32768.0f; + float bbox_r = float(_199.Load((bbox_offset + 2u) * 4 + 8)) - 32768.0f; + float bbox_b = float(_199.Load((bbox_offset + 3u) * 4 + 8)) - 32768.0f; float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); - float linewidth = asfloat(_187.Load((bbox_offset + 4u) * 4 + 8)); + float linewidth = asfloat(_199.Load((bbox_offset + 4u) * 4 + 8)); uint fill_mode = uint(linewidth >= 0.0f); if ((linewidth >= 0.0f) || (tag_word == 5u)) { - uint trans_ix = _187.Load((bbox_offset + 5u) * 4 + 8); - uint t = (_968.Load(36) >> uint(2)) + (6u * trans_ix); - mat = asfloat(uint4(_187.Load(t * 4 + 8), _187.Load((t + 1u) * 4 + 8), _187.Load((t + 2u) * 4 + 8), _187.Load((t + 3u) * 4 + 8))); + uint trans_ix = _199.Load((bbox_offset + 5u) * 4 + 8); + uint t = (_1054.Load(36) >> uint(2)) + (6u * trans_ix); + mat = asfloat(uint4(_199.Load(t * 4 + 8), _199.Load((t + 1u) * 4 + 8), _199.Load((t + 2u) * 4 + 8), _199.Load((t + 3u) * 4 + 8))); if (tag_word == 5u) { - translate = asfloat(uint2(_187.Load((t + 4u) * 4 + 8), _187.Load((t + 5u) * 4 + 8))); + translate = asfloat(uint2(_199.Load((t + 4u) * 4 + 8), _199.Load((t + 5u) * 4 + 8))); } } if (linewidth >= 0.0f) @@ -623,9 +664,9 @@ void comp_main() anno_fill.bbox = bbox; anno_fill.linewidth = linewidth; anno_fill.rgba_color = fill.rgba_color; - Alloc _1203; - _1203.offset = _968.Load(32); - param_18.offset = _1203.offset; + Alloc _1288; + _1288.offset = _1054.Load(32); + param_18.offset = _1288.offset; AnnotatedRef param_19 = out_ref; uint param_20 = fill_mode; AnnoColor param_21 = anno_fill; @@ -648,9 +689,9 @@ void comp_main() anno_lin.line_x = line_x; anno_lin.line_y = line_y; anno_lin.line_c = -((p0.x * line_x) + (p0.y * line_y)); - Alloc _1299; - _1299.offset = _968.Load(32); - param_23.offset = _1299.offset; + Alloc _1384; + _1384.offset = _1054.Load(32); + param_23.offset = _1384.offset; AnnotatedRef param_24 = out_ref; uint param_25 = fill_mode; AnnoLinGradient param_26 = anno_lin; @@ -665,9 +706,9 @@ void comp_main() anno_img.linewidth = linewidth; anno_img.index = fill_img.index; anno_img.offset = fill_img.offset; - Alloc _1327; - _1327.offset = _968.Load(32); - param_28.offset = _1327.offset; + Alloc _1412; + _1412.offset = _1054.Load(32); + param_28.offset = _1412.offset; AnnotatedRef param_29 = out_ref; uint param_30 = fill_mode; AnnoImage param_31 = anno_img; @@ -676,15 +717,19 @@ void comp_main() } case 9u: { + ElementRef param_32 = this_ref; + Clip begin_clip = Element_BeginClip_read(param_32); anno_begin_clip.bbox = bbox; anno_begin_clip.linewidth = 0.0f; - Alloc _1344; - _1344.offset = _968.Load(32); - param_32.offset = _1344.offset; - AnnotatedRef param_33 = out_ref; - uint param_34 = 0u; - AnnoBeginClip param_35 = anno_begin_clip; - Annotated_BeginClip_write(param_32, param_33, param_34, param_35); + anno_begin_clip.blend = begin_clip.blend; + uint flags = uint(begin_clip.blend != 3u) << uint(1); + Alloc _1442; + _1442.offset = _1054.Load(32); + param_33.offset = _1442.offset; + AnnotatedRef param_34 = out_ref; + uint param_35 = flags; + AnnoBeginClip param_36 = anno_begin_clip; + Annotated_BeginClip_write(param_33, param_34, param_35, param_36); break; } } @@ -693,13 +738,18 @@ void comp_main() { if (tag_word == 10u) { + ElementRef param_37 = this_ref; + Clip end_clip = Element_BeginClip_read(param_37); anno_end_clip.bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f); - Alloc _1368; - _1368.offset = _968.Load(32); - param_36.offset = _1368.offset; - AnnotatedRef param_37 = out_ref; - AnnoEndClip param_38 = anno_end_clip; - Annotated_EndClip_write(param_36, param_37, param_38); + anno_end_clip.blend = end_clip.blend; + uint flags_1 = uint(end_clip.blend != 3u) << uint(1); + Alloc _1480; + _1480.offset = _1054.Load(32); + param_38.offset = _1480.offset; + AnnotatedRef param_39 = out_ref; + uint param_40 = flags_1; + AnnoEndClip param_41 = anno_end_clip; + Annotated_EndClip_write(param_38, param_39, param_40, param_41); } } if ((tag_word == 9u) || (tag_word == 10u)) @@ -709,7 +759,7 @@ void comp_main() { path_ix = m.path_ix; } - _187.Store((clip_out_base + m.clip_ix) * 4 + 8, path_ix); + _199.Store((clip_out_base + m.clip_ix) * 4 + 8, path_ix); } out_ref.offset += 40u; } diff --git a/piet-gpu/shader/gen/kernel4.dxil b/piet-gpu/shader/gen/kernel4.dxil index 0a14cfa..c0c27c9 100644 Binary files a/piet-gpu/shader/gen/kernel4.dxil and b/piet-gpu/shader/gen/kernel4.dxil differ diff --git a/piet-gpu/shader/gen/kernel4.hlsl b/piet-gpu/shader/gen/kernel4.hlsl index 9457d14..21bd083 100644 --- a/piet-gpu/shader/gen/kernel4.hlsl +++ b/piet-gpu/shader/gen/kernel4.hlsl @@ -69,6 +69,16 @@ struct CmdAlpha float alpha; }; +struct CmdEndClipRef +{ + uint offset; +}; + +struct CmdEndClip +{ + uint blend; +}; + struct CmdJumpRef { uint offset; @@ -132,8 +142,8 @@ struct Config static const uint3 gl_WorkGroupSize = uint3(8u, 4u, 1u); -RWByteAddressBuffer _202 : register(u0, space0); -ByteAddressBuffer _723 : register(t1, space0); +RWByteAddressBuffer _278 : register(u0, space0); +ByteAddressBuffer _1521 : register(t1, space0); RWTexture2D image_atlas : register(u3, space0); RWTexture2D gradients : register(u4, space0); RWTexture2D image : register(u2, space0); @@ -160,8 +170,8 @@ float4 spvUnpackUnorm4x8(uint value) Alloc slice_mem(Alloc a, uint offset, uint size) { - Alloc _215 = { a.offset + offset }; - return _215; + Alloc _291 = { a.offset + offset }; + return _291; } bool touch_mem(Alloc alloc, uint offset) @@ -177,7 +187,7 @@ uint read_mem(Alloc alloc, uint offset) { return 0u; } - uint v = _202.Load(offset * 4 + 8); + uint v = _278.Load(offset * 4 + 8); return v; } @@ -186,8 +196,8 @@ CmdTag Cmd_tag(Alloc a, CmdRef ref) Alloc param = a; uint param_1 = ref.offset >> uint(2); uint tag_and_flags = read_mem(param, param_1); - CmdTag _432 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; - return _432; + CmdTag _525 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; + return _525; } CmdStroke CmdStroke_read(Alloc a, CmdStrokeRef ref) @@ -207,9 +217,9 @@ CmdStroke CmdStroke_read(Alloc a, CmdStrokeRef ref) CmdStroke Cmd_Stroke_read(Alloc a, CmdRef ref) { - CmdStrokeRef _449 = { ref.offset + 4u }; + CmdStrokeRef _542 = { ref.offset + 4u }; Alloc param = a; - CmdStrokeRef param_1 = _449; + CmdStrokeRef param_1 = _542; return CmdStroke_read(param, param_1); } @@ -245,8 +255,8 @@ TileSeg TileSeg_read(Alloc a, TileSegRef ref) s.origin = float2(asfloat(raw0), asfloat(raw1)); s._vector = float2(asfloat(raw2), asfloat(raw3)); s.y_edge = asfloat(raw4); - TileSegRef _572 = { raw5 }; - s.next = _572; + TileSegRef _675 = { raw5 }; + s.next = _675; return s; } @@ -272,9 +282,9 @@ CmdFill CmdFill_read(Alloc a, CmdFillRef ref) CmdFill Cmd_Fill_read(Alloc a, CmdRef ref) { - CmdFillRef _439 = { ref.offset + 4u }; + CmdFillRef _532 = { ref.offset + 4u }; Alloc param = a; - CmdFillRef param_1 = _439; + CmdFillRef param_1 = _532; return CmdFill_read(param, param_1); } @@ -291,9 +301,9 @@ CmdAlpha CmdAlpha_read(Alloc a, CmdAlphaRef ref) CmdAlpha Cmd_Alpha_read(Alloc a, CmdRef ref) { - CmdAlphaRef _459 = { ref.offset + 4u }; + CmdAlphaRef _552 = { ref.offset + 4u }; Alloc param = a; - CmdAlphaRef param_1 = _459; + CmdAlphaRef param_1 = _552; return CmdAlpha_read(param, param_1); } @@ -310,9 +320,9 @@ CmdColor CmdColor_read(Alloc a, CmdColorRef ref) CmdColor Cmd_Color_read(Alloc a, CmdRef ref) { - CmdColorRef _469 = { ref.offset + 4u }; + CmdColorRef _562 = { ref.offset + 4u }; Alloc param = a; - CmdColorRef param_1 = _469; + CmdColorRef param_1 = _562; return CmdColor_read(param, param_1); } @@ -356,9 +366,9 @@ CmdLinGrad CmdLinGrad_read(Alloc a, CmdLinGradRef ref) CmdLinGrad Cmd_LinGrad_read(Alloc a, CmdRef ref) { - CmdLinGradRef _479 = { ref.offset + 4u }; + CmdLinGradRef _572 = { ref.offset + 4u }; Alloc param = a; - CmdLinGradRef param_1 = _479; + CmdLinGradRef param_1 = _572; return CmdLinGrad_read(param, param_1); } @@ -379,9 +389,9 @@ CmdImage CmdImage_read(Alloc a, CmdImageRef ref) CmdImage Cmd_Image_read(Alloc a, CmdRef ref) { - CmdImageRef _489 = { ref.offset + 4u }; + CmdImageRef _582 = { ref.offset + 4u }; Alloc param = a; - CmdImageRef param_1 = _489; + CmdImageRef param_1 = _582; return CmdImage_read(param, param_1); } @@ -394,10 +404,10 @@ void fillImage(out float4 spvReturnValue[8], uint2 xy, CmdImage cmd_img) int2 uv = int2(xy + chunk_offset(param)) + cmd_img.offset; float4 fg_rgba = image_atlas[uv]; float3 param_1 = fg_rgba.xyz; - float3 _695 = fromsRGB(param_1); - fg_rgba.x = _695.x; - fg_rgba.y = _695.y; - fg_rgba.z = _695.z; + float3 _1493 = fromsRGB(param_1); + fg_rgba.x = _1493.x; + fg_rgba.y = _1493.y; + fg_rgba.z = _1493.z; rgba[i] = fg_rgba; } spvReturnValue = rgba; @@ -418,6 +428,438 @@ uint packsRGB(inout float4 rgba) return spvPackUnorm4x8(rgba.wzyx); } +CmdEndClip CmdEndClip_read(Alloc a, CmdEndClipRef ref) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint raw0 = read_mem(param, param_1); + CmdEndClip s; + s.blend = raw0; + return s; +} + +CmdEndClip Cmd_EndClip_read(Alloc a, CmdRef ref) +{ + CmdEndClipRef _592 = { ref.offset + 4u }; + Alloc param = a; + CmdEndClipRef param_1 = _592; + return CmdEndClip_read(param, param_1); +} + +float3 screen(float3 cb, float3 cs) +{ + return (cb + cs) - (cb * cs); +} + +float3 hard_light(float3 cb, float3 cs) +{ + float3 param = cb; + float3 param_1 = (cs * 2.0f) - 1.0f.xxx; + return lerp(screen(param, param_1), (cb * 2.0f) * cs, float3(bool3(cs.x <= 0.5f.xxx.x, cs.y <= 0.5f.xxx.y, cs.z <= 0.5f.xxx.z))); +} + +float color_dodge(float cb, float cs) +{ + if (cb == 0.0f) + { + return 0.0f; + } + else + { + if (cs == 1.0f) + { + return 1.0f; + } + else + { + return min(1.0f, cb / (1.0f - cs)); + } + } +} + +float color_burn(float cb, float cs) +{ + if (cb == 1.0f) + { + return 1.0f; + } + else + { + if (cs == 0.0f) + { + return 0.0f; + } + else + { + return 1.0f - min(1.0f, (1.0f - cb) / cs); + } + } +} + +float3 soft_light(float3 cb, float3 cs) +{ + float3 d = lerp(sqrt(cb), ((((cb * 16.0f) - 12.0f.xxx) * cb) + 4.0f.xxx) * cb, float3(bool3(cb.x <= 0.25f.xxx.x, cb.y <= 0.25f.xxx.y, cb.z <= 0.25f.xxx.z))); + return lerp(cb + (((cs * 2.0f) - 1.0f.xxx) * (d - cb)), cb - (((1.0f.xxx - (cs * 2.0f)) * cb) * (1.0f.xxx - cb)), float3(bool3(cs.x <= 0.5f.xxx.x, cs.y <= 0.5f.xxx.y, cs.z <= 0.5f.xxx.z))); +} + +float sat(float3 c) +{ + return max(c.x, max(c.y, c.z)) - min(c.x, min(c.y, c.z)); +} + +void set_sat_inner(inout float cmin, inout float cmid, inout float cmax, float s) +{ + if (cmax > cmin) + { + cmid = ((cmid - cmin) * s) / (cmax - cmin); + cmax = s; + } + else + { + cmid = 0.0f; + cmax = 0.0f; + } + cmin = 0.0f; +} + +float3 set_sat(inout float3 c, float s) +{ + if (c.x <= c.y) + { + if (c.y <= c.z) + { + float param = c.x; + float param_1 = c.y; + float param_2 = c.z; + float param_3 = s; + set_sat_inner(param, param_1, param_2, param_3); + c.x = param; + c.y = param_1; + c.z = param_2; + } + else + { + if (c.x <= c.z) + { + float param_4 = c.x; + float param_5 = c.z; + float param_6 = c.y; + float param_7 = s; + set_sat_inner(param_4, param_5, param_6, param_7); + c.x = param_4; + c.z = param_5; + c.y = param_6; + } + else + { + float param_8 = c.z; + float param_9 = c.x; + float param_10 = c.y; + float param_11 = s; + set_sat_inner(param_8, param_9, param_10, param_11); + c.z = param_8; + c.x = param_9; + c.y = param_10; + } + } + } + else + { + if (c.x <= c.z) + { + float param_12 = c.y; + float param_13 = c.x; + float param_14 = c.z; + float param_15 = s; + set_sat_inner(param_12, param_13, param_14, param_15); + c.y = param_12; + c.x = param_13; + c.z = param_14; + } + else + { + if (c.y <= c.z) + { + float param_16 = c.y; + float param_17 = c.z; + float param_18 = c.x; + float param_19 = s; + set_sat_inner(param_16, param_17, param_18, param_19); + c.y = param_16; + c.z = param_17; + c.x = param_18; + } + else + { + float param_20 = c.z; + float param_21 = c.y; + float param_22 = c.x; + float param_23 = s; + set_sat_inner(param_20, param_21, param_22, param_23); + c.z = param_20; + c.y = param_21; + c.x = param_22; + } + } + } + return c; +} + +float lum(float3 c) +{ + float3 f = float3(0.300000011920928955078125f, 0.589999973773956298828125f, 0.10999999940395355224609375f); + return dot(c, f); +} + +float3 clip_color(inout float3 c) +{ + float3 param = c; + float L = lum(param); + float n = min(c.x, min(c.y, c.z)); + float x = max(c.x, max(c.y, c.z)); + if (n < 0.0f) + { + c = L.xxx + (((c - L.xxx) * L) / (L - n).xxx); + } + if (x > 1.0f) + { + c = L.xxx + (((c - L.xxx) * (1.0f - L)) / (x - L).xxx); + } + return c; +} + +float3 set_lum(float3 c, float l) +{ + float3 param = c; + float3 param_1 = c + (l - lum(param)).xxx; + float3 _901 = clip_color(param_1); + return _901; +} + +float3 mix_blend(float3 cb, float3 cs, uint mode) +{ + float3 b = 0.0f.xxx; + switch (mode) + { + case 1u: + { + b = cb * cs; + break; + } + case 2u: + { + float3 param = cb; + float3 param_1 = cs; + b = screen(param, param_1); + break; + } + case 3u: + { + float3 param_2 = cs; + float3 param_3 = cb; + b = hard_light(param_2, param_3); + break; + } + case 4u: + { + b = min(cb, cs); + break; + } + case 5u: + { + b = max(cb, cs); + break; + } + case 6u: + { + float param_4 = cb.x; + float param_5 = cs.x; + float param_6 = cb.y; + float param_7 = cs.y; + float param_8 = cb.z; + float param_9 = cs.z; + b = float3(color_dodge(param_4, param_5), color_dodge(param_6, param_7), color_dodge(param_8, param_9)); + break; + } + case 7u: + { + float param_10 = cb.x; + float param_11 = cs.x; + float param_12 = cb.y; + float param_13 = cs.y; + float param_14 = cb.z; + float param_15 = cs.z; + b = float3(color_burn(param_10, param_11), color_burn(param_12, param_13), color_burn(param_14, param_15)); + break; + } + case 8u: + { + float3 param_16 = cb; + float3 param_17 = cs; + b = hard_light(param_16, param_17); + break; + } + case 9u: + { + float3 param_18 = cb; + float3 param_19 = cs; + b = soft_light(param_18, param_19); + break; + } + case 10u: + { + b = abs(cb - cs); + break; + } + case 11u: + { + b = (cb + cs) - ((cb * 2.0f) * cs); + break; + } + case 12u: + { + float3 param_20 = cb; + float3 param_21 = cs; + float param_22 = sat(param_20); + float3 _1192 = set_sat(param_21, param_22); + float3 param_23 = cb; + float3 param_24 = _1192; + float param_25 = lum(param_23); + b = set_lum(param_24, param_25); + break; + } + case 13u: + { + float3 param_26 = cs; + float3 param_27 = cb; + float param_28 = sat(param_26); + float3 _1206 = set_sat(param_27, param_28); + float3 param_29 = cb; + float3 param_30 = _1206; + float param_31 = lum(param_29); + b = set_lum(param_30, param_31); + break; + } + case 14u: + { + float3 param_32 = cb; + float3 param_33 = cs; + float param_34 = lum(param_32); + b = set_lum(param_33, param_34); + break; + } + case 15u: + { + float3 param_35 = cs; + float3 param_36 = cb; + float param_37 = lum(param_35); + b = set_lum(param_36, param_37); + break; + } + default: + { + b = cs; + break; + } + } + return b; +} + +float4 mix_compose(float3 cb, float3 cs, float ab, float as, uint mode) +{ + float fa = 0.0f; + float fb = 0.0f; + switch (mode) + { + case 1u: + { + fa = 1.0f; + fb = 0.0f; + break; + } + case 2u: + { + fa = 0.0f; + fb = 1.0f; + break; + } + case 3u: + { + fa = 1.0f; + fb = 1.0f - as; + break; + } + case 4u: + { + fa = 1.0f - ab; + fb = 1.0f; + break; + } + case 5u: + { + fa = ab; + fb = 0.0f; + break; + } + case 6u: + { + fa = 0.0f; + fb = as; + break; + } + case 7u: + { + fa = 1.0f - ab; + fb = 0.0f; + break; + } + case 8u: + { + fa = 0.0f; + fb = 1.0f - as; + break; + } + case 9u: + { + fa = ab; + fb = 1.0f - as; + break; + } + case 10u: + { + fa = 1.0f - ab; + fb = as; + break; + } + case 11u: + { + fa = 1.0f - ab; + fb = 1.0f - as; + break; + } + case 12u: + { + fa = 1.0f; + fb = 1.0f; + break; + } + case 13u: + { + return float4(max(0.0f.xxxx, ((1.0f.xxxx - (float4(cs, as) * as)) + 1.0f.xxxx) - (float4(cb, ab) * ab)).xyz, max(0.0f, ((1.0f - as) + 1.0f) - ab)); + } + case 14u: + { + return float4(min(1.0f.xxxx, (float4(cs, as) * as) + (float4(cb, ab) * ab)).xyz, min(1.0f, as + ab)); + } + default: + { + break; + } + } + return (float4(cs, as) * (as * fa)) + (float4(cb, ab) * (ab * fb)); +} + CmdJump CmdJump_read(Alloc a, CmdJumpRef ref) { uint ix = ref.offset >> uint(2); @@ -431,24 +873,24 @@ CmdJump CmdJump_read(Alloc a, CmdJumpRef ref) CmdJump Cmd_Jump_read(Alloc a, CmdRef ref) { - CmdJumpRef _499 = { ref.offset + 4u }; + CmdJumpRef _602 = { ref.offset + 4u }; Alloc param = a; - CmdJumpRef param_1 = _499; + CmdJumpRef param_1 = _602; return CmdJump_read(param, param_1); } void comp_main() { - uint tile_ix = (gl_WorkGroupID.y * _723.Load(8)) + gl_WorkGroupID.x; - Alloc _738; - _738.offset = _723.Load(24); + uint tile_ix = (gl_WorkGroupID.y * _1521.Load(8)) + gl_WorkGroupID.x; + Alloc _1536; + _1536.offset = _1521.Load(24); Alloc param; - param.offset = _738.offset; + param.offset = _1536.offset; uint param_1 = tile_ix * 1024u; uint param_2 = 1024u; Alloc cmd_alloc = slice_mem(param, param_1, param_2); - CmdRef _747 = { cmd_alloc.offset }; - CmdRef cmd_ref = _747; + CmdRef _1545 = { cmd_alloc.offset }; + CmdRef cmd_ref = _1545; uint2 xy_uint = uint2(gl_LocalInvocationID.x + (16u * gl_WorkGroupID.x), gl_LocalInvocationID.y + (16u * gl_WorkGroupID.y)); float2 xy = float2(xy_uint); float4 rgba[8]; @@ -457,7 +899,7 @@ void comp_main() rgba[i] = 0.0f.xxxx; } uint clip_depth = 0u; - bool mem_ok = _202.Load(4) == 0u; + bool mem_ok = _278.Load(4) == 0u; float df[8]; TileSegRef tile_seg_ref; float area[8]; @@ -482,8 +924,8 @@ void comp_main() { df[k] = 1000000000.0f; } - TileSegRef _842 = { stroke.tile_ref }; - tile_seg_ref = _842; + TileSegRef _1638 = { stroke.tile_ref }; + tile_seg_ref = _1638; do { uint param_7 = tile_seg_ref.offset; @@ -519,8 +961,8 @@ void comp_main() { area[k_3] = float(fill.backdrop); } - TileSegRef _964 = { fill.tile_ref }; - tile_seg_ref = _964; + TileSegRef _1758 = { fill.tile_ref }; + tile_seg_ref = _1758; do { uint param_15 = tile_seg_ref.offset; @@ -609,10 +1051,10 @@ void comp_main() int x = int(round(clamp(my_d, 0.0f, 1.0f) * 511.0f)); float4 fg_rgba = gradients[int2(x, int(lin.index))]; float3 param_29 = fg_rgba.xyz; - float3 _1298 = fromsRGB(param_29); - fg_rgba.x = _1298.x; - fg_rgba.y = _1298.y; - fg_rgba.z = _1298.z; + float3 _2092 = fromsRGB(param_29); + fg_rgba.x = _2092.x; + fg_rgba.y = _2092.y; + fg_rgba.z = _2092.z; rgba[k_9] = fg_rgba; } cmd_ref.offset += 20u; @@ -625,9 +1067,9 @@ void comp_main() CmdImage fill_img = Cmd_Image_read(param_30, param_31); uint2 param_32 = xy_uint; CmdImage param_33 = fill_img; - float4 _1327[8]; - fillImage(_1327, param_32, param_33); - float4 img[8] = _1327; + float4 _2121[8]; + fillImage(_2121, param_32, param_33); + float4 img[8] = _2121; for (uint k_10 = 0u; k_10 < 8u; k_10++) { float4 fg_k_1 = img[k_10] * area[k_10]; @@ -642,8 +1084,8 @@ void comp_main() { uint d_2 = min(clip_depth, 127u); float4 param_34 = float4(rgba[k_11]); - uint _1390 = packsRGB(param_34); - blend_stack[d_2][k_11] = _1390; + uint _2184 = packsRGB(param_34); + blend_stack[d_2][k_11] = _2184; rgba[k_11] = 0.0f.xxxx; } clip_depth++; @@ -652,24 +1094,44 @@ void comp_main() } case 9u: { + Alloc param_35 = cmd_alloc; + CmdRef param_36 = cmd_ref; + CmdEndClip end_clip = Cmd_EndClip_read(param_35, param_36); + uint blend_mode = end_clip.blend >> uint(8); + uint comp_mode = end_clip.blend & 255u; clip_depth--; for (uint k_12 = 0u; k_12 < 8u; k_12++) { uint d_3 = min(clip_depth, 127u); - uint param_35 = blend_stack[d_3][k_12]; - float4 bg = unpacksRGB(param_35); + uint param_37 = blend_stack[d_3][k_12]; + float4 bg = unpacksRGB(param_37); float4 fg_1 = rgba[k_12] * area[k_12]; - rgba[k_12] = (bg * (1.0f - fg_1.w)) + fg_1; + float3 param_38 = bg.xyz; + float3 param_39 = fg_1.xyz; + uint param_40 = blend_mode; + float3 blend = mix_blend(param_38, param_39, param_40); + float4 _2251 = fg_1; + float _2255 = fg_1.w; + float3 _2262 = lerp(_2251.xyz, blend, float((_2255 * bg.w) > 0.0f).xxx); + fg_1.x = _2262.x; + fg_1.y = _2262.y; + fg_1.z = _2262.z; + float3 param_41 = bg.xyz; + float3 param_42 = fg_1.xyz; + float param_43 = bg.w; + float param_44 = fg_1.w; + uint param_45 = comp_mode; + rgba[k_12] = mix_compose(param_41, param_42, param_43, param_44, param_45); } - cmd_ref.offset += 4u; + cmd_ref.offset += 8u; break; } case 10u: { - Alloc param_36 = cmd_alloc; - CmdRef param_37 = cmd_ref; - CmdRef _1453 = { Cmd_Jump_read(param_36, param_37).new_ref }; - cmd_ref = _1453; + Alloc param_46 = cmd_alloc; + CmdRef param_47 = cmd_ref; + CmdRef _2299 = { Cmd_Jump_read(param_46, param_47).new_ref }; + cmd_ref = _2299; cmd_alloc.offset = cmd_ref.offset; break; } @@ -677,9 +1139,9 @@ void comp_main() } for (uint i_1 = 0u; i_1 < 8u; i_1++) { - uint param_38 = i_1; - float3 param_39 = rgba[i_1].xyz; - image[int2(xy_uint + chunk_offset(param_38))] = float4(tosRGB(param_39), rgba[i_1].w); + uint param_48 = i_1; + float3 param_49 = rgba[i_1].xyz; + image[int2(xy_uint + chunk_offset(param_48))] = float4(tosRGB(param_49), rgba[i_1].w); } } diff --git a/piet-gpu/shader/gen/kernel4.msl b/piet-gpu/shader/gen/kernel4.msl index b58218c..9a8fbd0 100644 --- a/piet-gpu/shader/gen/kernel4.msl +++ b/piet-gpu/shader/gen/kernel4.msl @@ -511,7 +511,7 @@ float3 hard_light(thread const float3& cb, thread const float3& cs) { float3 param = cb; float3 param_1 = (cs * 2.0) - float3(1.0); - return mix(screen(param, param_1), (cb * 2.0) * cs, select(float3(0.0), float3(1.0), cs <= float3(0.5))); + return mix(screen(param, param_1), (cb * 2.0) * cs, float3(cs <= float3(0.5))); } static inline __attribute__((always_inline)) @@ -557,8 +557,8 @@ float color_burn(thread const float& cb, thread const float& cs) static inline __attribute__((always_inline)) float3 soft_light(thread const float3& cb, thread const float3& cs) { - float3 d = mix(sqrt(cb), ((((cb * 16.0) - float3(12.0)) * cb) + float3(4.0)) * cb, select(float3(0.0), float3(1.0), cb <= float3(0.25))); - return mix(cb + (((cs * 2.0) - float3(1.0)) * (d - cb)), cb - (((float3(1.0) - (cs * 2.0)) * cb) * (float3(1.0) - cb)), select(float3(0.0), float3(1.0), cs <= float3(0.5))); + float3 d = mix(sqrt(cb), ((((cb * 16.0) - float3(12.0)) * cb) + float3(4.0)) * cb, float3(cb <= float3(0.25))); + return mix(cb + (((cs * 2.0) - float3(1.0)) * (d - cb)), cb - (((float3(1.0) - (cs * 2.0)) * cb) * (float3(1.0) - cb)), float3(cs <= float3(0.5))); } static inline __attribute__((always_inline)) diff --git a/piet-gpu/shader/gen/kernel4_gray.dxil b/piet-gpu/shader/gen/kernel4_gray.dxil index f3bd028..18c4b7e 100644 Binary files a/piet-gpu/shader/gen/kernel4_gray.dxil and b/piet-gpu/shader/gen/kernel4_gray.dxil differ diff --git a/piet-gpu/shader/gen/kernel4_gray.hlsl b/piet-gpu/shader/gen/kernel4_gray.hlsl index 5ff97fb..7dc2e01 100644 --- a/piet-gpu/shader/gen/kernel4_gray.hlsl +++ b/piet-gpu/shader/gen/kernel4_gray.hlsl @@ -69,6 +69,16 @@ struct CmdAlpha float alpha; }; +struct CmdEndClipRef +{ + uint offset; +}; + +struct CmdEndClip +{ + uint blend; +}; + struct CmdJumpRef { uint offset; @@ -132,8 +142,8 @@ struct Config static const uint3 gl_WorkGroupSize = uint3(8u, 4u, 1u); -RWByteAddressBuffer _202 : register(u0, space0); -ByteAddressBuffer _723 : register(t1, space0); +RWByteAddressBuffer _278 : register(u0, space0); +ByteAddressBuffer _1521 : register(t1, space0); RWTexture2D image_atlas : register(u3, space0); RWTexture2D gradients : register(u4, space0); RWTexture2D image : register(u2, space0); @@ -160,8 +170,8 @@ float4 spvUnpackUnorm4x8(uint value) Alloc slice_mem(Alloc a, uint offset, uint size) { - Alloc _215 = { a.offset + offset }; - return _215; + Alloc _291 = { a.offset + offset }; + return _291; } bool touch_mem(Alloc alloc, uint offset) @@ -177,7 +187,7 @@ uint read_mem(Alloc alloc, uint offset) { return 0u; } - uint v = _202.Load(offset * 4 + 8); + uint v = _278.Load(offset * 4 + 8); return v; } @@ -186,8 +196,8 @@ CmdTag Cmd_tag(Alloc a, CmdRef ref) Alloc param = a; uint param_1 = ref.offset >> uint(2); uint tag_and_flags = read_mem(param, param_1); - CmdTag _432 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; - return _432; + CmdTag _525 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; + return _525; } CmdStroke CmdStroke_read(Alloc a, CmdStrokeRef ref) @@ -207,9 +217,9 @@ CmdStroke CmdStroke_read(Alloc a, CmdStrokeRef ref) CmdStroke Cmd_Stroke_read(Alloc a, CmdRef ref) { - CmdStrokeRef _449 = { ref.offset + 4u }; + CmdStrokeRef _542 = { ref.offset + 4u }; Alloc param = a; - CmdStrokeRef param_1 = _449; + CmdStrokeRef param_1 = _542; return CmdStroke_read(param, param_1); } @@ -245,8 +255,8 @@ TileSeg TileSeg_read(Alloc a, TileSegRef ref) s.origin = float2(asfloat(raw0), asfloat(raw1)); s._vector = float2(asfloat(raw2), asfloat(raw3)); s.y_edge = asfloat(raw4); - TileSegRef _572 = { raw5 }; - s.next = _572; + TileSegRef _675 = { raw5 }; + s.next = _675; return s; } @@ -272,9 +282,9 @@ CmdFill CmdFill_read(Alloc a, CmdFillRef ref) CmdFill Cmd_Fill_read(Alloc a, CmdRef ref) { - CmdFillRef _439 = { ref.offset + 4u }; + CmdFillRef _532 = { ref.offset + 4u }; Alloc param = a; - CmdFillRef param_1 = _439; + CmdFillRef param_1 = _532; return CmdFill_read(param, param_1); } @@ -291,9 +301,9 @@ CmdAlpha CmdAlpha_read(Alloc a, CmdAlphaRef ref) CmdAlpha Cmd_Alpha_read(Alloc a, CmdRef ref) { - CmdAlphaRef _459 = { ref.offset + 4u }; + CmdAlphaRef _552 = { ref.offset + 4u }; Alloc param = a; - CmdAlphaRef param_1 = _459; + CmdAlphaRef param_1 = _552; return CmdAlpha_read(param, param_1); } @@ -310,9 +320,9 @@ CmdColor CmdColor_read(Alloc a, CmdColorRef ref) CmdColor Cmd_Color_read(Alloc a, CmdRef ref) { - CmdColorRef _469 = { ref.offset + 4u }; + CmdColorRef _562 = { ref.offset + 4u }; Alloc param = a; - CmdColorRef param_1 = _469; + CmdColorRef param_1 = _562; return CmdColor_read(param, param_1); } @@ -356,9 +366,9 @@ CmdLinGrad CmdLinGrad_read(Alloc a, CmdLinGradRef ref) CmdLinGrad Cmd_LinGrad_read(Alloc a, CmdRef ref) { - CmdLinGradRef _479 = { ref.offset + 4u }; + CmdLinGradRef _572 = { ref.offset + 4u }; Alloc param = a; - CmdLinGradRef param_1 = _479; + CmdLinGradRef param_1 = _572; return CmdLinGrad_read(param, param_1); } @@ -379,9 +389,9 @@ CmdImage CmdImage_read(Alloc a, CmdImageRef ref) CmdImage Cmd_Image_read(Alloc a, CmdRef ref) { - CmdImageRef _489 = { ref.offset + 4u }; + CmdImageRef _582 = { ref.offset + 4u }; Alloc param = a; - CmdImageRef param_1 = _489; + CmdImageRef param_1 = _582; return CmdImage_read(param, param_1); } @@ -394,10 +404,10 @@ void fillImage(out float4 spvReturnValue[8], uint2 xy, CmdImage cmd_img) int2 uv = int2(xy + chunk_offset(param)) + cmd_img.offset; float4 fg_rgba = image_atlas[uv]; float3 param_1 = fg_rgba.xyz; - float3 _695 = fromsRGB(param_1); - fg_rgba.x = _695.x; - fg_rgba.y = _695.y; - fg_rgba.z = _695.z; + float3 _1493 = fromsRGB(param_1); + fg_rgba.x = _1493.x; + fg_rgba.y = _1493.y; + fg_rgba.z = _1493.z; rgba[i] = fg_rgba; } spvReturnValue = rgba; @@ -418,6 +428,438 @@ uint packsRGB(inout float4 rgba) return spvPackUnorm4x8(rgba.wzyx); } +CmdEndClip CmdEndClip_read(Alloc a, CmdEndClipRef ref) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint raw0 = read_mem(param, param_1); + CmdEndClip s; + s.blend = raw0; + return s; +} + +CmdEndClip Cmd_EndClip_read(Alloc a, CmdRef ref) +{ + CmdEndClipRef _592 = { ref.offset + 4u }; + Alloc param = a; + CmdEndClipRef param_1 = _592; + return CmdEndClip_read(param, param_1); +} + +float3 screen(float3 cb, float3 cs) +{ + return (cb + cs) - (cb * cs); +} + +float3 hard_light(float3 cb, float3 cs) +{ + float3 param = cb; + float3 param_1 = (cs * 2.0f) - 1.0f.xxx; + return lerp(screen(param, param_1), (cb * 2.0f) * cs, float3(bool3(cs.x <= 0.5f.xxx.x, cs.y <= 0.5f.xxx.y, cs.z <= 0.5f.xxx.z))); +} + +float color_dodge(float cb, float cs) +{ + if (cb == 0.0f) + { + return 0.0f; + } + else + { + if (cs == 1.0f) + { + return 1.0f; + } + else + { + return min(1.0f, cb / (1.0f - cs)); + } + } +} + +float color_burn(float cb, float cs) +{ + if (cb == 1.0f) + { + return 1.0f; + } + else + { + if (cs == 0.0f) + { + return 0.0f; + } + else + { + return 1.0f - min(1.0f, (1.0f - cb) / cs); + } + } +} + +float3 soft_light(float3 cb, float3 cs) +{ + float3 d = lerp(sqrt(cb), ((((cb * 16.0f) - 12.0f.xxx) * cb) + 4.0f.xxx) * cb, float3(bool3(cb.x <= 0.25f.xxx.x, cb.y <= 0.25f.xxx.y, cb.z <= 0.25f.xxx.z))); + return lerp(cb + (((cs * 2.0f) - 1.0f.xxx) * (d - cb)), cb - (((1.0f.xxx - (cs * 2.0f)) * cb) * (1.0f.xxx - cb)), float3(bool3(cs.x <= 0.5f.xxx.x, cs.y <= 0.5f.xxx.y, cs.z <= 0.5f.xxx.z))); +} + +float sat(float3 c) +{ + return max(c.x, max(c.y, c.z)) - min(c.x, min(c.y, c.z)); +} + +void set_sat_inner(inout float cmin, inout float cmid, inout float cmax, float s) +{ + if (cmax > cmin) + { + cmid = ((cmid - cmin) * s) / (cmax - cmin); + cmax = s; + } + else + { + cmid = 0.0f; + cmax = 0.0f; + } + cmin = 0.0f; +} + +float3 set_sat(inout float3 c, float s) +{ + if (c.x <= c.y) + { + if (c.y <= c.z) + { + float param = c.x; + float param_1 = c.y; + float param_2 = c.z; + float param_3 = s; + set_sat_inner(param, param_1, param_2, param_3); + c.x = param; + c.y = param_1; + c.z = param_2; + } + else + { + if (c.x <= c.z) + { + float param_4 = c.x; + float param_5 = c.z; + float param_6 = c.y; + float param_7 = s; + set_sat_inner(param_4, param_5, param_6, param_7); + c.x = param_4; + c.z = param_5; + c.y = param_6; + } + else + { + float param_8 = c.z; + float param_9 = c.x; + float param_10 = c.y; + float param_11 = s; + set_sat_inner(param_8, param_9, param_10, param_11); + c.z = param_8; + c.x = param_9; + c.y = param_10; + } + } + } + else + { + if (c.x <= c.z) + { + float param_12 = c.y; + float param_13 = c.x; + float param_14 = c.z; + float param_15 = s; + set_sat_inner(param_12, param_13, param_14, param_15); + c.y = param_12; + c.x = param_13; + c.z = param_14; + } + else + { + if (c.y <= c.z) + { + float param_16 = c.y; + float param_17 = c.z; + float param_18 = c.x; + float param_19 = s; + set_sat_inner(param_16, param_17, param_18, param_19); + c.y = param_16; + c.z = param_17; + c.x = param_18; + } + else + { + float param_20 = c.z; + float param_21 = c.y; + float param_22 = c.x; + float param_23 = s; + set_sat_inner(param_20, param_21, param_22, param_23); + c.z = param_20; + c.y = param_21; + c.x = param_22; + } + } + } + return c; +} + +float lum(float3 c) +{ + float3 f = float3(0.300000011920928955078125f, 0.589999973773956298828125f, 0.10999999940395355224609375f); + return dot(c, f); +} + +float3 clip_color(inout float3 c) +{ + float3 param = c; + float L = lum(param); + float n = min(c.x, min(c.y, c.z)); + float x = max(c.x, max(c.y, c.z)); + if (n < 0.0f) + { + c = L.xxx + (((c - L.xxx) * L) / (L - n).xxx); + } + if (x > 1.0f) + { + c = L.xxx + (((c - L.xxx) * (1.0f - L)) / (x - L).xxx); + } + return c; +} + +float3 set_lum(float3 c, float l) +{ + float3 param = c; + float3 param_1 = c + (l - lum(param)).xxx; + float3 _901 = clip_color(param_1); + return _901; +} + +float3 mix_blend(float3 cb, float3 cs, uint mode) +{ + float3 b = 0.0f.xxx; + switch (mode) + { + case 1u: + { + b = cb * cs; + break; + } + case 2u: + { + float3 param = cb; + float3 param_1 = cs; + b = screen(param, param_1); + break; + } + case 3u: + { + float3 param_2 = cs; + float3 param_3 = cb; + b = hard_light(param_2, param_3); + break; + } + case 4u: + { + b = min(cb, cs); + break; + } + case 5u: + { + b = max(cb, cs); + break; + } + case 6u: + { + float param_4 = cb.x; + float param_5 = cs.x; + float param_6 = cb.y; + float param_7 = cs.y; + float param_8 = cb.z; + float param_9 = cs.z; + b = float3(color_dodge(param_4, param_5), color_dodge(param_6, param_7), color_dodge(param_8, param_9)); + break; + } + case 7u: + { + float param_10 = cb.x; + float param_11 = cs.x; + float param_12 = cb.y; + float param_13 = cs.y; + float param_14 = cb.z; + float param_15 = cs.z; + b = float3(color_burn(param_10, param_11), color_burn(param_12, param_13), color_burn(param_14, param_15)); + break; + } + case 8u: + { + float3 param_16 = cb; + float3 param_17 = cs; + b = hard_light(param_16, param_17); + break; + } + case 9u: + { + float3 param_18 = cb; + float3 param_19 = cs; + b = soft_light(param_18, param_19); + break; + } + case 10u: + { + b = abs(cb - cs); + break; + } + case 11u: + { + b = (cb + cs) - ((cb * 2.0f) * cs); + break; + } + case 12u: + { + float3 param_20 = cb; + float3 param_21 = cs; + float param_22 = sat(param_20); + float3 _1192 = set_sat(param_21, param_22); + float3 param_23 = cb; + float3 param_24 = _1192; + float param_25 = lum(param_23); + b = set_lum(param_24, param_25); + break; + } + case 13u: + { + float3 param_26 = cs; + float3 param_27 = cb; + float param_28 = sat(param_26); + float3 _1206 = set_sat(param_27, param_28); + float3 param_29 = cb; + float3 param_30 = _1206; + float param_31 = lum(param_29); + b = set_lum(param_30, param_31); + break; + } + case 14u: + { + float3 param_32 = cb; + float3 param_33 = cs; + float param_34 = lum(param_32); + b = set_lum(param_33, param_34); + break; + } + case 15u: + { + float3 param_35 = cs; + float3 param_36 = cb; + float param_37 = lum(param_35); + b = set_lum(param_36, param_37); + break; + } + default: + { + b = cs; + break; + } + } + return b; +} + +float4 mix_compose(float3 cb, float3 cs, float ab, float as, uint mode) +{ + float fa = 0.0f; + float fb = 0.0f; + switch (mode) + { + case 1u: + { + fa = 1.0f; + fb = 0.0f; + break; + } + case 2u: + { + fa = 0.0f; + fb = 1.0f; + break; + } + case 3u: + { + fa = 1.0f; + fb = 1.0f - as; + break; + } + case 4u: + { + fa = 1.0f - ab; + fb = 1.0f; + break; + } + case 5u: + { + fa = ab; + fb = 0.0f; + break; + } + case 6u: + { + fa = 0.0f; + fb = as; + break; + } + case 7u: + { + fa = 1.0f - ab; + fb = 0.0f; + break; + } + case 8u: + { + fa = 0.0f; + fb = 1.0f - as; + break; + } + case 9u: + { + fa = ab; + fb = 1.0f - as; + break; + } + case 10u: + { + fa = 1.0f - ab; + fb = as; + break; + } + case 11u: + { + fa = 1.0f - ab; + fb = 1.0f - as; + break; + } + case 12u: + { + fa = 1.0f; + fb = 1.0f; + break; + } + case 13u: + { + return float4(max(0.0f.xxxx, ((1.0f.xxxx - (float4(cs, as) * as)) + 1.0f.xxxx) - (float4(cb, ab) * ab)).xyz, max(0.0f, ((1.0f - as) + 1.0f) - ab)); + } + case 14u: + { + return float4(min(1.0f.xxxx, (float4(cs, as) * as) + (float4(cb, ab) * ab)).xyz, min(1.0f, as + ab)); + } + default: + { + break; + } + } + return (float4(cs, as) * (as * fa)) + (float4(cb, ab) * (ab * fb)); +} + CmdJump CmdJump_read(Alloc a, CmdJumpRef ref) { uint ix = ref.offset >> uint(2); @@ -431,24 +873,24 @@ CmdJump CmdJump_read(Alloc a, CmdJumpRef ref) CmdJump Cmd_Jump_read(Alloc a, CmdRef ref) { - CmdJumpRef _499 = { ref.offset + 4u }; + CmdJumpRef _602 = { ref.offset + 4u }; Alloc param = a; - CmdJumpRef param_1 = _499; + CmdJumpRef param_1 = _602; return CmdJump_read(param, param_1); } void comp_main() { - uint tile_ix = (gl_WorkGroupID.y * _723.Load(8)) + gl_WorkGroupID.x; - Alloc _738; - _738.offset = _723.Load(24); + uint tile_ix = (gl_WorkGroupID.y * _1521.Load(8)) + gl_WorkGroupID.x; + Alloc _1536; + _1536.offset = _1521.Load(24); Alloc param; - param.offset = _738.offset; + param.offset = _1536.offset; uint param_1 = tile_ix * 1024u; uint param_2 = 1024u; Alloc cmd_alloc = slice_mem(param, param_1, param_2); - CmdRef _747 = { cmd_alloc.offset }; - CmdRef cmd_ref = _747; + CmdRef _1545 = { cmd_alloc.offset }; + CmdRef cmd_ref = _1545; uint2 xy_uint = uint2(gl_LocalInvocationID.x + (16u * gl_WorkGroupID.x), gl_LocalInvocationID.y + (16u * gl_WorkGroupID.y)); float2 xy = float2(xy_uint); float4 rgba[8]; @@ -457,7 +899,7 @@ void comp_main() rgba[i] = 0.0f.xxxx; } uint clip_depth = 0u; - bool mem_ok = _202.Load(4) == 0u; + bool mem_ok = _278.Load(4) == 0u; float df[8]; TileSegRef tile_seg_ref; float area[8]; @@ -482,8 +924,8 @@ void comp_main() { df[k] = 1000000000.0f; } - TileSegRef _842 = { stroke.tile_ref }; - tile_seg_ref = _842; + TileSegRef _1638 = { stroke.tile_ref }; + tile_seg_ref = _1638; do { uint param_7 = tile_seg_ref.offset; @@ -519,8 +961,8 @@ void comp_main() { area[k_3] = float(fill.backdrop); } - TileSegRef _964 = { fill.tile_ref }; - tile_seg_ref = _964; + TileSegRef _1758 = { fill.tile_ref }; + tile_seg_ref = _1758; do { uint param_15 = tile_seg_ref.offset; @@ -609,10 +1051,10 @@ void comp_main() int x = int(round(clamp(my_d, 0.0f, 1.0f) * 511.0f)); float4 fg_rgba = gradients[int2(x, int(lin.index))]; float3 param_29 = fg_rgba.xyz; - float3 _1298 = fromsRGB(param_29); - fg_rgba.x = _1298.x; - fg_rgba.y = _1298.y; - fg_rgba.z = _1298.z; + float3 _2092 = fromsRGB(param_29); + fg_rgba.x = _2092.x; + fg_rgba.y = _2092.y; + fg_rgba.z = _2092.z; rgba[k_9] = fg_rgba; } cmd_ref.offset += 20u; @@ -625,9 +1067,9 @@ void comp_main() CmdImage fill_img = Cmd_Image_read(param_30, param_31); uint2 param_32 = xy_uint; CmdImage param_33 = fill_img; - float4 _1327[8]; - fillImage(_1327, param_32, param_33); - float4 img[8] = _1327; + float4 _2121[8]; + fillImage(_2121, param_32, param_33); + float4 img[8] = _2121; for (uint k_10 = 0u; k_10 < 8u; k_10++) { float4 fg_k_1 = img[k_10] * area[k_10]; @@ -642,8 +1084,8 @@ void comp_main() { uint d_2 = min(clip_depth, 127u); float4 param_34 = float4(rgba[k_11]); - uint _1390 = packsRGB(param_34); - blend_stack[d_2][k_11] = _1390; + uint _2184 = packsRGB(param_34); + blend_stack[d_2][k_11] = _2184; rgba[k_11] = 0.0f.xxxx; } clip_depth++; @@ -652,24 +1094,44 @@ void comp_main() } case 9u: { + Alloc param_35 = cmd_alloc; + CmdRef param_36 = cmd_ref; + CmdEndClip end_clip = Cmd_EndClip_read(param_35, param_36); + uint blend_mode = end_clip.blend >> uint(8); + uint comp_mode = end_clip.blend & 255u; clip_depth--; for (uint k_12 = 0u; k_12 < 8u; k_12++) { uint d_3 = min(clip_depth, 127u); - uint param_35 = blend_stack[d_3][k_12]; - float4 bg = unpacksRGB(param_35); + uint param_37 = blend_stack[d_3][k_12]; + float4 bg = unpacksRGB(param_37); float4 fg_1 = rgba[k_12] * area[k_12]; - rgba[k_12] = (bg * (1.0f - fg_1.w)) + fg_1; + float3 param_38 = bg.xyz; + float3 param_39 = fg_1.xyz; + uint param_40 = blend_mode; + float3 blend = mix_blend(param_38, param_39, param_40); + float4 _2251 = fg_1; + float _2255 = fg_1.w; + float3 _2262 = lerp(_2251.xyz, blend, float((_2255 * bg.w) > 0.0f).xxx); + fg_1.x = _2262.x; + fg_1.y = _2262.y; + fg_1.z = _2262.z; + float3 param_41 = bg.xyz; + float3 param_42 = fg_1.xyz; + float param_43 = bg.w; + float param_44 = fg_1.w; + uint param_45 = comp_mode; + rgba[k_12] = mix_compose(param_41, param_42, param_43, param_44, param_45); } - cmd_ref.offset += 4u; + cmd_ref.offset += 8u; break; } case 10u: { - Alloc param_36 = cmd_alloc; - CmdRef param_37 = cmd_ref; - CmdRef _1453 = { Cmd_Jump_read(param_36, param_37).new_ref }; - cmd_ref = _1453; + Alloc param_46 = cmd_alloc; + CmdRef param_47 = cmd_ref; + CmdRef _2299 = { Cmd_Jump_read(param_46, param_47).new_ref }; + cmd_ref = _2299; cmd_alloc.offset = cmd_ref.offset; break; } @@ -677,8 +1139,8 @@ void comp_main() } for (uint i_1 = 0u; i_1 < 8u; i_1++) { - uint param_38 = i_1; - image[int2(xy_uint + chunk_offset(param_38))] = rgba[i_1].w.x; + uint param_48 = i_1; + image[int2(xy_uint + chunk_offset(param_48))] = rgba[i_1].w.x; } } diff --git a/piet-gpu/shader/gen/kernel4_gray.msl b/piet-gpu/shader/gen/kernel4_gray.msl index a318ed7..38506dd 100644 --- a/piet-gpu/shader/gen/kernel4_gray.msl +++ b/piet-gpu/shader/gen/kernel4_gray.msl @@ -454,10 +454,10 @@ spvUnsafeArray fillImage(thread const uint2& xy, thread const CmdImag int2 uv = int2(xy + chunk_offset(param)) + cmd_img.offset; float4 fg_rgba = image_atlas.read(uint2(uv)); float3 param_1 = fg_rgba.xyz; - float3 _1495 = fromsRGB(param_1); - fg_rgba.x = _1495.x; - fg_rgba.y = _1495.y; - fg_rgba.z = _1495.z; + float3 _1493 = fromsRGB(param_1); + fg_rgba.x = _1493.x; + fg_rgba.y = _1493.y; + fg_rgba.z = _1493.z; rgba[i] = fg_rgba; } return rgba; @@ -511,7 +511,7 @@ float3 hard_light(thread const float3& cb, thread const float3& cs) { float3 param = cb; float3 param_1 = (cs * 2.0) - float3(1.0); - return mix(screen(param, param_1), (cb * 2.0) * cs, select(float3(0.0), float3(1.0), cs <= float3(0.5))); + return mix(screen(param, param_1), (cb * 2.0) * cs, float3(cs <= float3(0.5))); } static inline __attribute__((always_inline)) @@ -557,8 +557,8 @@ float color_burn(thread const float& cb, thread const float& cs) static inline __attribute__((always_inline)) float3 soft_light(thread const float3& cb, thread const float3& cs) { - float3 d = mix(sqrt(cb), ((((cb * 16.0) - float3(12.0)) * cb) + float3(4.0)) * cb, select(float3(0.0), float3(1.0), cb <= float3(0.25))); - return mix(cb + (((cs * 2.0) - float3(1.0)) * (d - cb)), cb - (((float3(1.0) - (cs * 2.0)) * cb) * (float3(1.0) - cb)), select(float3(0.0), float3(1.0), cs <= float3(0.5))); + float3 d = mix(sqrt(cb), ((((cb * 16.0) - float3(12.0)) * cb) + float3(4.0)) * cb, float3(cb <= float3(0.25))); + return mix(cb + (((cs * 2.0) - float3(1.0)) * (d - cb)), cb - (((float3(1.0) - (cs * 2.0)) * cb) * (float3(1.0) - cb)), float3(cs <= float3(0.5))); } static inline __attribute__((always_inline)) @@ -568,103 +568,103 @@ float sat(thread const float3& c) } static inline __attribute__((always_inline)) -void SetSatInner(thread float& Cmin, thread float& Cmid, thread float& Cmax, thread const float& s) +void set_sat_inner(thread float& cmin, thread float& cmid, thread float& cmax, thread const float& s) { - if (Cmax > Cmin) + if (cmax > cmin) { - Cmid = ((Cmid - Cmin) * s) / (Cmax - Cmin); - Cmax = s; + cmid = ((cmid - cmin) * s) / (cmax - cmin); + cmax = s; } else { - Cmid = 0.0; - Cmax = 0.0; + cmid = 0.0; + cmax = 0.0; } - Cmin = 0.0; + cmin = 0.0; } static inline __attribute__((always_inline)) -float3 set_sat(thread float3& C, thread const float& s) +float3 set_sat(thread float3& c, thread const float& s) { - if (C.x <= C.y) + if (c.x <= c.y) { - if (C.y <= C.z) + if (c.y <= c.z) { - float param = C.x; - float param_1 = C.y; - float param_2 = C.z; + float param = c.x; + float param_1 = c.y; + float param_2 = c.z; float param_3 = s; - SetSatInner(param, param_1, param_2, param_3); - C.x = param; - C.y = param_1; - C.z = param_2; + set_sat_inner(param, param_1, param_2, param_3); + c.x = param; + c.y = param_1; + c.z = param_2; } else { - if (C.x <= C.z) + if (c.x <= c.z) { - float param_4 = C.x; - float param_5 = C.z; - float param_6 = C.y; + float param_4 = c.x; + float param_5 = c.z; + float param_6 = c.y; float param_7 = s; - SetSatInner(param_4, param_5, param_6, param_7); - C.x = param_4; - C.z = param_5; - C.y = param_6; + set_sat_inner(param_4, param_5, param_6, param_7); + c.x = param_4; + c.z = param_5; + c.y = param_6; } else { - float param_8 = C.z; - float param_9 = C.x; - float param_10 = C.y; + float param_8 = c.z; + float param_9 = c.x; + float param_10 = c.y; float param_11 = s; - SetSatInner(param_8, param_9, param_10, param_11); - C.z = param_8; - C.x = param_9; - C.y = param_10; + set_sat_inner(param_8, param_9, param_10, param_11); + c.z = param_8; + c.x = param_9; + c.y = param_10; } } } else { - if (C.x <= C.z) + if (c.x <= c.z) { - float param_12 = C.y; - float param_13 = C.x; - float param_14 = C.z; + float param_12 = c.y; + float param_13 = c.x; + float param_14 = c.z; float param_15 = s; - SetSatInner(param_12, param_13, param_14, param_15); - C.y = param_12; - C.x = param_13; - C.z = param_14; + set_sat_inner(param_12, param_13, param_14, param_15); + c.y = param_12; + c.x = param_13; + c.z = param_14; } else { - if (C.y <= C.z) + if (c.y <= c.z) { - float param_16 = C.y; - float param_17 = C.z; - float param_18 = C.x; + float param_16 = c.y; + float param_17 = c.z; + float param_18 = c.x; float param_19 = s; - SetSatInner(param_16, param_17, param_18, param_19); - C.y = param_16; - C.z = param_17; - C.x = param_18; + set_sat_inner(param_16, param_17, param_18, param_19); + c.y = param_16; + c.z = param_17; + c.x = param_18; } else { - float param_20 = C.z; - float param_21 = C.y; - float param_22 = C.x; + float param_20 = c.z; + float param_21 = c.y; + float param_22 = c.x; float param_23 = s; - SetSatInner(param_20, param_21, param_22, param_23); - C.z = param_20; - C.y = param_21; - C.x = param_22; + set_sat_inner(param_20, param_21, param_22, param_23); + c.z = param_20; + c.y = param_21; + c.x = param_22; } } } - return C; + return c; } static inline __attribute__((always_inline)) @@ -696,10 +696,9 @@ static inline __attribute__((always_inline)) float3 set_lum(thread const float3& c, thread const float& l) { float3 param = c; - float d = l - lum(param); - float3 param_1 = c + float3(d); - float3 _903 = clip_color(param_1); - return _903; + float3 param_1 = c + float3(l - lum(param)); + float3 _901 = clip_color(param_1); + return _901; } static inline __attribute__((always_inline)) @@ -788,9 +787,9 @@ float3 mix_blend(thread const float3& cb, thread const float3& cs, thread const float3 param_20 = cb; float3 param_21 = cs; float param_22 = sat(param_20); - float3 _1194 = set_sat(param_21, param_22); + float3 _1192 = set_sat(param_21, param_22); float3 param_23 = cb; - float3 param_24 = _1194; + float3 param_24 = _1192; float param_25 = lum(param_23); b = set_lum(param_24, param_25); break; @@ -800,9 +799,9 @@ float3 mix_blend(thread const float3& cb, thread const float3& cs, thread const float3 param_26 = cs; float3 param_27 = cb; float param_28 = sat(param_26); - float3 _1208 = set_sat(param_27, param_28); + float3 _1206 = set_sat(param_27, param_28); float3 param_29 = cb; - float3 param_30 = _1208; + float3 param_30 = _1206; float param_31 = lum(param_29); b = set_lum(param_30, param_31); break; @@ -947,11 +946,11 @@ CmdJump Cmd_Jump_read(thread const Alloc& a, thread const CmdRef& ref, device Me return CmdJump_read(param, param_1, v_278); } -kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1523 [[buffer(1)]], texture2d image [[texture(2)]], texture2d image_atlas [[texture(3)]], texture2d gradients [[texture(4)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) +kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1521 [[buffer(1)]], texture2d image [[texture(2)]], texture2d image_atlas [[texture(3)]], texture2d gradients [[texture(4)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) { - uint tile_ix = (gl_WorkGroupID.y * _1523.conf.width_in_tiles) + gl_WorkGroupID.x; + uint tile_ix = (gl_WorkGroupID.y * _1521.conf.width_in_tiles) + gl_WorkGroupID.x; Alloc param; - param.offset = _1523.conf.ptcl_alloc.offset; + param.offset = _1521.conf.ptcl_alloc.offset; uint param_1 = tile_ix * 1024u; uint param_2 = 1024u; Alloc cmd_alloc = slice_mem(param, param_1, param_2); @@ -1114,10 +1113,10 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 int x = int(round(fast::clamp(my_d, 0.0, 1.0) * 511.0)); float4 fg_rgba = gradients.read(uint2(int2(x, int(lin.index)))); float3 param_29 = fg_rgba.xyz; - float3 _2094 = fromsRGB(param_29); - fg_rgba.x = _2094.x; - fg_rgba.y = _2094.y; - fg_rgba.z = _2094.z; + float3 _2092 = fromsRGB(param_29); + fg_rgba.x = _2092.x; + fg_rgba.y = _2092.y; + fg_rgba.z = _2092.z; rgba[k_9] = fg_rgba; } cmd_ref.offset += 20u; @@ -1146,8 +1145,8 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 { uint d_2 = min(clip_depth, 127u); float4 param_34 = float4(rgba[k_11]); - uint _2186 = packsRGB(param_34); - blend_stack[d_2][k_11] = _2186; + uint _2184 = packsRGB(param_34); + blend_stack[d_2][k_11] = _2184; rgba[k_11] = float4(0.0); } clip_depth++; @@ -1172,12 +1171,12 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 float3 param_39 = fg_1.xyz; uint param_40 = blend_mode; float3 blend = mix_blend(param_38, param_39, param_40); - float4 _2253 = fg_1; - float _2257 = fg_1.w; - float3 _2264 = mix(_2253.xyz, blend, float3(float((_2257 * bg.w) > 0.0))); - fg_1.x = _2264.x; - fg_1.y = _2264.y; - fg_1.z = _2264.z; + float4 _2251 = fg_1; + float _2255 = fg_1.w; + float3 _2262 = mix(_2251.xyz, blend, float3(float((_2255 * bg.w) > 0.0))); + fg_1.x = _2262.x; + fg_1.y = _2262.y; + fg_1.z = _2262.z; float3 param_41 = bg.xyz; float3 param_42 = fg_1.xyz; float param_43 = bg.w; diff --git a/piet-gpu/shader/gen/kernel4_gray.spv b/piet-gpu/shader/gen/kernel4_gray.spv index eb7385f..305facd 100644 Binary files a/piet-gpu/shader/gen/kernel4_gray.spv and b/piet-gpu/shader/gen/kernel4_gray.spv differ diff --git a/piet-gpu/shader/gen/tile_alloc.dxil b/piet-gpu/shader/gen/tile_alloc.dxil index d69db16..fdc60a1 100644 Binary files a/piet-gpu/shader/gen/tile_alloc.dxil and b/piet-gpu/shader/gen/tile_alloc.dxil differ diff --git a/piet-gpu/shader/gen/tile_alloc.hlsl b/piet-gpu/shader/gen/tile_alloc.hlsl index 97e1c23..1c9d04b 100644 --- a/piet-gpu/shader/gen/tile_alloc.hlsl +++ b/piet-gpu/shader/gen/tile_alloc.hlsl @@ -17,6 +17,7 @@ struct AnnoEndClipRef struct AnnoEndClip { float4 bbox; + uint blend; }; struct AnnotatedRef @@ -76,7 +77,7 @@ struct Config static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); RWByteAddressBuffer _92 : register(u0, space0); -ByteAddressBuffer _305 : register(t1, space0); +ByteAddressBuffer _314 : register(t1, space0); static uint3 gl_LocalInvocationID; static uint3 gl_GlobalInvocationID; @@ -111,8 +112,8 @@ AnnotatedTag Annotated_tag(Alloc a, AnnotatedRef ref) Alloc param = a; uint param_1 = ref.offset >> uint(2); uint tag_and_flags = read_mem(param, param_1); - AnnotatedTag _236 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; - return _236; + AnnotatedTag _246 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; + return _246; } AnnoEndClip AnnoEndClip_read(Alloc a, AnnoEndClipRef ref) @@ -130,16 +131,20 @@ AnnoEndClip AnnoEndClip_read(Alloc a, AnnoEndClipRef ref) Alloc param_6 = a; uint param_7 = ix + 3u; uint raw3 = read_mem(param_6, param_7); + Alloc param_8 = a; + uint param_9 = ix + 4u; + uint raw4 = read_mem(param_8, param_9); AnnoEndClip s; s.bbox = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3)); + s.blend = raw4; return s; } AnnoEndClip Annotated_EndClip_read(Alloc a, AnnotatedRef ref) { - AnnoEndClipRef _243 = { ref.offset + 4u }; + AnnoEndClipRef _252 = { ref.offset + 4u }; Alloc param = a; - AnnoEndClipRef param_1 = _243; + AnnoEndClipRef param_1 = _252; return AnnoEndClip_read(param, param_1); } @@ -211,17 +216,17 @@ void comp_main() { uint th_ix = gl_LocalInvocationID.x; uint element_ix = gl_GlobalInvocationID.x; - PathRef _312 = { _305.Load(16) + (element_ix * 12u) }; - PathRef path_ref = _312; - AnnotatedRef _321 = { _305.Load(32) + (element_ix * 40u) }; - AnnotatedRef ref = _321; + PathRef _321 = { _314.Load(16) + (element_ix * 12u) }; + PathRef path_ref = _321; + AnnotatedRef _330 = { _314.Load(32) + (element_ix * 40u) }; + AnnotatedRef ref = _330; uint tag = 0u; - if (element_ix < _305.Load(0)) + if (element_ix < _314.Load(0)) { - Alloc _332; - _332.offset = _305.Load(32); + Alloc _341; + _341.offset = _314.Load(32); Alloc param; - param.offset = _332.offset; + param.offset = _341.offset; AnnotatedRef param_1 = ref; tag = Annotated_tag(param, param_1).tag; } @@ -237,10 +242,10 @@ void comp_main() case 4u: case 5u: { - Alloc _350; - _350.offset = _305.Load(32); + Alloc _359; + _359.offset = _314.Load(32); Alloc param_2; - param_2.offset = _350.offset; + param_2.offset = _359.offset; AnnotatedRef param_3 = ref; AnnoEndClip clip = Annotated_EndClip_read(param_2, param_3); x0 = int(floor(clip.bbox.x * 0.0625f)); @@ -250,10 +255,10 @@ void comp_main() break; } } - x0 = clamp(x0, 0, int(_305.Load(8))); - y0 = clamp(y0, 0, int(_305.Load(12))); - x1 = clamp(x1, 0, int(_305.Load(8))); - y1 = clamp(y1, 0, int(_305.Load(12))); + x0 = clamp(x0, 0, int(_314.Load(8))); + y0 = clamp(y0, 0, int(_314.Load(12))); + x1 = clamp(x1, 0, int(_314.Load(8))); + y1 = clamp(y1, 0, int(_314.Load(12))); Path path; path.bbox = uint4(uint(x0), uint(y0), uint(x1), uint(y1)); uint tile_count = uint((x1 - x0) * (y1 - y0)); @@ -276,46 +281,46 @@ void comp_main() if (th_ix == 255u) { uint param_4 = total_tile_count * 8u; - MallocResult _476 = malloc(param_4); - sh_tile_alloc = _476; + MallocResult _485 = malloc(param_4); + sh_tile_alloc = _485; } GroupMemoryBarrierWithGroupSync(); MallocResult alloc_start = sh_tile_alloc; - bool _487; + bool _496; if (!alloc_start.failed) { - _487 = _92.Load(4) != 0u; + _496 = _92.Load(4) != 0u; } else { - _487 = alloc_start.failed; + _496 = alloc_start.failed; } - if (_487) + if (_496) { return; } - if (element_ix < _305.Load(0)) + if (element_ix < _314.Load(0)) { - uint _500; + uint _509; if (th_ix > 0u) { - _500 = sh_tile_count[th_ix - 1u]; + _509 = sh_tile_count[th_ix - 1u]; } else { - _500 = 0u; + _509 = 0u; } - uint tile_subix = _500; + uint tile_subix = _509; Alloc param_5 = alloc_start.alloc; uint param_6 = 8u * tile_subix; uint param_7 = 8u * tile_count; Alloc tiles_alloc = slice_mem(param_5, param_6, param_7); - TileRef _522 = { tiles_alloc.offset }; - path.tiles = _522; - Alloc _527; - _527.offset = _305.Load(16); + TileRef _531 = { tiles_alloc.offset }; + path.tiles = _531; + Alloc _536; + _536.offset = _314.Load(16); Alloc param_8; - param_8.offset = _527.offset; + param_8.offset = _536.offset; PathRef param_9 = path_ref; Path param_10 = path; Path_write(param_8, param_9, param_10);