diff --git a/piet-gpu-types/src/ptcl.rs b/piet-gpu-types/src/ptcl.rs index e8c29c3..14831ca 100644 --- a/piet-gpu-types/src/ptcl.rs +++ b/piet-gpu-types/src/ptcl.rs @@ -24,6 +24,14 @@ piet_gpu! { line_y: f32, line_c: f32, } + struct CmdRadGrad { + index: u32, + mat: [f32; 4], + xlat: [f32; 2], + c1: [f32; 2], + ra: f32, + roff: f32, + } struct CmdImage { index: u32, offset: [i16; 2], @@ -31,6 +39,9 @@ piet_gpu! { struct CmdAlpha { alpha: f32, } + struct CmdEndClip { + blend: u32, + } struct CmdJump { new_ref: u32, } @@ -42,9 +53,10 @@ piet_gpu! { Alpha(CmdAlpha), Color(CmdColor), LinGrad(CmdLinGrad), + RadGrad(CmdRadGrad), Image(CmdImage), BeginClip, - EndClip, + EndClip(CmdEndClip), Jump(CmdJump), } } diff --git a/piet-gpu/shader/coarse.comp b/piet-gpu/shader/coarse.comp index 454371c..3abb2e0 100644 --- a/piet-gpu/shader/coarse.comp +++ b/piet-gpu/shader/coarse.comp @@ -229,6 +229,7 @@ void main() { case Drawtag_FillColor: case Drawtag_FillImage: case Drawtag_FillLinGradient: + case Drawtag_FillRadGradient: case Drawtag_BeginClip: case Drawtag_EndClip: uint drawmonoid_base = drawmonoid_start + 4 * element_ix; @@ -373,6 +374,25 @@ void main() { Cmd_LinGrad_write(cmd_alloc, cmd_ref, cmd_lin); cmd_ref.offset += 4 + CmdLinGrad_size; break; + case Drawtag_FillRadGradient: + if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) { + break; + } + linewidth = uintBitsToFloat(memory[di]); + write_fill(cmd_alloc, cmd_ref, tile, linewidth); + CmdRadGrad cmd_rad; + cmd_rad.index = scene[dd]; + // Given that this is basically a memcpy, we might consider + // letting the fine raster read the info itself. + cmd_rad.mat = uintBitsToFloat(uvec4(memory[di + 1], memory[di + 2], + memory[di + 3], memory[di + 4])); + cmd_rad.xlat = uintBitsToFloat(uvec2(memory[di + 5], memory[di + 6])); + cmd_rad.c1 = uintBitsToFloat(uvec2(memory[di + 7], memory[di + 8])); + cmd_rad.ra = uintBitsToFloat(memory[di + 9]); + cmd_rad.roff = uintBitsToFloat(memory[di + 10]); + Cmd_RadGrad_write(cmd_alloc, cmd_ref, cmd_rad); + cmd_ref.offset += 4 + CmdRadGrad_size; + break; case Drawtag_FillImage: linewidth = uintBitsToFloat(memory[di]); if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) { diff --git a/piet-gpu/shader/draw_leaf.comp b/piet-gpu/shader/draw_leaf.comp index 1cee0ef..ef369c9 100644 --- a/piet-gpu/shader/draw_leaf.comp +++ b/piet-gpu/shader/draw_leaf.comp @@ -94,8 +94,8 @@ void main() { // pipeline. However, going forward we'll get rid of that, and have // later stages read scene + bbox etc. tag_word = scene[drawtag_base + ix + i]; - if (tag_word == Drawtag_FillColor || tag_word == Drawtag_FillLinGradient || tag_word == Drawtag_FillImage || - tag_word == Drawtag_BeginClip) { + if (tag_word == Drawtag_FillColor || tag_word == Drawtag_FillLinGradient || tag_word == Drawtag_FillRadGradient || + tag_word == Drawtag_FillImage || tag_word == Drawtag_BeginClip) { uint bbox_offset = (conf.path_bbox_alloc.offset >> 2) + 6 * m.path_ix; float bbox_l = float(memory[bbox_offset]) - 32768.0; float bbox_t = float(memory[bbox_offset + 1]) - 32768.0; @@ -106,11 +106,11 @@ void main() { uint fill_mode = uint(linewidth >= 0.0); vec4 mat; vec2 translate; - if (linewidth >= 0.0 || tag_word == Drawtag_FillLinGradient) { + if (linewidth >= 0.0 || tag_word == Drawtag_FillLinGradient || tag_word == Drawtag_FillRadGradient) { uint trans_ix = memory[bbox_offset + 5]; uint t = (conf.trans_alloc.offset >> 2) + 6 * trans_ix; mat = uintBitsToFloat(uvec4(memory[t], memory[t + 1], memory[t + 2], memory[t + 3])); - if (tag_word == Drawtag_FillLinGradient) { + if (tag_word == Drawtag_FillLinGradient || tag_word == Drawtag_FillRadGradient) { translate = uintBitsToFloat(uvec2(memory[t + 4], memory[t + 5])); } } @@ -125,7 +125,6 @@ void main() { break; case Drawtag_FillLinGradient: memory[di] = floatBitsToUint(linewidth); - uint index = scene[dd]; vec2 p0 = uintBitsToFloat(uvec2(scene[dd + 1], scene[dd + 2])); vec2 p1 = uintBitsToFloat(uvec2(scene[dd + 3], scene[dd + 4])); p0 = mat.xy * p0.x + mat.zw * p0.y + translate; @@ -139,6 +138,33 @@ void main() { memory[di + 2] = floatBitsToUint(line_y); memory[di + 3] = floatBitsToUint(line_c); break; + case Drawtag_FillRadGradient: + p0 = uintBitsToFloat(uvec2(scene[dd + 1], scene[dd + 2])); + p1 = uintBitsToFloat(uvec2(scene[dd + 3], scene[dd + 4])); + float r0 = uintBitsToFloat(scene[dd + 5]); + float r1 = uintBitsToFloat(scene[dd + 6]); + float inv_det = 1.0 / (mat.x * mat.w - mat.y * mat.z); + vec4 inv_mat = inv_det * vec4(mat.w, -mat.y, -mat.z, mat.x); + vec2 inv_tr = inv_mat.xz * translate.x + inv_mat.yw * translate.y; + inv_tr += p0; + vec2 center1 = p1 - p0; + float rr = r1 / (r1 - r0); + float rainv = rr / (r1 * r1 - dot(center1, center1)); + vec2 c1 = center1 * rainv; + float ra = rr * rainv; + float roff = rr - 1.0; + memory[di] = floatBitsToUint(linewidth); + memory[di + 1] = floatBitsToUint(inv_mat.x); + memory[di + 2] = floatBitsToUint(inv_mat.y); + memory[di + 3] = floatBitsToUint(inv_mat.z); + memory[di + 4] = floatBitsToUint(inv_mat.w); + memory[di + 5] = floatBitsToUint(inv_tr.x); + memory[di + 6] = floatBitsToUint(inv_tr.y); + memory[di + 7] = floatBitsToUint(c1.x); + memory[di + 8] = floatBitsToUint(c1.y); + memory[di + 9] = floatBitsToUint(ra); + memory[di + 10] = floatBitsToUint(roff); + break; case Drawtag_BeginClip: break; } diff --git a/piet-gpu/shader/drawtag.h b/piet-gpu/shader/drawtag.h index 7f73546..1e35318 100644 --- a/piet-gpu/shader/drawtag.h +++ b/piet-gpu/shader/drawtag.h @@ -4,11 +4,12 @@ // Design of draw tag: & 0x1c gives scene size in bytes // & 1 gives clip -// (tag >> 4) & 0x1c is info size in bytes +// (tag >> 4) & 0x3c is info size in bytes #define Drawtag_Nop 0 #define Drawtag_FillColor 0x44 #define Drawtag_FillLinGradient 0x114 +#define Drawtag_FillRadGradient 0x2dc #define Drawtag_FillImage 0x48 #define Drawtag_BeginClip 0x05 #define Drawtag_EndClip 0x25 @@ -36,5 +37,5 @@ DrawMonoid combine_draw_monoid(DrawMonoid a, DrawMonoid b) { DrawMonoid map_tag(uint tag_word) { // TODO: at some point, EndClip should not generate a path uint has_path = uint(tag_word != Drawtag_Nop); - return DrawMonoid(has_path, tag_word & 1, tag_word & 0x1c, (tag_word >> 4) & 0x1c); + return DrawMonoid(has_path, tag_word & 1, tag_word & 0x1c, (tag_word >> 4) & 0x3c); } diff --git a/piet-gpu/shader/gen/coarse.dxil b/piet-gpu/shader/gen/coarse.dxil index 12e88dd..fdab444 100644 Binary files a/piet-gpu/shader/gen/coarse.dxil and b/piet-gpu/shader/gen/coarse.dxil differ diff --git a/piet-gpu/shader/gen/coarse.hlsl b/piet-gpu/shader/gen/coarse.hlsl index a702df5..04529bb 100644 --- a/piet-gpu/shader/gen/coarse.hlsl +++ b/piet-gpu/shader/gen/coarse.hlsl @@ -91,6 +91,21 @@ struct CmdLinGrad float line_c; }; +struct CmdRadGradRef +{ + uint offset; +}; + +struct CmdRadGrad +{ + uint index; + float4 mat; + float2 xlat; + float2 c1; + float ra; + float roff; +}; + struct CmdImageRef { uint offset; @@ -160,9 +175,9 @@ struct Config static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); -RWByteAddressBuffer _242 : register(u0, space0); -ByteAddressBuffer _854 : register(t1, space0); -ByteAddressBuffer _1222 : register(t2, space0); +RWByteAddressBuffer _260 : register(u0, space0); +ByteAddressBuffer _1005 : register(t1, space0); +ByteAddressBuffer _1372 : register(t2, space0); static uint3 gl_WorkGroupID; static uint3 gl_LocalInvocationID; @@ -185,8 +200,8 @@ groupshared uint sh_tile_count[256]; Alloc slice_mem(Alloc a, uint offset, uint size) { - Alloc _319 = { a.offset + offset }; - return _319; + Alloc _337 = { a.offset + offset }; + return _337; } bool touch_mem(Alloc alloc, uint offset) @@ -202,7 +217,7 @@ uint read_mem(Alloc alloc, uint offset) { return 0u; } - uint v = _242.Load(offset * 4 + 8); + uint v = _260.Load(offset * 4 + 8); return v; } @@ -215,8 +230,8 @@ Alloc new_alloc(uint offset, uint size, bool mem_ok) BinInstanceRef BinInstance_index(BinInstanceRef ref, uint index) { - BinInstanceRef _328 = { ref.offset + (index * 4u) }; - return _328; + BinInstanceRef _346 = { ref.offset + (index * 4u) }; + return _346; } BinInstance BinInstance_read(Alloc a, BinInstanceRef ref) @@ -244,8 +259,8 @@ Path Path_read(Alloc a, PathRef ref) uint raw2 = read_mem(param_4, param_5); Path s; s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); - TileRef _391 = { raw2 }; - s.tiles = _391; + TileRef _409 = { raw2 }; + s.tiles = _409; return s; } @@ -255,11 +270,11 @@ void write_tile_alloc(uint el_ix, Alloc a) Alloc read_tile_alloc(uint el_ix, bool mem_ok) { - uint _741; - _242.GetDimensions(_741); - _741 = (_741 - 8) / 4; + uint _892; + _260.GetDimensions(_892); + _892 = (_892 - 8) / 4; uint param = 0u; - uint param_1 = uint(int(_741) * 4); + uint param_1 = uint(int(_892) * 4); bool param_2 = mem_ok; return new_alloc(param, param_1, param_2); } @@ -273,31 +288,31 @@ Tile Tile_read(Alloc a, TileRef ref) Alloc param_2 = a; uint param_3 = ix + 1u; uint raw1 = read_mem(param_2, param_3); - TileSegRef _416 = { raw0 }; + TileSegRef _434 = { raw0 }; Tile s; - s.tile = _416; + s.tile = _434; s.backdrop = int(raw1); return s; } MallocResult malloc(uint size) { - uint _248; - _242.InterlockedAdd(0, size, _248); - uint offset = _248; - uint _255; - _242.GetDimensions(_255); - _255 = (_255 - 8) / 4; + uint _266; + _260.InterlockedAdd(0, size, _266); + uint offset = _266; + uint _273; + _260.GetDimensions(_273); + _273 = (_273 - 8) / 4; MallocResult r; - r.failed = (offset + size) > uint(int(_255) * 4); + r.failed = (offset + size) > uint(int(_273) * 4); uint param = offset; uint param_1 = size; bool param_2 = !r.failed; r.alloc = new_alloc(param, param_1, param_2); if (r.failed) { - uint _277; - _242.InterlockedMax(4, 1u, _277); + uint _295; + _260.InterlockedMax(4, 1u, _295); return r; } return r; @@ -311,7 +326,7 @@ void write_mem(Alloc alloc, uint offset, uint val) { return; } - _242.Store(offset * 4 + 8, val); + _260.Store(offset * 4 + 8, val); } void CmdJump_write(Alloc a, CmdJumpRef ref, CmdJump s) @@ -327,11 +342,11 @@ void Cmd_Jump_write(Alloc a, CmdRef ref, CmdJump s) { Alloc param = a; uint param_1 = ref.offset >> uint(2); - uint param_2 = 10u; + uint param_2 = 11u; write_mem(param, param_1, param_2); - CmdJumpRef _734 = { ref.offset + 4u }; + CmdJumpRef _885 = { ref.offset + 4u }; Alloc param_3 = a; - CmdJumpRef param_4 = _734; + CmdJumpRef param_4 = _885; CmdJump param_5 = s; CmdJump_write(param_3, param_4, param_5); } @@ -343,22 +358,22 @@ bool alloc_cmd(inout Alloc cmd_alloc, inout CmdRef cmd_ref, inout uint cmd_limit return true; } uint param = 1024u; - MallocResult _762 = malloc(param); - MallocResult new_cmd = _762; + MallocResult _913 = malloc(param); + MallocResult new_cmd = _913; if (new_cmd.failed) { return false; } - CmdJump _772 = { new_cmd.alloc.offset }; - CmdJump jump = _772; + CmdJump _923 = { new_cmd.alloc.offset }; + CmdJump jump = _923; Alloc param_1 = cmd_alloc; CmdRef param_2 = cmd_ref; CmdJump param_3 = jump; Cmd_Jump_write(param_1, param_2, param_3); cmd_alloc = new_cmd.alloc; - CmdRef _784 = { cmd_alloc.offset }; - cmd_ref = _784; - cmd_limit = (cmd_alloc.offset + 1024u) - 60u; + CmdRef _935 = { cmd_alloc.offset }; + cmd_ref = _935; + cmd_limit = (cmd_alloc.offset + 1024u) - 144u; return true; } @@ -381,9 +396,9 @@ void Cmd_Fill_write(Alloc a, CmdRef ref, CmdFill s) uint param_1 = ref.offset >> uint(2); uint param_2 = 1u; write_mem(param, param_1, param_2); - CmdFillRef _604 = { ref.offset + 4u }; + CmdFillRef _742 = { ref.offset + 4u }; Alloc param_3 = a; - CmdFillRef param_4 = _604; + CmdFillRef param_4 = _742; CmdFill param_5 = s; CmdFill_write(param_3, param_4, param_5); } @@ -415,9 +430,9 @@ void Cmd_Stroke_write(Alloc a, CmdRef ref, CmdStroke s) uint param_1 = ref.offset >> uint(2); uint param_2 = 2u; write_mem(param, param_1, param_2); - CmdStrokeRef _622 = { ref.offset + 4u }; + CmdStrokeRef _760 = { ref.offset + 4u }; Alloc param_3 = a; - CmdStrokeRef param_4 = _622; + CmdStrokeRef param_4 = _760; CmdStroke param_5 = s; CmdStroke_write(param_3, param_4, param_5); } @@ -428,8 +443,8 @@ void write_fill(Alloc alloc, inout CmdRef cmd_ref, Tile tile, float linewidth) { if (tile.tile.offset != 0u) { - CmdFill _807 = { tile.tile.offset, tile.backdrop }; - CmdFill cmd_fill = _807; + CmdFill _958 = { tile.tile.offset, tile.backdrop }; + CmdFill cmd_fill = _958; Alloc param = alloc; CmdRef param_1 = cmd_ref; CmdFill param_2 = cmd_fill; @@ -446,8 +461,8 @@ void write_fill(Alloc alloc, inout CmdRef cmd_ref, Tile tile, float linewidth) } else { - CmdStroke _837 = { tile.tile.offset, 0.5f * linewidth }; - CmdStroke cmd_stroke = _837; + CmdStroke _988 = { tile.tile.offset, 0.5f * linewidth }; + CmdStroke cmd_stroke = _988; Alloc param_5 = alloc; CmdRef param_6 = cmd_ref; CmdStroke param_7 = cmd_stroke; @@ -471,9 +486,9 @@ void Cmd_Color_write(Alloc a, CmdRef ref, CmdColor s) uint param_1 = ref.offset >> uint(2); uint param_2 = 5u; write_mem(param, param_1, param_2); - CmdColorRef _649 = { ref.offset + 4u }; + CmdColorRef _786 = { ref.offset + 4u }; Alloc param_3 = a; - CmdColorRef param_4 = _649; + CmdColorRef param_4 = _786; CmdColor param_5 = s; CmdColor_write(param_3, param_4, param_5); } @@ -505,13 +520,75 @@ void Cmd_LinGrad_write(Alloc a, CmdRef ref, CmdLinGrad s) uint param_1 = ref.offset >> uint(2); uint param_2 = 6u; write_mem(param, param_1, param_2); - CmdLinGradRef _668 = { ref.offset + 4u }; + CmdLinGradRef _804 = { ref.offset + 4u }; Alloc param_3 = a; - CmdLinGradRef param_4 = _668; + CmdLinGradRef param_4 = _804; CmdLinGrad param_5 = s; CmdLinGrad_write(param_3, param_4, param_5); } +void CmdRadGrad_write(Alloc a, CmdRadGradRef ref, CmdRadGrad s) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint param_2 = s.index; + write_mem(param, param_1, param_2); + Alloc param_3 = a; + uint param_4 = ix + 1u; + uint param_5 = asuint(s.mat.x); + write_mem(param_3, param_4, param_5); + Alloc param_6 = a; + uint param_7 = ix + 2u; + uint param_8 = asuint(s.mat.y); + write_mem(param_6, param_7, param_8); + Alloc param_9 = a; + uint param_10 = ix + 3u; + uint param_11 = asuint(s.mat.z); + write_mem(param_9, param_10, param_11); + Alloc param_12 = a; + uint param_13 = ix + 4u; + uint param_14 = asuint(s.mat.w); + write_mem(param_12, param_13, param_14); + Alloc param_15 = a; + uint param_16 = ix + 5u; + uint param_17 = asuint(s.xlat.x); + write_mem(param_15, param_16, param_17); + Alloc param_18 = a; + uint param_19 = ix + 6u; + uint param_20 = asuint(s.xlat.y); + write_mem(param_18, param_19, param_20); + Alloc param_21 = a; + uint param_22 = ix + 7u; + uint param_23 = asuint(s.c1.x); + write_mem(param_21, param_22, param_23); + Alloc param_24 = a; + uint param_25 = ix + 8u; + uint param_26 = asuint(s.c1.y); + write_mem(param_24, param_25, param_26); + Alloc param_27 = a; + uint param_28 = ix + 9u; + uint param_29 = asuint(s.ra); + write_mem(param_27, param_28, param_29); + Alloc param_30 = a; + uint param_31 = ix + 10u; + uint param_32 = asuint(s.roff); + write_mem(param_30, param_31, param_32); +} + +void Cmd_RadGrad_write(Alloc a, CmdRef ref, CmdRadGrad s) +{ + Alloc param = a; + uint param_1 = ref.offset >> uint(2); + uint param_2 = 7u; + write_mem(param, param_1, param_2); + CmdRadGradRef _822 = { ref.offset + 4u }; + Alloc param_3 = a; + CmdRadGradRef param_4 = _822; + CmdRadGrad param_5 = s; + CmdRadGrad_write(param_3, param_4, param_5); +} + void CmdImage_write(Alloc a, CmdImageRef ref, CmdImage s) { uint ix = ref.offset >> uint(2); @@ -529,11 +606,11 @@ void Cmd_Image_write(Alloc a, CmdRef ref, CmdImage s) { Alloc param = a; uint param_1 = ref.offset >> uint(2); - uint param_2 = 7u; + uint param_2 = 8u; write_mem(param, param_1, param_2); - CmdImageRef _687 = { ref.offset + 4u }; + CmdImageRef _840 = { ref.offset + 4u }; Alloc param_3 = a; - CmdImageRef param_4 = _687; + CmdImageRef param_4 = _840; CmdImage param_5 = s; CmdImage_write(param_3, param_4, param_5); } @@ -542,7 +619,7 @@ void Cmd_BeginClip_write(Alloc a, CmdRef ref) { Alloc param = a; uint param_1 = ref.offset >> uint(2); - uint param_2 = 8u; + uint param_2 = 9u; write_mem(param, param_1, param_2); } @@ -559,11 +636,11 @@ void Cmd_EndClip_write(Alloc a, CmdRef ref, CmdEndClip s) { Alloc param = a; uint param_1 = ref.offset >> uint(2); - uint param_2 = 9u; + uint param_2 = 10u; write_mem(param, param_1, param_2); - CmdEndClipRef _715 = { ref.offset + 4u }; + CmdEndClipRef _866 = { ref.offset + 4u }; Alloc param_3 = a; - CmdEndClipRef param_4 = _715; + CmdEndClipRef param_4 = _866; CmdEndClip param_5 = s; CmdEndClip_write(param_3, param_4, param_5); } @@ -578,80 +655,81 @@ void Cmd_End_write(Alloc a, CmdRef ref) void comp_main() { - uint width_in_bins = ((_854.Load(8) + 16u) - 1u) / 16u; + uint width_in_bins = ((_1005.Load(8) + 16u) - 1u) / 16u; uint bin_ix = (width_in_bins * gl_WorkGroupID.y) + gl_WorkGroupID.x; uint partition_ix = 0u; - uint n_partitions = ((_854.Load(0) + 256u) - 1u) / 256u; + uint n_partitions = ((_1005.Load(0) + 256u) - 1u) / 256u; uint th_ix = gl_LocalInvocationID.x; uint bin_tile_x = 16u * gl_WorkGroupID.x; uint bin_tile_y = 16u * gl_WorkGroupID.y; uint tile_x = gl_LocalInvocationID.x % 16u; uint tile_y = gl_LocalInvocationID.x / 16u; - uint this_tile_ix = (((bin_tile_y + tile_y) * _854.Load(8)) + bin_tile_x) + tile_x; - Alloc _919; - _919.offset = _854.Load(24); + uint this_tile_ix = (((bin_tile_y + tile_y) * _1005.Load(8)) + bin_tile_x) + tile_x; + Alloc _1070; + _1070.offset = _1005.Load(24); Alloc param; - param.offset = _919.offset; + param.offset = _1070.offset; uint param_1 = this_tile_ix * 1024u; uint param_2 = 1024u; Alloc cmd_alloc = slice_mem(param, param_1, param_2); - CmdRef _928 = { cmd_alloc.offset }; - CmdRef cmd_ref = _928; - uint cmd_limit = (cmd_ref.offset + 1024u) - 60u; + CmdRef _1079 = { cmd_alloc.offset }; + CmdRef cmd_ref = _1079; + uint cmd_limit = (cmd_ref.offset + 1024u) - 144u; uint clip_depth = 0u; uint clip_zero_depth = 0u; uint rd_ix = 0u; uint wr_ix = 0u; uint part_start_ix = 0u; uint ready_ix = 0u; - uint drawmonoid_start = _854.Load(44) >> uint(2); - uint drawtag_start = _854.Load(100) >> uint(2); - uint drawdata_start = _854.Load(104) >> uint(2); - uint drawinfo_start = _854.Load(68) >> uint(2); - bool mem_ok = _242.Load(4) == 0u; + uint drawmonoid_start = _1005.Load(44) >> uint(2); + uint drawtag_start = _1005.Load(100) >> uint(2); + uint drawdata_start = _1005.Load(104) >> uint(2); + uint drawinfo_start = _1005.Load(68) >> uint(2); + bool mem_ok = _260.Load(4) == 0u; Alloc param_3; Alloc param_5; - uint _1154; + uint _1304; uint element_ix; Alloc param_14; uint tile_count; - uint _1455; + uint _1605; float linewidth; CmdLinGrad cmd_lin; + CmdRadGrad cmd_rad; while (true) { for (uint i = 0u; i < 8u; i++) { sh_bitmaps[i][th_ix] = 0u; } - bool _1206; + bool _1356; for (;;) { if ((ready_ix == wr_ix) && (partition_ix < n_partitions)) { part_start_ix = ready_ix; uint count = 0u; - bool _1003 = th_ix < 256u; - bool _1011; - if (_1003) + bool _1154 = th_ix < 256u; + bool _1162; + if (_1154) { - _1011 = (partition_ix + th_ix) < n_partitions; + _1162 = (partition_ix + th_ix) < n_partitions; } else { - _1011 = _1003; + _1162 = _1154; } - if (_1011) + if (_1162) { - uint in_ix = (_854.Load(20) >> uint(2)) + ((((partition_ix + th_ix) * 256u) + bin_ix) * 2u); - Alloc _1029; - _1029.offset = _854.Load(20); - param_3.offset = _1029.offset; + uint in_ix = (_1005.Load(20) >> uint(2)) + ((((partition_ix + th_ix) * 256u) + bin_ix) * 2u); + Alloc _1179; + _1179.offset = _1005.Load(20); + param_3.offset = _1179.offset; uint param_4 = in_ix; count = read_mem(param_3, param_4); - Alloc _1040; - _1040.offset = _854.Load(20); - param_5.offset = _1040.offset; + Alloc _1190; + _1190.offset = _1005.Load(20); + param_5.offset = _1190.offset; uint param_6 = in_ix + 1u; uint offset = read_mem(param_5, param_6); uint param_7 = offset; @@ -697,16 +775,16 @@ void comp_main() } if (part_ix > 0u) { - _1154 = sh_part_count[part_ix - 1u]; + _1304 = sh_part_count[part_ix - 1u]; } else { - _1154 = part_start_ix; + _1304 = part_start_ix; } - ix -= _1154; + ix -= _1304; Alloc bin_alloc = sh_part_elements[part_ix]; - BinInstanceRef _1173 = { bin_alloc.offset }; - BinInstanceRef inst_ref = _1173; + BinInstanceRef _1323 = { bin_alloc.offset }; + BinInstanceRef inst_ref = _1323; BinInstanceRef param_10 = inst_ref; uint param_11 = ix; Alloc param_12 = bin_alloc; @@ -716,16 +794,16 @@ void comp_main() } GroupMemoryBarrierWithGroupSync(); wr_ix = min((rd_ix + 256u), ready_ix); - bool _1196 = (wr_ix - rd_ix) < 256u; - if (_1196) + bool _1346 = (wr_ix - rd_ix) < 256u; + if (_1346) { - _1206 = (wr_ix < ready_ix) || (partition_ix < n_partitions); + _1356 = (wr_ix < ready_ix) || (partition_ix < n_partitions); } else { - _1206 = _1196; + _1356 = _1346; } - if (_1206) + if (_1356) { continue; } @@ -738,23 +816,24 @@ void comp_main() if ((th_ix + rd_ix) < wr_ix) { element_ix = sh_elements[th_ix]; - tag = _1222.Load((drawtag_start + element_ix) * 4 + 0); + tag = _1372.Load((drawtag_start + element_ix) * 4 + 0); } switch (tag) { case 68u: case 72u: case 276u: + case 732u: case 5u: case 37u: { uint drawmonoid_base = drawmonoid_start + (4u * element_ix); - uint path_ix = _242.Load(drawmonoid_base * 4 + 8); - PathRef _1247 = { _854.Load(16) + (path_ix * 12u) }; - Alloc _1250; - _1250.offset = _854.Load(16); - param_14.offset = _1250.offset; - PathRef param_15 = _1247; + uint path_ix = _260.Load(drawmonoid_base * 4 + 8); + PathRef _1397 = { _1005.Load(16) + (path_ix * 12u) }; + Alloc _1400; + _1400.offset = _1005.Load(16); + param_14.offset = _1400.offset; + PathRef param_15 = _1397; Path path = Path_read(param_14, param_15); uint stride = path.bbox.z - path.bbox.x; sh_tile_stride[th_ix] = stride; @@ -810,16 +889,16 @@ void comp_main() } } uint element_ix_1 = sh_elements[el_ix]; - uint tag_1 = _1222.Load((drawtag_start + element_ix_1) * 4 + 0); + uint tag_1 = _1372.Load((drawtag_start + element_ix_1) * 4 + 0); if (el_ix > 0u) { - _1455 = sh_tile_count[el_ix - 1u]; + _1605 = sh_tile_count[el_ix - 1u]; } else { - _1455 = 0u; + _1605 = 0u; } - uint seq_ix = ix_1 - _1455; + uint seq_ix = ix_1 - _1605; uint width = sh_tile_width[el_ix]; uint x = sh_tile_x0[el_ix] + (seq_ix % width); uint y = sh_tile_y0[el_ix] + (seq_ix / width); @@ -828,38 +907,38 @@ void comp_main() { uint param_21 = el_ix; bool param_22 = mem_ok; - TileRef _1507 = { sh_tile_base[el_ix] + (((sh_tile_stride[el_ix] * y) + x) * 8u) }; + TileRef _1657 = { sh_tile_base[el_ix] + (((sh_tile_stride[el_ix] * y) + x) * 8u) }; Alloc param_23 = read_tile_alloc(param_21, param_22); - TileRef param_24 = _1507; + TileRef param_24 = _1657; Tile tile = Tile_read(param_23, param_24); bool is_clip = (tag_1 & 1u) != 0u; bool is_blend = false; if (is_clip) { uint drawmonoid_base_1 = drawmonoid_start + (4u * element_ix_1); - uint scene_offset = _242.Load((drawmonoid_base_1 + 2u) * 4 + 8); + uint scene_offset = _260.Load((drawmonoid_base_1 + 2u) * 4 + 8); uint dd = drawdata_start + (scene_offset >> uint(2)); - uint blend = _1222.Load(dd * 4 + 0); + uint blend = _1372.Load(dd * 4 + 0); is_blend = blend != 3u; } - bool _1542 = tile.tile.offset != 0u; - bool _1551; - if (!_1542) + bool _1692 = tile.tile.offset != 0u; + bool _1701; + if (!_1692) { - _1551 = (tile.backdrop == 0) == is_clip; + _1701 = (tile.backdrop == 0) == is_clip; } else { - _1551 = _1542; + _1701 = _1692; } - include_tile = _1551 || is_blend; + include_tile = _1701 || is_blend; } if (include_tile) { uint el_slice = el_ix / 32u; uint el_mask = 1u << (el_ix & 31u); - uint _1573; - InterlockedOr(sh_bitmaps[el_slice][(y * 16u) + x], el_mask, _1573); + uint _1723; + InterlockedOr(sh_bitmaps[el_slice][(y * 16u) + x], el_mask, _1723); } } GroupMemoryBarrierWithGroupSync(); @@ -883,33 +962,33 @@ void comp_main() uint element_ref_ix = (slice_ix * 32u) + uint(int(firstbitlow(bitmap))); uint element_ix_2 = sh_elements[element_ref_ix]; bitmap &= (bitmap - 1u); - uint drawtag = _1222.Load((drawtag_start + element_ix_2) * 4 + 0); + uint drawtag = _1372.Load((drawtag_start + element_ix_2) * 4 + 0); if (clip_zero_depth == 0u) { uint param_25 = element_ref_ix; bool param_26 = mem_ok; - TileRef _1650 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; + TileRef _1800 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; Alloc param_27 = read_tile_alloc(param_25, param_26); - TileRef param_28 = _1650; + TileRef param_28 = _1800; Tile tile_1 = Tile_read(param_27, param_28); uint drawmonoid_base_2 = drawmonoid_start + (4u * element_ix_2); - uint scene_offset_1 = _242.Load((drawmonoid_base_2 + 2u) * 4 + 8); - uint info_offset = _242.Load((drawmonoid_base_2 + 3u) * 4 + 8); + uint scene_offset_1 = _260.Load((drawmonoid_base_2 + 2u) * 4 + 8); + uint info_offset = _260.Load((drawmonoid_base_2 + 3u) * 4 + 8); uint dd_1 = drawdata_start + (scene_offset_1 >> uint(2)); uint di = drawinfo_start + (info_offset >> uint(2)); switch (drawtag) { case 68u: { - linewidth = asfloat(_242.Load(di * 4 + 8)); + linewidth = asfloat(_260.Load(di * 4 + 8)); Alloc param_29 = cmd_alloc; CmdRef param_30 = cmd_ref; uint param_31 = cmd_limit; - bool _1697 = alloc_cmd(param_29, param_30, param_31); + bool _1848 = alloc_cmd(param_29, param_30, param_31); cmd_alloc = param_29; cmd_ref = param_30; cmd_limit = param_31; - if (!_1697) + if (!_1848) { break; } @@ -919,11 +998,11 @@ void comp_main() float param_35 = linewidth; write_fill(param_32, param_33, param_34, param_35); cmd_ref = param_33; - uint rgba = _1222.Load(dd_1 * 4 + 0); - CmdColor _1720 = { rgba }; + uint rgba = _1372.Load(dd_1 * 4 + 0); + CmdColor _1871 = { rgba }; Alloc param_36 = cmd_alloc; CmdRef param_37 = cmd_ref; - CmdColor param_38 = _1720; + CmdColor param_38 = _1871; Cmd_Color_write(param_36, param_37, param_38); cmd_ref.offset += 8u; break; @@ -933,25 +1012,25 @@ void comp_main() Alloc param_39 = cmd_alloc; CmdRef param_40 = cmd_ref; uint param_41 = cmd_limit; - bool _1738 = alloc_cmd(param_39, param_40, param_41); + bool _1889 = alloc_cmd(param_39, param_40, param_41); cmd_alloc = param_39; cmd_ref = param_40; cmd_limit = param_41; - if (!_1738) + if (!_1889) { break; } - linewidth = asfloat(_242.Load(di * 4 + 8)); + linewidth = asfloat(_260.Load(di * 4 + 8)); Alloc param_42 = cmd_alloc; CmdRef param_43 = cmd_ref; Tile param_44 = tile_1; float param_45 = linewidth; write_fill(param_42, param_43, param_44, param_45); cmd_ref = param_43; - cmd_lin.index = _1222.Load(dd_1 * 4 + 0); - cmd_lin.line_x = asfloat(_242.Load((di + 1u) * 4 + 8)); - cmd_lin.line_y = asfloat(_242.Load((di + 2u) * 4 + 8)); - cmd_lin.line_c = asfloat(_242.Load((di + 3u) * 4 + 8)); + cmd_lin.index = _1372.Load(dd_1 * 4 + 0); + cmd_lin.line_x = asfloat(_260.Load((di + 1u) * 4 + 8)); + cmd_lin.line_y = asfloat(_260.Load((di + 2u) * 4 + 8)); + cmd_lin.line_c = asfloat(_260.Load((di + 3u) * 4 + 8)); Alloc param_46 = cmd_alloc; CmdRef param_47 = cmd_ref; CmdLinGrad param_48 = cmd_lin; @@ -959,69 +1038,102 @@ void comp_main() cmd_ref.offset += 20u; break; } - case 72u: + case 732u: { - linewidth = asfloat(_242.Load(di * 4 + 8)); Alloc param_49 = cmd_alloc; CmdRef param_50 = cmd_ref; uint param_51 = cmd_limit; - bool _1806 = alloc_cmd(param_49, param_50, param_51); + bool _1953 = alloc_cmd(param_49, param_50, param_51); cmd_alloc = param_49; cmd_ref = param_50; cmd_limit = param_51; - if (!_1806) + if (!_1953) { break; } + linewidth = asfloat(_260.Load(di * 4 + 8)); Alloc param_52 = cmd_alloc; CmdRef param_53 = cmd_ref; Tile param_54 = tile_1; float param_55 = linewidth; write_fill(param_52, param_53, param_54, param_55); cmd_ref = param_53; - uint index = _1222.Load(dd_1 * 4 + 0); - uint raw1 = _1222.Load((dd_1 + 1u) * 4 + 0); - int2 offset_1 = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); - CmdImage _1845 = { index, offset_1 }; + cmd_rad.index = _1372.Load(dd_1 * 4 + 0); + cmd_rad.mat = asfloat(uint4(_260.Load((di + 1u) * 4 + 8), _260.Load((di + 2u) * 4 + 8), _260.Load((di + 3u) * 4 + 8), _260.Load((di + 4u) * 4 + 8))); + cmd_rad.xlat = asfloat(uint2(_260.Load((di + 5u) * 4 + 8), _260.Load((di + 6u) * 4 + 8))); + cmd_rad.c1 = asfloat(uint2(_260.Load((di + 7u) * 4 + 8), _260.Load((di + 8u) * 4 + 8))); + cmd_rad.ra = asfloat(_260.Load((di + 9u) * 4 + 8)); + cmd_rad.roff = asfloat(_260.Load((di + 10u) * 4 + 8)); Alloc param_56 = cmd_alloc; CmdRef param_57 = cmd_ref; - CmdImage param_58 = _1845; - Cmd_Image_write(param_56, param_57, param_58); + CmdRadGrad param_58 = cmd_rad; + Cmd_RadGrad_write(param_56, param_57, param_58); + cmd_ref.offset += 48u; + break; + } + case 72u: + { + linewidth = asfloat(_260.Load(di * 4 + 8)); + Alloc param_59 = cmd_alloc; + CmdRef param_60 = cmd_ref; + uint param_61 = cmd_limit; + bool _2059 = alloc_cmd(param_59, param_60, param_61); + cmd_alloc = param_59; + cmd_ref = param_60; + cmd_limit = param_61; + if (!_2059) + { + break; + } + Alloc param_62 = cmd_alloc; + CmdRef param_63 = cmd_ref; + Tile param_64 = tile_1; + float param_65 = linewidth; + write_fill(param_62, param_63, param_64, param_65); + cmd_ref = param_63; + uint index = _1372.Load(dd_1 * 4 + 0); + uint raw1 = _1372.Load((dd_1 + 1u) * 4 + 0); + int2 offset_1 = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); + CmdImage _2098 = { index, offset_1 }; + Alloc param_66 = cmd_alloc; + CmdRef param_67 = cmd_ref; + CmdImage param_68 = _2098; + Cmd_Image_write(param_66, param_67, param_68); cmd_ref.offset += 12u; break; } case 5u: { - bool _1859 = tile_1.tile.offset == 0u; - bool _1865; - if (_1859) + bool _2112 = tile_1.tile.offset == 0u; + bool _2118; + if (_2112) { - _1865 = tile_1.backdrop == 0; + _2118 = tile_1.backdrop == 0; } else { - _1865 = _1859; + _2118 = _2112; } - if (_1865) + if (_2118) { clip_zero_depth = clip_depth + 1u; } else { - Alloc param_59 = cmd_alloc; - CmdRef param_60 = cmd_ref; - uint param_61 = cmd_limit; - bool _1877 = alloc_cmd(param_59, param_60, param_61); - cmd_alloc = param_59; - cmd_ref = param_60; - cmd_limit = param_61; - if (!_1877) + Alloc param_69 = cmd_alloc; + CmdRef param_70 = cmd_ref; + uint param_71 = cmd_limit; + bool _2130 = alloc_cmd(param_69, param_70, param_71); + cmd_alloc = param_69; + cmd_ref = param_70; + cmd_limit = param_71; + if (!_2130) { break; } - Alloc param_62 = cmd_alloc; - CmdRef param_63 = cmd_ref; - Cmd_BeginClip_write(param_62, param_63); + Alloc param_72 = cmd_alloc; + CmdRef param_73 = cmd_ref; + Cmd_BeginClip_write(param_72, param_73); cmd_ref.offset += 4u; } clip_depth++; @@ -1030,29 +1142,29 @@ void comp_main() case 37u: { clip_depth--; - Alloc param_64 = cmd_alloc; - CmdRef param_65 = cmd_ref; - uint param_66 = cmd_limit; - bool _1905 = alloc_cmd(param_64, param_65, param_66); - cmd_alloc = param_64; - cmd_ref = param_65; - cmd_limit = param_66; - if (!_1905) + Alloc param_74 = cmd_alloc; + CmdRef param_75 = cmd_ref; + uint param_76 = cmd_limit; + bool _2158 = alloc_cmd(param_74, param_75, param_76); + cmd_alloc = param_74; + cmd_ref = param_75; + cmd_limit = param_76; + if (!_2158) { break; } - Alloc param_67 = cmd_alloc; - CmdRef param_68 = cmd_ref; - Tile param_69 = tile_1; - float param_70 = -1.0f; - write_fill(param_67, param_68, param_69, param_70); - cmd_ref = param_68; - uint blend_1 = _1222.Load(dd_1 * 4 + 0); - CmdEndClip _1928 = { blend_1 }; - Alloc param_71 = cmd_alloc; - CmdRef param_72 = cmd_ref; - CmdEndClip param_73 = _1928; - Cmd_EndClip_write(param_71, param_72, param_73); + Alloc param_77 = cmd_alloc; + CmdRef param_78 = cmd_ref; + Tile param_79 = tile_1; + float param_80 = -1.0f; + write_fill(param_77, param_78, param_79, param_80); + cmd_ref = param_78; + uint blend_1 = _1372.Load(dd_1 * 4 + 0); + CmdEndClip _2181 = { blend_1 }; + Alloc param_81 = cmd_alloc; + CmdRef param_82 = cmd_ref; + CmdEndClip param_83 = _2181; + Cmd_EndClip_write(param_81, param_82, param_83); cmd_ref.offset += 8u; break; } @@ -1086,21 +1198,21 @@ void comp_main() break; } } - bool _1975 = (bin_tile_x + tile_x) < _854.Load(8); - bool _1984; - if (_1975) + bool _2228 = (bin_tile_x + tile_x) < _1005.Load(8); + bool _2237; + if (_2228) { - _1984 = (bin_tile_y + tile_y) < _854.Load(12); + _2237 = (bin_tile_y + tile_y) < _1005.Load(12); } else { - _1984 = _1975; + _2237 = _2228; } - if (_1984) + if (_2237) { - Alloc param_74 = cmd_alloc; - CmdRef param_75 = cmd_ref; - Cmd_End_write(param_74, param_75); + Alloc param_84 = cmd_alloc; + CmdRef param_85 = cmd_ref; + Cmd_End_write(param_84, param_85); } } diff --git a/piet-gpu/shader/gen/coarse.msl b/piet-gpu/shader/gen/coarse.msl index 4226352..55812d4 100644 --- a/piet-gpu/shader/gen/coarse.msl +++ b/piet-gpu/shader/gen/coarse.msl @@ -107,6 +107,21 @@ struct CmdLinGrad float line_c; }; +struct CmdRadGradRef +{ + uint offset; +}; + +struct CmdRadGrad +{ + uint index; + float4 mat; + float2 xlat; + float2 c1; + float ra; + float roff; +}; + struct CmdImageRef { uint offset; @@ -211,7 +226,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset) } static inline __attribute__((always_inline)) -uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_242, constant uint& v_242BufferSize) +uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_260, constant uint& v_260BufferSize) { Alloc param = alloc; uint param_1 = offset; @@ -219,7 +234,7 @@ uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memor { return 0u; } - uint v = v_242.memory[offset]; + uint v = v_260.memory[offset]; return v; } @@ -238,30 +253,30 @@ BinInstanceRef BinInstance_index(thread const BinInstanceRef& ref, thread const } static inline __attribute__((always_inline)) -BinInstance BinInstance_read(thread const Alloc& a, thread const BinInstanceRef& ref, device Memory& v_242, constant uint& v_242BufferSize) +BinInstance BinInstance_read(thread const Alloc& a, thread const BinInstanceRef& ref, device Memory& v_260, constant uint& v_260BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_242, v_242BufferSize); + uint raw0 = read_mem(param, param_1, v_260, v_260BufferSize); BinInstance s; s.element_ix = raw0; return s; } static inline __attribute__((always_inline)) -Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_242, constant uint& v_242BufferSize) +Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_260, constant uint& v_260BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_242, v_242BufferSize); + uint raw0 = read_mem(param, param_1, v_260, v_260BufferSize); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_242, v_242BufferSize); + uint raw1 = read_mem(param_2, param_3, v_260, v_260BufferSize); Alloc param_4 = a; uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_242, v_242BufferSize); + uint raw2 = read_mem(param_4, param_5, v_260, v_260BufferSize); Path s; s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); s.tiles = TileRef{ raw2 }; @@ -274,24 +289,24 @@ void write_tile_alloc(thread const uint& el_ix, thread const Alloc& a) } static inline __attribute__((always_inline)) -Alloc read_tile_alloc(thread const uint& el_ix, thread const bool& mem_ok, device Memory& v_242, constant uint& v_242BufferSize) +Alloc read_tile_alloc(thread const uint& el_ix, thread const bool& mem_ok, device Memory& v_260, constant uint& v_260BufferSize) { uint param = 0u; - uint param_1 = uint(int((v_242BufferSize - 8) / 4) * 4); + uint param_1 = uint(int((v_260BufferSize - 8) / 4) * 4); bool param_2 = mem_ok; return new_alloc(param, param_1, param_2); } static inline __attribute__((always_inline)) -Tile Tile_read(thread const Alloc& a, thread const TileRef& ref, device Memory& v_242, constant uint& v_242BufferSize) +Tile Tile_read(thread const Alloc& a, thread const TileRef& ref, device Memory& v_260, constant uint& v_260BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_242, v_242BufferSize); + uint raw0 = read_mem(param, param_1, v_260, v_260BufferSize); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_242, v_242BufferSize); + uint raw1 = read_mem(param_2, param_3, v_260, v_260BufferSize); Tile s; s.tile = TileSegRef{ raw0 }; s.backdrop = int(raw1); @@ -299,26 +314,26 @@ Tile Tile_read(thread const Alloc& a, thread const TileRef& ref, device Memory& } static inline __attribute__((always_inline)) -MallocResult malloc(thread const uint& size, device Memory& v_242, constant uint& v_242BufferSize) +MallocResult malloc(thread const uint& size, device Memory& v_260, constant uint& v_260BufferSize) { - uint _248 = atomic_fetch_add_explicit((device atomic_uint*)&v_242.mem_offset, size, memory_order_relaxed); - uint offset = _248; + uint _266 = atomic_fetch_add_explicit((device atomic_uint*)&v_260.mem_offset, size, memory_order_relaxed); + uint offset = _266; MallocResult r; - r.failed = (offset + size) > uint(int((v_242BufferSize - 8) / 4) * 4); + r.failed = (offset + size) > uint(int((v_260BufferSize - 8) / 4) * 4); uint param = offset; uint param_1 = size; bool param_2 = !r.failed; r.alloc = new_alloc(param, param_1, param_2); if (r.failed) { - uint _277 = atomic_fetch_max_explicit((device atomic_uint*)&v_242.mem_error, 1u, memory_order_relaxed); + uint _295 = atomic_fetch_max_explicit((device atomic_uint*)&v_260.mem_error, 1u, memory_order_relaxed); return r; } return r; } static inline __attribute__((always_inline)) -void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_242, constant uint& v_242BufferSize) +void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_260, constant uint& v_260BufferSize) { Alloc param = alloc; uint param_1 = offset; @@ -326,42 +341,42 @@ void write_mem(thread const Alloc& alloc, thread const uint& offset, thread cons { return; } - v_242.memory[offset] = val; + v_260.memory[offset] = val; } static inline __attribute__((always_inline)) -void CmdJump_write(thread const Alloc& a, thread const CmdJumpRef& ref, thread const CmdJump& s, device Memory& v_242, constant uint& v_242BufferSize) +void CmdJump_write(thread const Alloc& a, thread const CmdJumpRef& ref, thread const CmdJump& s, device Memory& v_260, constant uint& v_260BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = s.new_ref; - write_mem(param, param_1, param_2, v_242, v_242BufferSize); + write_mem(param, param_1, param_2, v_260, v_260BufferSize); } static inline __attribute__((always_inline)) -void Cmd_Jump_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdJump& s, device Memory& v_242, constant uint& v_242BufferSize) +void Cmd_Jump_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdJump& s, device Memory& v_260, constant uint& v_260BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); - uint param_2 = 10u; - write_mem(param, param_1, param_2, v_242, v_242BufferSize); + uint param_2 = 11u; + write_mem(param, param_1, param_2, v_260, v_260BufferSize); Alloc param_3 = a; CmdJumpRef param_4 = CmdJumpRef{ ref.offset + 4u }; CmdJump param_5 = s; - CmdJump_write(param_3, param_4, param_5, v_242, v_242BufferSize); + CmdJump_write(param_3, param_4, param_5, v_260, v_260BufferSize); } static inline __attribute__((always_inline)) -bool alloc_cmd(thread Alloc& cmd_alloc, thread CmdRef& cmd_ref, thread uint& cmd_limit, device Memory& v_242, constant uint& v_242BufferSize) +bool alloc_cmd(thread Alloc& cmd_alloc, thread CmdRef& cmd_ref, thread uint& cmd_limit, device Memory& v_260, constant uint& v_260BufferSize) { if (cmd_ref.offset < cmd_limit) { return true; } uint param = 1024u; - MallocResult _762 = malloc(param, v_242, v_242BufferSize); - MallocResult new_cmd = _762; + MallocResult _913 = malloc(param, v_260, v_260BufferSize); + MallocResult new_cmd = _913; if (new_cmd.failed) { return false; @@ -370,78 +385,78 @@ bool alloc_cmd(thread Alloc& cmd_alloc, thread CmdRef& cmd_ref, thread uint& cmd Alloc param_1 = cmd_alloc; CmdRef param_2 = cmd_ref; CmdJump param_3 = jump; - Cmd_Jump_write(param_1, param_2, param_3, v_242, v_242BufferSize); + Cmd_Jump_write(param_1, param_2, param_3, v_260, v_260BufferSize); cmd_alloc = new_cmd.alloc; cmd_ref = CmdRef{ cmd_alloc.offset }; - cmd_limit = (cmd_alloc.offset + 1024u) - 60u; + cmd_limit = (cmd_alloc.offset + 1024u) - 144u; return true; } static inline __attribute__((always_inline)) -void CmdFill_write(thread const Alloc& a, thread const CmdFillRef& ref, thread const CmdFill& s, device Memory& v_242, constant uint& v_242BufferSize) +void CmdFill_write(thread const Alloc& a, thread const CmdFillRef& ref, thread const CmdFill& s, device Memory& v_260, constant uint& v_260BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = s.tile_ref; - write_mem(param, param_1, param_2, v_242, v_242BufferSize); + write_mem(param, param_1, param_2, v_260, v_260BufferSize); Alloc param_3 = a; uint param_4 = ix + 1u; uint param_5 = uint(s.backdrop); - write_mem(param_3, param_4, param_5, v_242, v_242BufferSize); + write_mem(param_3, param_4, param_5, v_260, v_260BufferSize); } static inline __attribute__((always_inline)) -void Cmd_Fill_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdFill& s, device Memory& v_242, constant uint& v_242BufferSize) +void Cmd_Fill_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdFill& s, device Memory& v_260, constant uint& v_260BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 1u; - write_mem(param, param_1, param_2, v_242, v_242BufferSize); + write_mem(param, param_1, param_2, v_260, v_260BufferSize); Alloc param_3 = a; CmdFillRef param_4 = CmdFillRef{ ref.offset + 4u }; CmdFill param_5 = s; - CmdFill_write(param_3, param_4, param_5, v_242, v_242BufferSize); + CmdFill_write(param_3, param_4, param_5, v_260, v_260BufferSize); } static inline __attribute__((always_inline)) -void Cmd_Solid_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_242, constant uint& v_242BufferSize) +void Cmd_Solid_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_260, constant uint& v_260BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 3u; - write_mem(param, param_1, param_2, v_242, v_242BufferSize); + write_mem(param, param_1, param_2, v_260, v_260BufferSize); } static inline __attribute__((always_inline)) -void CmdStroke_write(thread const Alloc& a, thread const CmdStrokeRef& ref, thread const CmdStroke& s, device Memory& v_242, constant uint& v_242BufferSize) +void CmdStroke_write(thread const Alloc& a, thread const CmdStrokeRef& ref, thread const CmdStroke& s, device Memory& v_260, constant uint& v_260BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = s.tile_ref; - write_mem(param, param_1, param_2, v_242, v_242BufferSize); + write_mem(param, param_1, param_2, v_260, v_260BufferSize); Alloc param_3 = a; uint param_4 = ix + 1u; uint param_5 = as_type(s.half_width); - write_mem(param_3, param_4, param_5, v_242, v_242BufferSize); + write_mem(param_3, param_4, param_5, v_260, v_260BufferSize); } static inline __attribute__((always_inline)) -void Cmd_Stroke_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdStroke& s, device Memory& v_242, constant uint& v_242BufferSize) +void Cmd_Stroke_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdStroke& s, device Memory& v_260, constant uint& v_260BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 2u; - write_mem(param, param_1, param_2, v_242, v_242BufferSize); + write_mem(param, param_1, param_2, v_260, v_260BufferSize); Alloc param_3 = a; CmdStrokeRef param_4 = CmdStrokeRef{ ref.offset + 4u }; CmdStroke param_5 = s; - CmdStroke_write(param_3, param_4, param_5, v_242, v_242BufferSize); + CmdStroke_write(param_3, param_4, param_5, v_260, v_260BufferSize); } static inline __attribute__((always_inline)) -void write_fill(thread const Alloc& alloc, thread CmdRef& cmd_ref, thread const Tile& tile, thread const float& linewidth, device Memory& v_242, constant uint& v_242BufferSize) +void write_fill(thread const Alloc& alloc, thread CmdRef& cmd_ref, thread const Tile& tile, thread const float& linewidth, device Memory& v_260, constant uint& v_260BufferSize) { if (linewidth < 0.0) { @@ -451,14 +466,14 @@ void write_fill(thread const Alloc& alloc, thread CmdRef& cmd_ref, thread const Alloc param = alloc; CmdRef param_1 = cmd_ref; CmdFill param_2 = cmd_fill; - Cmd_Fill_write(param, param_1, param_2, v_242, v_242BufferSize); + Cmd_Fill_write(param, param_1, param_2, v_260, v_260BufferSize); cmd_ref.offset += 12u; } else { Alloc param_3 = alloc; CmdRef param_4 = cmd_ref; - Cmd_Solid_write(param_3, param_4, v_242, v_242BufferSize); + Cmd_Solid_write(param_3, param_4, v_260, v_260BufferSize); cmd_ref.offset += 4u; } } @@ -468,138 +483,201 @@ void write_fill(thread const Alloc& alloc, thread CmdRef& cmd_ref, thread const Alloc param_5 = alloc; CmdRef param_6 = cmd_ref; CmdStroke param_7 = cmd_stroke; - Cmd_Stroke_write(param_5, param_6, param_7, v_242, v_242BufferSize); + Cmd_Stroke_write(param_5, param_6, param_7, v_260, v_260BufferSize); cmd_ref.offset += 12u; } } static inline __attribute__((always_inline)) -void CmdColor_write(thread const Alloc& a, thread const CmdColorRef& ref, thread const CmdColor& s, device Memory& v_242, constant uint& v_242BufferSize) +void CmdColor_write(thread const Alloc& a, thread const CmdColorRef& ref, thread const CmdColor& s, device Memory& v_260, constant uint& v_260BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = s.rgba_color; - write_mem(param, param_1, param_2, v_242, v_242BufferSize); + write_mem(param, param_1, param_2, v_260, v_260BufferSize); } static inline __attribute__((always_inline)) -void Cmd_Color_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdColor& s, device Memory& v_242, constant uint& v_242BufferSize) +void Cmd_Color_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdColor& s, device Memory& v_260, constant uint& v_260BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 5u; - write_mem(param, param_1, param_2, v_242, v_242BufferSize); + write_mem(param, param_1, param_2, v_260, v_260BufferSize); Alloc param_3 = a; CmdColorRef param_4 = CmdColorRef{ ref.offset + 4u }; CmdColor param_5 = s; - CmdColor_write(param_3, param_4, param_5, v_242, v_242BufferSize); + CmdColor_write(param_3, param_4, param_5, v_260, v_260BufferSize); } static inline __attribute__((always_inline)) -void CmdLinGrad_write(thread const Alloc& a, thread const CmdLinGradRef& ref, thread const CmdLinGrad& s, device Memory& v_242, constant uint& v_242BufferSize) +void CmdLinGrad_write(thread const Alloc& a, thread const CmdLinGradRef& ref, thread const CmdLinGrad& s, device Memory& v_260, constant uint& v_260BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = s.index; - write_mem(param, param_1, param_2, v_242, v_242BufferSize); + write_mem(param, param_1, param_2, v_260, v_260BufferSize); Alloc param_3 = a; uint param_4 = ix + 1u; uint param_5 = as_type(s.line_x); - write_mem(param_3, param_4, param_5, v_242, v_242BufferSize); + write_mem(param_3, param_4, param_5, v_260, v_260BufferSize); Alloc param_6 = a; uint param_7 = ix + 2u; uint param_8 = as_type(s.line_y); - write_mem(param_6, param_7, param_8, v_242, v_242BufferSize); + write_mem(param_6, param_7, param_8, v_260, v_260BufferSize); Alloc param_9 = a; uint param_10 = ix + 3u; uint param_11 = as_type(s.line_c); - write_mem(param_9, param_10, param_11, v_242, v_242BufferSize); + write_mem(param_9, param_10, param_11, v_260, v_260BufferSize); } static inline __attribute__((always_inline)) -void Cmd_LinGrad_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdLinGrad& s, device Memory& v_242, constant uint& v_242BufferSize) +void Cmd_LinGrad_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdLinGrad& s, device Memory& v_260, constant uint& v_260BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 6u; - write_mem(param, param_1, param_2, v_242, v_242BufferSize); + write_mem(param, param_1, param_2, v_260, v_260BufferSize); Alloc param_3 = a; CmdLinGradRef param_4 = CmdLinGradRef{ ref.offset + 4u }; CmdLinGrad param_5 = s; - CmdLinGrad_write(param_3, param_4, param_5, v_242, v_242BufferSize); + CmdLinGrad_write(param_3, param_4, param_5, v_260, v_260BufferSize); } static inline __attribute__((always_inline)) -void CmdImage_write(thread const Alloc& a, thread const CmdImageRef& ref, thread const CmdImage& s, device Memory& v_242, constant uint& v_242BufferSize) +void CmdRadGrad_write(thread const Alloc& a, thread const CmdRadGradRef& ref, thread const CmdRadGrad& s, device Memory& v_260, constant uint& v_260BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = s.index; - write_mem(param, param_1, param_2, v_242, v_242BufferSize); + write_mem(param, param_1, param_2, v_260, v_260BufferSize); Alloc param_3 = a; uint param_4 = ix + 1u; - uint param_5 = (uint(s.offset.x) & 65535u) | (uint(s.offset.y) << uint(16)); - write_mem(param_3, param_4, param_5, v_242, v_242BufferSize); + uint param_5 = as_type(s.mat.x); + write_mem(param_3, param_4, param_5, v_260, v_260BufferSize); + Alloc param_6 = a; + uint param_7 = ix + 2u; + uint param_8 = as_type(s.mat.y); + write_mem(param_6, param_7, param_8, v_260, v_260BufferSize); + Alloc param_9 = a; + uint param_10 = ix + 3u; + uint param_11 = as_type(s.mat.z); + write_mem(param_9, param_10, param_11, v_260, v_260BufferSize); + Alloc param_12 = a; + uint param_13 = ix + 4u; + uint param_14 = as_type(s.mat.w); + write_mem(param_12, param_13, param_14, v_260, v_260BufferSize); + Alloc param_15 = a; + uint param_16 = ix + 5u; + uint param_17 = as_type(s.xlat.x); + write_mem(param_15, param_16, param_17, v_260, v_260BufferSize); + Alloc param_18 = a; + uint param_19 = ix + 6u; + uint param_20 = as_type(s.xlat.y); + write_mem(param_18, param_19, param_20, v_260, v_260BufferSize); + Alloc param_21 = a; + uint param_22 = ix + 7u; + uint param_23 = as_type(s.c1.x); + write_mem(param_21, param_22, param_23, v_260, v_260BufferSize); + Alloc param_24 = a; + uint param_25 = ix + 8u; + uint param_26 = as_type(s.c1.y); + write_mem(param_24, param_25, param_26, v_260, v_260BufferSize); + Alloc param_27 = a; + uint param_28 = ix + 9u; + uint param_29 = as_type(s.ra); + write_mem(param_27, param_28, param_29, v_260, v_260BufferSize); + Alloc param_30 = a; + uint param_31 = ix + 10u; + uint param_32 = as_type(s.roff); + write_mem(param_30, param_31, param_32, v_260, v_260BufferSize); } static inline __attribute__((always_inline)) -void Cmd_Image_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdImage& s, device Memory& v_242, constant uint& v_242BufferSize) +void Cmd_RadGrad_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdRadGrad& s, device Memory& v_260, constant uint& v_260BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 7u; - write_mem(param, param_1, param_2, v_242, v_242BufferSize); + write_mem(param, param_1, param_2, v_260, v_260BufferSize); Alloc param_3 = a; - CmdImageRef param_4 = CmdImageRef{ ref.offset + 4u }; - CmdImage param_5 = s; - CmdImage_write(param_3, param_4, param_5, v_242, v_242BufferSize); + CmdRadGradRef param_4 = CmdRadGradRef{ ref.offset + 4u }; + CmdRadGrad param_5 = s; + CmdRadGrad_write(param_3, param_4, param_5, v_260, v_260BufferSize); } static inline __attribute__((always_inline)) -void Cmd_BeginClip_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_242, constant uint& v_242BufferSize) +void CmdImage_write(thread const Alloc& a, thread const CmdImageRef& ref, thread const CmdImage& s, device Memory& v_260, constant uint& v_260BufferSize) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint param_2 = s.index; + write_mem(param, param_1, param_2, v_260, v_260BufferSize); + Alloc param_3 = a; + uint param_4 = ix + 1u; + uint param_5 = (uint(s.offset.x) & 65535u) | (uint(s.offset.y) << uint(16)); + write_mem(param_3, param_4, param_5, v_260, v_260BufferSize); +} + +static inline __attribute__((always_inline)) +void Cmd_Image_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdImage& s, device Memory& v_260, constant uint& v_260BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 8u; - write_mem(param, param_1, param_2, v_242, v_242BufferSize); + write_mem(param, param_1, param_2, v_260, v_260BufferSize); + Alloc param_3 = a; + CmdImageRef param_4 = CmdImageRef{ ref.offset + 4u }; + CmdImage param_5 = s; + CmdImage_write(param_3, param_4, param_5, v_260, v_260BufferSize); } static inline __attribute__((always_inline)) -void CmdEndClip_write(thread const Alloc& a, thread const CmdEndClipRef& ref, thread const CmdEndClip& s, device Memory& v_242, constant uint& v_242BufferSize) +void Cmd_BeginClip_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_260, constant uint& v_260BufferSize) +{ + Alloc param = a; + uint param_1 = ref.offset >> uint(2); + uint param_2 = 9u; + write_mem(param, param_1, param_2, v_260, v_260BufferSize); +} + +static inline __attribute__((always_inline)) +void CmdEndClip_write(thread const Alloc& a, thread const CmdEndClipRef& ref, thread const CmdEndClip& s, device Memory& v_260, constant uint& v_260BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = s.blend; - write_mem(param, param_1, param_2, v_242, v_242BufferSize); + write_mem(param, param_1, param_2, v_260, v_260BufferSize); } static inline __attribute__((always_inline)) -void Cmd_EndClip_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdEndClip& s, device Memory& v_242, constant uint& v_242BufferSize) +void Cmd_EndClip_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdEndClip& s, device Memory& v_260, constant uint& v_260BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); - uint param_2 = 9u; - write_mem(param, param_1, param_2, v_242, v_242BufferSize); + uint param_2 = 10u; + write_mem(param, param_1, param_2, v_260, v_260BufferSize); Alloc param_3 = a; CmdEndClipRef param_4 = CmdEndClipRef{ ref.offset + 4u }; CmdEndClip param_5 = s; - CmdEndClip_write(param_3, param_4, param_5, v_242, v_242BufferSize); + CmdEndClip_write(param_3, param_4, param_5, v_260, v_260BufferSize); } static inline __attribute__((always_inline)) -void Cmd_End_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_242, constant uint& v_242BufferSize) +void Cmd_End_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_260, constant uint& v_260BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 0u; - write_mem(param, param_1, param_2, v_242, v_242BufferSize); + write_mem(param, param_1, param_2, v_260, v_260BufferSize); } -kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_242 [[buffer(0)]], const device ConfigBuf& _854 [[buffer(1)]], const device SceneBuf& _1222 [[buffer(2)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) +kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_260 [[buffer(0)]], const device ConfigBuf& _1005 [[buffer(1)]], const device SceneBuf& _1372 [[buffer(2)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) { threadgroup uint sh_bitmaps[8][256]; threadgroup Alloc sh_part_elements[256]; @@ -611,76 +689,77 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M threadgroup uint sh_tile_y0[256]; threadgroup uint sh_tile_base[256]; threadgroup uint sh_tile_count[256]; - constant uint& v_242BufferSize = spvBufferSizeConstants[0]; - uint width_in_bins = ((_854.conf.width_in_tiles + 16u) - 1u) / 16u; + constant uint& v_260BufferSize = spvBufferSizeConstants[0]; + uint width_in_bins = ((_1005.conf.width_in_tiles + 16u) - 1u) / 16u; uint bin_ix = (width_in_bins * gl_WorkGroupID.y) + gl_WorkGroupID.x; uint partition_ix = 0u; - uint n_partitions = ((_854.conf.n_elements + 256u) - 1u) / 256u; + uint n_partitions = ((_1005.conf.n_elements + 256u) - 1u) / 256u; uint th_ix = gl_LocalInvocationID.x; uint bin_tile_x = 16u * gl_WorkGroupID.x; uint bin_tile_y = 16u * gl_WorkGroupID.y; uint tile_x = gl_LocalInvocationID.x % 16u; uint tile_y = gl_LocalInvocationID.x / 16u; - uint this_tile_ix = (((bin_tile_y + tile_y) * _854.conf.width_in_tiles) + bin_tile_x) + tile_x; + uint this_tile_ix = (((bin_tile_y + tile_y) * _1005.conf.width_in_tiles) + bin_tile_x) + tile_x; Alloc param; - param.offset = _854.conf.ptcl_alloc.offset; + param.offset = _1005.conf.ptcl_alloc.offset; uint param_1 = this_tile_ix * 1024u; uint param_2 = 1024u; Alloc cmd_alloc = slice_mem(param, param_1, param_2); CmdRef cmd_ref = CmdRef{ cmd_alloc.offset }; - uint cmd_limit = (cmd_ref.offset + 1024u) - 60u; + uint cmd_limit = (cmd_ref.offset + 1024u) - 144u; uint clip_depth = 0u; uint clip_zero_depth = 0u; uint rd_ix = 0u; uint wr_ix = 0u; uint part_start_ix = 0u; uint ready_ix = 0u; - uint drawmonoid_start = _854.conf.drawmonoid_alloc.offset >> uint(2); - uint drawtag_start = _854.conf.drawtag_offset >> uint(2); - uint drawdata_start = _854.conf.drawdata_offset >> uint(2); - uint drawinfo_start = _854.conf.drawinfo_alloc.offset >> uint(2); - bool mem_ok = v_242.mem_error == 0u; + uint drawmonoid_start = _1005.conf.drawmonoid_alloc.offset >> uint(2); + uint drawtag_start = _1005.conf.drawtag_offset >> uint(2); + uint drawdata_start = _1005.conf.drawdata_offset >> uint(2); + uint drawinfo_start = _1005.conf.drawinfo_alloc.offset >> uint(2); + bool mem_ok = v_260.mem_error == 0u; Alloc param_3; Alloc param_5; - uint _1154; + uint _1304; uint element_ix; Alloc param_14; uint tile_count; - uint _1455; + uint _1605; float linewidth; CmdLinGrad cmd_lin; + CmdRadGrad cmd_rad; while (true) { for (uint i = 0u; i < 8u; i++) { sh_bitmaps[i][th_ix] = 0u; } - bool _1206; + bool _1356; for (;;) { if ((ready_ix == wr_ix) && (partition_ix < n_partitions)) { part_start_ix = ready_ix; uint count = 0u; - bool _1003 = th_ix < 256u; - bool _1011; - if (_1003) + bool _1154 = th_ix < 256u; + bool _1162; + if (_1154) { - _1011 = (partition_ix + th_ix) < n_partitions; + _1162 = (partition_ix + th_ix) < n_partitions; } else { - _1011 = _1003; + _1162 = _1154; } - if (_1011) + if (_1162) { - uint in_ix = (_854.conf.bin_alloc.offset >> uint(2)) + ((((partition_ix + th_ix) * 256u) + bin_ix) * 2u); - param_3.offset = _854.conf.bin_alloc.offset; + uint in_ix = (_1005.conf.bin_alloc.offset >> uint(2)) + ((((partition_ix + th_ix) * 256u) + bin_ix) * 2u); + param_3.offset = _1005.conf.bin_alloc.offset; uint param_4 = in_ix; - count = read_mem(param_3, param_4, v_242, v_242BufferSize); - param_5.offset = _854.conf.bin_alloc.offset; + count = read_mem(param_3, param_4, v_260, v_260BufferSize); + param_5.offset = _1005.conf.bin_alloc.offset; uint param_6 = in_ix + 1u; - uint offset = read_mem(param_5, param_6, v_242, v_242BufferSize); + uint offset = read_mem(param_5, param_6, v_260, v_260BufferSize); uint param_7 = offset; uint param_8 = count * 4u; bool param_9 = mem_ok; @@ -724,34 +803,34 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M } if (part_ix > 0u) { - _1154 = sh_part_count[part_ix - 1u]; + _1304 = sh_part_count[part_ix - 1u]; } else { - _1154 = part_start_ix; + _1304 = part_start_ix; } - ix -= _1154; + ix -= _1304; Alloc bin_alloc = sh_part_elements[part_ix]; BinInstanceRef inst_ref = BinInstanceRef{ bin_alloc.offset }; BinInstanceRef param_10 = inst_ref; uint param_11 = ix; Alloc param_12 = bin_alloc; BinInstanceRef param_13 = BinInstance_index(param_10, param_11); - BinInstance inst = BinInstance_read(param_12, param_13, v_242, v_242BufferSize); + BinInstance inst = BinInstance_read(param_12, param_13, v_260, v_260BufferSize); sh_elements[th_ix] = inst.element_ix; } threadgroup_barrier(mem_flags::mem_threadgroup); wr_ix = min((rd_ix + 256u), ready_ix); - bool _1196 = (wr_ix - rd_ix) < 256u; - if (_1196) + bool _1346 = (wr_ix - rd_ix) < 256u; + if (_1346) { - _1206 = (wr_ix < ready_ix) || (partition_ix < n_partitions); + _1356 = (wr_ix < ready_ix) || (partition_ix < n_partitions); } else { - _1206 = _1196; + _1356 = _1346; } - if (_1206) + if (_1356) { continue; } @@ -764,21 +843,22 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M if ((th_ix + rd_ix) < wr_ix) { element_ix = sh_elements[th_ix]; - tag = _1222.scene[drawtag_start + element_ix]; + tag = _1372.scene[drawtag_start + element_ix]; } switch (tag) { case 68u: case 72u: case 276u: + case 732u: case 5u: case 37u: { uint drawmonoid_base = drawmonoid_start + (4u * element_ix); - uint path_ix = v_242.memory[drawmonoid_base]; - param_14.offset = _854.conf.tile_alloc.offset; - PathRef param_15 = PathRef{ _854.conf.tile_alloc.offset + (path_ix * 12u) }; - Path path = Path_read(param_14, param_15, v_242, v_242BufferSize); + uint path_ix = v_260.memory[drawmonoid_base]; + param_14.offset = _1005.conf.tile_alloc.offset; + PathRef param_15 = PathRef{ _1005.conf.tile_alloc.offset + (path_ix * 12u) }; + Path path = Path_read(param_14, param_15, v_260, v_260BufferSize); uint stride = path.bbox.z - path.bbox.x; sh_tile_stride[th_ix] = stride; int dx = int(path.bbox.x) - int(bin_tile_x); @@ -833,16 +913,16 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M } } uint element_ix_1 = sh_elements[el_ix]; - uint tag_1 = _1222.scene[drawtag_start + element_ix_1]; + uint tag_1 = _1372.scene[drawtag_start + element_ix_1]; if (el_ix > 0u) { - _1455 = sh_tile_count[el_ix - 1u]; + _1605 = sh_tile_count[el_ix - 1u]; } else { - _1455 = 0u; + _1605 = 0u; } - uint seq_ix = ix_1 - _1455; + uint seq_ix = ix_1 - _1605; uint width = sh_tile_width[el_ix]; uint x = sh_tile_x0[el_ix] + (seq_ix % width); uint y = sh_tile_y0[el_ix] + (seq_ix / width); @@ -851,36 +931,36 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M { uint param_21 = el_ix; bool param_22 = mem_ok; - Alloc param_23 = read_tile_alloc(param_21, param_22, v_242, v_242BufferSize); + Alloc param_23 = read_tile_alloc(param_21, param_22, v_260, v_260BufferSize); TileRef param_24 = TileRef{ sh_tile_base[el_ix] + (((sh_tile_stride[el_ix] * y) + x) * 8u) }; - Tile tile = Tile_read(param_23, param_24, v_242, v_242BufferSize); + Tile tile = Tile_read(param_23, param_24, v_260, v_260BufferSize); bool is_clip = (tag_1 & 1u) != 0u; bool is_blend = false; if (is_clip) { uint drawmonoid_base_1 = drawmonoid_start + (4u * element_ix_1); - uint scene_offset = v_242.memory[drawmonoid_base_1 + 2u]; + uint scene_offset = v_260.memory[drawmonoid_base_1 + 2u]; uint dd = drawdata_start + (scene_offset >> uint(2)); - uint blend = _1222.scene[dd]; + uint blend = _1372.scene[dd]; is_blend = blend != 3u; } - bool _1542 = tile.tile.offset != 0u; - bool _1551; - if (!_1542) + bool _1692 = tile.tile.offset != 0u; + bool _1701; + if (!_1692) { - _1551 = (tile.backdrop == 0) == is_clip; + _1701 = (tile.backdrop == 0) == is_clip; } else { - _1551 = _1542; + _1701 = _1692; } - include_tile = _1551 || is_blend; + include_tile = _1701 || is_blend; } if (include_tile) { uint el_slice = el_ix / 32u; uint el_mask = 1u << (el_ix & 31u); - uint _1573 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&sh_bitmaps[el_slice][(y * 16u) + x], el_mask, memory_order_relaxed); + uint _1723 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&sh_bitmaps[el_slice][(y * 16u) + x], el_mask, memory_order_relaxed); } } threadgroup_barrier(mem_flags::mem_threadgroup); @@ -904,32 +984,32 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M uint element_ref_ix = (slice_ix * 32u) + uint(int(spvFindLSB(bitmap))); uint element_ix_2 = sh_elements[element_ref_ix]; bitmap &= (bitmap - 1u); - uint drawtag = _1222.scene[drawtag_start + element_ix_2]; + uint drawtag = _1372.scene[drawtag_start + element_ix_2]; if (clip_zero_depth == 0u) { uint param_25 = element_ref_ix; bool param_26 = mem_ok; - Alloc param_27 = read_tile_alloc(param_25, param_26, v_242, v_242BufferSize); + Alloc param_27 = read_tile_alloc(param_25, param_26, v_260, v_260BufferSize); TileRef param_28 = TileRef{ sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; - Tile tile_1 = Tile_read(param_27, param_28, v_242, v_242BufferSize); + Tile tile_1 = Tile_read(param_27, param_28, v_260, v_260BufferSize); uint drawmonoid_base_2 = drawmonoid_start + (4u * element_ix_2); - uint scene_offset_1 = v_242.memory[drawmonoid_base_2 + 2u]; - uint info_offset = v_242.memory[drawmonoid_base_2 + 3u]; + uint scene_offset_1 = v_260.memory[drawmonoid_base_2 + 2u]; + uint info_offset = v_260.memory[drawmonoid_base_2 + 3u]; uint dd_1 = drawdata_start + (scene_offset_1 >> uint(2)); uint di = drawinfo_start + (info_offset >> uint(2)); switch (drawtag) { case 68u: { - linewidth = as_type(v_242.memory[di]); + linewidth = as_type(v_260.memory[di]); Alloc param_29 = cmd_alloc; CmdRef param_30 = cmd_ref; uint param_31 = cmd_limit; - bool _1697 = alloc_cmd(param_29, param_30, param_31, v_242, v_242BufferSize); + bool _1848 = alloc_cmd(param_29, param_30, param_31, v_260, v_260BufferSize); cmd_alloc = param_29; cmd_ref = param_30; cmd_limit = param_31; - if (!_1697) + if (!_1848) { break; } @@ -937,13 +1017,13 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M CmdRef param_33 = cmd_ref; Tile param_34 = tile_1; float param_35 = linewidth; - write_fill(param_32, param_33, param_34, param_35, v_242, v_242BufferSize); + write_fill(param_32, param_33, param_34, param_35, v_260, v_260BufferSize); cmd_ref = param_33; - uint rgba = _1222.scene[dd_1]; + uint rgba = _1372.scene[dd_1]; Alloc param_36 = cmd_alloc; CmdRef param_37 = cmd_ref; CmdColor param_38 = CmdColor{ rgba }; - Cmd_Color_write(param_36, param_37, param_38, v_242, v_242BufferSize); + Cmd_Color_write(param_36, param_37, param_38, v_260, v_260BufferSize); cmd_ref.offset += 8u; break; } @@ -952,94 +1032,127 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M Alloc param_39 = cmd_alloc; CmdRef param_40 = cmd_ref; uint param_41 = cmd_limit; - bool _1738 = alloc_cmd(param_39, param_40, param_41, v_242, v_242BufferSize); + bool _1889 = alloc_cmd(param_39, param_40, param_41, v_260, v_260BufferSize); cmd_alloc = param_39; cmd_ref = param_40; cmd_limit = param_41; - if (!_1738) + if (!_1889) { break; } - linewidth = as_type(v_242.memory[di]); + linewidth = as_type(v_260.memory[di]); Alloc param_42 = cmd_alloc; CmdRef param_43 = cmd_ref; Tile param_44 = tile_1; float param_45 = linewidth; - write_fill(param_42, param_43, param_44, param_45, v_242, v_242BufferSize); + write_fill(param_42, param_43, param_44, param_45, v_260, v_260BufferSize); cmd_ref = param_43; - cmd_lin.index = _1222.scene[dd_1]; - cmd_lin.line_x = as_type(v_242.memory[di + 1u]); - cmd_lin.line_y = as_type(v_242.memory[di + 2u]); - cmd_lin.line_c = as_type(v_242.memory[di + 3u]); + cmd_lin.index = _1372.scene[dd_1]; + cmd_lin.line_x = as_type(v_260.memory[di + 1u]); + cmd_lin.line_y = as_type(v_260.memory[di + 2u]); + cmd_lin.line_c = as_type(v_260.memory[di + 3u]); Alloc param_46 = cmd_alloc; CmdRef param_47 = cmd_ref; CmdLinGrad param_48 = cmd_lin; - Cmd_LinGrad_write(param_46, param_47, param_48, v_242, v_242BufferSize); + Cmd_LinGrad_write(param_46, param_47, param_48, v_260, v_260BufferSize); cmd_ref.offset += 20u; break; } - case 72u: + case 732u: { - linewidth = as_type(v_242.memory[di]); Alloc param_49 = cmd_alloc; CmdRef param_50 = cmd_ref; uint param_51 = cmd_limit; - bool _1806 = alloc_cmd(param_49, param_50, param_51, v_242, v_242BufferSize); + bool _1953 = alloc_cmd(param_49, param_50, param_51, v_260, v_260BufferSize); cmd_alloc = param_49; cmd_ref = param_50; cmd_limit = param_51; - if (!_1806) + if (!_1953) { break; } + linewidth = as_type(v_260.memory[di]); Alloc param_52 = cmd_alloc; CmdRef param_53 = cmd_ref; Tile param_54 = tile_1; float param_55 = linewidth; - write_fill(param_52, param_53, param_54, param_55, v_242, v_242BufferSize); + write_fill(param_52, param_53, param_54, param_55, v_260, v_260BufferSize); cmd_ref = param_53; - uint index = _1222.scene[dd_1]; - uint raw1 = _1222.scene[dd_1 + 1u]; - int2 offset_1 = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); + cmd_rad.index = _1372.scene[dd_1]; + cmd_rad.mat = as_type(uint4(v_260.memory[di + 1u], v_260.memory[di + 2u], v_260.memory[di + 3u], v_260.memory[di + 4u])); + cmd_rad.xlat = as_type(uint2(v_260.memory[di + 5u], v_260.memory[di + 6u])); + cmd_rad.c1 = as_type(uint2(v_260.memory[di + 7u], v_260.memory[di + 8u])); + cmd_rad.ra = as_type(v_260.memory[di + 9u]); + cmd_rad.roff = as_type(v_260.memory[di + 10u]); Alloc param_56 = cmd_alloc; CmdRef param_57 = cmd_ref; - CmdImage param_58 = CmdImage{ index, offset_1 }; - Cmd_Image_write(param_56, param_57, param_58, v_242, v_242BufferSize); + CmdRadGrad param_58 = cmd_rad; + Cmd_RadGrad_write(param_56, param_57, param_58, v_260, v_260BufferSize); + cmd_ref.offset += 48u; + break; + } + case 72u: + { + linewidth = as_type(v_260.memory[di]); + Alloc param_59 = cmd_alloc; + CmdRef param_60 = cmd_ref; + uint param_61 = cmd_limit; + bool _2059 = alloc_cmd(param_59, param_60, param_61, v_260, v_260BufferSize); + cmd_alloc = param_59; + cmd_ref = param_60; + cmd_limit = param_61; + if (!_2059) + { + break; + } + Alloc param_62 = cmd_alloc; + CmdRef param_63 = cmd_ref; + Tile param_64 = tile_1; + float param_65 = linewidth; + write_fill(param_62, param_63, param_64, param_65, v_260, v_260BufferSize); + cmd_ref = param_63; + uint index = _1372.scene[dd_1]; + uint raw1 = _1372.scene[dd_1 + 1u]; + int2 offset_1 = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); + Alloc param_66 = cmd_alloc; + CmdRef param_67 = cmd_ref; + CmdImage param_68 = CmdImage{ index, offset_1 }; + Cmd_Image_write(param_66, param_67, param_68, v_260, v_260BufferSize); cmd_ref.offset += 12u; break; } case 5u: { - bool _1859 = tile_1.tile.offset == 0u; - bool _1865; - if (_1859) + bool _2112 = tile_1.tile.offset == 0u; + bool _2118; + if (_2112) { - _1865 = tile_1.backdrop == 0; + _2118 = tile_1.backdrop == 0; } else { - _1865 = _1859; + _2118 = _2112; } - if (_1865) + if (_2118) { clip_zero_depth = clip_depth + 1u; } else { - Alloc param_59 = cmd_alloc; - CmdRef param_60 = cmd_ref; - uint param_61 = cmd_limit; - bool _1877 = alloc_cmd(param_59, param_60, param_61, v_242, v_242BufferSize); - cmd_alloc = param_59; - cmd_ref = param_60; - cmd_limit = param_61; - if (!_1877) + Alloc param_69 = cmd_alloc; + CmdRef param_70 = cmd_ref; + uint param_71 = cmd_limit; + bool _2130 = alloc_cmd(param_69, param_70, param_71, v_260, v_260BufferSize); + cmd_alloc = param_69; + cmd_ref = param_70; + cmd_limit = param_71; + if (!_2130) { break; } - Alloc param_62 = cmd_alloc; - CmdRef param_63 = cmd_ref; - Cmd_BeginClip_write(param_62, param_63, v_242, v_242BufferSize); + Alloc param_72 = cmd_alloc; + CmdRef param_73 = cmd_ref; + Cmd_BeginClip_write(param_72, param_73, v_260, v_260BufferSize); cmd_ref.offset += 4u; } clip_depth++; @@ -1048,28 +1161,28 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M case 37u: { clip_depth--; - Alloc param_64 = cmd_alloc; - CmdRef param_65 = cmd_ref; - uint param_66 = cmd_limit; - bool _1905 = alloc_cmd(param_64, param_65, param_66, v_242, v_242BufferSize); - cmd_alloc = param_64; - cmd_ref = param_65; - cmd_limit = param_66; - if (!_1905) + Alloc param_74 = cmd_alloc; + CmdRef param_75 = cmd_ref; + uint param_76 = cmd_limit; + bool _2158 = alloc_cmd(param_74, param_75, param_76, v_260, v_260BufferSize); + cmd_alloc = param_74; + cmd_ref = param_75; + cmd_limit = param_76; + if (!_2158) { break; } - Alloc param_67 = cmd_alloc; - CmdRef param_68 = cmd_ref; - Tile param_69 = tile_1; - float param_70 = -1.0; - write_fill(param_67, param_68, param_69, param_70, v_242, v_242BufferSize); - cmd_ref = param_68; - uint blend_1 = _1222.scene[dd_1]; - Alloc param_71 = cmd_alloc; - CmdRef param_72 = cmd_ref; - CmdEndClip param_73 = CmdEndClip{ blend_1 }; - Cmd_EndClip_write(param_71, param_72, param_73, v_242, v_242BufferSize); + Alloc param_77 = cmd_alloc; + CmdRef param_78 = cmd_ref; + Tile param_79 = tile_1; + float param_80 = -1.0; + write_fill(param_77, param_78, param_79, param_80, v_260, v_260BufferSize); + cmd_ref = param_78; + uint blend_1 = _1372.scene[dd_1]; + Alloc param_81 = cmd_alloc; + CmdRef param_82 = cmd_ref; + CmdEndClip param_83 = CmdEndClip{ blend_1 }; + Cmd_EndClip_write(param_81, param_82, param_83, v_260, v_260BufferSize); cmd_ref.offset += 8u; break; } @@ -1103,21 +1216,21 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M break; } } - bool _1975 = (bin_tile_x + tile_x) < _854.conf.width_in_tiles; - bool _1984; - if (_1975) + bool _2228 = (bin_tile_x + tile_x) < _1005.conf.width_in_tiles; + bool _2237; + if (_2228) { - _1984 = (bin_tile_y + tile_y) < _854.conf.height_in_tiles; + _2237 = (bin_tile_y + tile_y) < _1005.conf.height_in_tiles; } else { - _1984 = _1975; + _2237 = _2228; } - if (_1984) + if (_2237) { - Alloc param_74 = cmd_alloc; - CmdRef param_75 = cmd_ref; - Cmd_End_write(param_74, param_75, v_242, v_242BufferSize); + Alloc param_84 = cmd_alloc; + CmdRef param_85 = cmd_ref; + Cmd_End_write(param_84, param_85, v_260, v_260BufferSize); } } diff --git a/piet-gpu/shader/gen/coarse.spv b/piet-gpu/shader/gen/coarse.spv index b85fd8c..6d33ee7 100644 Binary files a/piet-gpu/shader/gen/coarse.spv and b/piet-gpu/shader/gen/coarse.spv differ diff --git a/piet-gpu/shader/gen/draw_leaf.dxil b/piet-gpu/shader/gen/draw_leaf.dxil index 77396c1..200f169 100644 Binary files a/piet-gpu/shader/gen/draw_leaf.dxil and b/piet-gpu/shader/gen/draw_leaf.dxil differ diff --git a/piet-gpu/shader/gen/draw_leaf.hlsl b/piet-gpu/shader/gen/draw_leaf.hlsl index f812f52..734d21e 100644 --- a/piet-gpu/shader/gen/draw_leaf.hlsl +++ b/piet-gpu/shader/gen/draw_leaf.hlsl @@ -46,10 +46,10 @@ static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); static const DrawMonoid _23 = { 0u, 0u, 0u, 0u }; -ByteAddressBuffer _92 : register(t1, space0); -ByteAddressBuffer _102 : register(t2, space0); -ByteAddressBuffer _202 : register(t3, space0); -RWByteAddressBuffer _284 : register(u0, space0); +ByteAddressBuffer _93 : register(t1, space0); +ByteAddressBuffer _103 : register(t2, space0); +ByteAddressBuffer _203 : register(t3, space0); +RWByteAddressBuffer _285 : register(u0, space0); static uint3 gl_WorkGroupID; static uint3 gl_LocalInvocationID; @@ -66,8 +66,8 @@ groupshared DrawMonoid sh_scratch[256]; DrawMonoid map_tag(uint tag_word) { uint has_path = uint(tag_word != 0u); - DrawMonoid _75 = { has_path, tag_word & 1u, tag_word & 28u, (tag_word >> uint(4)) & 28u }; - return _75; + DrawMonoid _76 = { has_path, tag_word & 1u, tag_word & 28u, (tag_word >> uint(4)) & 60u }; + return _76; } DrawMonoid combine_draw_monoid(DrawMonoid a, DrawMonoid b) @@ -88,15 +88,15 @@ DrawMonoid draw_monoid_identity() void comp_main() { uint ix = gl_GlobalInvocationID.x * 8u; - uint drawtag_base = _92.Load(100) >> uint(2); - uint tag_word = _102.Load((drawtag_base + ix) * 4 + 0); + uint drawtag_base = _93.Load(100) >> uint(2); + uint tag_word = _103.Load((drawtag_base + ix) * 4 + 0); uint param = tag_word; DrawMonoid agg = map_tag(param); DrawMonoid local[8]; local[0] = agg; for (uint i = 1u; i < 8u; i++) { - tag_word = _102.Load(((drawtag_base + ix) + i) * 4 + 0); + tag_word = _103.Load(((drawtag_base + ix) + i) * 4 + 0); uint param_1 = tag_word; DrawMonoid param_2 = agg; DrawMonoid param_3 = map_tag(param_1); @@ -121,15 +121,15 @@ void comp_main() DrawMonoid row = draw_monoid_identity(); if (gl_WorkGroupID.x > 0u) { - DrawMonoid _208; - _208.path_ix = _202.Load((gl_WorkGroupID.x - 1u) * 16 + 0); - _208.clip_ix = _202.Load((gl_WorkGroupID.x - 1u) * 16 + 4); - _208.scene_offset = _202.Load((gl_WorkGroupID.x - 1u) * 16 + 8); - _208.info_offset = _202.Load((gl_WorkGroupID.x - 1u) * 16 + 12); - row.path_ix = _208.path_ix; - row.clip_ix = _208.clip_ix; - row.scene_offset = _208.scene_offset; - row.info_offset = _208.info_offset; + DrawMonoid _209; + _209.path_ix = _203.Load((gl_WorkGroupID.x - 1u) * 16 + 0); + _209.clip_ix = _203.Load((gl_WorkGroupID.x - 1u) * 16 + 4); + _209.scene_offset = _203.Load((gl_WorkGroupID.x - 1u) * 16 + 8); + _209.info_offset = _203.Load((gl_WorkGroupID.x - 1u) * 16 + 12); + row.path_ix = _209.path_ix; + row.clip_ix = _209.clip_ix; + row.scene_offset = _209.scene_offset; + row.info_offset = _209.info_offset; } if (gl_LocalInvocationID.x > 0u) { @@ -137,13 +137,15 @@ void comp_main() DrawMonoid param_7 = sh_scratch[gl_LocalInvocationID.x - 1u]; row = combine_draw_monoid(param_6, param_7); } - uint drawdata_base = _92.Load(104) >> uint(2); - uint drawinfo_base = _92.Load(68) >> uint(2); + uint drawdata_base = _93.Load(104) >> uint(2); + uint drawinfo_base = _93.Load(68) >> uint(2); uint out_ix = gl_GlobalInvocationID.x * 8u; - uint out_base = (_92.Load(44) >> uint(2)) + (out_ix * 4u); - uint clip_out_base = _92.Load(48) >> uint(2); + uint out_base = (_93.Load(44) >> uint(2)) + (out_ix * 4u); + uint clip_out_base = _93.Load(48) >> uint(2); float4 mat; float2 translate; + float2 p0; + float2 p1; for (uint i_2 = 0u; i_2 < 8u; i_2++) { DrawMonoid m = row; @@ -153,31 +155,31 @@ void comp_main() DrawMonoid param_9 = local[i_2 - 1u]; m = combine_draw_monoid(param_8, param_9); } - _284.Store((out_base + (i_2 * 4u)) * 4 + 8, m.path_ix); - _284.Store(((out_base + (i_2 * 4u)) + 1u) * 4 + 8, m.clip_ix); - _284.Store(((out_base + (i_2 * 4u)) + 2u) * 4 + 8, m.scene_offset); - _284.Store(((out_base + (i_2 * 4u)) + 3u) * 4 + 8, m.info_offset); + _285.Store((out_base + (i_2 * 4u)) * 4 + 8, m.path_ix); + _285.Store(((out_base + (i_2 * 4u)) + 1u) * 4 + 8, m.clip_ix); + _285.Store(((out_base + (i_2 * 4u)) + 2u) * 4 + 8, m.scene_offset); + _285.Store(((out_base + (i_2 * 4u)) + 3u) * 4 + 8, m.info_offset); uint dd = drawdata_base + (m.scene_offset >> uint(2)); uint di = drawinfo_base + (m.info_offset >> uint(2)); - tag_word = _102.Load(((drawtag_base + ix) + i_2) * 4 + 0); - if ((((tag_word == 68u) || (tag_word == 276u)) || (tag_word == 72u)) || (tag_word == 5u)) + tag_word = _103.Load(((drawtag_base + ix) + i_2) * 4 + 0); + if (((((tag_word == 68u) || (tag_word == 276u)) || (tag_word == 732u)) || (tag_word == 72u)) || (tag_word == 5u)) { - uint bbox_offset = (_92.Load(40) >> uint(2)) + (6u * m.path_ix); - float bbox_l = float(_284.Load(bbox_offset * 4 + 8)) - 32768.0f; - float bbox_t = float(_284.Load((bbox_offset + 1u) * 4 + 8)) - 32768.0f; - float bbox_r = float(_284.Load((bbox_offset + 2u) * 4 + 8)) - 32768.0f; - float bbox_b = float(_284.Load((bbox_offset + 3u) * 4 + 8)) - 32768.0f; + uint bbox_offset = (_93.Load(40) >> uint(2)) + (6u * m.path_ix); + float bbox_l = float(_285.Load(bbox_offset * 4 + 8)) - 32768.0f; + float bbox_t = float(_285.Load((bbox_offset + 1u) * 4 + 8)) - 32768.0f; + float bbox_r = float(_285.Load((bbox_offset + 2u) * 4 + 8)) - 32768.0f; + float bbox_b = float(_285.Load((bbox_offset + 3u) * 4 + 8)) - 32768.0f; float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); - float linewidth = asfloat(_284.Load((bbox_offset + 4u) * 4 + 8)); + float linewidth = asfloat(_285.Load((bbox_offset + 4u) * 4 + 8)); uint fill_mode = uint(linewidth >= 0.0f); - if ((linewidth >= 0.0f) || (tag_word == 276u)) + if (((linewidth >= 0.0f) || (tag_word == 276u)) || (tag_word == 732u)) { - uint trans_ix = _284.Load((bbox_offset + 5u) * 4 + 8); - uint t = (_92.Load(36) >> uint(2)) + (6u * trans_ix); - mat = asfloat(uint4(_284.Load(t * 4 + 8), _284.Load((t + 1u) * 4 + 8), _284.Load((t + 2u) * 4 + 8), _284.Load((t + 3u) * 4 + 8))); - if (tag_word == 276u) + uint trans_ix = _285.Load((bbox_offset + 5u) * 4 + 8); + uint t = (_93.Load(36) >> uint(2)) + (6u * trans_ix); + mat = asfloat(uint4(_285.Load(t * 4 + 8), _285.Load((t + 1u) * 4 + 8), _285.Load((t + 2u) * 4 + 8), _285.Load((t + 3u) * 4 + 8))); + if ((tag_word == 276u) || (tag_word == 732u)) { - translate = asfloat(uint2(_284.Load((t + 4u) * 4 + 8), _284.Load((t + 5u) * 4 + 8))); + translate = asfloat(uint2(_285.Load((t + 4u) * 4 + 8), _285.Load((t + 5u) * 4 + 8))); } } if (linewidth >= 0.0f) @@ -189,15 +191,14 @@ void comp_main() case 68u: case 72u: { - _284.Store(di * 4 + 8, asuint(linewidth)); + _285.Store(di * 4 + 8, asuint(linewidth)); break; } case 276u: { - _284.Store(di * 4 + 8, asuint(linewidth)); - uint index = _102.Load(dd * 4 + 0); - float2 p0 = asfloat(uint2(_102.Load((dd + 1u) * 4 + 0), _102.Load((dd + 2u) * 4 + 0))); - float2 p1 = asfloat(uint2(_102.Load((dd + 3u) * 4 + 0), _102.Load((dd + 4u) * 4 + 0))); + _285.Store(di * 4 + 8, asuint(linewidth)); + p0 = asfloat(uint2(_103.Load((dd + 1u) * 4 + 0), _103.Load((dd + 2u) * 4 + 0))); + p1 = asfloat(uint2(_103.Load((dd + 3u) * 4 + 0), _103.Load((dd + 4u) * 4 + 0))); p0 = ((mat.xy * p0.x) + (mat.zw * p0.y)) + translate; p1 = ((mat.xy * p1.x) + (mat.zw * p1.y)) + translate; float2 dxy = p1 - p0; @@ -205,9 +206,38 @@ void comp_main() float line_x = dxy.x * scale; float line_y = dxy.y * scale; float line_c = -((p0.x * line_x) + (p0.y * line_y)); - _284.Store((di + 1u) * 4 + 8, asuint(line_x)); - _284.Store((di + 2u) * 4 + 8, asuint(line_y)); - _284.Store((di + 3u) * 4 + 8, asuint(line_c)); + _285.Store((di + 1u) * 4 + 8, asuint(line_x)); + _285.Store((di + 2u) * 4 + 8, asuint(line_y)); + _285.Store((di + 3u) * 4 + 8, asuint(line_c)); + break; + } + case 732u: + { + p0 = asfloat(uint2(_103.Load((dd + 1u) * 4 + 0), _103.Load((dd + 2u) * 4 + 0))); + p1 = asfloat(uint2(_103.Load((dd + 3u) * 4 + 0), _103.Load((dd + 4u) * 4 + 0))); + float r0 = asfloat(_103.Load((dd + 5u) * 4 + 0)); + float r1 = asfloat(_103.Load((dd + 6u) * 4 + 0)); + float inv_det = 1.0f / ((mat.x * mat.w) - (mat.y * mat.z)); + float4 inv_mat = float4(mat.w, -mat.y, -mat.z, mat.x) * inv_det; + float2 inv_tr = (inv_mat.xz * translate.x) + (inv_mat.yw * translate.y); + inv_tr += p0; + float2 center1 = p1 - p0; + float rr = r1 / (r1 - r0); + float rainv = rr / ((r1 * r1) - dot(center1, center1)); + float2 c1 = center1 * rainv; + float ra = rr * rainv; + float roff = rr - 1.0f; + _285.Store(di * 4 + 8, asuint(linewidth)); + _285.Store((di + 1u) * 4 + 8, asuint(inv_mat.x)); + _285.Store((di + 2u) * 4 + 8, asuint(inv_mat.y)); + _285.Store((di + 3u) * 4 + 8, asuint(inv_mat.z)); + _285.Store((di + 4u) * 4 + 8, asuint(inv_mat.w)); + _285.Store((di + 5u) * 4 + 8, asuint(inv_tr.x)); + _285.Store((di + 6u) * 4 + 8, asuint(inv_tr.y)); + _285.Store((di + 7u) * 4 + 8, asuint(c1.x)); + _285.Store((di + 8u) * 4 + 8, asuint(c1.y)); + _285.Store((di + 9u) * 4 + 8, asuint(ra)); + _285.Store((di + 10u) * 4 + 8, asuint(roff)); break; } case 5u: @@ -223,7 +253,7 @@ void comp_main() { path_ix = m.path_ix; } - _284.Store((clip_out_base + m.clip_ix) * 4 + 8, path_ix); + _285.Store((clip_out_base + m.clip_ix) * 4 + 8, path_ix); } } } diff --git a/piet-gpu/shader/gen/draw_leaf.msl b/piet-gpu/shader/gen/draw_leaf.msl index a8516ae..c11e21b 100644 --- a/piet-gpu/shader/gen/draw_leaf.msl +++ b/piet-gpu/shader/gen/draw_leaf.msl @@ -124,7 +124,7 @@ static inline __attribute__((always_inline)) DrawMonoid map_tag(thread const uint& tag_word) { uint has_path = uint(tag_word != 0u); - return DrawMonoid{ has_path, tag_word & 1u, tag_word & 28u, (tag_word >> uint(4)) & 28u }; + return DrawMonoid{ has_path, tag_word & 1u, tag_word & 28u, (tag_word >> uint(4)) & 60u }; } static inline __attribute__((always_inline)) @@ -144,19 +144,19 @@ DrawMonoid draw_monoid_identity() return DrawMonoid{ 0u, 0u, 0u, 0u }; } -kernel void main0(device Memory& _284 [[buffer(0)]], const device ConfigBuf& _92 [[buffer(1)]], const device SceneBuf& _102 [[buffer(2)]], const device ParentBuf& _202 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) +kernel void main0(device Memory& _285 [[buffer(0)]], const device ConfigBuf& _93 [[buffer(1)]], const device SceneBuf& _103 [[buffer(2)]], const device ParentBuf& _203 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) { threadgroup DrawMonoid sh_scratch[256]; uint ix = gl_GlobalInvocationID.x * 8u; - uint drawtag_base = _92.conf.drawtag_offset >> uint(2); - uint tag_word = _102.scene[drawtag_base + ix]; + uint drawtag_base = _93.conf.drawtag_offset >> uint(2); + uint tag_word = _103.scene[drawtag_base + ix]; uint param = tag_word; DrawMonoid agg = map_tag(param); spvUnsafeArray local; local[0] = agg; for (uint i = 1u; i < 8u; i++) { - tag_word = _102.scene[(drawtag_base + ix) + i]; + tag_word = _103.scene[(drawtag_base + ix) + i]; uint param_1 = tag_word; DrawMonoid param_2 = agg; DrawMonoid param_3 = map_tag(param_1); @@ -181,11 +181,11 @@ kernel void main0(device Memory& _284 [[buffer(0)]], const device ConfigBuf& _92 DrawMonoid row = draw_monoid_identity(); if (gl_WorkGroupID.x > 0u) { - uint _205 = gl_WorkGroupID.x - 1u; - row.path_ix = _202.parent[_205].path_ix; - row.clip_ix = _202.parent[_205].clip_ix; - row.scene_offset = _202.parent[_205].scene_offset; - row.info_offset = _202.parent[_205].info_offset; + uint _206 = gl_WorkGroupID.x - 1u; + row.path_ix = _203.parent[_206].path_ix; + row.clip_ix = _203.parent[_206].clip_ix; + row.scene_offset = _203.parent[_206].scene_offset; + row.info_offset = _203.parent[_206].info_offset; } if (gl_LocalInvocationID.x > 0u) { @@ -193,13 +193,15 @@ kernel void main0(device Memory& _284 [[buffer(0)]], const device ConfigBuf& _92 DrawMonoid param_7 = sh_scratch[gl_LocalInvocationID.x - 1u]; row = combine_draw_monoid(param_6, param_7); } - uint drawdata_base = _92.conf.drawdata_offset >> uint(2); - uint drawinfo_base = _92.conf.drawinfo_alloc.offset >> uint(2); + uint drawdata_base = _93.conf.drawdata_offset >> uint(2); + uint drawinfo_base = _93.conf.drawinfo_alloc.offset >> uint(2); uint out_ix = gl_GlobalInvocationID.x * 8u; - uint out_base = (_92.conf.drawmonoid_alloc.offset >> uint(2)) + (out_ix * 4u); - uint clip_out_base = _92.conf.clip_alloc.offset >> uint(2); + uint out_base = (_93.conf.drawmonoid_alloc.offset >> uint(2)) + (out_ix * 4u); + uint clip_out_base = _93.conf.clip_alloc.offset >> uint(2); float4 mat; float2 translate; + float2 p0; + float2 p1; for (uint i_2 = 0u; i_2 < 8u; i_2++) { DrawMonoid m = row; @@ -209,31 +211,31 @@ kernel void main0(device Memory& _284 [[buffer(0)]], const device ConfigBuf& _92 DrawMonoid param_9 = local[i_2 - 1u]; m = combine_draw_monoid(param_8, param_9); } - _284.memory[out_base + (i_2 * 4u)] = m.path_ix; - _284.memory[(out_base + (i_2 * 4u)) + 1u] = m.clip_ix; - _284.memory[(out_base + (i_2 * 4u)) + 2u] = m.scene_offset; - _284.memory[(out_base + (i_2 * 4u)) + 3u] = m.info_offset; + _285.memory[out_base + (i_2 * 4u)] = m.path_ix; + _285.memory[(out_base + (i_2 * 4u)) + 1u] = m.clip_ix; + _285.memory[(out_base + (i_2 * 4u)) + 2u] = m.scene_offset; + _285.memory[(out_base + (i_2 * 4u)) + 3u] = m.info_offset; uint dd = drawdata_base + (m.scene_offset >> uint(2)); uint di = drawinfo_base + (m.info_offset >> uint(2)); - tag_word = _102.scene[(drawtag_base + ix) + i_2]; - if ((((tag_word == 68u) || (tag_word == 276u)) || (tag_word == 72u)) || (tag_word == 5u)) + tag_word = _103.scene[(drawtag_base + ix) + i_2]; + if (((((tag_word == 68u) || (tag_word == 276u)) || (tag_word == 732u)) || (tag_word == 72u)) || (tag_word == 5u)) { - uint bbox_offset = (_92.conf.path_bbox_alloc.offset >> uint(2)) + (6u * m.path_ix); - float bbox_l = float(_284.memory[bbox_offset]) - 32768.0; - float bbox_t = float(_284.memory[bbox_offset + 1u]) - 32768.0; - float bbox_r = float(_284.memory[bbox_offset + 2u]) - 32768.0; - float bbox_b = float(_284.memory[bbox_offset + 3u]) - 32768.0; + uint bbox_offset = (_93.conf.path_bbox_alloc.offset >> uint(2)) + (6u * m.path_ix); + float bbox_l = float(_285.memory[bbox_offset]) - 32768.0; + float bbox_t = float(_285.memory[bbox_offset + 1u]) - 32768.0; + float bbox_r = float(_285.memory[bbox_offset + 2u]) - 32768.0; + float bbox_b = float(_285.memory[bbox_offset + 3u]) - 32768.0; float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); - float linewidth = as_type(_284.memory[bbox_offset + 4u]); + float linewidth = as_type(_285.memory[bbox_offset + 4u]); uint fill_mode = uint(linewidth >= 0.0); - if ((linewidth >= 0.0) || (tag_word == 276u)) + if (((linewidth >= 0.0) || (tag_word == 276u)) || (tag_word == 732u)) { - uint trans_ix = _284.memory[bbox_offset + 5u]; - uint t = (_92.conf.trans_alloc.offset >> uint(2)) + (6u * trans_ix); - mat = as_type(uint4(_284.memory[t], _284.memory[t + 1u], _284.memory[t + 2u], _284.memory[t + 3u])); - if (tag_word == 276u) + uint trans_ix = _285.memory[bbox_offset + 5u]; + uint t = (_93.conf.trans_alloc.offset >> uint(2)) + (6u * trans_ix); + mat = as_type(uint4(_285.memory[t], _285.memory[t + 1u], _285.memory[t + 2u], _285.memory[t + 3u])); + if ((tag_word == 276u) || (tag_word == 732u)) { - translate = as_type(uint2(_284.memory[t + 4u], _284.memory[t + 5u])); + translate = as_type(uint2(_285.memory[t + 4u], _285.memory[t + 5u])); } } if (linewidth >= 0.0) @@ -245,15 +247,14 @@ kernel void main0(device Memory& _284 [[buffer(0)]], const device ConfigBuf& _92 case 68u: case 72u: { - _284.memory[di] = as_type(linewidth); + _285.memory[di] = as_type(linewidth); break; } case 276u: { - _284.memory[di] = as_type(linewidth); - uint index = _102.scene[dd]; - float2 p0 = as_type(uint2(_102.scene[dd + 1u], _102.scene[dd + 2u])); - float2 p1 = as_type(uint2(_102.scene[dd + 3u], _102.scene[dd + 4u])); + _285.memory[di] = as_type(linewidth); + p0 = as_type(uint2(_103.scene[dd + 1u], _103.scene[dd + 2u])); + p1 = as_type(uint2(_103.scene[dd + 3u], _103.scene[dd + 4u])); p0 = ((mat.xy * p0.x) + (mat.zw * p0.y)) + translate; p1 = ((mat.xy * p1.x) + (mat.zw * p1.y)) + translate; float2 dxy = p1 - p0; @@ -261,9 +262,38 @@ kernel void main0(device Memory& _284 [[buffer(0)]], const device ConfigBuf& _92 float line_x = dxy.x * scale; float line_y = dxy.y * scale; float line_c = -((p0.x * line_x) + (p0.y * line_y)); - _284.memory[di + 1u] = as_type(line_x); - _284.memory[di + 2u] = as_type(line_y); - _284.memory[di + 3u] = as_type(line_c); + _285.memory[di + 1u] = as_type(line_x); + _285.memory[di + 2u] = as_type(line_y); + _285.memory[di + 3u] = as_type(line_c); + break; + } + case 732u: + { + p0 = as_type(uint2(_103.scene[dd + 1u], _103.scene[dd + 2u])); + p1 = as_type(uint2(_103.scene[dd + 3u], _103.scene[dd + 4u])); + float r0 = as_type(_103.scene[dd + 5u]); + float r1 = as_type(_103.scene[dd + 6u]); + float inv_det = 1.0 / ((mat.x * mat.w) - (mat.y * mat.z)); + float4 inv_mat = float4(mat.w, -mat.y, -mat.z, mat.x) * inv_det; + float2 inv_tr = (inv_mat.xz * translate.x) + (inv_mat.yw * translate.y); + inv_tr += p0; + float2 center1 = p1 - p0; + float rr = r1 / (r1 - r0); + float rainv = rr / ((r1 * r1) - dot(center1, center1)); + float2 c1 = center1 * rainv; + float ra = rr * rainv; + float roff = rr - 1.0; + _285.memory[di] = as_type(linewidth); + _285.memory[di + 1u] = as_type(inv_mat.x); + _285.memory[di + 2u] = as_type(inv_mat.y); + _285.memory[di + 3u] = as_type(inv_mat.z); + _285.memory[di + 4u] = as_type(inv_mat.w); + _285.memory[di + 5u] = as_type(inv_tr.x); + _285.memory[di + 6u] = as_type(inv_tr.y); + _285.memory[di + 7u] = as_type(c1.x); + _285.memory[di + 8u] = as_type(c1.y); + _285.memory[di + 9u] = as_type(ra); + _285.memory[di + 10u] = as_type(roff); break; } case 5u: @@ -279,7 +309,7 @@ kernel void main0(device Memory& _284 [[buffer(0)]], const device ConfigBuf& _92 { path_ix = m.path_ix; } - _284.memory[clip_out_base + m.clip_ix] = path_ix; + _285.memory[clip_out_base + m.clip_ix] = path_ix; } } } diff --git a/piet-gpu/shader/gen/draw_leaf.spv b/piet-gpu/shader/gen/draw_leaf.spv index d18b287..58dde43 100644 Binary files a/piet-gpu/shader/gen/draw_leaf.spv and b/piet-gpu/shader/gen/draw_leaf.spv differ diff --git a/piet-gpu/shader/gen/draw_reduce.dxil b/piet-gpu/shader/gen/draw_reduce.dxil index 4df0ec5..be69aad 100644 Binary files a/piet-gpu/shader/gen/draw_reduce.dxil and b/piet-gpu/shader/gen/draw_reduce.dxil differ diff --git a/piet-gpu/shader/gen/draw_reduce.hlsl b/piet-gpu/shader/gen/draw_reduce.hlsl index 7220b7e..8311155 100644 --- a/piet-gpu/shader/gen/draw_reduce.hlsl +++ b/piet-gpu/shader/gen/draw_reduce.hlsl @@ -44,10 +44,10 @@ struct Config static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); -ByteAddressBuffer _86 : register(t1, space0); -ByteAddressBuffer _96 : register(t2, space0); -RWByteAddressBuffer _187 : register(u3, space0); -RWByteAddressBuffer _205 : register(u0, space0); +ByteAddressBuffer _87 : register(t1, space0); +ByteAddressBuffer _97 : register(t2, space0); +RWByteAddressBuffer _188 : register(u3, space0); +RWByteAddressBuffer _206 : register(u0, space0); static uint3 gl_WorkGroupID; static uint3 gl_LocalInvocationID; @@ -64,8 +64,8 @@ groupshared DrawMonoid sh_scratch[256]; DrawMonoid map_tag(uint tag_word) { uint has_path = uint(tag_word != 0u); - DrawMonoid _69 = { has_path, tag_word & 1u, tag_word & 28u, (tag_word >> uint(4)) & 28u }; - return _69; + DrawMonoid _70 = { has_path, tag_word & 1u, tag_word & 28u, (tag_word >> uint(4)) & 60u }; + return _70; } DrawMonoid combine_draw_monoid(DrawMonoid a, DrawMonoid b) @@ -81,13 +81,13 @@ DrawMonoid combine_draw_monoid(DrawMonoid a, DrawMonoid b) void comp_main() { uint ix = gl_GlobalInvocationID.x * 8u; - uint drawtag_base = _86.Load(100) >> uint(2); - uint tag_word = _96.Load((drawtag_base + ix) * 4 + 0); + uint drawtag_base = _87.Load(100) >> uint(2); + uint tag_word = _97.Load((drawtag_base + ix) * 4 + 0); uint param = tag_word; DrawMonoid agg = map_tag(param); for (uint i = 1u; i < 8u; i++) { - uint tag_word_1 = _96.Load(((drawtag_base + ix) + i) * 4 + 0); + uint tag_word_1 = _97.Load(((drawtag_base + ix) + i) * 4 + 0); uint param_1 = tag_word_1; DrawMonoid param_2 = agg; DrawMonoid param_3 = map_tag(param_1); @@ -109,10 +109,10 @@ void comp_main() } if (gl_LocalInvocationID.x == 0u) { - _187.Store(gl_WorkGroupID.x * 16 + 0, agg.path_ix); - _187.Store(gl_WorkGroupID.x * 16 + 4, agg.clip_ix); - _187.Store(gl_WorkGroupID.x * 16 + 8, agg.scene_offset); - _187.Store(gl_WorkGroupID.x * 16 + 12, agg.info_offset); + _188.Store(gl_WorkGroupID.x * 16 + 0, agg.path_ix); + _188.Store(gl_WorkGroupID.x * 16 + 4, agg.clip_ix); + _188.Store(gl_WorkGroupID.x * 16 + 8, agg.scene_offset); + _188.Store(gl_WorkGroupID.x * 16 + 12, agg.info_offset); } } diff --git a/piet-gpu/shader/gen/draw_reduce.msl b/piet-gpu/shader/gen/draw_reduce.msl index 8e409a8..759267c 100644 --- a/piet-gpu/shader/gen/draw_reduce.msl +++ b/piet-gpu/shader/gen/draw_reduce.msl @@ -85,7 +85,7 @@ static inline __attribute__((always_inline)) DrawMonoid map_tag(thread const uint& tag_word) { uint has_path = uint(tag_word != 0u); - return DrawMonoid{ has_path, tag_word & 1u, tag_word & 28u, (tag_word >> uint(4)) & 28u }; + return DrawMonoid{ has_path, tag_word & 1u, tag_word & 28u, (tag_word >> uint(4)) & 60u }; } static inline __attribute__((always_inline)) @@ -99,17 +99,17 @@ DrawMonoid combine_draw_monoid(thread const DrawMonoid& a, thread const DrawMono return c; } -kernel void main0(const device ConfigBuf& _86 [[buffer(1)]], const device SceneBuf& _96 [[buffer(2)]], device OutBuf& _187 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) +kernel void main0(const device ConfigBuf& _87 [[buffer(1)]], const device SceneBuf& _97 [[buffer(2)]], device OutBuf& _188 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) { threadgroup DrawMonoid sh_scratch[256]; uint ix = gl_GlobalInvocationID.x * 8u; - uint drawtag_base = _86.conf.drawtag_offset >> uint(2); - uint tag_word = _96.scene[drawtag_base + ix]; + uint drawtag_base = _87.conf.drawtag_offset >> uint(2); + uint tag_word = _97.scene[drawtag_base + ix]; uint param = tag_word; DrawMonoid agg = map_tag(param); for (uint i = 1u; i < 8u; i++) { - uint tag_word_1 = _96.scene[(drawtag_base + ix) + i]; + uint tag_word_1 = _97.scene[(drawtag_base + ix) + i]; uint param_1 = tag_word_1; DrawMonoid param_2 = agg; DrawMonoid param_3 = map_tag(param_1); @@ -131,10 +131,10 @@ kernel void main0(const device ConfigBuf& _86 [[buffer(1)]], const device SceneB } if (gl_LocalInvocationID.x == 0u) { - _187.outbuf[gl_WorkGroupID.x].path_ix = agg.path_ix; - _187.outbuf[gl_WorkGroupID.x].clip_ix = agg.clip_ix; - _187.outbuf[gl_WorkGroupID.x].scene_offset = agg.scene_offset; - _187.outbuf[gl_WorkGroupID.x].info_offset = agg.info_offset; + _188.outbuf[gl_WorkGroupID.x].path_ix = agg.path_ix; + _188.outbuf[gl_WorkGroupID.x].clip_ix = agg.clip_ix; + _188.outbuf[gl_WorkGroupID.x].scene_offset = agg.scene_offset; + _188.outbuf[gl_WorkGroupID.x].info_offset = agg.info_offset; } } diff --git a/piet-gpu/shader/gen/draw_reduce.spv b/piet-gpu/shader/gen/draw_reduce.spv index 4daf43a..d6c6fb7 100644 Binary files a/piet-gpu/shader/gen/draw_reduce.spv and b/piet-gpu/shader/gen/draw_reduce.spv differ diff --git a/piet-gpu/shader/gen/kernel4.dxil b/piet-gpu/shader/gen/kernel4.dxil index c0c27c9..e6eccc1 100644 Binary files a/piet-gpu/shader/gen/kernel4.dxil and b/piet-gpu/shader/gen/kernel4.dxil differ diff --git a/piet-gpu/shader/gen/kernel4.hlsl b/piet-gpu/shader/gen/kernel4.hlsl index f17b240..92fe05b 100644 --- a/piet-gpu/shader/gen/kernel4.hlsl +++ b/piet-gpu/shader/gen/kernel4.hlsl @@ -48,6 +48,21 @@ struct CmdLinGrad float line_c; }; +struct CmdRadGradRef +{ + uint offset; +}; + +struct CmdRadGrad +{ + uint index; + float4 mat; + float2 xlat; + float2 c1; + float ra; + float roff; +}; + struct CmdImageRef { uint offset; @@ -146,8 +161,8 @@ struct Config static const uint3 gl_WorkGroupSize = uint3(8u, 4u, 1u); -RWByteAddressBuffer _278 : register(u0, space0); -ByteAddressBuffer _1521 : register(t1, space0); +RWByteAddressBuffer _291 : register(u0, space0); +ByteAddressBuffer _1666 : register(t1, space0); RWTexture2D image_atlas : register(u3, space0); RWTexture2D gradients : register(u4, space0); RWTexture2D image : register(u2, space0); @@ -174,8 +189,8 @@ float4 spvUnpackUnorm4x8(uint value) Alloc slice_mem(Alloc a, uint offset, uint size) { - Alloc _291 = { a.offset + offset }; - return _291; + Alloc _304 = { a.offset + offset }; + return _304; } bool touch_mem(Alloc alloc, uint offset) @@ -191,7 +206,7 @@ uint read_mem(Alloc alloc, uint offset) { return 0u; } - uint v = _278.Load(offset * 4 + 8); + uint v = _291.Load(offset * 4 + 8); return v; } @@ -200,8 +215,8 @@ CmdTag Cmd_tag(Alloc a, CmdRef ref) Alloc param = a; uint param_1 = ref.offset >> uint(2); uint tag_and_flags = read_mem(param, param_1); - CmdTag _525 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; - return _525; + CmdTag _663 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; + return _663; } CmdStroke CmdStroke_read(Alloc a, CmdStrokeRef ref) @@ -221,9 +236,9 @@ CmdStroke CmdStroke_read(Alloc a, CmdStrokeRef ref) CmdStroke Cmd_Stroke_read(Alloc a, CmdRef ref) { - CmdStrokeRef _542 = { ref.offset + 4u }; + CmdStrokeRef _679 = { ref.offset + 4u }; Alloc param = a; - CmdStrokeRef param_1 = _542; + CmdStrokeRef param_1 = _679; return CmdStroke_read(param, param_1); } @@ -259,8 +274,8 @@ TileSeg TileSeg_read(Alloc a, TileSegRef ref) s.origin = float2(asfloat(raw0), asfloat(raw1)); s._vector = float2(asfloat(raw2), asfloat(raw3)); s.y_edge = asfloat(raw4); - TileSegRef _675 = { raw5 }; - s.next = _675; + TileSegRef _820 = { raw5 }; + s.next = _820; return s; } @@ -286,9 +301,9 @@ CmdFill CmdFill_read(Alloc a, CmdFillRef ref) CmdFill Cmd_Fill_read(Alloc a, CmdRef ref) { - CmdFillRef _532 = { ref.offset + 4u }; + CmdFillRef _669 = { ref.offset + 4u }; Alloc param = a; - CmdFillRef param_1 = _532; + CmdFillRef param_1 = _669; return CmdFill_read(param, param_1); } @@ -305,9 +320,9 @@ CmdAlpha CmdAlpha_read(Alloc a, CmdAlphaRef ref) CmdAlpha Cmd_Alpha_read(Alloc a, CmdRef ref) { - CmdAlphaRef _552 = { ref.offset + 4u }; + CmdAlphaRef _689 = { ref.offset + 4u }; Alloc param = a; - CmdAlphaRef param_1 = _552; + CmdAlphaRef param_1 = _689; return CmdAlpha_read(param, param_1); } @@ -324,9 +339,9 @@ CmdColor CmdColor_read(Alloc a, CmdColorRef ref) CmdColor Cmd_Color_read(Alloc a, CmdRef ref) { - CmdColorRef _562 = { ref.offset + 4u }; + CmdColorRef _699 = { ref.offset + 4u }; Alloc param = a; - CmdColorRef param_1 = _562; + CmdColorRef param_1 = _699; return CmdColor_read(param, param_1); } @@ -370,12 +385,66 @@ CmdLinGrad CmdLinGrad_read(Alloc a, CmdLinGradRef ref) CmdLinGrad Cmd_LinGrad_read(Alloc a, CmdRef ref) { - CmdLinGradRef _572 = { ref.offset + 4u }; + CmdLinGradRef _709 = { ref.offset + 4u }; Alloc param = a; - CmdLinGradRef param_1 = _572; + CmdLinGradRef param_1 = _709; return CmdLinGrad_read(param, param_1); } +CmdRadGrad CmdRadGrad_read(Alloc a, CmdRadGradRef ref) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint raw0 = read_mem(param, param_1); + Alloc param_2 = a; + uint param_3 = ix + 1u; + uint raw1 = read_mem(param_2, param_3); + Alloc param_4 = a; + uint param_5 = ix + 2u; + uint raw2 = read_mem(param_4, param_5); + Alloc param_6 = a; + uint param_7 = ix + 3u; + uint raw3 = read_mem(param_6, param_7); + Alloc param_8 = a; + uint param_9 = ix + 4u; + uint raw4 = read_mem(param_8, param_9); + Alloc param_10 = a; + uint param_11 = ix + 5u; + uint raw5 = read_mem(param_10, param_11); + Alloc param_12 = a; + uint param_13 = ix + 6u; + uint raw6 = read_mem(param_12, param_13); + Alloc param_14 = a; + uint param_15 = ix + 7u; + uint raw7 = read_mem(param_14, param_15); + Alloc param_16 = a; + uint param_17 = ix + 8u; + uint raw8 = read_mem(param_16, param_17); + Alloc param_18 = a; + uint param_19 = ix + 9u; + uint raw9 = read_mem(param_18, param_19); + Alloc param_20 = a; + uint param_21 = ix + 10u; + uint raw10 = read_mem(param_20, param_21); + CmdRadGrad s; + s.index = raw0; + s.mat = float4(asfloat(raw1), asfloat(raw2), asfloat(raw3), asfloat(raw4)); + s.xlat = float2(asfloat(raw5), asfloat(raw6)); + s.c1 = float2(asfloat(raw7), asfloat(raw8)); + s.ra = asfloat(raw9); + s.roff = asfloat(raw10); + return s; +} + +CmdRadGrad Cmd_RadGrad_read(Alloc a, CmdRef ref) +{ + CmdRadGradRef _719 = { ref.offset + 4u }; + Alloc param = a; + CmdRadGradRef param_1 = _719; + return CmdRadGrad_read(param, param_1); +} + CmdImage CmdImage_read(Alloc a, CmdImageRef ref) { uint ix = ref.offset >> uint(2); @@ -393,9 +462,9 @@ CmdImage CmdImage_read(Alloc a, CmdImageRef ref) CmdImage Cmd_Image_read(Alloc a, CmdRef ref) { - CmdImageRef _582 = { ref.offset + 4u }; + CmdImageRef _729 = { ref.offset + 4u }; Alloc param = a; - CmdImageRef param_1 = _582; + CmdImageRef param_1 = _729; return CmdImage_read(param, param_1); } @@ -408,10 +477,10 @@ void fillImage(out float4 spvReturnValue[8], uint2 xy, CmdImage cmd_img) int2 uv = int2(xy + chunk_offset(param)) + cmd_img.offset; float4 fg_rgba = image_atlas[uv]; float3 param_1 = fg_rgba.xyz; - float3 _1493 = fromsRGB(param_1); - fg_rgba.x = _1493.x; - fg_rgba.y = _1493.y; - fg_rgba.z = _1493.z; + float3 _1638 = fromsRGB(param_1); + fg_rgba.x = _1638.x; + fg_rgba.y = _1638.y; + fg_rgba.z = _1638.z; rgba[i] = fg_rgba; } spvReturnValue = rgba; @@ -445,9 +514,9 @@ CmdEndClip CmdEndClip_read(Alloc a, CmdEndClipRef ref) CmdEndClip Cmd_EndClip_read(Alloc a, CmdRef ref) { - CmdEndClipRef _592 = { ref.offset + 4u }; + CmdEndClipRef _739 = { ref.offset + 4u }; Alloc param = a; - CmdEndClipRef param_1 = _592; + CmdEndClipRef param_1 = _739; return CmdEndClip_read(param, param_1); } @@ -637,8 +706,8 @@ float3 set_lum(float3 c, float l) { float3 param = c; float3 param_1 = c + (l - lum(param)).xxx; - float3 _901 = clip_color(param_1); - return _901; + float3 _1046 = clip_color(param_1); + return _1046; } float3 mix_blend(float3 cb, float3 cs, uint mode) @@ -726,9 +795,9 @@ float3 mix_blend(float3 cb, float3 cs, uint mode) float3 param_20 = cb; float3 param_21 = cs; float param_22 = sat(param_20); - float3 _1192 = set_sat(param_21, param_22); + float3 _1337 = set_sat(param_21, param_22); float3 param_23 = cb; - float3 param_24 = _1192; + float3 param_24 = _1337; float param_25 = lum(param_23); b = set_lum(param_24, param_25); break; @@ -738,9 +807,9 @@ float3 mix_blend(float3 cb, float3 cs, uint mode) float3 param_26 = cs; float3 param_27 = cb; float param_28 = sat(param_26); - float3 _1206 = set_sat(param_27, param_28); + float3 _1351 = set_sat(param_27, param_28); float3 param_29 = cb; - float3 param_30 = _1206; + float3 param_30 = _1351; float param_31 = lum(param_29); b = set_lum(param_30, param_31); break; @@ -877,24 +946,24 @@ CmdJump CmdJump_read(Alloc a, CmdJumpRef ref) CmdJump Cmd_Jump_read(Alloc a, CmdRef ref) { - CmdJumpRef _602 = { ref.offset + 4u }; + CmdJumpRef _749 = { ref.offset + 4u }; Alloc param = a; - CmdJumpRef param_1 = _602; + CmdJumpRef param_1 = _749; return CmdJump_read(param, param_1); } void comp_main() { - uint tile_ix = (gl_WorkGroupID.y * _1521.Load(8)) + gl_WorkGroupID.x; - Alloc _1536; - _1536.offset = _1521.Load(24); + uint tile_ix = (gl_WorkGroupID.y * _1666.Load(8)) + gl_WorkGroupID.x; + Alloc _1681; + _1681.offset = _1666.Load(24); Alloc param; - param.offset = _1536.offset; + param.offset = _1681.offset; uint param_1 = tile_ix * 1024u; uint param_2 = 1024u; Alloc cmd_alloc = slice_mem(param, param_1, param_2); - CmdRef _1545 = { cmd_alloc.offset }; - CmdRef cmd_ref = _1545; + CmdRef _1690 = { cmd_alloc.offset }; + CmdRef cmd_ref = _1690; uint2 xy_uint = uint2(gl_LocalInvocationID.x + (16u * gl_WorkGroupID.x), gl_LocalInvocationID.y + (16u * gl_WorkGroupID.y)); float2 xy = float2(xy_uint); float4 rgba[8]; @@ -903,7 +972,7 @@ void comp_main() rgba[i] = 0.0f.xxxx; } uint clip_depth = 0u; - bool mem_ok = _278.Load(4) == 0u; + bool mem_ok = _291.Load(4) == 0u; float df[8]; TileSegRef tile_seg_ref; float area[8]; @@ -928,8 +997,8 @@ void comp_main() { df[k] = 1000000000.0f; } - TileSegRef _1638 = { stroke.tile_ref }; - tile_seg_ref = _1638; + TileSegRef _1784 = { stroke.tile_ref }; + tile_seg_ref = _1784; do { uint param_7 = tile_seg_ref.offset; @@ -965,8 +1034,8 @@ void comp_main() { area[k_3] = float(fill.backdrop); } - TileSegRef _1758 = { fill.tile_ref }; - tile_seg_ref = _1758; + TileSegRef _1904 = { fill.tile_ref }; + tile_seg_ref = _1904; do { uint param_15 = tile_seg_ref.offset; @@ -1055,11 +1124,12 @@ void comp_main() int x = int(round(clamp(my_d, 0.0f, 1.0f) * 511.0f)); float4 fg_rgba = gradients[int2(x, int(lin.index))]; float3 param_29 = fg_rgba.xyz; - float3 _2092 = fromsRGB(param_29); - fg_rgba.x = _2092.x; - fg_rgba.y = _2092.y; - fg_rgba.z = _2092.z; - rgba[k_9] = fg_rgba; + float3 _2238 = fromsRGB(param_29); + fg_rgba.x = _2238.x; + fg_rgba.y = _2238.y; + fg_rgba.z = _2238.z; + float4 fg_k_1 = fg_rgba * area[k_9]; + rgba[k_9] = (rgba[k_9] * (1.0f - fg_k_1.w)) + fg_k_1; } cmd_ref.offset += 20u; break; @@ -1068,74 +1138,100 @@ void comp_main() { Alloc param_30 = cmd_alloc; CmdRef param_31 = cmd_ref; - CmdImage fill_img = Cmd_Image_read(param_30, param_31); - uint2 param_32 = xy_uint; - CmdImage param_33 = fill_img; - float4 _2121[8]; - fillImage(_2121, param_32, param_33); - float4 img[8] = _2121; + CmdRadGrad rad = Cmd_RadGrad_read(param_30, param_31); for (uint k_10 = 0u; k_10 < 8u; k_10++) { - float4 fg_k_1 = img[k_10] * area[k_10]; - rgba[k_10] = (rgba[k_10] * (1.0f - fg_k_1.w)) + fg_k_1; + uint param_32 = k_10; + float2 my_xy_1 = xy + float2(chunk_offset(param_32)); + my_xy_1 = ((rad.mat.xz * my_xy_1.x) + (rad.mat.yw * my_xy_1.y)) - rad.xlat; + float ba = dot(my_xy_1, rad.c1); + float ca = rad.ra * dot(my_xy_1, my_xy_1); + float t_2 = (sqrt((ba * ba) + ca) - ba) - rad.roff; + int x_1 = int(round(clamp(t_2, 0.0f, 1.0f) * 511.0f)); + float4 fg_rgba_1 = gradients[int2(x_1, int(rad.index))]; + float3 param_33 = fg_rgba_1.xyz; + float3 _2348 = fromsRGB(param_33); + fg_rgba_1.x = _2348.x; + fg_rgba_1.y = _2348.y; + fg_rgba_1.z = _2348.z; + float4 fg_k_2 = fg_rgba_1 * area[k_10]; + rgba[k_10] = (rgba[k_10] * (1.0f - fg_k_2.w)) + fg_k_2; } - cmd_ref.offset += 12u; + cmd_ref.offset += 48u; break; } case 8u: { + Alloc param_34 = cmd_alloc; + CmdRef param_35 = cmd_ref; + CmdImage fill_img = Cmd_Image_read(param_34, param_35); + uint2 param_36 = xy_uint; + CmdImage param_37 = fill_img; + float4 _2391[8]; + fillImage(_2391, param_36, param_37); + float4 img[8] = _2391; for (uint k_11 = 0u; k_11 < 8u; k_11++) + { + float4 fg_k_3 = img[k_11] * area[k_11]; + rgba[k_11] = (rgba[k_11] * (1.0f - fg_k_3.w)) + fg_k_3; + } + cmd_ref.offset += 12u; + break; + } + case 9u: + { + for (uint k_12 = 0u; k_12 < 8u; k_12++) { uint d_2 = min(clip_depth, 127u); - float4 param_34 = float4(rgba[k_11]); - uint _2184 = packsRGB(param_34); - blend_stack[d_2][k_11] = _2184; - rgba[k_11] = 0.0f.xxxx; + float4 param_38 = float4(rgba[k_12]); + uint _2454 = packsRGB(param_38); + blend_stack[d_2][k_12] = _2454; + rgba[k_12] = 0.0f.xxxx; } clip_depth++; cmd_ref.offset += 4u; break; } - case 9u: + case 10u: { - Alloc param_35 = cmd_alloc; - CmdRef param_36 = cmd_ref; - CmdEndClip end_clip = Cmd_EndClip_read(param_35, param_36); + Alloc param_39 = cmd_alloc; + CmdRef param_40 = cmd_ref; + CmdEndClip end_clip = Cmd_EndClip_read(param_39, param_40); uint blend_mode = end_clip.blend >> uint(8); uint comp_mode = end_clip.blend & 255u; clip_depth--; - for (uint k_12 = 0u; k_12 < 8u; k_12++) + for (uint k_13 = 0u; k_13 < 8u; k_13++) { uint d_3 = min(clip_depth, 127u); - uint param_37 = blend_stack[d_3][k_12]; - float4 bg = unpacksRGB(param_37); - float4 fg_1 = rgba[k_12] * area[k_12]; - float3 param_38 = bg.xyz; - float3 param_39 = fg_1.xyz; - uint param_40 = blend_mode; - float3 blend = mix_blend(param_38, param_39, param_40); - float4 _2251 = fg_1; - float _2255 = fg_1.w; - float3 _2262 = lerp(_2251.xyz, blend, float((_2255 * bg.w) > 0.0f).xxx); - fg_1.x = _2262.x; - fg_1.y = _2262.y; - fg_1.z = _2262.z; - float3 param_41 = bg.xyz; - float3 param_42 = fg_1.xyz; - float param_43 = bg.w; - float param_44 = fg_1.w; - uint param_45 = comp_mode; - rgba[k_12] = mix_compose(param_41, param_42, param_43, param_44, param_45); + uint param_41 = blend_stack[d_3][k_13]; + float4 bg = unpacksRGB(param_41); + float4 fg_1 = rgba[k_13] * area[k_13]; + float3 param_42 = bg.xyz; + float3 param_43 = fg_1.xyz; + uint param_44 = blend_mode; + float3 blend = mix_blend(param_42, param_43, param_44); + float4 _2521 = fg_1; + float _2525 = fg_1.w; + float3 _2532 = lerp(_2521.xyz, blend, float((_2525 * bg.w) > 0.0f).xxx); + fg_1.x = _2532.x; + fg_1.y = _2532.y; + fg_1.z = _2532.z; + float3 param_45 = bg.xyz; + float3 param_46 = fg_1.xyz; + float param_47 = bg.w; + float param_48 = fg_1.w; + uint param_49 = comp_mode; + rgba[k_13] = mix_compose(param_45, param_46, param_47, param_48, param_49); } cmd_ref.offset += 8u; break; } - case 10u: + case 11u: { - Alloc param_46 = cmd_alloc; - CmdRef param_47 = cmd_ref; - CmdRef _2299 = { Cmd_Jump_read(param_46, param_47).new_ref }; - cmd_ref = _2299; + Alloc param_50 = cmd_alloc; + CmdRef param_51 = cmd_ref; + CmdRef _2569 = { Cmd_Jump_read(param_50, param_51).new_ref }; + cmd_ref = _2569; cmd_alloc.offset = cmd_ref.offset; break; } @@ -1143,9 +1239,9 @@ void comp_main() } for (uint i_1 = 0u; i_1 < 8u; i_1++) { - uint param_48 = i_1; - float3 param_49 = rgba[i_1].xyz; - image[int2(xy_uint + chunk_offset(param_48))] = float4(tosRGB(param_49), rgba[i_1].w); + uint param_52 = i_1; + float3 param_53 = rgba[i_1].xyz; + image[int2(xy_uint + chunk_offset(param_52))] = float4(tosRGB(param_53), rgba[i_1].w); } } diff --git a/piet-gpu/shader/gen/kernel4.msl b/piet-gpu/shader/gen/kernel4.msl index c1f41af..6489563 100644 --- a/piet-gpu/shader/gen/kernel4.msl +++ b/piet-gpu/shader/gen/kernel4.msl @@ -94,6 +94,21 @@ struct CmdLinGrad float line_c; }; +struct CmdRadGradRef +{ + uint offset; +}; + +struct CmdRadGrad +{ + uint index; + float4 mat; + float2 xlat; + float2 c1; + float ra; + float roff; +}; + struct CmdImageRef { uint offset; @@ -222,7 +237,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset) } static inline __attribute__((always_inline)) -uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_278) +uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_291) { Alloc param = alloc; uint param_1 = offset; @@ -230,29 +245,29 @@ uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memor { return 0u; } - uint v = v_278.memory[offset]; + uint v = v_291.memory[offset]; return v; } static inline __attribute__((always_inline)) -CmdTag Cmd_tag(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) +CmdTag Cmd_tag(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) { Alloc param = a; uint param_1 = ref.offset >> uint(2); - uint tag_and_flags = read_mem(param, param_1, v_278); + uint tag_and_flags = read_mem(param, param_1, v_291); return CmdTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) }; } static inline __attribute__((always_inline)) -CmdStroke CmdStroke_read(thread const Alloc& a, thread const CmdStrokeRef& ref, device Memory& v_278) +CmdStroke CmdStroke_read(thread const Alloc& a, thread const CmdStrokeRef& ref, device Memory& v_291) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_278); + uint raw0 = read_mem(param, param_1, v_291); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_278); + uint raw1 = read_mem(param_2, param_3, v_291); CmdStroke s; s.tile_ref = raw0; s.half_width = as_type(raw1); @@ -260,11 +275,11 @@ CmdStroke CmdStroke_read(thread const Alloc& a, thread const CmdStrokeRef& ref, } static inline __attribute__((always_inline)) -CmdStroke Cmd_Stroke_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) +CmdStroke Cmd_Stroke_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) { Alloc param = a; CmdStrokeRef param_1 = CmdStrokeRef{ ref.offset + 4u }; - return CmdStroke_read(param, param_1, v_278); + return CmdStroke_read(param, param_1, v_291); } static inline __attribute__((always_inline)) @@ -276,27 +291,27 @@ Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const } static inline __attribute__((always_inline)) -TileSeg TileSeg_read(thread const Alloc& a, thread const TileSegRef& ref, device Memory& v_278) +TileSeg TileSeg_read(thread const Alloc& a, thread const TileSegRef& ref, device Memory& v_291) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_278); + uint raw0 = read_mem(param, param_1, v_291); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_278); + uint raw1 = read_mem(param_2, param_3, v_291); Alloc param_4 = a; uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_278); + uint raw2 = read_mem(param_4, param_5, v_291); Alloc param_6 = a; uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_278); + uint raw3 = read_mem(param_6, param_7, v_291); Alloc param_8 = a; uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9, v_278); + uint raw4 = read_mem(param_8, param_9, v_291); Alloc param_10 = a; uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11, v_278); + uint raw5 = read_mem(param_10, param_11, v_291); TileSeg s; s.origin = float2(as_type(raw0), as_type(raw1)); s.vector = float2(as_type(raw2), as_type(raw3)); @@ -312,15 +327,15 @@ uint2 chunk_offset(thread const uint& i) } static inline __attribute__((always_inline)) -CmdFill CmdFill_read(thread const Alloc& a, thread const CmdFillRef& ref, device Memory& v_278) +CmdFill CmdFill_read(thread const Alloc& a, thread const CmdFillRef& ref, device Memory& v_291) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_278); + uint raw0 = read_mem(param, param_1, v_291); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_278); + uint raw1 = read_mem(param_2, param_3, v_291); CmdFill s; s.tile_ref = raw0; s.backdrop = int(raw1); @@ -328,51 +343,51 @@ CmdFill CmdFill_read(thread const Alloc& a, thread const CmdFillRef& ref, device } static inline __attribute__((always_inline)) -CmdFill Cmd_Fill_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) +CmdFill Cmd_Fill_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) { Alloc param = a; CmdFillRef param_1 = CmdFillRef{ ref.offset + 4u }; - return CmdFill_read(param, param_1, v_278); + return CmdFill_read(param, param_1, v_291); } static inline __attribute__((always_inline)) -CmdAlpha CmdAlpha_read(thread const Alloc& a, thread const CmdAlphaRef& ref, device Memory& v_278) +CmdAlpha CmdAlpha_read(thread const Alloc& a, thread const CmdAlphaRef& ref, device Memory& v_291) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_278); + uint raw0 = read_mem(param, param_1, v_291); CmdAlpha s; s.alpha = as_type(raw0); return s; } static inline __attribute__((always_inline)) -CmdAlpha Cmd_Alpha_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) +CmdAlpha Cmd_Alpha_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) { Alloc param = a; CmdAlphaRef param_1 = CmdAlphaRef{ ref.offset + 4u }; - return CmdAlpha_read(param, param_1, v_278); + return CmdAlpha_read(param, param_1, v_291); } static inline __attribute__((always_inline)) -CmdColor CmdColor_read(thread const Alloc& a, thread const CmdColorRef& ref, device Memory& v_278) +CmdColor CmdColor_read(thread const Alloc& a, thread const CmdColorRef& ref, device Memory& v_291) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_278); + uint raw0 = read_mem(param, param_1, v_291); CmdColor s; s.rgba_color = raw0; return s; } static inline __attribute__((always_inline)) -CmdColor Cmd_Color_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) +CmdColor Cmd_Color_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) { Alloc param = a; CmdColorRef param_1 = CmdColorRef{ ref.offset + 4u }; - return CmdColor_read(param, param_1, v_278); + return CmdColor_read(param, param_1, v_291); } static inline __attribute__((always_inline)) @@ -393,21 +408,21 @@ float4 unpacksRGB(thread const uint& srgba) } static inline __attribute__((always_inline)) -CmdLinGrad CmdLinGrad_read(thread const Alloc& a, thread const CmdLinGradRef& ref, device Memory& v_278) +CmdLinGrad CmdLinGrad_read(thread const Alloc& a, thread const CmdLinGradRef& ref, device Memory& v_291) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_278); + uint raw0 = read_mem(param, param_1, v_291); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_278); + uint raw1 = read_mem(param_2, param_3, v_291); Alloc param_4 = a; uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_278); + uint raw2 = read_mem(param_4, param_5, v_291); Alloc param_6 = a; uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_278); + uint raw3 = read_mem(param_6, param_7, v_291); CmdLinGrad s; s.index = raw0; s.line_x = as_type(raw1); @@ -417,23 +432,78 @@ CmdLinGrad CmdLinGrad_read(thread const Alloc& a, thread const CmdLinGradRef& re } static inline __attribute__((always_inline)) -CmdLinGrad Cmd_LinGrad_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) +CmdLinGrad Cmd_LinGrad_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) { Alloc param = a; CmdLinGradRef param_1 = CmdLinGradRef{ ref.offset + 4u }; - return CmdLinGrad_read(param, param_1, v_278); + return CmdLinGrad_read(param, param_1, v_291); } static inline __attribute__((always_inline)) -CmdImage CmdImage_read(thread const Alloc& a, thread const CmdImageRef& ref, device Memory& v_278) +CmdRadGrad CmdRadGrad_read(thread const Alloc& a, thread const CmdRadGradRef& ref, device Memory& v_291) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_278); + uint raw0 = read_mem(param, param_1, v_291); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_278); + uint raw1 = read_mem(param_2, param_3, v_291); + Alloc param_4 = a; + uint param_5 = ix + 2u; + uint raw2 = read_mem(param_4, param_5, v_291); + Alloc param_6 = a; + uint param_7 = ix + 3u; + uint raw3 = read_mem(param_6, param_7, v_291); + Alloc param_8 = a; + uint param_9 = ix + 4u; + uint raw4 = read_mem(param_8, param_9, v_291); + Alloc param_10 = a; + uint param_11 = ix + 5u; + uint raw5 = read_mem(param_10, param_11, v_291); + Alloc param_12 = a; + uint param_13 = ix + 6u; + uint raw6 = read_mem(param_12, param_13, v_291); + Alloc param_14 = a; + uint param_15 = ix + 7u; + uint raw7 = read_mem(param_14, param_15, v_291); + Alloc param_16 = a; + uint param_17 = ix + 8u; + uint raw8 = read_mem(param_16, param_17, v_291); + Alloc param_18 = a; + uint param_19 = ix + 9u; + uint raw9 = read_mem(param_18, param_19, v_291); + Alloc param_20 = a; + uint param_21 = ix + 10u; + uint raw10 = read_mem(param_20, param_21, v_291); + CmdRadGrad s; + s.index = raw0; + s.mat = float4(as_type(raw1), as_type(raw2), as_type(raw3), as_type(raw4)); + s.xlat = float2(as_type(raw5), as_type(raw6)); + s.c1 = float2(as_type(raw7), as_type(raw8)); + s.ra = as_type(raw9); + s.roff = as_type(raw10); + return s; +} + +static inline __attribute__((always_inline)) +CmdRadGrad Cmd_RadGrad_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) +{ + Alloc param = a; + CmdRadGradRef param_1 = CmdRadGradRef{ ref.offset + 4u }; + return CmdRadGrad_read(param, param_1, v_291); +} + +static inline __attribute__((always_inline)) +CmdImage CmdImage_read(thread const Alloc& a, thread const CmdImageRef& ref, device Memory& v_291) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint raw0 = read_mem(param, param_1, v_291); + Alloc param_2 = a; + uint param_3 = ix + 1u; + uint raw1 = read_mem(param_2, param_3, v_291); CmdImage s; s.index = raw0; s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); @@ -441,11 +511,11 @@ CmdImage CmdImage_read(thread const Alloc& a, thread const CmdImageRef& ref, dev } static inline __attribute__((always_inline)) -CmdImage Cmd_Image_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) +CmdImage Cmd_Image_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) { Alloc param = a; CmdImageRef param_1 = CmdImageRef{ ref.offset + 4u }; - return CmdImage_read(param, param_1, v_278); + return CmdImage_read(param, param_1, v_291); } static inline __attribute__((always_inline)) @@ -458,10 +528,10 @@ spvUnsafeArray fillImage(thread const uint2& xy, thread const CmdImag int2 uv = int2(xy + chunk_offset(param)) + cmd_img.offset; float4 fg_rgba = image_atlas.read(uint2(uv)); float3 param_1 = fg_rgba.xyz; - float3 _1493 = fromsRGB(param_1); - fg_rgba.x = _1493.x; - fg_rgba.y = _1493.y; - fg_rgba.z = _1493.z; + float3 _1638 = fromsRGB(param_1); + fg_rgba.x = _1638.x; + fg_rgba.y = _1638.y; + fg_rgba.z = _1638.z; rgba[i] = fg_rgba; } return rgba; @@ -485,23 +555,23 @@ uint packsRGB(thread float4& rgba) } static inline __attribute__((always_inline)) -CmdEndClip CmdEndClip_read(thread const Alloc& a, thread const CmdEndClipRef& ref, device Memory& v_278) +CmdEndClip CmdEndClip_read(thread const Alloc& a, thread const CmdEndClipRef& ref, device Memory& v_291) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_278); + uint raw0 = read_mem(param, param_1, v_291); CmdEndClip s; s.blend = raw0; return s; } static inline __attribute__((always_inline)) -CmdEndClip Cmd_EndClip_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) +CmdEndClip Cmd_EndClip_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) { Alloc param = a; CmdEndClipRef param_1 = CmdEndClipRef{ ref.offset + 4u }; - return CmdEndClip_read(param, param_1, v_278); + return CmdEndClip_read(param, param_1, v_291); } static inline __attribute__((always_inline)) @@ -701,8 +771,8 @@ float3 set_lum(thread const float3& c, thread const float& l) { float3 param = c; float3 param_1 = c + float3(l - lum(param)); - float3 _901 = clip_color(param_1); - return _901; + float3 _1046 = clip_color(param_1); + return _1046; } static inline __attribute__((always_inline)) @@ -791,9 +861,9 @@ float3 mix_blend(thread const float3& cb, thread const float3& cs, thread const float3 param_20 = cb; float3 param_21 = cs; float param_22 = sat(param_20); - float3 _1192 = set_sat(param_21, param_22); + float3 _1337 = set_sat(param_21, param_22); float3 param_23 = cb; - float3 param_24 = _1192; + float3 param_24 = _1337; float param_25 = lum(param_23); b = set_lum(param_24, param_25); break; @@ -803,9 +873,9 @@ float3 mix_blend(thread const float3& cb, thread const float3& cs, thread const float3 param_26 = cs; float3 param_27 = cb; float param_28 = sat(param_26); - float3 _1206 = set_sat(param_27, param_28); + float3 _1351 = set_sat(param_27, param_28); float3 param_29 = cb; - float3 param_30 = _1206; + float3 param_30 = _1351; float param_31 = lum(param_29); b = set_lum(param_30, param_31); break; @@ -931,30 +1001,30 @@ float4 mix_compose(thread const float3& cb, thread const float3& cs, thread cons } static inline __attribute__((always_inline)) -CmdJump CmdJump_read(thread const Alloc& a, thread const CmdJumpRef& ref, device Memory& v_278) +CmdJump CmdJump_read(thread const Alloc& a, thread const CmdJumpRef& ref, device Memory& v_291) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_278); + uint raw0 = read_mem(param, param_1, v_291); CmdJump s; s.new_ref = raw0; return s; } static inline __attribute__((always_inline)) -CmdJump Cmd_Jump_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) +CmdJump Cmd_Jump_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) { Alloc param = a; CmdJumpRef param_1 = CmdJumpRef{ ref.offset + 4u }; - return CmdJump_read(param, param_1, v_278); + return CmdJump_read(param, param_1, v_291); } -kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1521 [[buffer(1)]], texture2d image [[texture(2)]], texture2d image_atlas [[texture(3)]], texture2d gradients [[texture(4)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) +kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1666 [[buffer(1)]], texture2d image [[texture(2)]], texture2d image_atlas [[texture(3)]], texture2d gradients [[texture(4)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) { - uint tile_ix = (gl_WorkGroupID.y * _1521.conf.width_in_tiles) + gl_WorkGroupID.x; + uint tile_ix = (gl_WorkGroupID.y * _1666.conf.width_in_tiles) + gl_WorkGroupID.x; Alloc param; - param.offset = _1521.conf.ptcl_alloc.offset; + param.offset = _1666.conf.ptcl_alloc.offset; uint param_1 = tile_ix * 1024u; uint param_2 = 1024u; Alloc cmd_alloc = slice_mem(param, param_1, param_2); @@ -967,7 +1037,7 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 rgba[i] = float4(0.0); } uint clip_depth = 0u; - bool mem_ok = v_278.mem_error == 0u; + bool mem_ok = v_291.mem_error == 0u; spvUnsafeArray df; TileSegRef tile_seg_ref; spvUnsafeArray area; @@ -976,7 +1046,7 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_3 = cmd_alloc; CmdRef param_4 = cmd_ref; - uint tag = Cmd_tag(param_3, param_4, v_278).tag; + uint tag = Cmd_tag(param_3, param_4, v_291).tag; if (tag == 0u) { break; @@ -987,7 +1057,7 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_5 = cmd_alloc; CmdRef param_6 = cmd_ref; - CmdStroke stroke = Cmd_Stroke_read(param_5, param_6, v_278); + CmdStroke stroke = Cmd_Stroke_read(param_5, param_6, v_291); for (uint k = 0u; k < 8u; k++) { df[k] = 1000000000.0; @@ -1000,7 +1070,7 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 bool param_9 = mem_ok; Alloc param_10 = new_alloc(param_7, param_8, param_9); TileSegRef param_11 = tile_seg_ref; - TileSeg seg = TileSeg_read(param_10, param_11, v_278); + TileSeg seg = TileSeg_read(param_10, param_11, v_291); float2 line_vec = seg.vector; for (uint k_1 = 0u; k_1 < 8u; k_1++) { @@ -1023,7 +1093,7 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_13 = cmd_alloc; CmdRef param_14 = cmd_ref; - CmdFill fill = Cmd_Fill_read(param_13, param_14, v_278); + CmdFill fill = Cmd_Fill_read(param_13, param_14, v_291); for (uint k_3 = 0u; k_3 < 8u; k_3++) { area[k_3] = float(fill.backdrop); @@ -1036,7 +1106,7 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 bool param_17 = mem_ok; Alloc param_18 = new_alloc(param_15, param_16, param_17); TileSegRef param_19 = tile_seg_ref; - TileSeg seg_1 = TileSeg_read(param_18, param_19, v_278); + TileSeg seg_1 = TileSeg_read(param_18, param_19, v_291); for (uint k_4 = 0u; k_4 < 8u; k_4++) { uint param_20 = k_4; @@ -1080,7 +1150,7 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_21 = cmd_alloc; CmdRef param_22 = cmd_ref; - CmdAlpha alpha = Cmd_Alpha_read(param_21, param_22, v_278); + CmdAlpha alpha = Cmd_Alpha_read(param_21, param_22, v_291); for (uint k_7 = 0u; k_7 < 8u; k_7++) { area[k_7] = alpha.alpha; @@ -1092,7 +1162,7 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_23 = cmd_alloc; CmdRef param_24 = cmd_ref; - CmdColor color = Cmd_Color_read(param_23, param_24, v_278); + CmdColor color = Cmd_Color_read(param_23, param_24, v_291); uint param_25 = color.rgba_color; float4 fg = unpacksRGB(param_25); for (uint k_8 = 0u; k_8 < 8u; k_8++) @@ -1107,7 +1177,7 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_26 = cmd_alloc; CmdRef param_27 = cmd_ref; - CmdLinGrad lin = Cmd_LinGrad_read(param_26, param_27, v_278); + CmdLinGrad lin = Cmd_LinGrad_read(param_26, param_27, v_291); float d_1 = ((lin.line_x * xy.x) + (lin.line_y * xy.y)) + lin.line_c; for (uint k_9 = 0u; k_9 < 8u; k_9++) { @@ -1117,11 +1187,12 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 int x = int(round(fast::clamp(my_d, 0.0, 1.0) * 511.0)); float4 fg_rgba = gradients.read(uint2(int2(x, int(lin.index)))); float3 param_29 = fg_rgba.xyz; - float3 _2092 = fromsRGB(param_29); - fg_rgba.x = _2092.x; - fg_rgba.y = _2092.y; - fg_rgba.z = _2092.z; - rgba[k_9] = fg_rgba; + float3 _2238 = fromsRGB(param_29); + fg_rgba.x = _2238.x; + fg_rgba.y = _2238.y; + fg_rgba.z = _2238.z; + float4 fg_k_1 = fg_rgba * area[k_9]; + rgba[k_9] = (rgba[k_9] * (1.0 - fg_k_1.w)) + fg_k_1; } cmd_ref.offset += 20u; break; @@ -1130,72 +1201,98 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_30 = cmd_alloc; CmdRef param_31 = cmd_ref; - CmdImage fill_img = Cmd_Image_read(param_30, param_31, v_278); - uint2 param_32 = xy_uint; - CmdImage param_33 = fill_img; - spvUnsafeArray img; - img = fillImage(param_32, param_33, image_atlas); + CmdRadGrad rad = Cmd_RadGrad_read(param_30, param_31, v_291); for (uint k_10 = 0u; k_10 < 8u; k_10++) { - float4 fg_k_1 = img[k_10] * area[k_10]; - rgba[k_10] = (rgba[k_10] * (1.0 - fg_k_1.w)) + fg_k_1; + uint param_32 = k_10; + float2 my_xy_1 = xy + float2(chunk_offset(param_32)); + my_xy_1 = ((rad.mat.xz * my_xy_1.x) + (rad.mat.yw * my_xy_1.y)) - rad.xlat; + float ba = dot(my_xy_1, rad.c1); + float ca = rad.ra * dot(my_xy_1, my_xy_1); + float t_2 = (sqrt((ba * ba) + ca) - ba) - rad.roff; + int x_1 = int(round(fast::clamp(t_2, 0.0, 1.0) * 511.0)); + float4 fg_rgba_1 = gradients.read(uint2(int2(x_1, int(rad.index)))); + float3 param_33 = fg_rgba_1.xyz; + float3 _2348 = fromsRGB(param_33); + fg_rgba_1.x = _2348.x; + fg_rgba_1.y = _2348.y; + fg_rgba_1.z = _2348.z; + float4 fg_k_2 = fg_rgba_1 * area[k_10]; + rgba[k_10] = (rgba[k_10] * (1.0 - fg_k_2.w)) + fg_k_2; } - cmd_ref.offset += 12u; + cmd_ref.offset += 48u; break; } case 8u: { + Alloc param_34 = cmd_alloc; + CmdRef param_35 = cmd_ref; + CmdImage fill_img = Cmd_Image_read(param_34, param_35, v_291); + uint2 param_36 = xy_uint; + CmdImage param_37 = fill_img; + spvUnsafeArray img; + img = fillImage(param_36, param_37, image_atlas); for (uint k_11 = 0u; k_11 < 8u; k_11++) + { + float4 fg_k_3 = img[k_11] * area[k_11]; + rgba[k_11] = (rgba[k_11] * (1.0 - fg_k_3.w)) + fg_k_3; + } + cmd_ref.offset += 12u; + break; + } + case 9u: + { + for (uint k_12 = 0u; k_12 < 8u; k_12++) { uint d_2 = min(clip_depth, 127u); - float4 param_34 = float4(rgba[k_11]); - uint _2184 = packsRGB(param_34); - blend_stack[d_2][k_11] = _2184; - rgba[k_11] = float4(0.0); + float4 param_38 = float4(rgba[k_12]); + uint _2454 = packsRGB(param_38); + blend_stack[d_2][k_12] = _2454; + rgba[k_12] = float4(0.0); } clip_depth++; cmd_ref.offset += 4u; break; } - case 9u: + case 10u: { - Alloc param_35 = cmd_alloc; - CmdRef param_36 = cmd_ref; - CmdEndClip end_clip = Cmd_EndClip_read(param_35, param_36, v_278); + Alloc param_39 = cmd_alloc; + CmdRef param_40 = cmd_ref; + CmdEndClip end_clip = Cmd_EndClip_read(param_39, param_40, v_291); uint blend_mode = end_clip.blend >> uint(8); uint comp_mode = end_clip.blend & 255u; clip_depth--; - for (uint k_12 = 0u; k_12 < 8u; k_12++) + for (uint k_13 = 0u; k_13 < 8u; k_13++) { uint d_3 = min(clip_depth, 127u); - uint param_37 = blend_stack[d_3][k_12]; - float4 bg = unpacksRGB(param_37); - float4 fg_1 = rgba[k_12] * area[k_12]; - float3 param_38 = bg.xyz; - float3 param_39 = fg_1.xyz; - uint param_40 = blend_mode; - float3 blend = mix_blend(param_38, param_39, param_40); - float4 _2251 = fg_1; - float _2255 = fg_1.w; - float3 _2262 = mix(_2251.xyz, blend, float3(float((_2255 * bg.w) > 0.0))); - fg_1.x = _2262.x; - fg_1.y = _2262.y; - fg_1.z = _2262.z; - float3 param_41 = bg.xyz; - float3 param_42 = fg_1.xyz; - float param_43 = bg.w; - float param_44 = fg_1.w; - uint param_45 = comp_mode; - rgba[k_12] = mix_compose(param_41, param_42, param_43, param_44, param_45); + uint param_41 = blend_stack[d_3][k_13]; + float4 bg = unpacksRGB(param_41); + float4 fg_1 = rgba[k_13] * area[k_13]; + float3 param_42 = bg.xyz; + float3 param_43 = fg_1.xyz; + uint param_44 = blend_mode; + float3 blend = mix_blend(param_42, param_43, param_44); + float4 _2521 = fg_1; + float _2525 = fg_1.w; + float3 _2532 = mix(_2521.xyz, blend, float3(float((_2525 * bg.w) > 0.0))); + fg_1.x = _2532.x; + fg_1.y = _2532.y; + fg_1.z = _2532.z; + float3 param_45 = bg.xyz; + float3 param_46 = fg_1.xyz; + float param_47 = bg.w; + float param_48 = fg_1.w; + uint param_49 = comp_mode; + rgba[k_13] = mix_compose(param_45, param_46, param_47, param_48, param_49); } cmd_ref.offset += 8u; break; } - case 10u: + case 11u: { - Alloc param_46 = cmd_alloc; - CmdRef param_47 = cmd_ref; - cmd_ref = CmdRef{ Cmd_Jump_read(param_46, param_47, v_278).new_ref }; + Alloc param_50 = cmd_alloc; + CmdRef param_51 = cmd_ref; + cmd_ref = CmdRef{ Cmd_Jump_read(param_50, param_51, v_291).new_ref }; cmd_alloc.offset = cmd_ref.offset; break; } @@ -1203,9 +1300,9 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 } for (uint i_1 = 0u; i_1 < 8u; i_1++) { - uint param_48 = i_1; - float3 param_49 = rgba[i_1].xyz; - image.write(float4(tosRGB(param_49), rgba[i_1].w), uint2(int2(xy_uint + chunk_offset(param_48)))); + uint param_52 = i_1; + float3 param_53 = rgba[i_1].xyz; + image.write(float4(tosRGB(param_53), rgba[i_1].w), uint2(int2(xy_uint + chunk_offset(param_52)))); } } diff --git a/piet-gpu/shader/gen/kernel4.spv b/piet-gpu/shader/gen/kernel4.spv index 91272da..7061263 100644 Binary files a/piet-gpu/shader/gen/kernel4.spv and b/piet-gpu/shader/gen/kernel4.spv differ diff --git a/piet-gpu/shader/gen/kernel4_gray.dxil b/piet-gpu/shader/gen/kernel4_gray.dxil index 18c4b7e..046045f 100644 Binary files a/piet-gpu/shader/gen/kernel4_gray.dxil and b/piet-gpu/shader/gen/kernel4_gray.dxil differ diff --git a/piet-gpu/shader/gen/kernel4_gray.hlsl b/piet-gpu/shader/gen/kernel4_gray.hlsl index de95771..019a73c 100644 --- a/piet-gpu/shader/gen/kernel4_gray.hlsl +++ b/piet-gpu/shader/gen/kernel4_gray.hlsl @@ -48,6 +48,21 @@ struct CmdLinGrad float line_c; }; +struct CmdRadGradRef +{ + uint offset; +}; + +struct CmdRadGrad +{ + uint index; + float4 mat; + float2 xlat; + float2 c1; + float ra; + float roff; +}; + struct CmdImageRef { uint offset; @@ -146,8 +161,8 @@ struct Config static const uint3 gl_WorkGroupSize = uint3(8u, 4u, 1u); -RWByteAddressBuffer _278 : register(u0, space0); -ByteAddressBuffer _1521 : register(t1, space0); +RWByteAddressBuffer _291 : register(u0, space0); +ByteAddressBuffer _1666 : register(t1, space0); RWTexture2D image_atlas : register(u3, space0); RWTexture2D gradients : register(u4, space0); RWTexture2D image : register(u2, space0); @@ -174,8 +189,8 @@ float4 spvUnpackUnorm4x8(uint value) Alloc slice_mem(Alloc a, uint offset, uint size) { - Alloc _291 = { a.offset + offset }; - return _291; + Alloc _304 = { a.offset + offset }; + return _304; } bool touch_mem(Alloc alloc, uint offset) @@ -191,7 +206,7 @@ uint read_mem(Alloc alloc, uint offset) { return 0u; } - uint v = _278.Load(offset * 4 + 8); + uint v = _291.Load(offset * 4 + 8); return v; } @@ -200,8 +215,8 @@ CmdTag Cmd_tag(Alloc a, CmdRef ref) Alloc param = a; uint param_1 = ref.offset >> uint(2); uint tag_and_flags = read_mem(param, param_1); - CmdTag _525 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; - return _525; + CmdTag _663 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; + return _663; } CmdStroke CmdStroke_read(Alloc a, CmdStrokeRef ref) @@ -221,9 +236,9 @@ CmdStroke CmdStroke_read(Alloc a, CmdStrokeRef ref) CmdStroke Cmd_Stroke_read(Alloc a, CmdRef ref) { - CmdStrokeRef _542 = { ref.offset + 4u }; + CmdStrokeRef _679 = { ref.offset + 4u }; Alloc param = a; - CmdStrokeRef param_1 = _542; + CmdStrokeRef param_1 = _679; return CmdStroke_read(param, param_1); } @@ -259,8 +274,8 @@ TileSeg TileSeg_read(Alloc a, TileSegRef ref) s.origin = float2(asfloat(raw0), asfloat(raw1)); s._vector = float2(asfloat(raw2), asfloat(raw3)); s.y_edge = asfloat(raw4); - TileSegRef _675 = { raw5 }; - s.next = _675; + TileSegRef _820 = { raw5 }; + s.next = _820; return s; } @@ -286,9 +301,9 @@ CmdFill CmdFill_read(Alloc a, CmdFillRef ref) CmdFill Cmd_Fill_read(Alloc a, CmdRef ref) { - CmdFillRef _532 = { ref.offset + 4u }; + CmdFillRef _669 = { ref.offset + 4u }; Alloc param = a; - CmdFillRef param_1 = _532; + CmdFillRef param_1 = _669; return CmdFill_read(param, param_1); } @@ -305,9 +320,9 @@ CmdAlpha CmdAlpha_read(Alloc a, CmdAlphaRef ref) CmdAlpha Cmd_Alpha_read(Alloc a, CmdRef ref) { - CmdAlphaRef _552 = { ref.offset + 4u }; + CmdAlphaRef _689 = { ref.offset + 4u }; Alloc param = a; - CmdAlphaRef param_1 = _552; + CmdAlphaRef param_1 = _689; return CmdAlpha_read(param, param_1); } @@ -324,9 +339,9 @@ CmdColor CmdColor_read(Alloc a, CmdColorRef ref) CmdColor Cmd_Color_read(Alloc a, CmdRef ref) { - CmdColorRef _562 = { ref.offset + 4u }; + CmdColorRef _699 = { ref.offset + 4u }; Alloc param = a; - CmdColorRef param_1 = _562; + CmdColorRef param_1 = _699; return CmdColor_read(param, param_1); } @@ -370,12 +385,66 @@ CmdLinGrad CmdLinGrad_read(Alloc a, CmdLinGradRef ref) CmdLinGrad Cmd_LinGrad_read(Alloc a, CmdRef ref) { - CmdLinGradRef _572 = { ref.offset + 4u }; + CmdLinGradRef _709 = { ref.offset + 4u }; Alloc param = a; - CmdLinGradRef param_1 = _572; + CmdLinGradRef param_1 = _709; return CmdLinGrad_read(param, param_1); } +CmdRadGrad CmdRadGrad_read(Alloc a, CmdRadGradRef ref) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint raw0 = read_mem(param, param_1); + Alloc param_2 = a; + uint param_3 = ix + 1u; + uint raw1 = read_mem(param_2, param_3); + Alloc param_4 = a; + uint param_5 = ix + 2u; + uint raw2 = read_mem(param_4, param_5); + Alloc param_6 = a; + uint param_7 = ix + 3u; + uint raw3 = read_mem(param_6, param_7); + Alloc param_8 = a; + uint param_9 = ix + 4u; + uint raw4 = read_mem(param_8, param_9); + Alloc param_10 = a; + uint param_11 = ix + 5u; + uint raw5 = read_mem(param_10, param_11); + Alloc param_12 = a; + uint param_13 = ix + 6u; + uint raw6 = read_mem(param_12, param_13); + Alloc param_14 = a; + uint param_15 = ix + 7u; + uint raw7 = read_mem(param_14, param_15); + Alloc param_16 = a; + uint param_17 = ix + 8u; + uint raw8 = read_mem(param_16, param_17); + Alloc param_18 = a; + uint param_19 = ix + 9u; + uint raw9 = read_mem(param_18, param_19); + Alloc param_20 = a; + uint param_21 = ix + 10u; + uint raw10 = read_mem(param_20, param_21); + CmdRadGrad s; + s.index = raw0; + s.mat = float4(asfloat(raw1), asfloat(raw2), asfloat(raw3), asfloat(raw4)); + s.xlat = float2(asfloat(raw5), asfloat(raw6)); + s.c1 = float2(asfloat(raw7), asfloat(raw8)); + s.ra = asfloat(raw9); + s.roff = asfloat(raw10); + return s; +} + +CmdRadGrad Cmd_RadGrad_read(Alloc a, CmdRef ref) +{ + CmdRadGradRef _719 = { ref.offset + 4u }; + Alloc param = a; + CmdRadGradRef param_1 = _719; + return CmdRadGrad_read(param, param_1); +} + CmdImage CmdImage_read(Alloc a, CmdImageRef ref) { uint ix = ref.offset >> uint(2); @@ -393,9 +462,9 @@ CmdImage CmdImage_read(Alloc a, CmdImageRef ref) CmdImage Cmd_Image_read(Alloc a, CmdRef ref) { - CmdImageRef _582 = { ref.offset + 4u }; + CmdImageRef _729 = { ref.offset + 4u }; Alloc param = a; - CmdImageRef param_1 = _582; + CmdImageRef param_1 = _729; return CmdImage_read(param, param_1); } @@ -408,10 +477,10 @@ void fillImage(out float4 spvReturnValue[8], uint2 xy, CmdImage cmd_img) int2 uv = int2(xy + chunk_offset(param)) + cmd_img.offset; float4 fg_rgba = image_atlas[uv]; float3 param_1 = fg_rgba.xyz; - float3 _1493 = fromsRGB(param_1); - fg_rgba.x = _1493.x; - fg_rgba.y = _1493.y; - fg_rgba.z = _1493.z; + float3 _1638 = fromsRGB(param_1); + fg_rgba.x = _1638.x; + fg_rgba.y = _1638.y; + fg_rgba.z = _1638.z; rgba[i] = fg_rgba; } spvReturnValue = rgba; @@ -445,9 +514,9 @@ CmdEndClip CmdEndClip_read(Alloc a, CmdEndClipRef ref) CmdEndClip Cmd_EndClip_read(Alloc a, CmdRef ref) { - CmdEndClipRef _592 = { ref.offset + 4u }; + CmdEndClipRef _739 = { ref.offset + 4u }; Alloc param = a; - CmdEndClipRef param_1 = _592; + CmdEndClipRef param_1 = _739; return CmdEndClip_read(param, param_1); } @@ -637,8 +706,8 @@ float3 set_lum(float3 c, float l) { float3 param = c; float3 param_1 = c + (l - lum(param)).xxx; - float3 _901 = clip_color(param_1); - return _901; + float3 _1046 = clip_color(param_1); + return _1046; } float3 mix_blend(float3 cb, float3 cs, uint mode) @@ -726,9 +795,9 @@ float3 mix_blend(float3 cb, float3 cs, uint mode) float3 param_20 = cb; float3 param_21 = cs; float param_22 = sat(param_20); - float3 _1192 = set_sat(param_21, param_22); + float3 _1337 = set_sat(param_21, param_22); float3 param_23 = cb; - float3 param_24 = _1192; + float3 param_24 = _1337; float param_25 = lum(param_23); b = set_lum(param_24, param_25); break; @@ -738,9 +807,9 @@ float3 mix_blend(float3 cb, float3 cs, uint mode) float3 param_26 = cs; float3 param_27 = cb; float param_28 = sat(param_26); - float3 _1206 = set_sat(param_27, param_28); + float3 _1351 = set_sat(param_27, param_28); float3 param_29 = cb; - float3 param_30 = _1206; + float3 param_30 = _1351; float param_31 = lum(param_29); b = set_lum(param_30, param_31); break; @@ -877,24 +946,24 @@ CmdJump CmdJump_read(Alloc a, CmdJumpRef ref) CmdJump Cmd_Jump_read(Alloc a, CmdRef ref) { - CmdJumpRef _602 = { ref.offset + 4u }; + CmdJumpRef _749 = { ref.offset + 4u }; Alloc param = a; - CmdJumpRef param_1 = _602; + CmdJumpRef param_1 = _749; return CmdJump_read(param, param_1); } void comp_main() { - uint tile_ix = (gl_WorkGroupID.y * _1521.Load(8)) + gl_WorkGroupID.x; - Alloc _1536; - _1536.offset = _1521.Load(24); + uint tile_ix = (gl_WorkGroupID.y * _1666.Load(8)) + gl_WorkGroupID.x; + Alloc _1681; + _1681.offset = _1666.Load(24); Alloc param; - param.offset = _1536.offset; + param.offset = _1681.offset; uint param_1 = tile_ix * 1024u; uint param_2 = 1024u; Alloc cmd_alloc = slice_mem(param, param_1, param_2); - CmdRef _1545 = { cmd_alloc.offset }; - CmdRef cmd_ref = _1545; + CmdRef _1690 = { cmd_alloc.offset }; + CmdRef cmd_ref = _1690; uint2 xy_uint = uint2(gl_LocalInvocationID.x + (16u * gl_WorkGroupID.x), gl_LocalInvocationID.y + (16u * gl_WorkGroupID.y)); float2 xy = float2(xy_uint); float4 rgba[8]; @@ -903,7 +972,7 @@ void comp_main() rgba[i] = 0.0f.xxxx; } uint clip_depth = 0u; - bool mem_ok = _278.Load(4) == 0u; + bool mem_ok = _291.Load(4) == 0u; float df[8]; TileSegRef tile_seg_ref; float area[8]; @@ -928,8 +997,8 @@ void comp_main() { df[k] = 1000000000.0f; } - TileSegRef _1638 = { stroke.tile_ref }; - tile_seg_ref = _1638; + TileSegRef _1784 = { stroke.tile_ref }; + tile_seg_ref = _1784; do { uint param_7 = tile_seg_ref.offset; @@ -965,8 +1034,8 @@ void comp_main() { area[k_3] = float(fill.backdrop); } - TileSegRef _1758 = { fill.tile_ref }; - tile_seg_ref = _1758; + TileSegRef _1904 = { fill.tile_ref }; + tile_seg_ref = _1904; do { uint param_15 = tile_seg_ref.offset; @@ -1055,11 +1124,12 @@ void comp_main() int x = int(round(clamp(my_d, 0.0f, 1.0f) * 511.0f)); float4 fg_rgba = gradients[int2(x, int(lin.index))]; float3 param_29 = fg_rgba.xyz; - float3 _2092 = fromsRGB(param_29); - fg_rgba.x = _2092.x; - fg_rgba.y = _2092.y; - fg_rgba.z = _2092.z; - rgba[k_9] = fg_rgba; + float3 _2238 = fromsRGB(param_29); + fg_rgba.x = _2238.x; + fg_rgba.y = _2238.y; + fg_rgba.z = _2238.z; + float4 fg_k_1 = fg_rgba * area[k_9]; + rgba[k_9] = (rgba[k_9] * (1.0f - fg_k_1.w)) + fg_k_1; } cmd_ref.offset += 20u; break; @@ -1068,74 +1138,100 @@ void comp_main() { Alloc param_30 = cmd_alloc; CmdRef param_31 = cmd_ref; - CmdImage fill_img = Cmd_Image_read(param_30, param_31); - uint2 param_32 = xy_uint; - CmdImage param_33 = fill_img; - float4 _2121[8]; - fillImage(_2121, param_32, param_33); - float4 img[8] = _2121; + CmdRadGrad rad = Cmd_RadGrad_read(param_30, param_31); for (uint k_10 = 0u; k_10 < 8u; k_10++) { - float4 fg_k_1 = img[k_10] * area[k_10]; - rgba[k_10] = (rgba[k_10] * (1.0f - fg_k_1.w)) + fg_k_1; + uint param_32 = k_10; + float2 my_xy_1 = xy + float2(chunk_offset(param_32)); + my_xy_1 = ((rad.mat.xz * my_xy_1.x) + (rad.mat.yw * my_xy_1.y)) - rad.xlat; + float ba = dot(my_xy_1, rad.c1); + float ca = rad.ra * dot(my_xy_1, my_xy_1); + float t_2 = (sqrt((ba * ba) + ca) - ba) - rad.roff; + int x_1 = int(round(clamp(t_2, 0.0f, 1.0f) * 511.0f)); + float4 fg_rgba_1 = gradients[int2(x_1, int(rad.index))]; + float3 param_33 = fg_rgba_1.xyz; + float3 _2348 = fromsRGB(param_33); + fg_rgba_1.x = _2348.x; + fg_rgba_1.y = _2348.y; + fg_rgba_1.z = _2348.z; + float4 fg_k_2 = fg_rgba_1 * area[k_10]; + rgba[k_10] = (rgba[k_10] * (1.0f - fg_k_2.w)) + fg_k_2; } - cmd_ref.offset += 12u; + cmd_ref.offset += 48u; break; } case 8u: { + Alloc param_34 = cmd_alloc; + CmdRef param_35 = cmd_ref; + CmdImage fill_img = Cmd_Image_read(param_34, param_35); + uint2 param_36 = xy_uint; + CmdImage param_37 = fill_img; + float4 _2391[8]; + fillImage(_2391, param_36, param_37); + float4 img[8] = _2391; for (uint k_11 = 0u; k_11 < 8u; k_11++) + { + float4 fg_k_3 = img[k_11] * area[k_11]; + rgba[k_11] = (rgba[k_11] * (1.0f - fg_k_3.w)) + fg_k_3; + } + cmd_ref.offset += 12u; + break; + } + case 9u: + { + for (uint k_12 = 0u; k_12 < 8u; k_12++) { uint d_2 = min(clip_depth, 127u); - float4 param_34 = float4(rgba[k_11]); - uint _2184 = packsRGB(param_34); - blend_stack[d_2][k_11] = _2184; - rgba[k_11] = 0.0f.xxxx; + float4 param_38 = float4(rgba[k_12]); + uint _2454 = packsRGB(param_38); + blend_stack[d_2][k_12] = _2454; + rgba[k_12] = 0.0f.xxxx; } clip_depth++; cmd_ref.offset += 4u; break; } - case 9u: + case 10u: { - Alloc param_35 = cmd_alloc; - CmdRef param_36 = cmd_ref; - CmdEndClip end_clip = Cmd_EndClip_read(param_35, param_36); + Alloc param_39 = cmd_alloc; + CmdRef param_40 = cmd_ref; + CmdEndClip end_clip = Cmd_EndClip_read(param_39, param_40); uint blend_mode = end_clip.blend >> uint(8); uint comp_mode = end_clip.blend & 255u; clip_depth--; - for (uint k_12 = 0u; k_12 < 8u; k_12++) + for (uint k_13 = 0u; k_13 < 8u; k_13++) { uint d_3 = min(clip_depth, 127u); - uint param_37 = blend_stack[d_3][k_12]; - float4 bg = unpacksRGB(param_37); - float4 fg_1 = rgba[k_12] * area[k_12]; - float3 param_38 = bg.xyz; - float3 param_39 = fg_1.xyz; - uint param_40 = blend_mode; - float3 blend = mix_blend(param_38, param_39, param_40); - float4 _2251 = fg_1; - float _2255 = fg_1.w; - float3 _2262 = lerp(_2251.xyz, blend, float((_2255 * bg.w) > 0.0f).xxx); - fg_1.x = _2262.x; - fg_1.y = _2262.y; - fg_1.z = _2262.z; - float3 param_41 = bg.xyz; - float3 param_42 = fg_1.xyz; - float param_43 = bg.w; - float param_44 = fg_1.w; - uint param_45 = comp_mode; - rgba[k_12] = mix_compose(param_41, param_42, param_43, param_44, param_45); + uint param_41 = blend_stack[d_3][k_13]; + float4 bg = unpacksRGB(param_41); + float4 fg_1 = rgba[k_13] * area[k_13]; + float3 param_42 = bg.xyz; + float3 param_43 = fg_1.xyz; + uint param_44 = blend_mode; + float3 blend = mix_blend(param_42, param_43, param_44); + float4 _2521 = fg_1; + float _2525 = fg_1.w; + float3 _2532 = lerp(_2521.xyz, blend, float((_2525 * bg.w) > 0.0f).xxx); + fg_1.x = _2532.x; + fg_1.y = _2532.y; + fg_1.z = _2532.z; + float3 param_45 = bg.xyz; + float3 param_46 = fg_1.xyz; + float param_47 = bg.w; + float param_48 = fg_1.w; + uint param_49 = comp_mode; + rgba[k_13] = mix_compose(param_45, param_46, param_47, param_48, param_49); } cmd_ref.offset += 8u; break; } - case 10u: + case 11u: { - Alloc param_46 = cmd_alloc; - CmdRef param_47 = cmd_ref; - CmdRef _2299 = { Cmd_Jump_read(param_46, param_47).new_ref }; - cmd_ref = _2299; + Alloc param_50 = cmd_alloc; + CmdRef param_51 = cmd_ref; + CmdRef _2569 = { Cmd_Jump_read(param_50, param_51).new_ref }; + cmd_ref = _2569; cmd_alloc.offset = cmd_ref.offset; break; } @@ -1143,8 +1239,8 @@ void comp_main() } for (uint i_1 = 0u; i_1 < 8u; i_1++) { - uint param_48 = i_1; - image[int2(xy_uint + chunk_offset(param_48))] = rgba[i_1].w.x; + uint param_52 = i_1; + image[int2(xy_uint + chunk_offset(param_52))] = rgba[i_1].w.x; } } diff --git a/piet-gpu/shader/gen/kernel4_gray.msl b/piet-gpu/shader/gen/kernel4_gray.msl index 5128e99..6402c6f 100644 --- a/piet-gpu/shader/gen/kernel4_gray.msl +++ b/piet-gpu/shader/gen/kernel4_gray.msl @@ -94,6 +94,21 @@ struct CmdLinGrad float line_c; }; +struct CmdRadGradRef +{ + uint offset; +}; + +struct CmdRadGrad +{ + uint index; + float4 mat; + float2 xlat; + float2 c1; + float ra; + float roff; +}; + struct CmdImageRef { uint offset; @@ -222,7 +237,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset) } static inline __attribute__((always_inline)) -uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_278) +uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_291) { Alloc param = alloc; uint param_1 = offset; @@ -230,29 +245,29 @@ uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memor { return 0u; } - uint v = v_278.memory[offset]; + uint v = v_291.memory[offset]; return v; } static inline __attribute__((always_inline)) -CmdTag Cmd_tag(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) +CmdTag Cmd_tag(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) { Alloc param = a; uint param_1 = ref.offset >> uint(2); - uint tag_and_flags = read_mem(param, param_1, v_278); + uint tag_and_flags = read_mem(param, param_1, v_291); return CmdTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) }; } static inline __attribute__((always_inline)) -CmdStroke CmdStroke_read(thread const Alloc& a, thread const CmdStrokeRef& ref, device Memory& v_278) +CmdStroke CmdStroke_read(thread const Alloc& a, thread const CmdStrokeRef& ref, device Memory& v_291) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_278); + uint raw0 = read_mem(param, param_1, v_291); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_278); + uint raw1 = read_mem(param_2, param_3, v_291); CmdStroke s; s.tile_ref = raw0; s.half_width = as_type(raw1); @@ -260,11 +275,11 @@ CmdStroke CmdStroke_read(thread const Alloc& a, thread const CmdStrokeRef& ref, } static inline __attribute__((always_inline)) -CmdStroke Cmd_Stroke_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) +CmdStroke Cmd_Stroke_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) { Alloc param = a; CmdStrokeRef param_1 = CmdStrokeRef{ ref.offset + 4u }; - return CmdStroke_read(param, param_1, v_278); + return CmdStroke_read(param, param_1, v_291); } static inline __attribute__((always_inline)) @@ -276,27 +291,27 @@ Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const } static inline __attribute__((always_inline)) -TileSeg TileSeg_read(thread const Alloc& a, thread const TileSegRef& ref, device Memory& v_278) +TileSeg TileSeg_read(thread const Alloc& a, thread const TileSegRef& ref, device Memory& v_291) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_278); + uint raw0 = read_mem(param, param_1, v_291); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_278); + uint raw1 = read_mem(param_2, param_3, v_291); Alloc param_4 = a; uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_278); + uint raw2 = read_mem(param_4, param_5, v_291); Alloc param_6 = a; uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_278); + uint raw3 = read_mem(param_6, param_7, v_291); Alloc param_8 = a; uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9, v_278); + uint raw4 = read_mem(param_8, param_9, v_291); Alloc param_10 = a; uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11, v_278); + uint raw5 = read_mem(param_10, param_11, v_291); TileSeg s; s.origin = float2(as_type(raw0), as_type(raw1)); s.vector = float2(as_type(raw2), as_type(raw3)); @@ -312,15 +327,15 @@ uint2 chunk_offset(thread const uint& i) } static inline __attribute__((always_inline)) -CmdFill CmdFill_read(thread const Alloc& a, thread const CmdFillRef& ref, device Memory& v_278) +CmdFill CmdFill_read(thread const Alloc& a, thread const CmdFillRef& ref, device Memory& v_291) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_278); + uint raw0 = read_mem(param, param_1, v_291); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_278); + uint raw1 = read_mem(param_2, param_3, v_291); CmdFill s; s.tile_ref = raw0; s.backdrop = int(raw1); @@ -328,51 +343,51 @@ CmdFill CmdFill_read(thread const Alloc& a, thread const CmdFillRef& ref, device } static inline __attribute__((always_inline)) -CmdFill Cmd_Fill_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) +CmdFill Cmd_Fill_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) { Alloc param = a; CmdFillRef param_1 = CmdFillRef{ ref.offset + 4u }; - return CmdFill_read(param, param_1, v_278); + return CmdFill_read(param, param_1, v_291); } static inline __attribute__((always_inline)) -CmdAlpha CmdAlpha_read(thread const Alloc& a, thread const CmdAlphaRef& ref, device Memory& v_278) +CmdAlpha CmdAlpha_read(thread const Alloc& a, thread const CmdAlphaRef& ref, device Memory& v_291) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_278); + uint raw0 = read_mem(param, param_1, v_291); CmdAlpha s; s.alpha = as_type(raw0); return s; } static inline __attribute__((always_inline)) -CmdAlpha Cmd_Alpha_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) +CmdAlpha Cmd_Alpha_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) { Alloc param = a; CmdAlphaRef param_1 = CmdAlphaRef{ ref.offset + 4u }; - return CmdAlpha_read(param, param_1, v_278); + return CmdAlpha_read(param, param_1, v_291); } static inline __attribute__((always_inline)) -CmdColor CmdColor_read(thread const Alloc& a, thread const CmdColorRef& ref, device Memory& v_278) +CmdColor CmdColor_read(thread const Alloc& a, thread const CmdColorRef& ref, device Memory& v_291) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_278); + uint raw0 = read_mem(param, param_1, v_291); CmdColor s; s.rgba_color = raw0; return s; } static inline __attribute__((always_inline)) -CmdColor Cmd_Color_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) +CmdColor Cmd_Color_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) { Alloc param = a; CmdColorRef param_1 = CmdColorRef{ ref.offset + 4u }; - return CmdColor_read(param, param_1, v_278); + return CmdColor_read(param, param_1, v_291); } static inline __attribute__((always_inline)) @@ -393,21 +408,21 @@ float4 unpacksRGB(thread const uint& srgba) } static inline __attribute__((always_inline)) -CmdLinGrad CmdLinGrad_read(thread const Alloc& a, thread const CmdLinGradRef& ref, device Memory& v_278) +CmdLinGrad CmdLinGrad_read(thread const Alloc& a, thread const CmdLinGradRef& ref, device Memory& v_291) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_278); + uint raw0 = read_mem(param, param_1, v_291); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_278); + uint raw1 = read_mem(param_2, param_3, v_291); Alloc param_4 = a; uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_278); + uint raw2 = read_mem(param_4, param_5, v_291); Alloc param_6 = a; uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_278); + uint raw3 = read_mem(param_6, param_7, v_291); CmdLinGrad s; s.index = raw0; s.line_x = as_type(raw1); @@ -417,23 +432,78 @@ CmdLinGrad CmdLinGrad_read(thread const Alloc& a, thread const CmdLinGradRef& re } static inline __attribute__((always_inline)) -CmdLinGrad Cmd_LinGrad_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) +CmdLinGrad Cmd_LinGrad_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) { Alloc param = a; CmdLinGradRef param_1 = CmdLinGradRef{ ref.offset + 4u }; - return CmdLinGrad_read(param, param_1, v_278); + return CmdLinGrad_read(param, param_1, v_291); } static inline __attribute__((always_inline)) -CmdImage CmdImage_read(thread const Alloc& a, thread const CmdImageRef& ref, device Memory& v_278) +CmdRadGrad CmdRadGrad_read(thread const Alloc& a, thread const CmdRadGradRef& ref, device Memory& v_291) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_278); + uint raw0 = read_mem(param, param_1, v_291); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_278); + uint raw1 = read_mem(param_2, param_3, v_291); + Alloc param_4 = a; + uint param_5 = ix + 2u; + uint raw2 = read_mem(param_4, param_5, v_291); + Alloc param_6 = a; + uint param_7 = ix + 3u; + uint raw3 = read_mem(param_6, param_7, v_291); + Alloc param_8 = a; + uint param_9 = ix + 4u; + uint raw4 = read_mem(param_8, param_9, v_291); + Alloc param_10 = a; + uint param_11 = ix + 5u; + uint raw5 = read_mem(param_10, param_11, v_291); + Alloc param_12 = a; + uint param_13 = ix + 6u; + uint raw6 = read_mem(param_12, param_13, v_291); + Alloc param_14 = a; + uint param_15 = ix + 7u; + uint raw7 = read_mem(param_14, param_15, v_291); + Alloc param_16 = a; + uint param_17 = ix + 8u; + uint raw8 = read_mem(param_16, param_17, v_291); + Alloc param_18 = a; + uint param_19 = ix + 9u; + uint raw9 = read_mem(param_18, param_19, v_291); + Alloc param_20 = a; + uint param_21 = ix + 10u; + uint raw10 = read_mem(param_20, param_21, v_291); + CmdRadGrad s; + s.index = raw0; + s.mat = float4(as_type(raw1), as_type(raw2), as_type(raw3), as_type(raw4)); + s.xlat = float2(as_type(raw5), as_type(raw6)); + s.c1 = float2(as_type(raw7), as_type(raw8)); + s.ra = as_type(raw9); + s.roff = as_type(raw10); + return s; +} + +static inline __attribute__((always_inline)) +CmdRadGrad Cmd_RadGrad_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) +{ + Alloc param = a; + CmdRadGradRef param_1 = CmdRadGradRef{ ref.offset + 4u }; + return CmdRadGrad_read(param, param_1, v_291); +} + +static inline __attribute__((always_inline)) +CmdImage CmdImage_read(thread const Alloc& a, thread const CmdImageRef& ref, device Memory& v_291) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint raw0 = read_mem(param, param_1, v_291); + Alloc param_2 = a; + uint param_3 = ix + 1u; + uint raw1 = read_mem(param_2, param_3, v_291); CmdImage s; s.index = raw0; s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); @@ -441,11 +511,11 @@ CmdImage CmdImage_read(thread const Alloc& a, thread const CmdImageRef& ref, dev } static inline __attribute__((always_inline)) -CmdImage Cmd_Image_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) +CmdImage Cmd_Image_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) { Alloc param = a; CmdImageRef param_1 = CmdImageRef{ ref.offset + 4u }; - return CmdImage_read(param, param_1, v_278); + return CmdImage_read(param, param_1, v_291); } static inline __attribute__((always_inline)) @@ -458,10 +528,10 @@ spvUnsafeArray fillImage(thread const uint2& xy, thread const CmdImag int2 uv = int2(xy + chunk_offset(param)) + cmd_img.offset; float4 fg_rgba = image_atlas.read(uint2(uv)); float3 param_1 = fg_rgba.xyz; - float3 _1493 = fromsRGB(param_1); - fg_rgba.x = _1493.x; - fg_rgba.y = _1493.y; - fg_rgba.z = _1493.z; + float3 _1638 = fromsRGB(param_1); + fg_rgba.x = _1638.x; + fg_rgba.y = _1638.y; + fg_rgba.z = _1638.z; rgba[i] = fg_rgba; } return rgba; @@ -485,23 +555,23 @@ uint packsRGB(thread float4& rgba) } static inline __attribute__((always_inline)) -CmdEndClip CmdEndClip_read(thread const Alloc& a, thread const CmdEndClipRef& ref, device Memory& v_278) +CmdEndClip CmdEndClip_read(thread const Alloc& a, thread const CmdEndClipRef& ref, device Memory& v_291) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_278); + uint raw0 = read_mem(param, param_1, v_291); CmdEndClip s; s.blend = raw0; return s; } static inline __attribute__((always_inline)) -CmdEndClip Cmd_EndClip_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) +CmdEndClip Cmd_EndClip_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) { Alloc param = a; CmdEndClipRef param_1 = CmdEndClipRef{ ref.offset + 4u }; - return CmdEndClip_read(param, param_1, v_278); + return CmdEndClip_read(param, param_1, v_291); } static inline __attribute__((always_inline)) @@ -701,8 +771,8 @@ float3 set_lum(thread const float3& c, thread const float& l) { float3 param = c; float3 param_1 = c + float3(l - lum(param)); - float3 _901 = clip_color(param_1); - return _901; + float3 _1046 = clip_color(param_1); + return _1046; } static inline __attribute__((always_inline)) @@ -791,9 +861,9 @@ float3 mix_blend(thread const float3& cb, thread const float3& cs, thread const float3 param_20 = cb; float3 param_21 = cs; float param_22 = sat(param_20); - float3 _1192 = set_sat(param_21, param_22); + float3 _1337 = set_sat(param_21, param_22); float3 param_23 = cb; - float3 param_24 = _1192; + float3 param_24 = _1337; float param_25 = lum(param_23); b = set_lum(param_24, param_25); break; @@ -803,9 +873,9 @@ float3 mix_blend(thread const float3& cb, thread const float3& cs, thread const float3 param_26 = cs; float3 param_27 = cb; float param_28 = sat(param_26); - float3 _1206 = set_sat(param_27, param_28); + float3 _1351 = set_sat(param_27, param_28); float3 param_29 = cb; - float3 param_30 = _1206; + float3 param_30 = _1351; float param_31 = lum(param_29); b = set_lum(param_30, param_31); break; @@ -931,30 +1001,30 @@ float4 mix_compose(thread const float3& cb, thread const float3& cs, thread cons } static inline __attribute__((always_inline)) -CmdJump CmdJump_read(thread const Alloc& a, thread const CmdJumpRef& ref, device Memory& v_278) +CmdJump CmdJump_read(thread const Alloc& a, thread const CmdJumpRef& ref, device Memory& v_291) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_278); + uint raw0 = read_mem(param, param_1, v_291); CmdJump s; s.new_ref = raw0; return s; } static inline __attribute__((always_inline)) -CmdJump Cmd_Jump_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) +CmdJump Cmd_Jump_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) { Alloc param = a; CmdJumpRef param_1 = CmdJumpRef{ ref.offset + 4u }; - return CmdJump_read(param, param_1, v_278); + return CmdJump_read(param, param_1, v_291); } -kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1521 [[buffer(1)]], texture2d image [[texture(2)]], texture2d image_atlas [[texture(3)]], texture2d gradients [[texture(4)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) +kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1666 [[buffer(1)]], texture2d image [[texture(2)]], texture2d image_atlas [[texture(3)]], texture2d gradients [[texture(4)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) { - uint tile_ix = (gl_WorkGroupID.y * _1521.conf.width_in_tiles) + gl_WorkGroupID.x; + uint tile_ix = (gl_WorkGroupID.y * _1666.conf.width_in_tiles) + gl_WorkGroupID.x; Alloc param; - param.offset = _1521.conf.ptcl_alloc.offset; + param.offset = _1666.conf.ptcl_alloc.offset; uint param_1 = tile_ix * 1024u; uint param_2 = 1024u; Alloc cmd_alloc = slice_mem(param, param_1, param_2); @@ -967,7 +1037,7 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 rgba[i] = float4(0.0); } uint clip_depth = 0u; - bool mem_ok = v_278.mem_error == 0u; + bool mem_ok = v_291.mem_error == 0u; spvUnsafeArray df; TileSegRef tile_seg_ref; spvUnsafeArray area; @@ -976,7 +1046,7 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_3 = cmd_alloc; CmdRef param_4 = cmd_ref; - uint tag = Cmd_tag(param_3, param_4, v_278).tag; + uint tag = Cmd_tag(param_3, param_4, v_291).tag; if (tag == 0u) { break; @@ -987,7 +1057,7 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_5 = cmd_alloc; CmdRef param_6 = cmd_ref; - CmdStroke stroke = Cmd_Stroke_read(param_5, param_6, v_278); + CmdStroke stroke = Cmd_Stroke_read(param_5, param_6, v_291); for (uint k = 0u; k < 8u; k++) { df[k] = 1000000000.0; @@ -1000,7 +1070,7 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 bool param_9 = mem_ok; Alloc param_10 = new_alloc(param_7, param_8, param_9); TileSegRef param_11 = tile_seg_ref; - TileSeg seg = TileSeg_read(param_10, param_11, v_278); + TileSeg seg = TileSeg_read(param_10, param_11, v_291); float2 line_vec = seg.vector; for (uint k_1 = 0u; k_1 < 8u; k_1++) { @@ -1023,7 +1093,7 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_13 = cmd_alloc; CmdRef param_14 = cmd_ref; - CmdFill fill = Cmd_Fill_read(param_13, param_14, v_278); + CmdFill fill = Cmd_Fill_read(param_13, param_14, v_291); for (uint k_3 = 0u; k_3 < 8u; k_3++) { area[k_3] = float(fill.backdrop); @@ -1036,7 +1106,7 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 bool param_17 = mem_ok; Alloc param_18 = new_alloc(param_15, param_16, param_17); TileSegRef param_19 = tile_seg_ref; - TileSeg seg_1 = TileSeg_read(param_18, param_19, v_278); + TileSeg seg_1 = TileSeg_read(param_18, param_19, v_291); for (uint k_4 = 0u; k_4 < 8u; k_4++) { uint param_20 = k_4; @@ -1080,7 +1150,7 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_21 = cmd_alloc; CmdRef param_22 = cmd_ref; - CmdAlpha alpha = Cmd_Alpha_read(param_21, param_22, v_278); + CmdAlpha alpha = Cmd_Alpha_read(param_21, param_22, v_291); for (uint k_7 = 0u; k_7 < 8u; k_7++) { area[k_7] = alpha.alpha; @@ -1092,7 +1162,7 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_23 = cmd_alloc; CmdRef param_24 = cmd_ref; - CmdColor color = Cmd_Color_read(param_23, param_24, v_278); + CmdColor color = Cmd_Color_read(param_23, param_24, v_291); uint param_25 = color.rgba_color; float4 fg = unpacksRGB(param_25); for (uint k_8 = 0u; k_8 < 8u; k_8++) @@ -1107,7 +1177,7 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_26 = cmd_alloc; CmdRef param_27 = cmd_ref; - CmdLinGrad lin = Cmd_LinGrad_read(param_26, param_27, v_278); + CmdLinGrad lin = Cmd_LinGrad_read(param_26, param_27, v_291); float d_1 = ((lin.line_x * xy.x) + (lin.line_y * xy.y)) + lin.line_c; for (uint k_9 = 0u; k_9 < 8u; k_9++) { @@ -1117,11 +1187,12 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 int x = int(round(fast::clamp(my_d, 0.0, 1.0) * 511.0)); float4 fg_rgba = gradients.read(uint2(int2(x, int(lin.index)))); float3 param_29 = fg_rgba.xyz; - float3 _2092 = fromsRGB(param_29); - fg_rgba.x = _2092.x; - fg_rgba.y = _2092.y; - fg_rgba.z = _2092.z; - rgba[k_9] = fg_rgba; + float3 _2238 = fromsRGB(param_29); + fg_rgba.x = _2238.x; + fg_rgba.y = _2238.y; + fg_rgba.z = _2238.z; + float4 fg_k_1 = fg_rgba * area[k_9]; + rgba[k_9] = (rgba[k_9] * (1.0 - fg_k_1.w)) + fg_k_1; } cmd_ref.offset += 20u; break; @@ -1130,72 +1201,98 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_30 = cmd_alloc; CmdRef param_31 = cmd_ref; - CmdImage fill_img = Cmd_Image_read(param_30, param_31, v_278); - uint2 param_32 = xy_uint; - CmdImage param_33 = fill_img; - spvUnsafeArray img; - img = fillImage(param_32, param_33, image_atlas); + CmdRadGrad rad = Cmd_RadGrad_read(param_30, param_31, v_291); for (uint k_10 = 0u; k_10 < 8u; k_10++) { - float4 fg_k_1 = img[k_10] * area[k_10]; - rgba[k_10] = (rgba[k_10] * (1.0 - fg_k_1.w)) + fg_k_1; + uint param_32 = k_10; + float2 my_xy_1 = xy + float2(chunk_offset(param_32)); + my_xy_1 = ((rad.mat.xz * my_xy_1.x) + (rad.mat.yw * my_xy_1.y)) - rad.xlat; + float ba = dot(my_xy_1, rad.c1); + float ca = rad.ra * dot(my_xy_1, my_xy_1); + float t_2 = (sqrt((ba * ba) + ca) - ba) - rad.roff; + int x_1 = int(round(fast::clamp(t_2, 0.0, 1.0) * 511.0)); + float4 fg_rgba_1 = gradients.read(uint2(int2(x_1, int(rad.index)))); + float3 param_33 = fg_rgba_1.xyz; + float3 _2348 = fromsRGB(param_33); + fg_rgba_1.x = _2348.x; + fg_rgba_1.y = _2348.y; + fg_rgba_1.z = _2348.z; + float4 fg_k_2 = fg_rgba_1 * area[k_10]; + rgba[k_10] = (rgba[k_10] * (1.0 - fg_k_2.w)) + fg_k_2; } - cmd_ref.offset += 12u; + cmd_ref.offset += 48u; break; } case 8u: { + Alloc param_34 = cmd_alloc; + CmdRef param_35 = cmd_ref; + CmdImage fill_img = Cmd_Image_read(param_34, param_35, v_291); + uint2 param_36 = xy_uint; + CmdImage param_37 = fill_img; + spvUnsafeArray img; + img = fillImage(param_36, param_37, image_atlas); for (uint k_11 = 0u; k_11 < 8u; k_11++) + { + float4 fg_k_3 = img[k_11] * area[k_11]; + rgba[k_11] = (rgba[k_11] * (1.0 - fg_k_3.w)) + fg_k_3; + } + cmd_ref.offset += 12u; + break; + } + case 9u: + { + for (uint k_12 = 0u; k_12 < 8u; k_12++) { uint d_2 = min(clip_depth, 127u); - float4 param_34 = float4(rgba[k_11]); - uint _2184 = packsRGB(param_34); - blend_stack[d_2][k_11] = _2184; - rgba[k_11] = float4(0.0); + float4 param_38 = float4(rgba[k_12]); + uint _2454 = packsRGB(param_38); + blend_stack[d_2][k_12] = _2454; + rgba[k_12] = float4(0.0); } clip_depth++; cmd_ref.offset += 4u; break; } - case 9u: + case 10u: { - Alloc param_35 = cmd_alloc; - CmdRef param_36 = cmd_ref; - CmdEndClip end_clip = Cmd_EndClip_read(param_35, param_36, v_278); + Alloc param_39 = cmd_alloc; + CmdRef param_40 = cmd_ref; + CmdEndClip end_clip = Cmd_EndClip_read(param_39, param_40, v_291); uint blend_mode = end_clip.blend >> uint(8); uint comp_mode = end_clip.blend & 255u; clip_depth--; - for (uint k_12 = 0u; k_12 < 8u; k_12++) + for (uint k_13 = 0u; k_13 < 8u; k_13++) { uint d_3 = min(clip_depth, 127u); - uint param_37 = blend_stack[d_3][k_12]; - float4 bg = unpacksRGB(param_37); - float4 fg_1 = rgba[k_12] * area[k_12]; - float3 param_38 = bg.xyz; - float3 param_39 = fg_1.xyz; - uint param_40 = blend_mode; - float3 blend = mix_blend(param_38, param_39, param_40); - float4 _2251 = fg_1; - float _2255 = fg_1.w; - float3 _2262 = mix(_2251.xyz, blend, float3(float((_2255 * bg.w) > 0.0))); - fg_1.x = _2262.x; - fg_1.y = _2262.y; - fg_1.z = _2262.z; - float3 param_41 = bg.xyz; - float3 param_42 = fg_1.xyz; - float param_43 = bg.w; - float param_44 = fg_1.w; - uint param_45 = comp_mode; - rgba[k_12] = mix_compose(param_41, param_42, param_43, param_44, param_45); + uint param_41 = blend_stack[d_3][k_13]; + float4 bg = unpacksRGB(param_41); + float4 fg_1 = rgba[k_13] * area[k_13]; + float3 param_42 = bg.xyz; + float3 param_43 = fg_1.xyz; + uint param_44 = blend_mode; + float3 blend = mix_blend(param_42, param_43, param_44); + float4 _2521 = fg_1; + float _2525 = fg_1.w; + float3 _2532 = mix(_2521.xyz, blend, float3(float((_2525 * bg.w) > 0.0))); + fg_1.x = _2532.x; + fg_1.y = _2532.y; + fg_1.z = _2532.z; + float3 param_45 = bg.xyz; + float3 param_46 = fg_1.xyz; + float param_47 = bg.w; + float param_48 = fg_1.w; + uint param_49 = comp_mode; + rgba[k_13] = mix_compose(param_45, param_46, param_47, param_48, param_49); } cmd_ref.offset += 8u; break; } - case 10u: + case 11u: { - Alloc param_46 = cmd_alloc; - CmdRef param_47 = cmd_ref; - cmd_ref = CmdRef{ Cmd_Jump_read(param_46, param_47, v_278).new_ref }; + Alloc param_50 = cmd_alloc; + CmdRef param_51 = cmd_ref; + cmd_ref = CmdRef{ Cmd_Jump_read(param_50, param_51, v_291).new_ref }; cmd_alloc.offset = cmd_ref.offset; break; } @@ -1203,8 +1300,8 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 } for (uint i_1 = 0u; i_1 < 8u; i_1++) { - uint param_48 = i_1; - image.write(float4(rgba[i_1].w), uint2(int2(xy_uint + chunk_offset(param_48)))); + uint param_52 = i_1; + image.write(float4(rgba[i_1].w), uint2(int2(xy_uint + chunk_offset(param_52)))); } } diff --git a/piet-gpu/shader/gen/kernel4_gray.spv b/piet-gpu/shader/gen/kernel4_gray.spv index 791b76c..4633401 100644 Binary files a/piet-gpu/shader/gen/kernel4_gray.spv and b/piet-gpu/shader/gen/kernel4_gray.spv differ diff --git a/piet-gpu/shader/kernel4.comp b/piet-gpu/shader/kernel4.comp index a97715a..c49e2fa 100644 --- a/piet-gpu/shader/kernel4.comp +++ b/piet-gpu/shader/kernel4.comp @@ -192,10 +192,27 @@ void main() { int x = int(round(clamp(my_d, 0.0, 1.0) * float(GRADIENT_WIDTH - 1))); mediump vec4 fg_rgba = imageLoad(gradients, ivec2(x, int(lin.index))); fg_rgba.rgb = fromsRGB(fg_rgba.rgb); - rgba[k] = fg_rgba; + mediump vec4 fg_k = fg_rgba * area[k]; + rgba[k] = rgba[k] * (1.0 - fg_k.a) + fg_k; } cmd_ref.offset += 4 + CmdLinGrad_size; break; + case Cmd_RadGrad: + CmdRadGrad rad = Cmd_RadGrad_read(cmd_alloc, cmd_ref); + for (uint k = 0; k < CHUNK; k++) { + vec2 my_xy = xy + vec2(chunk_offset(k)); + my_xy = rad.mat.xz * my_xy.x + rad.mat.yw * my_xy.y - rad.xlat; + float ba = dot(my_xy, rad.c1); + float ca = rad.ra * dot(my_xy, my_xy); + float t = sqrt(ba * ba + ca) - ba - rad.roff; + int x = int(round(clamp(t, 0.0, 1.0) * float(GRADIENT_WIDTH - 1))); + mediump vec4 fg_rgba = imageLoad(gradients, ivec2(x, int(rad.index))); + fg_rgba.rgb = fromsRGB(fg_rgba.rgb); + mediump vec4 fg_k = fg_rgba * area[k]; + rgba[k] = rgba[k] * (1.0 - fg_k.a) + fg_k; + } + cmd_ref.offset += 4 + CmdRadGrad_size; + break; case Cmd_Image: CmdImage fill_img = Cmd_Image_read(cmd_alloc, cmd_ref); mediump vec4 img[CHUNK] = fillImage(xy_uint, fill_img); diff --git a/piet-gpu/shader/ptcl.h b/piet-gpu/shader/ptcl.h index 9b9b341..54dcc9e 100644 --- a/piet-gpu/shader/ptcl.h +++ b/piet-gpu/shader/ptcl.h @@ -18,6 +18,10 @@ struct CmdLinGradRef { uint offset; }; +struct CmdRadGradRef { + uint offset; +}; + struct CmdImageRef { uint offset; }; @@ -83,6 +87,21 @@ CmdLinGradRef CmdLinGrad_index(CmdLinGradRef ref, uint index) { return CmdLinGradRef(ref.offset + index * CmdLinGrad_size); } +struct CmdRadGrad { + uint index; + vec4 mat; + vec2 xlat; + vec2 c1; + float ra; + float roff; +}; + +#define CmdRadGrad_size 44 + +CmdRadGradRef CmdRadGrad_index(CmdRadGradRef ref, uint index) { + return CmdRadGradRef(ref.offset + index * CmdRadGrad_size); +} + struct CmdImage { uint index; ivec2 offset; @@ -131,11 +150,12 @@ CmdJumpRef CmdJump_index(CmdJumpRef ref, uint index) { #define Cmd_Alpha 4 #define Cmd_Color 5 #define Cmd_LinGrad 6 -#define Cmd_Image 7 -#define Cmd_BeginClip 8 -#define Cmd_EndClip 9 -#define Cmd_Jump 10 -#define Cmd_size 20 +#define Cmd_RadGrad 7 +#define Cmd_Image 8 +#define Cmd_BeginClip 9 +#define Cmd_EndClip 10 +#define Cmd_Jump 11 +#define Cmd_size 48 CmdRef Cmd_index(CmdRef ref, uint index) { return CmdRef(ref.offset + index * Cmd_size); @@ -213,6 +233,44 @@ void CmdLinGrad_write(Alloc a, CmdLinGradRef ref, CmdLinGrad s) { write_mem(a, ix + 3, floatBitsToUint(s.line_c)); } +CmdRadGrad CmdRadGrad_read(Alloc a, CmdRadGradRef ref) { + uint ix = ref.offset >> 2; + uint raw0 = read_mem(a, ix + 0); + uint raw1 = read_mem(a, ix + 1); + uint raw2 = read_mem(a, ix + 2); + uint raw3 = read_mem(a, ix + 3); + uint raw4 = read_mem(a, ix + 4); + uint raw5 = read_mem(a, ix + 5); + uint raw6 = read_mem(a, ix + 6); + uint raw7 = read_mem(a, ix + 7); + uint raw8 = read_mem(a, ix + 8); + uint raw9 = read_mem(a, ix + 9); + uint raw10 = read_mem(a, ix + 10); + CmdRadGrad s; + s.index = raw0; + s.mat = vec4(uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3), uintBitsToFloat(raw4)); + s.xlat = vec2(uintBitsToFloat(raw5), uintBitsToFloat(raw6)); + s.c1 = vec2(uintBitsToFloat(raw7), uintBitsToFloat(raw8)); + s.ra = uintBitsToFloat(raw9); + s.roff = uintBitsToFloat(raw10); + return s; +} + +void CmdRadGrad_write(Alloc a, CmdRadGradRef ref, CmdRadGrad s) { + uint ix = ref.offset >> 2; + write_mem(a, ix + 0, s.index); + write_mem(a, ix + 1, floatBitsToUint(s.mat.x)); + write_mem(a, ix + 2, floatBitsToUint(s.mat.y)); + write_mem(a, ix + 3, floatBitsToUint(s.mat.z)); + write_mem(a, ix + 4, floatBitsToUint(s.mat.w)); + write_mem(a, ix + 5, floatBitsToUint(s.xlat.x)); + write_mem(a, ix + 6, floatBitsToUint(s.xlat.y)); + write_mem(a, ix + 7, floatBitsToUint(s.c1.x)); + write_mem(a, ix + 8, floatBitsToUint(s.c1.y)); + write_mem(a, ix + 9, floatBitsToUint(s.ra)); + write_mem(a, ix + 10, floatBitsToUint(s.roff)); +} + CmdImage CmdImage_read(Alloc a, CmdImageRef ref) { uint ix = ref.offset >> 2; uint raw0 = read_mem(a, ix + 0); @@ -293,6 +351,10 @@ CmdLinGrad Cmd_LinGrad_read(Alloc a, CmdRef ref) { return CmdLinGrad_read(a, CmdLinGradRef(ref.offset + 4)); } +CmdRadGrad Cmd_RadGrad_read(Alloc a, CmdRef ref) { + return CmdRadGrad_read(a, CmdRadGradRef(ref.offset + 4)); +} + CmdImage Cmd_Image_read(Alloc a, CmdRef ref) { return CmdImage_read(a, CmdImageRef(ref.offset + 4)); } @@ -338,6 +400,11 @@ void Cmd_LinGrad_write(Alloc a, CmdRef ref, CmdLinGrad s) { CmdLinGrad_write(a, CmdLinGradRef(ref.offset + 4), s); } +void Cmd_RadGrad_write(Alloc a, CmdRef ref, CmdRadGrad s) { + write_mem(a, ref.offset >> 2, Cmd_RadGrad); + CmdRadGrad_write(a, CmdRadGradRef(ref.offset + 4), s); +} + void Cmd_Image_write(Alloc a, CmdRef ref, CmdImage s) { write_mem(a, ref.offset >> 2, Cmd_Image); CmdImage_write(a, CmdImageRef(ref.offset + 4), s); diff --git a/piet-gpu/src/encoder.rs b/piet-gpu/src/encoder.rs index 62c59c4..2f4b85e 100644 --- a/piet-gpu/src/encoder.rs +++ b/piet-gpu/src/encoder.rs @@ -62,6 +62,7 @@ const ANNOTATED_SIZE: usize = 40; // Tags for draw objects. See shader/drawtag.h for the authoritative source. const DRAWTAG_FILLCOLOR: u32 = 0x44; const DRAWTAG_FILLLINGRADIENT: u32 = 0x114; +const DRAWTAG_FILLRADGRADIENT: u32 = 0x2dc; const DRAWTAG_BEGINCLIP: u32 = 0x05; const DRAWTAG_ENDCLIP: u32 = 0x25; @@ -79,6 +80,16 @@ pub struct FillLinGradient { p1: [f32; 2], } +#[repr(C)] +#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] +pub struct FillRadGradient { + index: u32, + p0: [f32; 2], + p1: [f32; 2], + r0: f32, + r1: f32, +} + #[allow(unused)] #[repr(C)] #[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] @@ -123,6 +134,13 @@ impl Encoder { self.transform_stream.push(transform); } + // Swap the last two tags in the tag stream; used for transformed + // gradients. + pub fn swap_last_tags(&mut self) { + let len = self.tag_stream.len(); + self.tag_stream.swap(len - 1, len - 2); + } + // -1.0 means "fill" pub fn linewidth(&mut self, linewidth: f32) { self.tag_stream.push(0x40); @@ -147,6 +165,16 @@ impl Encoder { self.drawdata_stream.extend(bytemuck::bytes_of(&element)); } + + /// Encode a fill radial gradient draw object. + /// + /// This should be encoded after a path. + pub fn fill_rad_gradient(&mut self, index: u32, p0: [f32; 2], p1: [f32; 2], r0: f32, r1: f32) { + self.drawtag_stream.push(DRAWTAG_FILLRADGRADIENT); + let element = FillRadGradient { index, p0, p1, r0, r1 }; + self.drawdata_stream.extend(bytemuck::bytes_of(&element)); + } + /// Start a clip. pub fn begin_clip(&mut self, blend: Option) { self.drawtag_stream.push(DRAWTAG_BEGINCLIP); @@ -220,7 +248,7 @@ impl Encoder { alloc += n_drawobj * DRAW_BBOX_SIZE; let drawinfo_alloc = alloc; // TODO: not optimized; it can be accumulated during encoding or summed from drawtags - const MAX_DRAWINFO_SIZE: usize = 16; + const MAX_DRAWINFO_SIZE: usize = 44; alloc += n_drawobj * MAX_DRAWINFO_SIZE; let config = Config { diff --git a/piet-gpu/src/gradient.rs b/piet-gpu/src/gradient.rs index 20982e9..e655908 100644 --- a/piet-gpu/src/gradient.rs +++ b/piet-gpu/src/gradient.rs @@ -18,15 +18,29 @@ use std::collections::hash_map::{Entry, HashMap}; -use piet::{Color, FixedLinearGradient, GradientStop}; +use piet::kurbo::Point; +use piet::{Color, FixedLinearGradient, GradientStop, FixedRadialGradient}; + +/// Radial gradient compatible with COLRv1 spec +#[derive(Debug, Clone)] +pub struct Colrv1RadialGradient { + /// The center of the iner circle. + pub center0: Point, + /// The offset of the origin relative to the center. + pub center1: Point, + /// The radius of the inner circle. + pub radius0: f64, + /// The radius of the outer circle. + pub radius1: f64, + /// The stops. + pub stops: Vec, +} #[derive(Clone)] pub struct BakedGradient { ramp: Vec, } -/// This is basically the same type as scene::FillLinGradient, so could -/// potentially use that directly. #[derive(Clone)] pub struct LinearGradient { pub(crate) start: [f32; 2], @@ -34,6 +48,15 @@ pub struct LinearGradient { pub(crate) ramp_id: u32, } +#[derive(Clone)] +pub struct RadialGradient { + pub(crate) start: [f32; 2], + pub(crate) end: [f32; 2], + pub(crate) r0: f32, + pub(crate) r1: f32, + pub(crate) ramp_id: u32, +} + #[derive(Default)] pub struct RampCache { ramps: Vec, @@ -154,6 +177,28 @@ impl RampCache { } } + pub fn add_radial_gradient(&mut self, rad: &FixedRadialGradient) -> RadialGradient { + let ramp_id = self.add_ramp(&rad.stops); + RadialGradient { + ramp_id: ramp_id as u32, + start: crate::render_ctx::to_f32_2(rad.center + rad.origin_offset), + end: crate::render_ctx::to_f32_2(rad.center), + r0: 0.0, + r1: rad.radius as f32, + } + } + + pub fn add_radial_gradient_colrv1(&mut self, rad: &Colrv1RadialGradient) -> RadialGradient { + let ramp_id = self.add_ramp(&rad.stops); + RadialGradient { + ramp_id: ramp_id as u32, + start: crate::render_ctx::to_f32_2(rad.center0), + end: crate::render_ctx::to_f32_2(rad.center1), + r0: rad.radius0 as f32, + r1: rad.radius1 as f32, + } + } + /// Dump the contents of a gradient. This is for debugging. #[allow(unused)] pub(crate) fn dump_gradient(&self, lin: &LinearGradient) { diff --git a/piet-gpu/src/lib.rs b/piet-gpu/src/lib.rs index 249735a..45275a5 100644 --- a/piet-gpu/src/lib.rs +++ b/piet-gpu/src/lib.rs @@ -12,6 +12,7 @@ use std::convert::TryInto; pub use blend::{Blend, BlendMode, CompositionMode}; pub use render_ctx::PietGpuRenderContext; +pub use gradient::Colrv1RadialGradient; use piet::kurbo::Vec2; use piet::{ImageFormat, RenderContext}; diff --git a/piet-gpu/src/render_ctx.rs b/piet-gpu/src/render_ctx.rs index 024dd2b..dca03eb 100644 --- a/piet-gpu/src/render_ctx.rs +++ b/piet-gpu/src/render_ctx.rs @@ -13,7 +13,7 @@ use piet_gpu_hal::BufWrite; use piet_gpu_types::encoder::{Encode, Encoder}; use piet_gpu_types::scene::Element; -use crate::gradient::{LinearGradient, RampCache}; +use crate::gradient::{LinearGradient, RadialGradient, RampCache, Colrv1RadialGradient}; use crate::text::Font; pub use crate::text::{PietGpuText, PietGpuTextLayout, PietGpuTextLayoutBuilder}; use crate::Blend; @@ -50,6 +50,7 @@ pub struct PietGpuRenderContext { pub enum PietGpuBrush { Solid(u32), LinGradient(LinearGradient), + RadGradient(RadialGradient), } #[derive(Default)] @@ -187,6 +188,10 @@ impl RenderContext for PietGpuRenderContext { let lin = self.ramp_cache.add_linear_gradient(&lin); Ok(PietGpuBrush::LinGradient(lin)) } + FixedGradient::Radial(rad) => { + let rad = self.ramp_cache.add_radial_gradient(&rad); + Ok(PietGpuBrush::RadGradient(rad)) + } _ => todo!("don't do radial gradients yet"), } } @@ -338,6 +343,20 @@ impl PietGpuRenderContext { } } + pub fn radial_gradient_colrv1(&mut self, rad: &Colrv1RadialGradient) -> PietGpuBrush { + PietGpuBrush::RadGradient(self.ramp_cache.add_radial_gradient_colrv1(rad)) + } + + pub fn fill_transform(&mut self, shape: impl Shape, brush: &PietGpuBrush, transform: Affine) { + let path = shape.path_elements(TOLERANCE); + self.encode_linewidth(-1.0); + self.encode_path(path, true); + self.encode_transform(Transform::from_kurbo(transform)); + self.new_encoder.swap_last_tags(); + self.encode_brush(&brush); + self.encode_transform(Transform::from_kurbo(transform.inverse())); + } + fn encode_path(&mut self, path: impl Iterator, is_fill: bool) { if is_fill { self.encode_path_inner( @@ -420,6 +439,10 @@ impl PietGpuRenderContext { self.new_encoder .fill_lin_gradient(lin.ramp_id, lin.start, lin.end); } + PietGpuBrush::RadGradient(rad) => { + self.new_encoder + .fill_rad_gradient(rad.ramp_id, rad.start, rad.end, rad.r0, rad.r1); + } } } } diff --git a/piet-gpu/src/test_scenes.rs b/piet-gpu/src/test_scenes.rs index 350b9dd..bfd2af2 100644 --- a/piet-gpu/src/test_scenes.rs +++ b/piet-gpu/src/test_scenes.rs @@ -2,10 +2,10 @@ use rand::{Rng, RngCore}; -use crate::{Blend, BlendMode, CompositionMode, PietGpuRenderContext}; +use crate::{Blend, BlendMode, CompositionMode, PietGpuRenderContext, Colrv1RadialGradient}; use piet::kurbo::{Affine, BezPath, Circle, Line, Point, Rect, Shape}; use piet::{ - Color, FixedGradient, FixedLinearGradient, GradientStop, Text, TextAttribute, TextLayoutBuilder, + Color, FixedGradient, FixedRadialGradient, GradientStop, Text, TextAttribute, TextLayoutBuilder, }; use crate::{PicoSvg, RenderContext, Vec2}; @@ -27,7 +27,7 @@ pub fn render_svg(rc: &mut impl RenderContext, svg: &PicoSvg) { println!("flattening and encoding time: {:?}", start.elapsed()); } -pub fn render_scene(rc: &mut impl RenderContext) { +pub fn render_scene(rc: &mut PietGpuRenderContext) { const WIDTH: usize = 2048; const HEIGHT: usize = 1536; let mut rng = rand::thread_rng(); @@ -137,7 +137,7 @@ fn render_alpha_test(rc: &mut impl RenderContext) { } #[allow(unused)] -fn render_gradient_test(rc: &mut impl RenderContext) { +fn render_gradient_test(rc: &mut PietGpuRenderContext) { let stops = vec![ GradientStop { color: Color::rgb8(0, 255, 0), @@ -148,14 +148,18 @@ fn render_gradient_test(rc: &mut impl RenderContext) { pos: 1.0, }, ]; - let lin = FixedLinearGradient { - start: Point::new(0.0, 100.0), - end: Point::new(0.0, 300.0), + let rad = Colrv1RadialGradient { + center0: Point::new(200.0, 200.0), + center1: Point::new(250.0, 200.0), + radius0: 50.0, + radius1: 100.0, stops, }; - let brush = FixedGradient::Linear(lin); + let brush = rc.radial_gradient_colrv1(&rad); + //let brush = FixedGradient::Radial(rad); //let brush = Color::rgb8(0, 128, 0); - rc.fill(Rect::new(100.0, 100.0, 300.0, 300.0), &brush); + let transform = Affine::new([1.0, 0.0, 0.0, 0.5, 0.0, 100.0]); + rc.fill_transform(Rect::new(100.0, 100.0, 300.0, 300.0), &brush, transform); } fn diamond(origin: Point) -> impl Shape {