diff --git a/piet-gpu/shader/blend.h b/piet-gpu/shader/blend.h index 1ac4bd6..c0ae6af 100644 --- a/piet-gpu/shader/blend.h +++ b/piet-gpu/shader/blend.h @@ -122,6 +122,8 @@ vec3 set_sat(vec3 c, float s) { return c; } +// Blends two RGB colors together. The colors are assumed to be in sRGB +// color space, and this function does not take alpha into account. vec3 mix_blend(vec3 cb, vec3 cs, uint mode) { vec3 b = vec3(0.0); switch (mode) { @@ -190,9 +192,10 @@ vec3 mix_blend(vec3 cb, vec3 cs, uint mode) { #define Comp_DestAtop 10 #define Comp_Xor 11 #define Comp_Plus 12 -#define Comp_PlusDarker 13 -#define Comp_PlusLighter 14 +#define Comp_PlusLighter 13 +// Apply general compositing operation. +// Inputs are separated colors and alpha, output is premultiplied. vec4 mix_compose(vec3 cb, vec3 cs, float ab, float as, uint mode) { float fa = 0.0; float fb = 0.0; @@ -245,16 +248,41 @@ vec4 mix_compose(vec3 cb, vec3 cs, float ab, float as, uint mode) { fa = 1.0; fb = 1.0; break; - case Comp_PlusDarker: - return vec4(max(vec4(0.0), 1.0 - as * vec4(cs, as) + 1.0 - ab * vec4(cb, ab)).xyz, - max(0.0, 1.0 - as + 1.0 - ab)); case Comp_PlusLighter: - return vec4(min(vec4(1.0), as * vec4(cs, as) + ab * vec4(cb, ab)).xyz, - min(1.0, as + ab)); + return min(vec4(1.0), vec4(as * cs + ab * cb, as + ab)); default: break; } - return as * fa * vec4(cs, as) + ab * fb * vec4(cb, ab); + float as_fa = as * fa; + float ab_fb = ab * fb; + vec3 co = as_fa * cs + ab_fb * cb; + return vec4(co, as_fa + ab_fb); } #define BlendComp_default (Blend_Normal << 8 | Comp_SrcOver) + +// This is added to alpha to prevent divide-by-zero +#define EPSILON 1e-15 + +// Apply blending and composition. Both input and output colors are +// premultiplied RGB. +vec4 mix_blend_compose(vec4 backdrop, vec4 src, uint mode) { + if (mode == BlendComp_default) { + return backdrop * (1.0 - src.a) + src; + } + // Un-premultiply colors for blending + float inv_src_a = 1.0 / (src.a + EPSILON); + vec3 cs = src.rgb * inv_src_a; + float inv_backdrop_a = 1.0 / (backdrop.a + EPSILON); + vec3 cb = backdrop.rgb * inv_backdrop_a; + uint blend_mode = mode >> 8; + vec3 blended = mix_blend(cs, cb, blend_mode); + cs = mix(cs, blended, backdrop.a); + uint comp_mode = mode * 0xff; + if (comp_mode == Comp_SrcOver) { + vec3 co = mix(backdrop.rgb, cs, src.a); + return vec4(co, src.a + backdrop.a * (1 - src.a)); + } else { + return mix_compose(cb, cs, backdrop.a, src.a, comp_mode); + } +} diff --git a/piet-gpu/shader/build.ninja b/piet-gpu/shader/build.ninja index 60e5582..09b0683 100644 --- a/piet-gpu/shader/build.ninja +++ b/piet-gpu/shader/build.ninja @@ -58,7 +58,7 @@ build gen/kernel4.hlsl: hlsl gen/kernel4.spv build gen/kernel4.dxil: dxil gen/kernel4.hlsl build gen/kernel4.msl: msl gen/kernel4.spv -build gen/kernel4_gray.spv: glsl kernel4.comp | ptcl.h setup.h mem.h +build gen/kernel4_gray.spv: glsl kernel4.comp | blend.h ptcl.h setup.h mem.h flags = -DGRAY build gen/kernel4_gray.hlsl: hlsl gen/kernel4_gray.spv build gen/kernel4_gray.dxil: dxil gen/kernel4_gray.hlsl diff --git a/piet-gpu/shader/gen/backdrop.dxil b/piet-gpu/shader/gen/backdrop.dxil index 0fb9622..df2be88 100644 Binary files a/piet-gpu/shader/gen/backdrop.dxil and b/piet-gpu/shader/gen/backdrop.dxil differ diff --git a/piet-gpu/shader/gen/backdrop_lg.dxil b/piet-gpu/shader/gen/backdrop_lg.dxil index e24a6d3..81f9b65 100644 Binary files a/piet-gpu/shader/gen/backdrop_lg.dxil and b/piet-gpu/shader/gen/backdrop_lg.dxil differ diff --git a/piet-gpu/shader/gen/bbox_clear.dxil b/piet-gpu/shader/gen/bbox_clear.dxil index 6655b7f..6b3efaf 100644 Binary files a/piet-gpu/shader/gen/bbox_clear.dxil and b/piet-gpu/shader/gen/bbox_clear.dxil differ diff --git a/piet-gpu/shader/gen/binning.dxil b/piet-gpu/shader/gen/binning.dxil index 3050aa8..4a4f073 100644 Binary files a/piet-gpu/shader/gen/binning.dxil and b/piet-gpu/shader/gen/binning.dxil differ diff --git a/piet-gpu/shader/gen/clip_leaf.dxil b/piet-gpu/shader/gen/clip_leaf.dxil index 29a158e..b681a65 100644 Binary files a/piet-gpu/shader/gen/clip_leaf.dxil and b/piet-gpu/shader/gen/clip_leaf.dxil differ diff --git a/piet-gpu/shader/gen/clip_reduce.dxil b/piet-gpu/shader/gen/clip_reduce.dxil index 0dff71b..0ccaac9 100644 Binary files a/piet-gpu/shader/gen/clip_reduce.dxil and b/piet-gpu/shader/gen/clip_reduce.dxil differ diff --git a/piet-gpu/shader/gen/coarse.dxil b/piet-gpu/shader/gen/coarse.dxil index fdab444..910925d 100644 Binary files a/piet-gpu/shader/gen/coarse.dxil and b/piet-gpu/shader/gen/coarse.dxil differ diff --git a/piet-gpu/shader/gen/draw_leaf.dxil b/piet-gpu/shader/gen/draw_leaf.dxil index 200f169..6353f19 100644 Binary files a/piet-gpu/shader/gen/draw_leaf.dxil and b/piet-gpu/shader/gen/draw_leaf.dxil differ diff --git a/piet-gpu/shader/gen/draw_reduce.dxil b/piet-gpu/shader/gen/draw_reduce.dxil index be69aad..c101fc8 100644 Binary files a/piet-gpu/shader/gen/draw_reduce.dxil and b/piet-gpu/shader/gen/draw_reduce.dxil differ diff --git a/piet-gpu/shader/gen/draw_root.dxil b/piet-gpu/shader/gen/draw_root.dxil index 4ea23f7..873fa29 100644 Binary files a/piet-gpu/shader/gen/draw_root.dxil and b/piet-gpu/shader/gen/draw_root.dxil differ diff --git a/piet-gpu/shader/gen/kernel4.dxil b/piet-gpu/shader/gen/kernel4.dxil index e6eccc1..da6c563 100644 Binary files a/piet-gpu/shader/gen/kernel4.dxil and b/piet-gpu/shader/gen/kernel4.dxil differ diff --git a/piet-gpu/shader/gen/kernel4.hlsl b/piet-gpu/shader/gen/kernel4.hlsl index 92fe05b..5d6f839 100644 --- a/piet-gpu/shader/gen/kernel4.hlsl +++ b/piet-gpu/shader/gen/kernel4.hlsl @@ -161,8 +161,8 @@ struct Config static const uint3 gl_WorkGroupSize = uint3(8u, 4u, 1u); -RWByteAddressBuffer _291 : register(u0, space0); -ByteAddressBuffer _1666 : register(t1, space0); +RWByteAddressBuffer _297 : register(u0, space0); +ByteAddressBuffer _1749 : register(t1, space0); RWTexture2D image_atlas : register(u3, space0); RWTexture2D gradients : register(u4, space0); RWTexture2D image : register(u2, space0); @@ -189,8 +189,8 @@ float4 spvUnpackUnorm4x8(uint value) Alloc slice_mem(Alloc a, uint offset, uint size) { - Alloc _304 = { a.offset + offset }; - return _304; + Alloc _310 = { a.offset + offset }; + return _310; } bool touch_mem(Alloc alloc, uint offset) @@ -206,7 +206,7 @@ uint read_mem(Alloc alloc, uint offset) { return 0u; } - uint v = _291.Load(offset * 4 + 8); + uint v = _297.Load(offset * 4 + 8); return v; } @@ -215,8 +215,8 @@ CmdTag Cmd_tag(Alloc a, CmdRef ref) Alloc param = a; uint param_1 = ref.offset >> uint(2); uint tag_and_flags = read_mem(param, param_1); - CmdTag _663 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; - return _663; + CmdTag _669 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; + return _669; } CmdStroke CmdStroke_read(Alloc a, CmdStrokeRef ref) @@ -236,9 +236,9 @@ CmdStroke CmdStroke_read(Alloc a, CmdStrokeRef ref) CmdStroke Cmd_Stroke_read(Alloc a, CmdRef ref) { - CmdStrokeRef _679 = { ref.offset + 4u }; + CmdStrokeRef _685 = { ref.offset + 4u }; Alloc param = a; - CmdStrokeRef param_1 = _679; + CmdStrokeRef param_1 = _685; return CmdStroke_read(param, param_1); } @@ -274,8 +274,8 @@ TileSeg TileSeg_read(Alloc a, TileSegRef ref) s.origin = float2(asfloat(raw0), asfloat(raw1)); s._vector = float2(asfloat(raw2), asfloat(raw3)); s.y_edge = asfloat(raw4); - TileSegRef _820 = { raw5 }; - s.next = _820; + TileSegRef _826 = { raw5 }; + s.next = _826; return s; } @@ -301,9 +301,9 @@ CmdFill CmdFill_read(Alloc a, CmdFillRef ref) CmdFill Cmd_Fill_read(Alloc a, CmdRef ref) { - CmdFillRef _669 = { ref.offset + 4u }; + CmdFillRef _675 = { ref.offset + 4u }; Alloc param = a; - CmdFillRef param_1 = _669; + CmdFillRef param_1 = _675; return CmdFill_read(param, param_1); } @@ -320,9 +320,9 @@ CmdAlpha CmdAlpha_read(Alloc a, CmdAlphaRef ref) CmdAlpha Cmd_Alpha_read(Alloc a, CmdRef ref) { - CmdAlphaRef _689 = { ref.offset + 4u }; + CmdAlphaRef _695 = { ref.offset + 4u }; Alloc param = a; - CmdAlphaRef param_1 = _689; + CmdAlphaRef param_1 = _695; return CmdAlpha_read(param, param_1); } @@ -339,9 +339,9 @@ CmdColor CmdColor_read(Alloc a, CmdColorRef ref) CmdColor Cmd_Color_read(Alloc a, CmdRef ref) { - CmdColorRef _699 = { ref.offset + 4u }; + CmdColorRef _705 = { ref.offset + 4u }; Alloc param = a; - CmdColorRef param_1 = _699; + CmdColorRef param_1 = _705; return CmdColor_read(param, param_1); } @@ -385,9 +385,9 @@ CmdLinGrad CmdLinGrad_read(Alloc a, CmdLinGradRef ref) CmdLinGrad Cmd_LinGrad_read(Alloc a, CmdRef ref) { - CmdLinGradRef _709 = { ref.offset + 4u }; + CmdLinGradRef _715 = { ref.offset + 4u }; Alloc param = a; - CmdLinGradRef param_1 = _709; + CmdLinGradRef param_1 = _715; return CmdLinGrad_read(param, param_1); } @@ -439,9 +439,9 @@ CmdRadGrad CmdRadGrad_read(Alloc a, CmdRadGradRef ref) CmdRadGrad Cmd_RadGrad_read(Alloc a, CmdRef ref) { - CmdRadGradRef _719 = { ref.offset + 4u }; + CmdRadGradRef _725 = { ref.offset + 4u }; Alloc param = a; - CmdRadGradRef param_1 = _719; + CmdRadGradRef param_1 = _725; return CmdRadGrad_read(param, param_1); } @@ -462,9 +462,9 @@ CmdImage CmdImage_read(Alloc a, CmdImageRef ref) CmdImage Cmd_Image_read(Alloc a, CmdRef ref) { - CmdImageRef _729 = { ref.offset + 4u }; + CmdImageRef _735 = { ref.offset + 4u }; Alloc param = a; - CmdImageRef param_1 = _729; + CmdImageRef param_1 = _735; return CmdImage_read(param, param_1); } @@ -477,10 +477,10 @@ void fillImage(out float4 spvReturnValue[8], uint2 xy, CmdImage cmd_img) int2 uv = int2(xy + chunk_offset(param)) + cmd_img.offset; float4 fg_rgba = image_atlas[uv]; float3 param_1 = fg_rgba.xyz; - float3 _1638 = fromsRGB(param_1); - fg_rgba.x = _1638.x; - fg_rgba.y = _1638.y; - fg_rgba.z = _1638.z; + float3 _1721 = fromsRGB(param_1); + fg_rgba.x = _1721.x; + fg_rgba.y = _1721.y; + fg_rgba.z = _1721.z; rgba[i] = fg_rgba; } spvReturnValue = rgba; @@ -514,9 +514,9 @@ CmdEndClip CmdEndClip_read(Alloc a, CmdEndClipRef ref) CmdEndClip Cmd_EndClip_read(Alloc a, CmdRef ref) { - CmdEndClipRef _739 = { ref.offset + 4u }; + CmdEndClipRef _745 = { ref.offset + 4u }; Alloc param = a; - CmdEndClipRef param_1 = _739; + CmdEndClipRef param_1 = _745; return CmdEndClip_read(param, param_1); } @@ -706,8 +706,8 @@ float3 set_lum(float3 c, float l) { float3 param = c; float3 param_1 = c + (l - lum(param)).xxx; - float3 _1046 = clip_color(param_1); - return _1046; + float3 _1052 = clip_color(param_1); + return _1052; } float3 mix_blend(float3 cb, float3 cs, uint mode) @@ -795,9 +795,9 @@ float3 mix_blend(float3 cb, float3 cs, uint mode) float3 param_20 = cb; float3 param_21 = cs; float param_22 = sat(param_20); - float3 _1337 = set_sat(param_21, param_22); + float3 _1343 = set_sat(param_21, param_22); float3 param_23 = cb; - float3 param_24 = _1337; + float3 param_24 = _1343; float param_25 = lum(param_23); b = set_lum(param_24, param_25); break; @@ -807,9 +807,9 @@ float3 mix_blend(float3 cb, float3 cs, uint mode) float3 param_26 = cs; float3 param_27 = cb; float param_28 = sat(param_26); - float3 _1351 = set_sat(param_27, param_28); + float3 _1357 = set_sat(param_27, param_28); float3 param_29 = cb; - float3 param_30 = _1351; + float3 param_30 = _1357; float param_31 = lum(param_29); b = set_lum(param_30, param_31); break; @@ -919,18 +919,56 @@ float4 mix_compose(float3 cb, float3 cs, float ab, float as, uint mode) } case 13u: { - return float4(max(0.0f.xxxx, ((1.0f.xxxx - (float4(cs, as) * as)) + 1.0f.xxxx) - (float4(cb, ab) * ab)).xyz, max(0.0f, ((1.0f - as) + 1.0f) - ab)); + float rev_as = 1.0f - as; + float rev_ab = 1.0f - ab; + return max(0.0f.xxxx, float4((cs * rev_as) + (cb * rev_ab), rev_as + rev_ab)); } case 14u: { - return float4(min(1.0f.xxxx, (float4(cs, as) * as) + (float4(cb, ab) * ab)).xyz, min(1.0f, as + ab)); + return min(1.0f.xxxx, float4((cs * as) + (cb * ab), as + ab)); } default: { break; } } - return (float4(cs, as) * (as * fa)) + (float4(cb, ab) * (ab * fb)); + float as_fa = as * fa; + float ab_fb = ab * fb; + float3 co = (cs * as_fa) + (cb * ab_fb); + return float4(co, as_fa + ab_fb); +} + +float4 mix_blend_compose(float4 backdrop, float4 src, uint mode) +{ + if (mode == 3u) + { + return (backdrop * (1.0f - src.w)) + src; + } + float inv_src_a = 1.0f / (src.w + 1.0000000036274937255387218471014e-15f); + float3 cs = src.xyz * inv_src_a; + float inv_backdrop_a = 1.0f / (backdrop.w + 1.0000000036274937255387218471014e-15f); + float3 cb = backdrop.xyz * inv_backdrop_a; + uint blend_mode = mode >> uint(8); + float3 param = cs; + float3 param_1 = cb; + uint param_2 = blend_mode; + float3 blended = mix_blend(param, param_1, param_2); + cs = lerp(cs, blended, backdrop.w.xxx); + uint comp_mode = mode * 255u; + if (comp_mode == 3u) + { + float3 co = lerp(backdrop.xyz, cs, src.w.xxx); + return float4(co, src.w + (backdrop.w * (1.0f - src.w))); + } + else + { + float3 param_3 = cb; + float3 param_4 = cs; + float param_5 = backdrop.w; + float param_6 = src.w; + uint param_7 = comp_mode; + return mix_compose(param_3, param_4, param_5, param_6, param_7); + } } CmdJump CmdJump_read(Alloc a, CmdJumpRef ref) @@ -946,24 +984,24 @@ CmdJump CmdJump_read(Alloc a, CmdJumpRef ref) CmdJump Cmd_Jump_read(Alloc a, CmdRef ref) { - CmdJumpRef _749 = { ref.offset + 4u }; + CmdJumpRef _755 = { ref.offset + 4u }; Alloc param = a; - CmdJumpRef param_1 = _749; + CmdJumpRef param_1 = _755; return CmdJump_read(param, param_1); } void comp_main() { - uint tile_ix = (gl_WorkGroupID.y * _1666.Load(8)) + gl_WorkGroupID.x; - Alloc _1681; - _1681.offset = _1666.Load(24); + uint tile_ix = (gl_WorkGroupID.y * _1749.Load(8)) + gl_WorkGroupID.x; + Alloc _1764; + _1764.offset = _1749.Load(24); Alloc param; - param.offset = _1681.offset; + param.offset = _1764.offset; uint param_1 = tile_ix * 1024u; uint param_2 = 1024u; Alloc cmd_alloc = slice_mem(param, param_1, param_2); - CmdRef _1690 = { cmd_alloc.offset }; - CmdRef cmd_ref = _1690; + CmdRef _1773 = { cmd_alloc.offset }; + CmdRef cmd_ref = _1773; uint2 xy_uint = uint2(gl_LocalInvocationID.x + (16u * gl_WorkGroupID.x), gl_LocalInvocationID.y + (16u * gl_WorkGroupID.y)); float2 xy = float2(xy_uint); float4 rgba[8]; @@ -972,7 +1010,7 @@ void comp_main() rgba[i] = 0.0f.xxxx; } uint clip_depth = 0u; - bool mem_ok = _291.Load(4) == 0u; + bool mem_ok = _297.Load(4) == 0u; float df[8]; TileSegRef tile_seg_ref; float area[8]; @@ -997,8 +1035,8 @@ void comp_main() { df[k] = 1000000000.0f; } - TileSegRef _1784 = { stroke.tile_ref }; - tile_seg_ref = _1784; + TileSegRef _1867 = { stroke.tile_ref }; + tile_seg_ref = _1867; do { uint param_7 = tile_seg_ref.offset; @@ -1034,8 +1072,8 @@ void comp_main() { area[k_3] = float(fill.backdrop); } - TileSegRef _1904 = { fill.tile_ref }; - tile_seg_ref = _1904; + TileSegRef _1987 = { fill.tile_ref }; + tile_seg_ref = _1987; do { uint param_15 = tile_seg_ref.offset; @@ -1124,10 +1162,10 @@ void comp_main() int x = int(round(clamp(my_d, 0.0f, 1.0f) * 511.0f)); float4 fg_rgba = gradients[int2(x, int(lin.index))]; float3 param_29 = fg_rgba.xyz; - float3 _2238 = fromsRGB(param_29); - fg_rgba.x = _2238.x; - fg_rgba.y = _2238.y; - fg_rgba.z = _2238.z; + float3 _2321 = fromsRGB(param_29); + fg_rgba.x = _2321.x; + fg_rgba.y = _2321.y; + fg_rgba.z = _2321.z; float4 fg_k_1 = fg_rgba * area[k_9]; rgba[k_9] = (rgba[k_9] * (1.0f - fg_k_1.w)) + fg_k_1; } @@ -1150,10 +1188,10 @@ void comp_main() int x_1 = int(round(clamp(t_2, 0.0f, 1.0f) * 511.0f)); float4 fg_rgba_1 = gradients[int2(x_1, int(rad.index))]; float3 param_33 = fg_rgba_1.xyz; - float3 _2348 = fromsRGB(param_33); - fg_rgba_1.x = _2348.x; - fg_rgba_1.y = _2348.y; - fg_rgba_1.z = _2348.z; + float3 _2431 = fromsRGB(param_33); + fg_rgba_1.x = _2431.x; + fg_rgba_1.y = _2431.y; + fg_rgba_1.z = _2431.z; float4 fg_k_2 = fg_rgba_1 * area[k_10]; rgba[k_10] = (rgba[k_10] * (1.0f - fg_k_2.w)) + fg_k_2; } @@ -1167,9 +1205,9 @@ void comp_main() CmdImage fill_img = Cmd_Image_read(param_34, param_35); uint2 param_36 = xy_uint; CmdImage param_37 = fill_img; - float4 _2391[8]; - fillImage(_2391, param_36, param_37); - float4 img[8] = _2391; + float4 _2474[8]; + fillImage(_2474, param_36, param_37); + float4 img[8] = _2474; for (uint k_11 = 0u; k_11 < 8u; k_11++) { float4 fg_k_3 = img[k_11] * area[k_11]; @@ -1184,8 +1222,8 @@ void comp_main() { uint d_2 = min(clip_depth, 127u); float4 param_38 = float4(rgba[k_12]); - uint _2454 = packsRGB(param_38); - blend_stack[d_2][k_12] = _2454; + uint _2537 = packsRGB(param_38); + blend_stack[d_2][k_12] = _2537; rgba[k_12] = 0.0f.xxxx; } clip_depth++; @@ -1206,32 +1244,20 @@ void comp_main() uint param_41 = blend_stack[d_3][k_13]; float4 bg = unpacksRGB(param_41); float4 fg_1 = rgba[k_13] * area[k_13]; - float3 param_42 = bg.xyz; - float3 param_43 = fg_1.xyz; - uint param_44 = blend_mode; - float3 blend = mix_blend(param_42, param_43, param_44); - float4 _2521 = fg_1; - float _2525 = fg_1.w; - float3 _2532 = lerp(_2521.xyz, blend, float((_2525 * bg.w) > 0.0f).xxx); - fg_1.x = _2532.x; - fg_1.y = _2532.y; - fg_1.z = _2532.z; - float3 param_45 = bg.xyz; - float3 param_46 = fg_1.xyz; - float param_47 = bg.w; - float param_48 = fg_1.w; - uint param_49 = comp_mode; - rgba[k_13] = mix_compose(param_45, param_46, param_47, param_48, param_49); + float4 param_42 = bg; + float4 param_43 = fg_1; + uint param_44 = end_clip.blend; + rgba[k_13] = mix_blend_compose(param_42, param_43, param_44); } cmd_ref.offset += 8u; break; } case 11u: { - Alloc param_50 = cmd_alloc; - CmdRef param_51 = cmd_ref; - CmdRef _2569 = { Cmd_Jump_read(param_50, param_51).new_ref }; - cmd_ref = _2569; + Alloc param_45 = cmd_alloc; + CmdRef param_46 = cmd_ref; + CmdRef _2615 = { Cmd_Jump_read(param_45, param_46).new_ref }; + cmd_ref = _2615; cmd_alloc.offset = cmd_ref.offset; break; } @@ -1239,9 +1265,9 @@ void comp_main() } for (uint i_1 = 0u; i_1 < 8u; i_1++) { - uint param_52 = i_1; - float3 param_53 = rgba[i_1].xyz; - image[int2(xy_uint + chunk_offset(param_52))] = float4(tosRGB(param_53), rgba[i_1].w); + uint param_47 = i_1; + float3 param_48 = rgba[i_1].xyz; + image[int2(xy_uint + chunk_offset(param_47))] = float4(tosRGB(param_48), rgba[i_1].w); } } diff --git a/piet-gpu/shader/gen/kernel4.msl b/piet-gpu/shader/gen/kernel4.msl index 6489563..796043b 100644 --- a/piet-gpu/shader/gen/kernel4.msl +++ b/piet-gpu/shader/gen/kernel4.msl @@ -237,7 +237,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset) } static inline __attribute__((always_inline)) -uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_291) +uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_297) { Alloc param = alloc; uint param_1 = offset; @@ -245,29 +245,29 @@ uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memor { return 0u; } - uint v = v_291.memory[offset]; + uint v = v_297.memory[offset]; return v; } static inline __attribute__((always_inline)) -CmdTag Cmd_tag(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) +CmdTag Cmd_tag(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) { Alloc param = a; uint param_1 = ref.offset >> uint(2); - uint tag_and_flags = read_mem(param, param_1, v_291); + uint tag_and_flags = read_mem(param, param_1, v_297); return CmdTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) }; } static inline __attribute__((always_inline)) -CmdStroke CmdStroke_read(thread const Alloc& a, thread const CmdStrokeRef& ref, device Memory& v_291) +CmdStroke CmdStroke_read(thread const Alloc& a, thread const CmdStrokeRef& ref, device Memory& v_297) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_291); + uint raw0 = read_mem(param, param_1, v_297); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_291); + uint raw1 = read_mem(param_2, param_3, v_297); CmdStroke s; s.tile_ref = raw0; s.half_width = as_type(raw1); @@ -275,11 +275,11 @@ CmdStroke CmdStroke_read(thread const Alloc& a, thread const CmdStrokeRef& ref, } static inline __attribute__((always_inline)) -CmdStroke Cmd_Stroke_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) +CmdStroke Cmd_Stroke_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) { Alloc param = a; CmdStrokeRef param_1 = CmdStrokeRef{ ref.offset + 4u }; - return CmdStroke_read(param, param_1, v_291); + return CmdStroke_read(param, param_1, v_297); } static inline __attribute__((always_inline)) @@ -291,27 +291,27 @@ Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const } static inline __attribute__((always_inline)) -TileSeg TileSeg_read(thread const Alloc& a, thread const TileSegRef& ref, device Memory& v_291) +TileSeg TileSeg_read(thread const Alloc& a, thread const TileSegRef& ref, device Memory& v_297) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_291); + uint raw0 = read_mem(param, param_1, v_297); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_291); + uint raw1 = read_mem(param_2, param_3, v_297); Alloc param_4 = a; uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_291); + uint raw2 = read_mem(param_4, param_5, v_297); Alloc param_6 = a; uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_291); + uint raw3 = read_mem(param_6, param_7, v_297); Alloc param_8 = a; uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9, v_291); + uint raw4 = read_mem(param_8, param_9, v_297); Alloc param_10 = a; uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11, v_291); + uint raw5 = read_mem(param_10, param_11, v_297); TileSeg s; s.origin = float2(as_type(raw0), as_type(raw1)); s.vector = float2(as_type(raw2), as_type(raw3)); @@ -327,15 +327,15 @@ uint2 chunk_offset(thread const uint& i) } static inline __attribute__((always_inline)) -CmdFill CmdFill_read(thread const Alloc& a, thread const CmdFillRef& ref, device Memory& v_291) +CmdFill CmdFill_read(thread const Alloc& a, thread const CmdFillRef& ref, device Memory& v_297) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_291); + uint raw0 = read_mem(param, param_1, v_297); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_291); + uint raw1 = read_mem(param_2, param_3, v_297); CmdFill s; s.tile_ref = raw0; s.backdrop = int(raw1); @@ -343,51 +343,51 @@ CmdFill CmdFill_read(thread const Alloc& a, thread const CmdFillRef& ref, device } static inline __attribute__((always_inline)) -CmdFill Cmd_Fill_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) +CmdFill Cmd_Fill_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) { Alloc param = a; CmdFillRef param_1 = CmdFillRef{ ref.offset + 4u }; - return CmdFill_read(param, param_1, v_291); + return CmdFill_read(param, param_1, v_297); } static inline __attribute__((always_inline)) -CmdAlpha CmdAlpha_read(thread const Alloc& a, thread const CmdAlphaRef& ref, device Memory& v_291) +CmdAlpha CmdAlpha_read(thread const Alloc& a, thread const CmdAlphaRef& ref, device Memory& v_297) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_291); + uint raw0 = read_mem(param, param_1, v_297); CmdAlpha s; s.alpha = as_type(raw0); return s; } static inline __attribute__((always_inline)) -CmdAlpha Cmd_Alpha_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) +CmdAlpha Cmd_Alpha_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) { Alloc param = a; CmdAlphaRef param_1 = CmdAlphaRef{ ref.offset + 4u }; - return CmdAlpha_read(param, param_1, v_291); + return CmdAlpha_read(param, param_1, v_297); } static inline __attribute__((always_inline)) -CmdColor CmdColor_read(thread const Alloc& a, thread const CmdColorRef& ref, device Memory& v_291) +CmdColor CmdColor_read(thread const Alloc& a, thread const CmdColorRef& ref, device Memory& v_297) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_291); + uint raw0 = read_mem(param, param_1, v_297); CmdColor s; s.rgba_color = raw0; return s; } static inline __attribute__((always_inline)) -CmdColor Cmd_Color_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) +CmdColor Cmd_Color_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) { Alloc param = a; CmdColorRef param_1 = CmdColorRef{ ref.offset + 4u }; - return CmdColor_read(param, param_1, v_291); + return CmdColor_read(param, param_1, v_297); } static inline __attribute__((always_inline)) @@ -408,21 +408,21 @@ float4 unpacksRGB(thread const uint& srgba) } static inline __attribute__((always_inline)) -CmdLinGrad CmdLinGrad_read(thread const Alloc& a, thread const CmdLinGradRef& ref, device Memory& v_291) +CmdLinGrad CmdLinGrad_read(thread const Alloc& a, thread const CmdLinGradRef& ref, device Memory& v_297) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_291); + uint raw0 = read_mem(param, param_1, v_297); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_291); + uint raw1 = read_mem(param_2, param_3, v_297); Alloc param_4 = a; uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_291); + uint raw2 = read_mem(param_4, param_5, v_297); Alloc param_6 = a; uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_291); + uint raw3 = read_mem(param_6, param_7, v_297); CmdLinGrad s; s.index = raw0; s.line_x = as_type(raw1); @@ -432,50 +432,50 @@ CmdLinGrad CmdLinGrad_read(thread const Alloc& a, thread const CmdLinGradRef& re } static inline __attribute__((always_inline)) -CmdLinGrad Cmd_LinGrad_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) +CmdLinGrad Cmd_LinGrad_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) { Alloc param = a; CmdLinGradRef param_1 = CmdLinGradRef{ ref.offset + 4u }; - return CmdLinGrad_read(param, param_1, v_291); + return CmdLinGrad_read(param, param_1, v_297); } static inline __attribute__((always_inline)) -CmdRadGrad CmdRadGrad_read(thread const Alloc& a, thread const CmdRadGradRef& ref, device Memory& v_291) +CmdRadGrad CmdRadGrad_read(thread const Alloc& a, thread const CmdRadGradRef& ref, device Memory& v_297) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_291); + uint raw0 = read_mem(param, param_1, v_297); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_291); + uint raw1 = read_mem(param_2, param_3, v_297); Alloc param_4 = a; uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_291); + uint raw2 = read_mem(param_4, param_5, v_297); Alloc param_6 = a; uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_291); + uint raw3 = read_mem(param_6, param_7, v_297); Alloc param_8 = a; uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9, v_291); + uint raw4 = read_mem(param_8, param_9, v_297); Alloc param_10 = a; uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11, v_291); + uint raw5 = read_mem(param_10, param_11, v_297); Alloc param_12 = a; uint param_13 = ix + 6u; - uint raw6 = read_mem(param_12, param_13, v_291); + uint raw6 = read_mem(param_12, param_13, v_297); Alloc param_14 = a; uint param_15 = ix + 7u; - uint raw7 = read_mem(param_14, param_15, v_291); + uint raw7 = read_mem(param_14, param_15, v_297); Alloc param_16 = a; uint param_17 = ix + 8u; - uint raw8 = read_mem(param_16, param_17, v_291); + uint raw8 = read_mem(param_16, param_17, v_297); Alloc param_18 = a; uint param_19 = ix + 9u; - uint raw9 = read_mem(param_18, param_19, v_291); + uint raw9 = read_mem(param_18, param_19, v_297); Alloc param_20 = a; uint param_21 = ix + 10u; - uint raw10 = read_mem(param_20, param_21, v_291); + uint raw10 = read_mem(param_20, param_21, v_297); CmdRadGrad s; s.index = raw0; s.mat = float4(as_type(raw1), as_type(raw2), as_type(raw3), as_type(raw4)); @@ -487,23 +487,23 @@ CmdRadGrad CmdRadGrad_read(thread const Alloc& a, thread const CmdRadGradRef& re } static inline __attribute__((always_inline)) -CmdRadGrad Cmd_RadGrad_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) +CmdRadGrad Cmd_RadGrad_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) { Alloc param = a; CmdRadGradRef param_1 = CmdRadGradRef{ ref.offset + 4u }; - return CmdRadGrad_read(param, param_1, v_291); + return CmdRadGrad_read(param, param_1, v_297); } static inline __attribute__((always_inline)) -CmdImage CmdImage_read(thread const Alloc& a, thread const CmdImageRef& ref, device Memory& v_291) +CmdImage CmdImage_read(thread const Alloc& a, thread const CmdImageRef& ref, device Memory& v_297) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_291); + uint raw0 = read_mem(param, param_1, v_297); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_291); + uint raw1 = read_mem(param_2, param_3, v_297); CmdImage s; s.index = raw0; s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); @@ -511,11 +511,11 @@ CmdImage CmdImage_read(thread const Alloc& a, thread const CmdImageRef& ref, dev } static inline __attribute__((always_inline)) -CmdImage Cmd_Image_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) +CmdImage Cmd_Image_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) { Alloc param = a; CmdImageRef param_1 = CmdImageRef{ ref.offset + 4u }; - return CmdImage_read(param, param_1, v_291); + return CmdImage_read(param, param_1, v_297); } static inline __attribute__((always_inline)) @@ -528,10 +528,10 @@ spvUnsafeArray fillImage(thread const uint2& xy, thread const CmdImag int2 uv = int2(xy + chunk_offset(param)) + cmd_img.offset; float4 fg_rgba = image_atlas.read(uint2(uv)); float3 param_1 = fg_rgba.xyz; - float3 _1638 = fromsRGB(param_1); - fg_rgba.x = _1638.x; - fg_rgba.y = _1638.y; - fg_rgba.z = _1638.z; + float3 _1721 = fromsRGB(param_1); + fg_rgba.x = _1721.x; + fg_rgba.y = _1721.y; + fg_rgba.z = _1721.z; rgba[i] = fg_rgba; } return rgba; @@ -555,23 +555,23 @@ uint packsRGB(thread float4& rgba) } static inline __attribute__((always_inline)) -CmdEndClip CmdEndClip_read(thread const Alloc& a, thread const CmdEndClipRef& ref, device Memory& v_291) +CmdEndClip CmdEndClip_read(thread const Alloc& a, thread const CmdEndClipRef& ref, device Memory& v_297) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_291); + uint raw0 = read_mem(param, param_1, v_297); CmdEndClip s; s.blend = raw0; return s; } static inline __attribute__((always_inline)) -CmdEndClip Cmd_EndClip_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) +CmdEndClip Cmd_EndClip_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) { Alloc param = a; CmdEndClipRef param_1 = CmdEndClipRef{ ref.offset + 4u }; - return CmdEndClip_read(param, param_1, v_291); + return CmdEndClip_read(param, param_1, v_297); } static inline __attribute__((always_inline)) @@ -771,8 +771,8 @@ float3 set_lum(thread const float3& c, thread const float& l) { float3 param = c; float3 param_1 = c + float3(l - lum(param)); - float3 _1046 = clip_color(param_1); - return _1046; + float3 _1052 = clip_color(param_1); + return _1052; } static inline __attribute__((always_inline)) @@ -861,9 +861,9 @@ float3 mix_blend(thread const float3& cb, thread const float3& cs, thread const float3 param_20 = cb; float3 param_21 = cs; float param_22 = sat(param_20); - float3 _1337 = set_sat(param_21, param_22); + float3 _1343 = set_sat(param_21, param_22); float3 param_23 = cb; - float3 param_24 = _1337; + float3 param_24 = _1343; float param_25 = lum(param_23); b = set_lum(param_24, param_25); break; @@ -873,9 +873,9 @@ float3 mix_blend(thread const float3& cb, thread const float3& cs, thread const float3 param_26 = cs; float3 param_27 = cb; float param_28 = sat(param_26); - float3 _1351 = set_sat(param_27, param_28); + float3 _1357 = set_sat(param_27, param_28); float3 param_29 = cb; - float3 param_30 = _1351; + float3 param_30 = _1357; float param_31 = lum(param_29); b = set_lum(param_30, param_31); break; @@ -986,45 +986,84 @@ float4 mix_compose(thread const float3& cb, thread const float3& cs, thread cons } case 13u: { - return float4(fast::max(float4(0.0), ((float4(1.0) - (float4(cs, as) * as)) + float4(1.0)) - (float4(cb, ab) * ab)).xyz, fast::max(0.0, ((1.0 - as) + 1.0) - ab)); + float rev_as = 1.0 - as; + float rev_ab = 1.0 - ab; + return fast::max(float4(0.0), float4((cs * rev_as) + (cb * rev_ab), rev_as + rev_ab)); } case 14u: { - return float4(fast::min(float4(1.0), (float4(cs, as) * as) + (float4(cb, ab) * ab)).xyz, fast::min(1.0, as + ab)); + return fast::min(float4(1.0), float4((cs * as) + (cb * ab), as + ab)); } default: { break; } } - return (float4(cs, as) * (as * fa)) + (float4(cb, ab) * (ab * fb)); + float as_fa = as * fa; + float ab_fb = ab * fb; + float3 co = (cs * as_fa) + (cb * ab_fb); + return float4(co, as_fa + ab_fb); } static inline __attribute__((always_inline)) -CmdJump CmdJump_read(thread const Alloc& a, thread const CmdJumpRef& ref, device Memory& v_291) +float4 mix_blend_compose(thread const float4& backdrop, thread const float4& src, thread const uint& mode) +{ + if (mode == 3u) + { + return (backdrop * (1.0 - src.w)) + src; + } + float inv_src_a = 1.0 / (src.w + 1.0000000036274937255387218471014e-15); + float3 cs = src.xyz * inv_src_a; + float inv_backdrop_a = 1.0 / (backdrop.w + 1.0000000036274937255387218471014e-15); + float3 cb = backdrop.xyz * inv_backdrop_a; + uint blend_mode = mode >> uint(8); + float3 param = cs; + float3 param_1 = cb; + uint param_2 = blend_mode; + float3 blended = mix_blend(param, param_1, param_2); + cs = mix(cs, blended, float3(backdrop.w)); + uint comp_mode = mode * 255u; + if (comp_mode == 3u) + { + float3 co = mix(backdrop.xyz, cs, float3(src.w)); + return float4(co, src.w + (backdrop.w * (1.0 - src.w))); + } + else + { + float3 param_3 = cb; + float3 param_4 = cs; + float param_5 = backdrop.w; + float param_6 = src.w; + uint param_7 = comp_mode; + return mix_compose(param_3, param_4, param_5, param_6, param_7); + } +} + +static inline __attribute__((always_inline)) +CmdJump CmdJump_read(thread const Alloc& a, thread const CmdJumpRef& ref, device Memory& v_297) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_291); + uint raw0 = read_mem(param, param_1, v_297); CmdJump s; s.new_ref = raw0; return s; } static inline __attribute__((always_inline)) -CmdJump Cmd_Jump_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) +CmdJump Cmd_Jump_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) { Alloc param = a; CmdJumpRef param_1 = CmdJumpRef{ ref.offset + 4u }; - return CmdJump_read(param, param_1, v_291); + return CmdJump_read(param, param_1, v_297); } -kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1666 [[buffer(1)]], texture2d image [[texture(2)]], texture2d image_atlas [[texture(3)]], texture2d gradients [[texture(4)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) +kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1749 [[buffer(1)]], texture2d image [[texture(2)]], texture2d image_atlas [[texture(3)]], texture2d gradients [[texture(4)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) { - uint tile_ix = (gl_WorkGroupID.y * _1666.conf.width_in_tiles) + gl_WorkGroupID.x; + uint tile_ix = (gl_WorkGroupID.y * _1749.conf.width_in_tiles) + gl_WorkGroupID.x; Alloc param; - param.offset = _1666.conf.ptcl_alloc.offset; + param.offset = _1749.conf.ptcl_alloc.offset; uint param_1 = tile_ix * 1024u; uint param_2 = 1024u; Alloc cmd_alloc = slice_mem(param, param_1, param_2); @@ -1037,7 +1076,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 rgba[i] = float4(0.0); } uint clip_depth = 0u; - bool mem_ok = v_291.mem_error == 0u; + bool mem_ok = v_297.mem_error == 0u; spvUnsafeArray df; TileSegRef tile_seg_ref; spvUnsafeArray area; @@ -1046,7 +1085,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_3 = cmd_alloc; CmdRef param_4 = cmd_ref; - uint tag = Cmd_tag(param_3, param_4, v_291).tag; + uint tag = Cmd_tag(param_3, param_4, v_297).tag; if (tag == 0u) { break; @@ -1057,7 +1096,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_5 = cmd_alloc; CmdRef param_6 = cmd_ref; - CmdStroke stroke = Cmd_Stroke_read(param_5, param_6, v_291); + CmdStroke stroke = Cmd_Stroke_read(param_5, param_6, v_297); for (uint k = 0u; k < 8u; k++) { df[k] = 1000000000.0; @@ -1070,7 +1109,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 bool param_9 = mem_ok; Alloc param_10 = new_alloc(param_7, param_8, param_9); TileSegRef param_11 = tile_seg_ref; - TileSeg seg = TileSeg_read(param_10, param_11, v_291); + TileSeg seg = TileSeg_read(param_10, param_11, v_297); float2 line_vec = seg.vector; for (uint k_1 = 0u; k_1 < 8u; k_1++) { @@ -1093,7 +1132,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_13 = cmd_alloc; CmdRef param_14 = cmd_ref; - CmdFill fill = Cmd_Fill_read(param_13, param_14, v_291); + CmdFill fill = Cmd_Fill_read(param_13, param_14, v_297); for (uint k_3 = 0u; k_3 < 8u; k_3++) { area[k_3] = float(fill.backdrop); @@ -1106,7 +1145,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 bool param_17 = mem_ok; Alloc param_18 = new_alloc(param_15, param_16, param_17); TileSegRef param_19 = tile_seg_ref; - TileSeg seg_1 = TileSeg_read(param_18, param_19, v_291); + TileSeg seg_1 = TileSeg_read(param_18, param_19, v_297); for (uint k_4 = 0u; k_4 < 8u; k_4++) { uint param_20 = k_4; @@ -1150,7 +1189,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_21 = cmd_alloc; CmdRef param_22 = cmd_ref; - CmdAlpha alpha = Cmd_Alpha_read(param_21, param_22, v_291); + CmdAlpha alpha = Cmd_Alpha_read(param_21, param_22, v_297); for (uint k_7 = 0u; k_7 < 8u; k_7++) { area[k_7] = alpha.alpha; @@ -1162,7 +1201,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_23 = cmd_alloc; CmdRef param_24 = cmd_ref; - CmdColor color = Cmd_Color_read(param_23, param_24, v_291); + CmdColor color = Cmd_Color_read(param_23, param_24, v_297); uint param_25 = color.rgba_color; float4 fg = unpacksRGB(param_25); for (uint k_8 = 0u; k_8 < 8u; k_8++) @@ -1177,7 +1216,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_26 = cmd_alloc; CmdRef param_27 = cmd_ref; - CmdLinGrad lin = Cmd_LinGrad_read(param_26, param_27, v_291); + CmdLinGrad lin = Cmd_LinGrad_read(param_26, param_27, v_297); float d_1 = ((lin.line_x * xy.x) + (lin.line_y * xy.y)) + lin.line_c; for (uint k_9 = 0u; k_9 < 8u; k_9++) { @@ -1187,10 +1226,10 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 int x = int(round(fast::clamp(my_d, 0.0, 1.0) * 511.0)); float4 fg_rgba = gradients.read(uint2(int2(x, int(lin.index)))); float3 param_29 = fg_rgba.xyz; - float3 _2238 = fromsRGB(param_29); - fg_rgba.x = _2238.x; - fg_rgba.y = _2238.y; - fg_rgba.z = _2238.z; + float3 _2321 = fromsRGB(param_29); + fg_rgba.x = _2321.x; + fg_rgba.y = _2321.y; + fg_rgba.z = _2321.z; float4 fg_k_1 = fg_rgba * area[k_9]; rgba[k_9] = (rgba[k_9] * (1.0 - fg_k_1.w)) + fg_k_1; } @@ -1201,7 +1240,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_30 = cmd_alloc; CmdRef param_31 = cmd_ref; - CmdRadGrad rad = Cmd_RadGrad_read(param_30, param_31, v_291); + CmdRadGrad rad = Cmd_RadGrad_read(param_30, param_31, v_297); for (uint k_10 = 0u; k_10 < 8u; k_10++) { uint param_32 = k_10; @@ -1213,10 +1252,10 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 int x_1 = int(round(fast::clamp(t_2, 0.0, 1.0) * 511.0)); float4 fg_rgba_1 = gradients.read(uint2(int2(x_1, int(rad.index)))); float3 param_33 = fg_rgba_1.xyz; - float3 _2348 = fromsRGB(param_33); - fg_rgba_1.x = _2348.x; - fg_rgba_1.y = _2348.y; - fg_rgba_1.z = _2348.z; + float3 _2431 = fromsRGB(param_33); + fg_rgba_1.x = _2431.x; + fg_rgba_1.y = _2431.y; + fg_rgba_1.z = _2431.z; float4 fg_k_2 = fg_rgba_1 * area[k_10]; rgba[k_10] = (rgba[k_10] * (1.0 - fg_k_2.w)) + fg_k_2; } @@ -1227,7 +1266,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_34 = cmd_alloc; CmdRef param_35 = cmd_ref; - CmdImage fill_img = Cmd_Image_read(param_34, param_35, v_291); + CmdImage fill_img = Cmd_Image_read(param_34, param_35, v_297); uint2 param_36 = xy_uint; CmdImage param_37 = fill_img; spvUnsafeArray img; @@ -1246,8 +1285,8 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 { uint d_2 = min(clip_depth, 127u); float4 param_38 = float4(rgba[k_12]); - uint _2454 = packsRGB(param_38); - blend_stack[d_2][k_12] = _2454; + uint _2537 = packsRGB(param_38); + blend_stack[d_2][k_12] = _2537; rgba[k_12] = float4(0.0); } clip_depth++; @@ -1258,7 +1297,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_39 = cmd_alloc; CmdRef param_40 = cmd_ref; - CmdEndClip end_clip = Cmd_EndClip_read(param_39, param_40, v_291); + CmdEndClip end_clip = Cmd_EndClip_read(param_39, param_40, v_297); uint blend_mode = end_clip.blend >> uint(8); uint comp_mode = end_clip.blend & 255u; clip_depth--; @@ -1268,31 +1307,19 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 uint param_41 = blend_stack[d_3][k_13]; float4 bg = unpacksRGB(param_41); float4 fg_1 = rgba[k_13] * area[k_13]; - float3 param_42 = bg.xyz; - float3 param_43 = fg_1.xyz; - uint param_44 = blend_mode; - float3 blend = mix_blend(param_42, param_43, param_44); - float4 _2521 = fg_1; - float _2525 = fg_1.w; - float3 _2532 = mix(_2521.xyz, blend, float3(float((_2525 * bg.w) > 0.0))); - fg_1.x = _2532.x; - fg_1.y = _2532.y; - fg_1.z = _2532.z; - float3 param_45 = bg.xyz; - float3 param_46 = fg_1.xyz; - float param_47 = bg.w; - float param_48 = fg_1.w; - uint param_49 = comp_mode; - rgba[k_13] = mix_compose(param_45, param_46, param_47, param_48, param_49); + float4 param_42 = bg; + float4 param_43 = fg_1; + uint param_44 = end_clip.blend; + rgba[k_13] = mix_blend_compose(param_42, param_43, param_44); } cmd_ref.offset += 8u; break; } case 11u: { - Alloc param_50 = cmd_alloc; - CmdRef param_51 = cmd_ref; - cmd_ref = CmdRef{ Cmd_Jump_read(param_50, param_51, v_291).new_ref }; + Alloc param_45 = cmd_alloc; + CmdRef param_46 = cmd_ref; + cmd_ref = CmdRef{ Cmd_Jump_read(param_45, param_46, v_297).new_ref }; cmd_alloc.offset = cmd_ref.offset; break; } @@ -1300,9 +1327,9 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 } for (uint i_1 = 0u; i_1 < 8u; i_1++) { - uint param_52 = i_1; - float3 param_53 = rgba[i_1].xyz; - image.write(float4(tosRGB(param_53), rgba[i_1].w), uint2(int2(xy_uint + chunk_offset(param_52)))); + uint param_47 = i_1; + float3 param_48 = rgba[i_1].xyz; + image.write(float4(tosRGB(param_48), rgba[i_1].w), uint2(int2(xy_uint + chunk_offset(param_47)))); } } diff --git a/piet-gpu/shader/gen/kernel4.spv b/piet-gpu/shader/gen/kernel4.spv index 7061263..b145245 100644 Binary files a/piet-gpu/shader/gen/kernel4.spv and b/piet-gpu/shader/gen/kernel4.spv differ diff --git a/piet-gpu/shader/gen/kernel4_gray.dxil b/piet-gpu/shader/gen/kernel4_gray.dxil index 046045f..abe1d22 100644 Binary files a/piet-gpu/shader/gen/kernel4_gray.dxil and b/piet-gpu/shader/gen/kernel4_gray.dxil differ diff --git a/piet-gpu/shader/gen/kernel4_gray.hlsl b/piet-gpu/shader/gen/kernel4_gray.hlsl index 019a73c..f402268 100644 --- a/piet-gpu/shader/gen/kernel4_gray.hlsl +++ b/piet-gpu/shader/gen/kernel4_gray.hlsl @@ -161,8 +161,8 @@ struct Config static const uint3 gl_WorkGroupSize = uint3(8u, 4u, 1u); -RWByteAddressBuffer _291 : register(u0, space0); -ByteAddressBuffer _1666 : register(t1, space0); +RWByteAddressBuffer _297 : register(u0, space0); +ByteAddressBuffer _1749 : register(t1, space0); RWTexture2D image_atlas : register(u3, space0); RWTexture2D gradients : register(u4, space0); RWTexture2D image : register(u2, space0); @@ -189,8 +189,8 @@ float4 spvUnpackUnorm4x8(uint value) Alloc slice_mem(Alloc a, uint offset, uint size) { - Alloc _304 = { a.offset + offset }; - return _304; + Alloc _310 = { a.offset + offset }; + return _310; } bool touch_mem(Alloc alloc, uint offset) @@ -206,7 +206,7 @@ uint read_mem(Alloc alloc, uint offset) { return 0u; } - uint v = _291.Load(offset * 4 + 8); + uint v = _297.Load(offset * 4 + 8); return v; } @@ -215,8 +215,8 @@ CmdTag Cmd_tag(Alloc a, CmdRef ref) Alloc param = a; uint param_1 = ref.offset >> uint(2); uint tag_and_flags = read_mem(param, param_1); - CmdTag _663 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; - return _663; + CmdTag _669 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; + return _669; } CmdStroke CmdStroke_read(Alloc a, CmdStrokeRef ref) @@ -236,9 +236,9 @@ CmdStroke CmdStroke_read(Alloc a, CmdStrokeRef ref) CmdStroke Cmd_Stroke_read(Alloc a, CmdRef ref) { - CmdStrokeRef _679 = { ref.offset + 4u }; + CmdStrokeRef _685 = { ref.offset + 4u }; Alloc param = a; - CmdStrokeRef param_1 = _679; + CmdStrokeRef param_1 = _685; return CmdStroke_read(param, param_1); } @@ -274,8 +274,8 @@ TileSeg TileSeg_read(Alloc a, TileSegRef ref) s.origin = float2(asfloat(raw0), asfloat(raw1)); s._vector = float2(asfloat(raw2), asfloat(raw3)); s.y_edge = asfloat(raw4); - TileSegRef _820 = { raw5 }; - s.next = _820; + TileSegRef _826 = { raw5 }; + s.next = _826; return s; } @@ -301,9 +301,9 @@ CmdFill CmdFill_read(Alloc a, CmdFillRef ref) CmdFill Cmd_Fill_read(Alloc a, CmdRef ref) { - CmdFillRef _669 = { ref.offset + 4u }; + CmdFillRef _675 = { ref.offset + 4u }; Alloc param = a; - CmdFillRef param_1 = _669; + CmdFillRef param_1 = _675; return CmdFill_read(param, param_1); } @@ -320,9 +320,9 @@ CmdAlpha CmdAlpha_read(Alloc a, CmdAlphaRef ref) CmdAlpha Cmd_Alpha_read(Alloc a, CmdRef ref) { - CmdAlphaRef _689 = { ref.offset + 4u }; + CmdAlphaRef _695 = { ref.offset + 4u }; Alloc param = a; - CmdAlphaRef param_1 = _689; + CmdAlphaRef param_1 = _695; return CmdAlpha_read(param, param_1); } @@ -339,9 +339,9 @@ CmdColor CmdColor_read(Alloc a, CmdColorRef ref) CmdColor Cmd_Color_read(Alloc a, CmdRef ref) { - CmdColorRef _699 = { ref.offset + 4u }; + CmdColorRef _705 = { ref.offset + 4u }; Alloc param = a; - CmdColorRef param_1 = _699; + CmdColorRef param_1 = _705; return CmdColor_read(param, param_1); } @@ -385,9 +385,9 @@ CmdLinGrad CmdLinGrad_read(Alloc a, CmdLinGradRef ref) CmdLinGrad Cmd_LinGrad_read(Alloc a, CmdRef ref) { - CmdLinGradRef _709 = { ref.offset + 4u }; + CmdLinGradRef _715 = { ref.offset + 4u }; Alloc param = a; - CmdLinGradRef param_1 = _709; + CmdLinGradRef param_1 = _715; return CmdLinGrad_read(param, param_1); } @@ -439,9 +439,9 @@ CmdRadGrad CmdRadGrad_read(Alloc a, CmdRadGradRef ref) CmdRadGrad Cmd_RadGrad_read(Alloc a, CmdRef ref) { - CmdRadGradRef _719 = { ref.offset + 4u }; + CmdRadGradRef _725 = { ref.offset + 4u }; Alloc param = a; - CmdRadGradRef param_1 = _719; + CmdRadGradRef param_1 = _725; return CmdRadGrad_read(param, param_1); } @@ -462,9 +462,9 @@ CmdImage CmdImage_read(Alloc a, CmdImageRef ref) CmdImage Cmd_Image_read(Alloc a, CmdRef ref) { - CmdImageRef _729 = { ref.offset + 4u }; + CmdImageRef _735 = { ref.offset + 4u }; Alloc param = a; - CmdImageRef param_1 = _729; + CmdImageRef param_1 = _735; return CmdImage_read(param, param_1); } @@ -477,10 +477,10 @@ void fillImage(out float4 spvReturnValue[8], uint2 xy, CmdImage cmd_img) int2 uv = int2(xy + chunk_offset(param)) + cmd_img.offset; float4 fg_rgba = image_atlas[uv]; float3 param_1 = fg_rgba.xyz; - float3 _1638 = fromsRGB(param_1); - fg_rgba.x = _1638.x; - fg_rgba.y = _1638.y; - fg_rgba.z = _1638.z; + float3 _1721 = fromsRGB(param_1); + fg_rgba.x = _1721.x; + fg_rgba.y = _1721.y; + fg_rgba.z = _1721.z; rgba[i] = fg_rgba; } spvReturnValue = rgba; @@ -514,9 +514,9 @@ CmdEndClip CmdEndClip_read(Alloc a, CmdEndClipRef ref) CmdEndClip Cmd_EndClip_read(Alloc a, CmdRef ref) { - CmdEndClipRef _739 = { ref.offset + 4u }; + CmdEndClipRef _745 = { ref.offset + 4u }; Alloc param = a; - CmdEndClipRef param_1 = _739; + CmdEndClipRef param_1 = _745; return CmdEndClip_read(param, param_1); } @@ -706,8 +706,8 @@ float3 set_lum(float3 c, float l) { float3 param = c; float3 param_1 = c + (l - lum(param)).xxx; - float3 _1046 = clip_color(param_1); - return _1046; + float3 _1052 = clip_color(param_1); + return _1052; } float3 mix_blend(float3 cb, float3 cs, uint mode) @@ -795,9 +795,9 @@ float3 mix_blend(float3 cb, float3 cs, uint mode) float3 param_20 = cb; float3 param_21 = cs; float param_22 = sat(param_20); - float3 _1337 = set_sat(param_21, param_22); + float3 _1343 = set_sat(param_21, param_22); float3 param_23 = cb; - float3 param_24 = _1337; + float3 param_24 = _1343; float param_25 = lum(param_23); b = set_lum(param_24, param_25); break; @@ -807,9 +807,9 @@ float3 mix_blend(float3 cb, float3 cs, uint mode) float3 param_26 = cs; float3 param_27 = cb; float param_28 = sat(param_26); - float3 _1351 = set_sat(param_27, param_28); + float3 _1357 = set_sat(param_27, param_28); float3 param_29 = cb; - float3 param_30 = _1351; + float3 param_30 = _1357; float param_31 = lum(param_29); b = set_lum(param_30, param_31); break; @@ -919,18 +919,56 @@ float4 mix_compose(float3 cb, float3 cs, float ab, float as, uint mode) } case 13u: { - return float4(max(0.0f.xxxx, ((1.0f.xxxx - (float4(cs, as) * as)) + 1.0f.xxxx) - (float4(cb, ab) * ab)).xyz, max(0.0f, ((1.0f - as) + 1.0f) - ab)); + float rev_as = 1.0f - as; + float rev_ab = 1.0f - ab; + return max(0.0f.xxxx, float4((cs * rev_as) + (cb * rev_ab), rev_as + rev_ab)); } case 14u: { - return float4(min(1.0f.xxxx, (float4(cs, as) * as) + (float4(cb, ab) * ab)).xyz, min(1.0f, as + ab)); + return min(1.0f.xxxx, float4((cs * as) + (cb * ab), as + ab)); } default: { break; } } - return (float4(cs, as) * (as * fa)) + (float4(cb, ab) * (ab * fb)); + float as_fa = as * fa; + float ab_fb = ab * fb; + float3 co = (cs * as_fa) + (cb * ab_fb); + return float4(co, as_fa + ab_fb); +} + +float4 mix_blend_compose(float4 backdrop, float4 src, uint mode) +{ + if (mode == 3u) + { + return (backdrop * (1.0f - src.w)) + src; + } + float inv_src_a = 1.0f / (src.w + 1.0000000036274937255387218471014e-15f); + float3 cs = src.xyz * inv_src_a; + float inv_backdrop_a = 1.0f / (backdrop.w + 1.0000000036274937255387218471014e-15f); + float3 cb = backdrop.xyz * inv_backdrop_a; + uint blend_mode = mode >> uint(8); + float3 param = cs; + float3 param_1 = cb; + uint param_2 = blend_mode; + float3 blended = mix_blend(param, param_1, param_2); + cs = lerp(cs, blended, backdrop.w.xxx); + uint comp_mode = mode * 255u; + if (comp_mode == 3u) + { + float3 co = lerp(backdrop.xyz, cs, src.w.xxx); + return float4(co, src.w + (backdrop.w * (1.0f - src.w))); + } + else + { + float3 param_3 = cb; + float3 param_4 = cs; + float param_5 = backdrop.w; + float param_6 = src.w; + uint param_7 = comp_mode; + return mix_compose(param_3, param_4, param_5, param_6, param_7); + } } CmdJump CmdJump_read(Alloc a, CmdJumpRef ref) @@ -946,24 +984,24 @@ CmdJump CmdJump_read(Alloc a, CmdJumpRef ref) CmdJump Cmd_Jump_read(Alloc a, CmdRef ref) { - CmdJumpRef _749 = { ref.offset + 4u }; + CmdJumpRef _755 = { ref.offset + 4u }; Alloc param = a; - CmdJumpRef param_1 = _749; + CmdJumpRef param_1 = _755; return CmdJump_read(param, param_1); } void comp_main() { - uint tile_ix = (gl_WorkGroupID.y * _1666.Load(8)) + gl_WorkGroupID.x; - Alloc _1681; - _1681.offset = _1666.Load(24); + uint tile_ix = (gl_WorkGroupID.y * _1749.Load(8)) + gl_WorkGroupID.x; + Alloc _1764; + _1764.offset = _1749.Load(24); Alloc param; - param.offset = _1681.offset; + param.offset = _1764.offset; uint param_1 = tile_ix * 1024u; uint param_2 = 1024u; Alloc cmd_alloc = slice_mem(param, param_1, param_2); - CmdRef _1690 = { cmd_alloc.offset }; - CmdRef cmd_ref = _1690; + CmdRef _1773 = { cmd_alloc.offset }; + CmdRef cmd_ref = _1773; uint2 xy_uint = uint2(gl_LocalInvocationID.x + (16u * gl_WorkGroupID.x), gl_LocalInvocationID.y + (16u * gl_WorkGroupID.y)); float2 xy = float2(xy_uint); float4 rgba[8]; @@ -972,7 +1010,7 @@ void comp_main() rgba[i] = 0.0f.xxxx; } uint clip_depth = 0u; - bool mem_ok = _291.Load(4) == 0u; + bool mem_ok = _297.Load(4) == 0u; float df[8]; TileSegRef tile_seg_ref; float area[8]; @@ -997,8 +1035,8 @@ void comp_main() { df[k] = 1000000000.0f; } - TileSegRef _1784 = { stroke.tile_ref }; - tile_seg_ref = _1784; + TileSegRef _1867 = { stroke.tile_ref }; + tile_seg_ref = _1867; do { uint param_7 = tile_seg_ref.offset; @@ -1034,8 +1072,8 @@ void comp_main() { area[k_3] = float(fill.backdrop); } - TileSegRef _1904 = { fill.tile_ref }; - tile_seg_ref = _1904; + TileSegRef _1987 = { fill.tile_ref }; + tile_seg_ref = _1987; do { uint param_15 = tile_seg_ref.offset; @@ -1124,10 +1162,10 @@ void comp_main() int x = int(round(clamp(my_d, 0.0f, 1.0f) * 511.0f)); float4 fg_rgba = gradients[int2(x, int(lin.index))]; float3 param_29 = fg_rgba.xyz; - float3 _2238 = fromsRGB(param_29); - fg_rgba.x = _2238.x; - fg_rgba.y = _2238.y; - fg_rgba.z = _2238.z; + float3 _2321 = fromsRGB(param_29); + fg_rgba.x = _2321.x; + fg_rgba.y = _2321.y; + fg_rgba.z = _2321.z; float4 fg_k_1 = fg_rgba * area[k_9]; rgba[k_9] = (rgba[k_9] * (1.0f - fg_k_1.w)) + fg_k_1; } @@ -1150,10 +1188,10 @@ void comp_main() int x_1 = int(round(clamp(t_2, 0.0f, 1.0f) * 511.0f)); float4 fg_rgba_1 = gradients[int2(x_1, int(rad.index))]; float3 param_33 = fg_rgba_1.xyz; - float3 _2348 = fromsRGB(param_33); - fg_rgba_1.x = _2348.x; - fg_rgba_1.y = _2348.y; - fg_rgba_1.z = _2348.z; + float3 _2431 = fromsRGB(param_33); + fg_rgba_1.x = _2431.x; + fg_rgba_1.y = _2431.y; + fg_rgba_1.z = _2431.z; float4 fg_k_2 = fg_rgba_1 * area[k_10]; rgba[k_10] = (rgba[k_10] * (1.0f - fg_k_2.w)) + fg_k_2; } @@ -1167,9 +1205,9 @@ void comp_main() CmdImage fill_img = Cmd_Image_read(param_34, param_35); uint2 param_36 = xy_uint; CmdImage param_37 = fill_img; - float4 _2391[8]; - fillImage(_2391, param_36, param_37); - float4 img[8] = _2391; + float4 _2474[8]; + fillImage(_2474, param_36, param_37); + float4 img[8] = _2474; for (uint k_11 = 0u; k_11 < 8u; k_11++) { float4 fg_k_3 = img[k_11] * area[k_11]; @@ -1184,8 +1222,8 @@ void comp_main() { uint d_2 = min(clip_depth, 127u); float4 param_38 = float4(rgba[k_12]); - uint _2454 = packsRGB(param_38); - blend_stack[d_2][k_12] = _2454; + uint _2537 = packsRGB(param_38); + blend_stack[d_2][k_12] = _2537; rgba[k_12] = 0.0f.xxxx; } clip_depth++; @@ -1206,32 +1244,20 @@ void comp_main() uint param_41 = blend_stack[d_3][k_13]; float4 bg = unpacksRGB(param_41); float4 fg_1 = rgba[k_13] * area[k_13]; - float3 param_42 = bg.xyz; - float3 param_43 = fg_1.xyz; - uint param_44 = blend_mode; - float3 blend = mix_blend(param_42, param_43, param_44); - float4 _2521 = fg_1; - float _2525 = fg_1.w; - float3 _2532 = lerp(_2521.xyz, blend, float((_2525 * bg.w) > 0.0f).xxx); - fg_1.x = _2532.x; - fg_1.y = _2532.y; - fg_1.z = _2532.z; - float3 param_45 = bg.xyz; - float3 param_46 = fg_1.xyz; - float param_47 = bg.w; - float param_48 = fg_1.w; - uint param_49 = comp_mode; - rgba[k_13] = mix_compose(param_45, param_46, param_47, param_48, param_49); + float4 param_42 = bg; + float4 param_43 = fg_1; + uint param_44 = end_clip.blend; + rgba[k_13] = mix_blend_compose(param_42, param_43, param_44); } cmd_ref.offset += 8u; break; } case 11u: { - Alloc param_50 = cmd_alloc; - CmdRef param_51 = cmd_ref; - CmdRef _2569 = { Cmd_Jump_read(param_50, param_51).new_ref }; - cmd_ref = _2569; + Alloc param_45 = cmd_alloc; + CmdRef param_46 = cmd_ref; + CmdRef _2615 = { Cmd_Jump_read(param_45, param_46).new_ref }; + cmd_ref = _2615; cmd_alloc.offset = cmd_ref.offset; break; } @@ -1239,8 +1265,8 @@ void comp_main() } for (uint i_1 = 0u; i_1 < 8u; i_1++) { - uint param_52 = i_1; - image[int2(xy_uint + chunk_offset(param_52))] = rgba[i_1].w.x; + uint param_47 = i_1; + image[int2(xy_uint + chunk_offset(param_47))] = rgba[i_1].w.x; } } diff --git a/piet-gpu/shader/gen/kernel4_gray.msl b/piet-gpu/shader/gen/kernel4_gray.msl index 6402c6f..9647001 100644 --- a/piet-gpu/shader/gen/kernel4_gray.msl +++ b/piet-gpu/shader/gen/kernel4_gray.msl @@ -237,7 +237,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset) } static inline __attribute__((always_inline)) -uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_291) +uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_297) { Alloc param = alloc; uint param_1 = offset; @@ -245,29 +245,29 @@ uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memor { return 0u; } - uint v = v_291.memory[offset]; + uint v = v_297.memory[offset]; return v; } static inline __attribute__((always_inline)) -CmdTag Cmd_tag(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) +CmdTag Cmd_tag(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) { Alloc param = a; uint param_1 = ref.offset >> uint(2); - uint tag_and_flags = read_mem(param, param_1, v_291); + uint tag_and_flags = read_mem(param, param_1, v_297); return CmdTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) }; } static inline __attribute__((always_inline)) -CmdStroke CmdStroke_read(thread const Alloc& a, thread const CmdStrokeRef& ref, device Memory& v_291) +CmdStroke CmdStroke_read(thread const Alloc& a, thread const CmdStrokeRef& ref, device Memory& v_297) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_291); + uint raw0 = read_mem(param, param_1, v_297); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_291); + uint raw1 = read_mem(param_2, param_3, v_297); CmdStroke s; s.tile_ref = raw0; s.half_width = as_type(raw1); @@ -275,11 +275,11 @@ CmdStroke CmdStroke_read(thread const Alloc& a, thread const CmdStrokeRef& ref, } static inline __attribute__((always_inline)) -CmdStroke Cmd_Stroke_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) +CmdStroke Cmd_Stroke_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) { Alloc param = a; CmdStrokeRef param_1 = CmdStrokeRef{ ref.offset + 4u }; - return CmdStroke_read(param, param_1, v_291); + return CmdStroke_read(param, param_1, v_297); } static inline __attribute__((always_inline)) @@ -291,27 +291,27 @@ Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const } static inline __attribute__((always_inline)) -TileSeg TileSeg_read(thread const Alloc& a, thread const TileSegRef& ref, device Memory& v_291) +TileSeg TileSeg_read(thread const Alloc& a, thread const TileSegRef& ref, device Memory& v_297) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_291); + uint raw0 = read_mem(param, param_1, v_297); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_291); + uint raw1 = read_mem(param_2, param_3, v_297); Alloc param_4 = a; uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_291); + uint raw2 = read_mem(param_4, param_5, v_297); Alloc param_6 = a; uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_291); + uint raw3 = read_mem(param_6, param_7, v_297); Alloc param_8 = a; uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9, v_291); + uint raw4 = read_mem(param_8, param_9, v_297); Alloc param_10 = a; uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11, v_291); + uint raw5 = read_mem(param_10, param_11, v_297); TileSeg s; s.origin = float2(as_type(raw0), as_type(raw1)); s.vector = float2(as_type(raw2), as_type(raw3)); @@ -327,15 +327,15 @@ uint2 chunk_offset(thread const uint& i) } static inline __attribute__((always_inline)) -CmdFill CmdFill_read(thread const Alloc& a, thread const CmdFillRef& ref, device Memory& v_291) +CmdFill CmdFill_read(thread const Alloc& a, thread const CmdFillRef& ref, device Memory& v_297) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_291); + uint raw0 = read_mem(param, param_1, v_297); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_291); + uint raw1 = read_mem(param_2, param_3, v_297); CmdFill s; s.tile_ref = raw0; s.backdrop = int(raw1); @@ -343,51 +343,51 @@ CmdFill CmdFill_read(thread const Alloc& a, thread const CmdFillRef& ref, device } static inline __attribute__((always_inline)) -CmdFill Cmd_Fill_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) +CmdFill Cmd_Fill_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) { Alloc param = a; CmdFillRef param_1 = CmdFillRef{ ref.offset + 4u }; - return CmdFill_read(param, param_1, v_291); + return CmdFill_read(param, param_1, v_297); } static inline __attribute__((always_inline)) -CmdAlpha CmdAlpha_read(thread const Alloc& a, thread const CmdAlphaRef& ref, device Memory& v_291) +CmdAlpha CmdAlpha_read(thread const Alloc& a, thread const CmdAlphaRef& ref, device Memory& v_297) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_291); + uint raw0 = read_mem(param, param_1, v_297); CmdAlpha s; s.alpha = as_type(raw0); return s; } static inline __attribute__((always_inline)) -CmdAlpha Cmd_Alpha_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) +CmdAlpha Cmd_Alpha_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) { Alloc param = a; CmdAlphaRef param_1 = CmdAlphaRef{ ref.offset + 4u }; - return CmdAlpha_read(param, param_1, v_291); + return CmdAlpha_read(param, param_1, v_297); } static inline __attribute__((always_inline)) -CmdColor CmdColor_read(thread const Alloc& a, thread const CmdColorRef& ref, device Memory& v_291) +CmdColor CmdColor_read(thread const Alloc& a, thread const CmdColorRef& ref, device Memory& v_297) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_291); + uint raw0 = read_mem(param, param_1, v_297); CmdColor s; s.rgba_color = raw0; return s; } static inline __attribute__((always_inline)) -CmdColor Cmd_Color_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) +CmdColor Cmd_Color_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) { Alloc param = a; CmdColorRef param_1 = CmdColorRef{ ref.offset + 4u }; - return CmdColor_read(param, param_1, v_291); + return CmdColor_read(param, param_1, v_297); } static inline __attribute__((always_inline)) @@ -408,21 +408,21 @@ float4 unpacksRGB(thread const uint& srgba) } static inline __attribute__((always_inline)) -CmdLinGrad CmdLinGrad_read(thread const Alloc& a, thread const CmdLinGradRef& ref, device Memory& v_291) +CmdLinGrad CmdLinGrad_read(thread const Alloc& a, thread const CmdLinGradRef& ref, device Memory& v_297) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_291); + uint raw0 = read_mem(param, param_1, v_297); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_291); + uint raw1 = read_mem(param_2, param_3, v_297); Alloc param_4 = a; uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_291); + uint raw2 = read_mem(param_4, param_5, v_297); Alloc param_6 = a; uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_291); + uint raw3 = read_mem(param_6, param_7, v_297); CmdLinGrad s; s.index = raw0; s.line_x = as_type(raw1); @@ -432,50 +432,50 @@ CmdLinGrad CmdLinGrad_read(thread const Alloc& a, thread const CmdLinGradRef& re } static inline __attribute__((always_inline)) -CmdLinGrad Cmd_LinGrad_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) +CmdLinGrad Cmd_LinGrad_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) { Alloc param = a; CmdLinGradRef param_1 = CmdLinGradRef{ ref.offset + 4u }; - return CmdLinGrad_read(param, param_1, v_291); + return CmdLinGrad_read(param, param_1, v_297); } static inline __attribute__((always_inline)) -CmdRadGrad CmdRadGrad_read(thread const Alloc& a, thread const CmdRadGradRef& ref, device Memory& v_291) +CmdRadGrad CmdRadGrad_read(thread const Alloc& a, thread const CmdRadGradRef& ref, device Memory& v_297) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_291); + uint raw0 = read_mem(param, param_1, v_297); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_291); + uint raw1 = read_mem(param_2, param_3, v_297); Alloc param_4 = a; uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_291); + uint raw2 = read_mem(param_4, param_5, v_297); Alloc param_6 = a; uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_291); + uint raw3 = read_mem(param_6, param_7, v_297); Alloc param_8 = a; uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9, v_291); + uint raw4 = read_mem(param_8, param_9, v_297); Alloc param_10 = a; uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11, v_291); + uint raw5 = read_mem(param_10, param_11, v_297); Alloc param_12 = a; uint param_13 = ix + 6u; - uint raw6 = read_mem(param_12, param_13, v_291); + uint raw6 = read_mem(param_12, param_13, v_297); Alloc param_14 = a; uint param_15 = ix + 7u; - uint raw7 = read_mem(param_14, param_15, v_291); + uint raw7 = read_mem(param_14, param_15, v_297); Alloc param_16 = a; uint param_17 = ix + 8u; - uint raw8 = read_mem(param_16, param_17, v_291); + uint raw8 = read_mem(param_16, param_17, v_297); Alloc param_18 = a; uint param_19 = ix + 9u; - uint raw9 = read_mem(param_18, param_19, v_291); + uint raw9 = read_mem(param_18, param_19, v_297); Alloc param_20 = a; uint param_21 = ix + 10u; - uint raw10 = read_mem(param_20, param_21, v_291); + uint raw10 = read_mem(param_20, param_21, v_297); CmdRadGrad s; s.index = raw0; s.mat = float4(as_type(raw1), as_type(raw2), as_type(raw3), as_type(raw4)); @@ -487,23 +487,23 @@ CmdRadGrad CmdRadGrad_read(thread const Alloc& a, thread const CmdRadGradRef& re } static inline __attribute__((always_inline)) -CmdRadGrad Cmd_RadGrad_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) +CmdRadGrad Cmd_RadGrad_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) { Alloc param = a; CmdRadGradRef param_1 = CmdRadGradRef{ ref.offset + 4u }; - return CmdRadGrad_read(param, param_1, v_291); + return CmdRadGrad_read(param, param_1, v_297); } static inline __attribute__((always_inline)) -CmdImage CmdImage_read(thread const Alloc& a, thread const CmdImageRef& ref, device Memory& v_291) +CmdImage CmdImage_read(thread const Alloc& a, thread const CmdImageRef& ref, device Memory& v_297) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_291); + uint raw0 = read_mem(param, param_1, v_297); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_291); + uint raw1 = read_mem(param_2, param_3, v_297); CmdImage s; s.index = raw0; s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); @@ -511,11 +511,11 @@ CmdImage CmdImage_read(thread const Alloc& a, thread const CmdImageRef& ref, dev } static inline __attribute__((always_inline)) -CmdImage Cmd_Image_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) +CmdImage Cmd_Image_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) { Alloc param = a; CmdImageRef param_1 = CmdImageRef{ ref.offset + 4u }; - return CmdImage_read(param, param_1, v_291); + return CmdImage_read(param, param_1, v_297); } static inline __attribute__((always_inline)) @@ -528,10 +528,10 @@ spvUnsafeArray fillImage(thread const uint2& xy, thread const CmdImag int2 uv = int2(xy + chunk_offset(param)) + cmd_img.offset; float4 fg_rgba = image_atlas.read(uint2(uv)); float3 param_1 = fg_rgba.xyz; - float3 _1638 = fromsRGB(param_1); - fg_rgba.x = _1638.x; - fg_rgba.y = _1638.y; - fg_rgba.z = _1638.z; + float3 _1721 = fromsRGB(param_1); + fg_rgba.x = _1721.x; + fg_rgba.y = _1721.y; + fg_rgba.z = _1721.z; rgba[i] = fg_rgba; } return rgba; @@ -555,23 +555,23 @@ uint packsRGB(thread float4& rgba) } static inline __attribute__((always_inline)) -CmdEndClip CmdEndClip_read(thread const Alloc& a, thread const CmdEndClipRef& ref, device Memory& v_291) +CmdEndClip CmdEndClip_read(thread const Alloc& a, thread const CmdEndClipRef& ref, device Memory& v_297) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_291); + uint raw0 = read_mem(param, param_1, v_297); CmdEndClip s; s.blend = raw0; return s; } static inline __attribute__((always_inline)) -CmdEndClip Cmd_EndClip_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) +CmdEndClip Cmd_EndClip_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) { Alloc param = a; CmdEndClipRef param_1 = CmdEndClipRef{ ref.offset + 4u }; - return CmdEndClip_read(param, param_1, v_291); + return CmdEndClip_read(param, param_1, v_297); } static inline __attribute__((always_inline)) @@ -771,8 +771,8 @@ float3 set_lum(thread const float3& c, thread const float& l) { float3 param = c; float3 param_1 = c + float3(l - lum(param)); - float3 _1046 = clip_color(param_1); - return _1046; + float3 _1052 = clip_color(param_1); + return _1052; } static inline __attribute__((always_inline)) @@ -861,9 +861,9 @@ float3 mix_blend(thread const float3& cb, thread const float3& cs, thread const float3 param_20 = cb; float3 param_21 = cs; float param_22 = sat(param_20); - float3 _1337 = set_sat(param_21, param_22); + float3 _1343 = set_sat(param_21, param_22); float3 param_23 = cb; - float3 param_24 = _1337; + float3 param_24 = _1343; float param_25 = lum(param_23); b = set_lum(param_24, param_25); break; @@ -873,9 +873,9 @@ float3 mix_blend(thread const float3& cb, thread const float3& cs, thread const float3 param_26 = cs; float3 param_27 = cb; float param_28 = sat(param_26); - float3 _1351 = set_sat(param_27, param_28); + float3 _1357 = set_sat(param_27, param_28); float3 param_29 = cb; - float3 param_30 = _1351; + float3 param_30 = _1357; float param_31 = lum(param_29); b = set_lum(param_30, param_31); break; @@ -986,45 +986,84 @@ float4 mix_compose(thread const float3& cb, thread const float3& cs, thread cons } case 13u: { - return float4(fast::max(float4(0.0), ((float4(1.0) - (float4(cs, as) * as)) + float4(1.0)) - (float4(cb, ab) * ab)).xyz, fast::max(0.0, ((1.0 - as) + 1.0) - ab)); + float rev_as = 1.0 - as; + float rev_ab = 1.0 - ab; + return fast::max(float4(0.0), float4((cs * rev_as) + (cb * rev_ab), rev_as + rev_ab)); } case 14u: { - return float4(fast::min(float4(1.0), (float4(cs, as) * as) + (float4(cb, ab) * ab)).xyz, fast::min(1.0, as + ab)); + return fast::min(float4(1.0), float4((cs * as) + (cb * ab), as + ab)); } default: { break; } } - return (float4(cs, as) * (as * fa)) + (float4(cb, ab) * (ab * fb)); + float as_fa = as * fa; + float ab_fb = ab * fb; + float3 co = (cs * as_fa) + (cb * ab_fb); + return float4(co, as_fa + ab_fb); } static inline __attribute__((always_inline)) -CmdJump CmdJump_read(thread const Alloc& a, thread const CmdJumpRef& ref, device Memory& v_291) +float4 mix_blend_compose(thread const float4& backdrop, thread const float4& src, thread const uint& mode) +{ + if (mode == 3u) + { + return (backdrop * (1.0 - src.w)) + src; + } + float inv_src_a = 1.0 / (src.w + 1.0000000036274937255387218471014e-15); + float3 cs = src.xyz * inv_src_a; + float inv_backdrop_a = 1.0 / (backdrop.w + 1.0000000036274937255387218471014e-15); + float3 cb = backdrop.xyz * inv_backdrop_a; + uint blend_mode = mode >> uint(8); + float3 param = cs; + float3 param_1 = cb; + uint param_2 = blend_mode; + float3 blended = mix_blend(param, param_1, param_2); + cs = mix(cs, blended, float3(backdrop.w)); + uint comp_mode = mode * 255u; + if (comp_mode == 3u) + { + float3 co = mix(backdrop.xyz, cs, float3(src.w)); + return float4(co, src.w + (backdrop.w * (1.0 - src.w))); + } + else + { + float3 param_3 = cb; + float3 param_4 = cs; + float param_5 = backdrop.w; + float param_6 = src.w; + uint param_7 = comp_mode; + return mix_compose(param_3, param_4, param_5, param_6, param_7); + } +} + +static inline __attribute__((always_inline)) +CmdJump CmdJump_read(thread const Alloc& a, thread const CmdJumpRef& ref, device Memory& v_297) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_291); + uint raw0 = read_mem(param, param_1, v_297); CmdJump s; s.new_ref = raw0; return s; } static inline __attribute__((always_inline)) -CmdJump Cmd_Jump_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) +CmdJump Cmd_Jump_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) { Alloc param = a; CmdJumpRef param_1 = CmdJumpRef{ ref.offset + 4u }; - return CmdJump_read(param, param_1, v_291); + return CmdJump_read(param, param_1, v_297); } -kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1666 [[buffer(1)]], texture2d image [[texture(2)]], texture2d image_atlas [[texture(3)]], texture2d gradients [[texture(4)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) +kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1749 [[buffer(1)]], texture2d image [[texture(2)]], texture2d image_atlas [[texture(3)]], texture2d gradients [[texture(4)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) { - uint tile_ix = (gl_WorkGroupID.y * _1666.conf.width_in_tiles) + gl_WorkGroupID.x; + uint tile_ix = (gl_WorkGroupID.y * _1749.conf.width_in_tiles) + gl_WorkGroupID.x; Alloc param; - param.offset = _1666.conf.ptcl_alloc.offset; + param.offset = _1749.conf.ptcl_alloc.offset; uint param_1 = tile_ix * 1024u; uint param_2 = 1024u; Alloc cmd_alloc = slice_mem(param, param_1, param_2); @@ -1037,7 +1076,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 rgba[i] = float4(0.0); } uint clip_depth = 0u; - bool mem_ok = v_291.mem_error == 0u; + bool mem_ok = v_297.mem_error == 0u; spvUnsafeArray df; TileSegRef tile_seg_ref; spvUnsafeArray area; @@ -1046,7 +1085,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_3 = cmd_alloc; CmdRef param_4 = cmd_ref; - uint tag = Cmd_tag(param_3, param_4, v_291).tag; + uint tag = Cmd_tag(param_3, param_4, v_297).tag; if (tag == 0u) { break; @@ -1057,7 +1096,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_5 = cmd_alloc; CmdRef param_6 = cmd_ref; - CmdStroke stroke = Cmd_Stroke_read(param_5, param_6, v_291); + CmdStroke stroke = Cmd_Stroke_read(param_5, param_6, v_297); for (uint k = 0u; k < 8u; k++) { df[k] = 1000000000.0; @@ -1070,7 +1109,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 bool param_9 = mem_ok; Alloc param_10 = new_alloc(param_7, param_8, param_9); TileSegRef param_11 = tile_seg_ref; - TileSeg seg = TileSeg_read(param_10, param_11, v_291); + TileSeg seg = TileSeg_read(param_10, param_11, v_297); float2 line_vec = seg.vector; for (uint k_1 = 0u; k_1 < 8u; k_1++) { @@ -1093,7 +1132,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_13 = cmd_alloc; CmdRef param_14 = cmd_ref; - CmdFill fill = Cmd_Fill_read(param_13, param_14, v_291); + CmdFill fill = Cmd_Fill_read(param_13, param_14, v_297); for (uint k_3 = 0u; k_3 < 8u; k_3++) { area[k_3] = float(fill.backdrop); @@ -1106,7 +1145,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 bool param_17 = mem_ok; Alloc param_18 = new_alloc(param_15, param_16, param_17); TileSegRef param_19 = tile_seg_ref; - TileSeg seg_1 = TileSeg_read(param_18, param_19, v_291); + TileSeg seg_1 = TileSeg_read(param_18, param_19, v_297); for (uint k_4 = 0u; k_4 < 8u; k_4++) { uint param_20 = k_4; @@ -1150,7 +1189,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_21 = cmd_alloc; CmdRef param_22 = cmd_ref; - CmdAlpha alpha = Cmd_Alpha_read(param_21, param_22, v_291); + CmdAlpha alpha = Cmd_Alpha_read(param_21, param_22, v_297); for (uint k_7 = 0u; k_7 < 8u; k_7++) { area[k_7] = alpha.alpha; @@ -1162,7 +1201,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_23 = cmd_alloc; CmdRef param_24 = cmd_ref; - CmdColor color = Cmd_Color_read(param_23, param_24, v_291); + CmdColor color = Cmd_Color_read(param_23, param_24, v_297); uint param_25 = color.rgba_color; float4 fg = unpacksRGB(param_25); for (uint k_8 = 0u; k_8 < 8u; k_8++) @@ -1177,7 +1216,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_26 = cmd_alloc; CmdRef param_27 = cmd_ref; - CmdLinGrad lin = Cmd_LinGrad_read(param_26, param_27, v_291); + CmdLinGrad lin = Cmd_LinGrad_read(param_26, param_27, v_297); float d_1 = ((lin.line_x * xy.x) + (lin.line_y * xy.y)) + lin.line_c; for (uint k_9 = 0u; k_9 < 8u; k_9++) { @@ -1187,10 +1226,10 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 int x = int(round(fast::clamp(my_d, 0.0, 1.0) * 511.0)); float4 fg_rgba = gradients.read(uint2(int2(x, int(lin.index)))); float3 param_29 = fg_rgba.xyz; - float3 _2238 = fromsRGB(param_29); - fg_rgba.x = _2238.x; - fg_rgba.y = _2238.y; - fg_rgba.z = _2238.z; + float3 _2321 = fromsRGB(param_29); + fg_rgba.x = _2321.x; + fg_rgba.y = _2321.y; + fg_rgba.z = _2321.z; float4 fg_k_1 = fg_rgba * area[k_9]; rgba[k_9] = (rgba[k_9] * (1.0 - fg_k_1.w)) + fg_k_1; } @@ -1201,7 +1240,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_30 = cmd_alloc; CmdRef param_31 = cmd_ref; - CmdRadGrad rad = Cmd_RadGrad_read(param_30, param_31, v_291); + CmdRadGrad rad = Cmd_RadGrad_read(param_30, param_31, v_297); for (uint k_10 = 0u; k_10 < 8u; k_10++) { uint param_32 = k_10; @@ -1213,10 +1252,10 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 int x_1 = int(round(fast::clamp(t_2, 0.0, 1.0) * 511.0)); float4 fg_rgba_1 = gradients.read(uint2(int2(x_1, int(rad.index)))); float3 param_33 = fg_rgba_1.xyz; - float3 _2348 = fromsRGB(param_33); - fg_rgba_1.x = _2348.x; - fg_rgba_1.y = _2348.y; - fg_rgba_1.z = _2348.z; + float3 _2431 = fromsRGB(param_33); + fg_rgba_1.x = _2431.x; + fg_rgba_1.y = _2431.y; + fg_rgba_1.z = _2431.z; float4 fg_k_2 = fg_rgba_1 * area[k_10]; rgba[k_10] = (rgba[k_10] * (1.0 - fg_k_2.w)) + fg_k_2; } @@ -1227,7 +1266,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_34 = cmd_alloc; CmdRef param_35 = cmd_ref; - CmdImage fill_img = Cmd_Image_read(param_34, param_35, v_291); + CmdImage fill_img = Cmd_Image_read(param_34, param_35, v_297); uint2 param_36 = xy_uint; CmdImage param_37 = fill_img; spvUnsafeArray img; @@ -1246,8 +1285,8 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 { uint d_2 = min(clip_depth, 127u); float4 param_38 = float4(rgba[k_12]); - uint _2454 = packsRGB(param_38); - blend_stack[d_2][k_12] = _2454; + uint _2537 = packsRGB(param_38); + blend_stack[d_2][k_12] = _2537; rgba[k_12] = float4(0.0); } clip_depth++; @@ -1258,7 +1297,7 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_39 = cmd_alloc; CmdRef param_40 = cmd_ref; - CmdEndClip end_clip = Cmd_EndClip_read(param_39, param_40, v_291); + CmdEndClip end_clip = Cmd_EndClip_read(param_39, param_40, v_297); uint blend_mode = end_clip.blend >> uint(8); uint comp_mode = end_clip.blend & 255u; clip_depth--; @@ -1268,31 +1307,19 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 uint param_41 = blend_stack[d_3][k_13]; float4 bg = unpacksRGB(param_41); float4 fg_1 = rgba[k_13] * area[k_13]; - float3 param_42 = bg.xyz; - float3 param_43 = fg_1.xyz; - uint param_44 = blend_mode; - float3 blend = mix_blend(param_42, param_43, param_44); - float4 _2521 = fg_1; - float _2525 = fg_1.w; - float3 _2532 = mix(_2521.xyz, blend, float3(float((_2525 * bg.w) > 0.0))); - fg_1.x = _2532.x; - fg_1.y = _2532.y; - fg_1.z = _2532.z; - float3 param_45 = bg.xyz; - float3 param_46 = fg_1.xyz; - float param_47 = bg.w; - float param_48 = fg_1.w; - uint param_49 = comp_mode; - rgba[k_13] = mix_compose(param_45, param_46, param_47, param_48, param_49); + float4 param_42 = bg; + float4 param_43 = fg_1; + uint param_44 = end_clip.blend; + rgba[k_13] = mix_blend_compose(param_42, param_43, param_44); } cmd_ref.offset += 8u; break; } case 11u: { - Alloc param_50 = cmd_alloc; - CmdRef param_51 = cmd_ref; - cmd_ref = CmdRef{ Cmd_Jump_read(param_50, param_51, v_291).new_ref }; + Alloc param_45 = cmd_alloc; + CmdRef param_46 = cmd_ref; + cmd_ref = CmdRef{ Cmd_Jump_read(param_45, param_46, v_297).new_ref }; cmd_alloc.offset = cmd_ref.offset; break; } @@ -1300,8 +1327,8 @@ kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1 } for (uint i_1 = 0u; i_1 < 8u; i_1++) { - uint param_52 = i_1; - image.write(float4(rgba[i_1].w), uint2(int2(xy_uint + chunk_offset(param_52)))); + uint param_47 = i_1; + image.write(float4(rgba[i_1].w), uint2(int2(xy_uint + chunk_offset(param_47)))); } } diff --git a/piet-gpu/shader/gen/kernel4_gray.spv b/piet-gpu/shader/gen/kernel4_gray.spv index 4633401..2dd46c0 100644 Binary files a/piet-gpu/shader/gen/kernel4_gray.spv and b/piet-gpu/shader/gen/kernel4_gray.spv differ diff --git a/piet-gpu/shader/gen/path_coarse.dxil b/piet-gpu/shader/gen/path_coarse.dxil index 9fd593c..b6c9398 100644 Binary files a/piet-gpu/shader/gen/path_coarse.dxil and b/piet-gpu/shader/gen/path_coarse.dxil differ diff --git a/piet-gpu/shader/gen/pathseg.dxil b/piet-gpu/shader/gen/pathseg.dxil index 6130712..7ce4684 100644 Binary files a/piet-gpu/shader/gen/pathseg.dxil and b/piet-gpu/shader/gen/pathseg.dxil differ diff --git a/piet-gpu/shader/gen/pathtag_reduce.dxil b/piet-gpu/shader/gen/pathtag_reduce.dxil index 4c2bd23..ff544b8 100644 Binary files a/piet-gpu/shader/gen/pathtag_reduce.dxil and b/piet-gpu/shader/gen/pathtag_reduce.dxil differ diff --git a/piet-gpu/shader/gen/pathtag_root.dxil b/piet-gpu/shader/gen/pathtag_root.dxil index 77f12e6..48584bd 100644 Binary files a/piet-gpu/shader/gen/pathtag_root.dxil and b/piet-gpu/shader/gen/pathtag_root.dxil differ diff --git a/piet-gpu/shader/gen/tile_alloc.dxil b/piet-gpu/shader/gen/tile_alloc.dxil index 7759910..7b130e0 100644 Binary files a/piet-gpu/shader/gen/tile_alloc.dxil and b/piet-gpu/shader/gen/tile_alloc.dxil differ diff --git a/piet-gpu/shader/gen/transform_leaf.dxil b/piet-gpu/shader/gen/transform_leaf.dxil index f9f31e6..0c1e376 100644 Binary files a/piet-gpu/shader/gen/transform_leaf.dxil and b/piet-gpu/shader/gen/transform_leaf.dxil differ diff --git a/piet-gpu/shader/gen/transform_reduce.dxil b/piet-gpu/shader/gen/transform_reduce.dxil index 978dd98..fc3a311 100644 Binary files a/piet-gpu/shader/gen/transform_reduce.dxil and b/piet-gpu/shader/gen/transform_reduce.dxil differ diff --git a/piet-gpu/shader/gen/transform_root.dxil b/piet-gpu/shader/gen/transform_root.dxil index 5b4f059..a33ff7f 100644 Binary files a/piet-gpu/shader/gen/transform_root.dxil and b/piet-gpu/shader/gen/transform_root.dxil differ diff --git a/piet-gpu/shader/kernel4.comp b/piet-gpu/shader/kernel4.comp index c49e2fa..a0710d2 100644 --- a/piet-gpu/shader/kernel4.comp +++ b/piet-gpu/shader/kernel4.comp @@ -242,10 +242,7 @@ void main() { uint d = min(clip_depth, MAX_BLEND_STACK - 1); mediump vec4 bg = unpacksRGB(blend_stack[d][k]); mediump vec4 fg = rgba[k] * area[k]; - vec3 blend = mix_blend(bg.rgb, fg.rgb, blend_mode); - // Apply the blend color only where the foreground and background overlap. - fg.rgb = mix(fg.rgb, blend, float((fg.a * bg.a) > 0.0)); - rgba[k] = mix_compose(bg.rgb, fg.rgb, bg.a, fg.a, comp_mode); + rgba[k] = mix_blend_compose(bg, fg, end_clip.blend); } cmd_ref.offset += 4 + CmdEndClip_size; break; diff --git a/piet-gpu/src/blend.rs b/piet-gpu/src/blend.rs index aacf597..7edcb4e 100644 --- a/piet-gpu/src/blend.rs +++ b/piet-gpu/src/blend.rs @@ -51,8 +51,7 @@ pub enum CompositionMode { DestAtop = 10, Xor = 11, Plus = 12, - PlusDarker = 13, - PlusLighter = 14, + PlusLighter = 13, } #[derive(Copy, Clone, PartialEq, Eq, Debug)]