diff --git a/piet-gpu/bin/winit.rs b/piet-gpu/bin/winit.rs index f12a1cf..3ca0742 100644 --- a/piet-gpu/bin/winit.rs +++ b/piet-gpu/bin/winit.rs @@ -57,16 +57,27 @@ fn main() -> Result<(), Error> { let mut submitted: [Option; NUM_FRAMES] = Default::default(); let mut renderer = Renderer::new(&session, WIDTH, HEIGHT, NUM_FRAMES)?; + let mut mode = 0usize; event_loop.run(move |event, _, control_flow| { *control_flow = ControlFlow::Poll; // `ControlFlow::Wait` if only re-render on event match event { Event::WindowEvent { event, window_id } if window_id == window.id() => { + use winit::event::{ElementState, VirtualKeyCode}; match event { WindowEvent::CloseRequested => { *control_flow = ControlFlow::Exit; } + WindowEvent::KeyboardInput { input, .. } => { + if input.state == ElementState::Pressed { + match input.virtual_keycode { + Some(VirtualKeyCode::Left) => mode = mode.wrapping_sub(1), + Some(VirtualKeyCode::Right) => mode = mode.wrapping_add(1), + _ => {} + } + } + } _ => (), } } @@ -105,7 +116,41 @@ fn main() -> Result<(), Error> { } test_scenes::render_svg(&mut ctx, input, scale); } else { - test_scenes::render_anim_frame(&mut ctx, current_frame); + use piet_gpu::{Blend, BlendMode::*, CompositionMode::*}; + let blends = [ + Blend::new(Normal, SrcOver), + Blend::new(Multiply, SrcOver), + Blend::new(Screen, SrcOver), + Blend::new(Overlay, SrcOver), + Blend::new(Darken, SrcOver), + Blend::new(Lighten, SrcOver), + Blend::new(ColorDodge, SrcOver), + Blend::new(ColorBurn, SrcOver), + Blend::new(HardLight, SrcOver), + Blend::new(SoftLight, SrcOver), + Blend::new(Difference, SrcOver), + Blend::new(Exclusion, SrcOver), + Blend::new(Hue, SrcOver), + Blend::new(Saturation, SrcOver), + Blend::new(Color, SrcOver), + Blend::new(Luminosity, SrcOver), + Blend::new(Normal, Clear), + Blend::new(Normal, Copy), + Blend::new(Normal, Dest), + Blend::new(Normal, SrcOver), + Blend::new(Normal, DestOver), + Blend::new(Normal, SrcIn), + Blend::new(Normal, DestIn), + Blend::new(Normal, SrcOut), + Blend::new(Normal, DestOut), + Blend::new(Normal, SrcAtop), + Blend::new(Normal, DestAtop), + Blend::new(Normal, Xor), + Blend::new(Normal, Plus), + ]; + let blend = blends[mode % blends.len()]; + test_scenes::render_blend_test(&mut ctx, current_frame, blend); + info_string = format!("{:?}", blend); } render_info_string(&mut ctx, &info_string); if let Err(e) = renderer.upload_render_ctx(&mut ctx, frame_idx) { diff --git a/piet-gpu/shader/annotated.h b/piet-gpu/shader/annotated.h index b833574..5a35088 100644 --- a/piet-gpu/shader/annotated.h +++ b/piet-gpu/shader/annotated.h @@ -69,9 +69,10 @@ AnnoLinGradientRef AnnoLinGradient_index(AnnoLinGradientRef ref, uint index) { struct AnnoBeginClip { vec4 bbox; float linewidth; + uint blend; }; -#define AnnoBeginClip_size 20 +#define AnnoBeginClip_size 24 AnnoBeginClipRef AnnoBeginClip_index(AnnoBeginClipRef ref, uint index) { return AnnoBeginClipRef(ref.offset + index * AnnoBeginClip_size); @@ -79,9 +80,10 @@ AnnoBeginClipRef AnnoBeginClip_index(AnnoBeginClipRef ref, uint index) { struct AnnoEndClip { vec4 bbox; + uint blend; }; -#define AnnoEndClip_size 16 +#define AnnoEndClip_size 20 AnnoEndClipRef AnnoEndClip_index(AnnoEndClipRef ref, uint index) { return AnnoEndClipRef(ref.offset + index * AnnoEndClip_size); @@ -198,9 +200,11 @@ AnnoBeginClip AnnoBeginClip_read(Alloc a, AnnoBeginClipRef ref) { uint raw2 = read_mem(a, ix + 2); uint raw3 = read_mem(a, ix + 3); uint raw4 = read_mem(a, ix + 4); + uint raw5 = read_mem(a, ix + 5); AnnoBeginClip s; s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3)); s.linewidth = uintBitsToFloat(raw4); + s.blend = raw5; return s; } @@ -211,6 +215,7 @@ void AnnoBeginClip_write(Alloc a, AnnoBeginClipRef ref, AnnoBeginClip s) { write_mem(a, ix + 2, floatBitsToUint(s.bbox.z)); write_mem(a, ix + 3, floatBitsToUint(s.bbox.w)); write_mem(a, ix + 4, floatBitsToUint(s.linewidth)); + write_mem(a, ix + 5, s.blend); } AnnoEndClip AnnoEndClip_read(Alloc a, AnnoEndClipRef ref) { @@ -219,8 +224,10 @@ AnnoEndClip AnnoEndClip_read(Alloc a, AnnoEndClipRef ref) { uint raw1 = read_mem(a, ix + 1); uint raw2 = read_mem(a, ix + 2); uint raw3 = read_mem(a, ix + 3); + uint raw4 = read_mem(a, ix + 4); AnnoEndClip s; s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3)); + s.blend = raw4; return s; } @@ -230,6 +237,7 @@ void AnnoEndClip_write(Alloc a, AnnoEndClipRef ref, AnnoEndClip s) { write_mem(a, ix + 1, floatBitsToUint(s.bbox.y)); write_mem(a, ix + 2, floatBitsToUint(s.bbox.z)); write_mem(a, ix + 3, floatBitsToUint(s.bbox.w)); + write_mem(a, ix + 4, s.blend); } AnnotatedTag Annotated_tag(Alloc a, AnnotatedRef ref) { @@ -281,8 +289,8 @@ void Annotated_BeginClip_write(Alloc a, AnnotatedRef ref, uint flags, AnnoBeginC AnnoBeginClip_write(a, AnnoBeginClipRef(ref.offset + 4), s); } -void Annotated_EndClip_write(Alloc a, AnnotatedRef ref, AnnoEndClip s) { - write_mem(a, ref.offset >> 2, Annotated_EndClip); +void Annotated_EndClip_write(Alloc a, AnnotatedRef ref, uint flags, AnnoEndClip s) { + write_mem(a, ref.offset >> 2, (flags << 16) | Annotated_EndClip); AnnoEndClip_write(a, AnnoEndClipRef(ref.offset + 4), s); } diff --git a/piet-gpu/shader/blend.h b/piet-gpu/shader/blend.h new file mode 100644 index 0000000..1ac4bd6 --- /dev/null +++ b/piet-gpu/shader/blend.h @@ -0,0 +1,260 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense + +// Mode definitions and functions for blending and composition. + +#define Blend_Normal 0 +#define Blend_Multiply 1 +#define Blend_Screen 2 +#define Blend_Overlay 3 +#define Blend_Darken 4 +#define Blend_Lighten 5 +#define Blend_ColorDodge 6 +#define Blend_ColorBurn 7 +#define Blend_HardLight 8 +#define Blend_SoftLight 9 +#define Blend_Difference 10 +#define Blend_Exclusion 11 +#define Blend_Hue 12 +#define Blend_Saturation 13 +#define Blend_Color 14 +#define Blend_Luminosity 15 + +vec3 screen(vec3 cb, vec3 cs) { + return cb + cs - (cb * cs); +} + +float color_dodge(float cb, float cs) { + if (cb == 0.0) + return 0.0; + else if (cs == 1.0) + return 1.0; + else + return min(1.0, cb / (1.0 - cs)); +} + +float color_burn(float cb, float cs) { + if (cb == 1.0) + return 1.0; + else if (cs == 0.0) + return 0.0; + else + return 1.0 - min(1.0, (1.0 - cb) / cs); +} + +vec3 hard_light(vec3 cb, vec3 cs) { + return mix( + screen(cb, 2.0 * cs - 1.0), + cb * 2.0 * cs, + vec3(lessThanEqual(cs, vec3(0.5))) + ); +} + +vec3 soft_light(vec3 cb, vec3 cs) { + vec3 d = mix( + sqrt(cb), + ((16.0 * cb - vec3(12.0)) * cb + vec3(4.0)) * cb, + vec3(lessThanEqual(cb, vec3(0.25))) + ); + return mix( + cb + (2.0 * cs - vec3(1.0)) * (d - cb), + cb - (vec3(1.0) - 2.0 * cs) * cb * (vec3(1.0) - cb), + vec3(lessThanEqual(cs, vec3(0.5))) + ); +} + +float sat(vec3 c) { + return max(c.r, max(c.g, c.b)) - min(c.r, min(c.g, c.b)); +} + +float lum(vec3 c) { + vec3 f = vec3(0.3, 0.59, 0.11); + return dot(c, f); +} + +vec3 clip_color(vec3 c) { + float L = lum(c); + float n = min(c.r, min(c.g, c.b)); + float x = max(c.r, max(c.g, c.b)); + if (n < 0.0) + c = L + (((c - L) * L) / (L - n)); + if (x > 1.0) + c = L + (((c - L) * (1.0 - L)) / (x - L)); + return c; +} + +vec3 set_lum(vec3 c, float l) { + return clip_color(c + (l - lum(c))); +} + +void set_sat_inner(inout float cmin, inout float cmid, inout float cmax, float s) { + if (cmax > cmin) { + cmid = (((cmid - cmin) * s) / (cmax - cmin)); + cmax = s; + } else { + cmid = 0.0; + cmax = 0.0; + } + cmin = 0.0; +} + +vec3 set_sat(vec3 c, float s) { + if (c.r <= c.g) { + if (c.g <= c.b) { + set_sat_inner(c.r, c.g, c.b, s); + } else { + if (c.r <= c.b) { + set_sat_inner(c.r, c.b, c.g, s); + } else { + set_sat_inner(c.b, c.r, c.g, s); + } + } + } else { + if (c.r <= c.b) { + set_sat_inner(c.g, c.r, c.b, s); + } else { + if (c.g <= c.b) { + set_sat_inner(c.g, c.b, c.r, s); + } else { + set_sat_inner(c.b, c.g, c.r, s); + } + } + } + return c; +} + +vec3 mix_blend(vec3 cb, vec3 cs, uint mode) { + vec3 b = vec3(0.0); + switch (mode) { + case Blend_Multiply: + b = cb * cs; + break; + case Blend_Screen: + b = screen(cb, cs); + break; + case Blend_Overlay: + b = hard_light(cs, cb); + break; + case Blend_Darken: + b = min(cb, cs); + break; + case Blend_Lighten: + b = max(cb, cs); + break; + case Blend_ColorDodge: + b = vec3(color_dodge(cb.x, cs.x), color_dodge(cb.y, cs.y), color_dodge(cb.z, cs.z)); + break; + case Blend_ColorBurn: + b = vec3(color_burn(cb.x, cs.x), color_burn(cb.y, cs.y), color_burn(cb.z, cs.z)); + break; + case Blend_HardLight: + b = hard_light(cb, cs); + break; + case Blend_SoftLight: + b = soft_light(cb, cs); + break; + case Blend_Difference: + b = abs(cb - cs); + break; + case Blend_Exclusion: + b = cb + cs - 2 * cb * cs; + break; + case Blend_Hue: + b = set_lum(set_sat(cs, sat(cb)), lum(cb)); + break; + case Blend_Saturation: + b = set_lum(set_sat(cb, sat(cs)), lum(cb)); + break; + case Blend_Color: + b = set_lum(cs, lum(cb)); + break; + case Blend_Luminosity: + b = set_lum(cb, lum(cs)); + break; + default: + b = cs; + break; + } + return b; +} + +#define Comp_Clear 0 +#define Comp_Copy 1 +#define Comp_Dest 2 +#define Comp_SrcOver 3 +#define Comp_DestOver 4 +#define Comp_SrcIn 5 +#define Comp_DestIn 6 +#define Comp_SrcOut 7 +#define Comp_DestOut 8 +#define Comp_SrcAtop 9 +#define Comp_DestAtop 10 +#define Comp_Xor 11 +#define Comp_Plus 12 +#define Comp_PlusDarker 13 +#define Comp_PlusLighter 14 + +vec4 mix_compose(vec3 cb, vec3 cs, float ab, float as, uint mode) { + float fa = 0.0; + float fb = 0.0; + switch (mode) { + case Comp_Copy: + fa = 1.0; + fb = 0.0; + break; + case Comp_Dest: + fa = 0.0; + fb = 1.0; + break; + case Comp_SrcOver: + fa = 1.0; + fb = 1.0 - as; + break; + case Comp_DestOver: + fa = 1.0 - ab; + fb = 1.0; + break; + case Comp_SrcIn: + fa = ab; + fb = 0.0; + break; + case Comp_DestIn: + fa = 0.0; + fb = as; + break; + case Comp_SrcOut: + fa = 1.0 - ab; + fb = 0.0; + break; + case Comp_DestOut: + fa = 0.0; + fb = 1.0 - as; + break; + case Comp_SrcAtop: + fa = ab; + fb = 1.0 - as; + break; + case Comp_DestAtop: + fa = 1.0 - ab; + fb = as; + break; + case Comp_Xor: + fa = 1.0 - ab; + fb = 1.0 - as; + break; + case Comp_Plus: + fa = 1.0; + fb = 1.0; + break; + case Comp_PlusDarker: + return vec4(max(vec4(0.0), 1.0 - as * vec4(cs, as) + 1.0 - ab * vec4(cb, ab)).xyz, + max(0.0, 1.0 - as + 1.0 - ab)); + case Comp_PlusLighter: + return vec4(min(vec4(1.0), as * vec4(cs, as) + ab * vec4(cb, ab)).xyz, + min(1.0, as + ab)); + default: + break; + } + return as * fa * vec4(cs, as) + ab * fb * vec4(cb, ab); +} + +#define BlendComp_default (Blend_Normal << 8 | Comp_SrcOver) diff --git a/piet-gpu/shader/build.ninja b/piet-gpu/shader/build.ninja index 8b9998f..ac4f3d7 100644 --- a/piet-gpu/shader/build.ninja +++ b/piet-gpu/shader/build.ninja @@ -53,7 +53,7 @@ build gen/coarse.hlsl: hlsl gen/coarse.spv build gen/coarse.dxil: dxil gen/coarse.hlsl build gen/coarse.msl: msl gen/coarse.spv -build gen/kernel4.spv: glsl kernel4.comp | ptcl.h setup.h +build gen/kernel4.spv: glsl kernel4.comp | blend.h ptcl.h setup.h build gen/kernel4.hlsl: hlsl gen/kernel4.spv build gen/kernel4.dxil: dxil gen/kernel4.hlsl build gen/kernel4.msl: msl gen/kernel4.spv @@ -114,7 +114,7 @@ build gen/draw_root.hlsl: hlsl gen/draw_root.spv build gen/draw_root.dxil: dxil gen/draw_root.hlsl build gen/draw_root.msl: msl gen/draw_root.spv -build gen/draw_leaf.spv: glsl draw_leaf.comp | scene.h drawtag.h annotated.h setup.h mem.h +build gen/draw_leaf.spv: glsl draw_leaf.comp | blend.h scene.h drawtag.h annotated.h setup.h mem.h build gen/draw_leaf.hlsl: hlsl gen/draw_leaf.spv build gen/draw_leaf.dxil: dxil gen/draw_leaf.hlsl build gen/draw_leaf.msl: msl gen/draw_leaf.spv diff --git a/piet-gpu/shader/coarse.comp b/piet-gpu/shader/coarse.comp index 98ab270..df306e0 100644 --- a/piet-gpu/shader/coarse.comp +++ b/piet-gpu/shader/coarse.comp @@ -273,7 +273,8 @@ void main() { } } AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + sh_elements[el_ix] * Annotated_size); - uint tag = Annotated_tag(conf.anno_alloc, ref).tag; + AnnotatedTag anno_tag = Annotated_tag(conf.anno_alloc, ref); + uint tag = anno_tag.tag; uint seq_ix = ix - (el_ix > 0 ? sh_tile_count[el_ix - 1] : 0); uint width = sh_tile_width[el_ix]; uint x = sh_tile_x0[el_ix] + seq_ix % width; @@ -287,7 +288,10 @@ void main() { // For draws, include the tile if it is solid. // For clips, include the tile if it is empty - this way, logic // below will suppress the drawing of inner elements. - include_tile = tile.tile.offset != 0 || (tile.backdrop == 0) == is_clip; + // For blends, include the tile if + // (blend_mode, composition_mode) != (Normal, SrcOver) + include_tile = tile.tile.offset != 0 || (tile.backdrop == 0) == is_clip + || (is_clip && (anno_tag.flags & 0x2) != 0); } if (include_tile) { uint el_slice = el_ix / 32; @@ -387,13 +391,14 @@ void main() { tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok), TileRef(sh_tile_base[element_ref_ix] + (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size)); + AnnoEndClip end_clip = Annotated_EndClip_read(conf.anno_alloc, ref); clip_depth--; if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) { break; } write_fill(cmd_alloc, cmd_ref, MODE_NONZERO, tile, 0.0); - Cmd_EndClip_write(cmd_alloc, cmd_ref); - cmd_ref.offset += 4; + Cmd_EndClip_write(cmd_alloc, cmd_ref, CmdEndClip(end_clip.blend)); + cmd_ref.offset += 4 + CmdEndClip_size; break; } } else { diff --git a/piet-gpu/shader/draw_leaf.comp b/piet-gpu/shader/draw_leaf.comp index f236b7f..74fc2f8 100644 --- a/piet-gpu/shader/draw_leaf.comp +++ b/piet-gpu/shader/draw_leaf.comp @@ -28,6 +28,7 @@ layout(binding = 2) readonly buffer SceneBuf { #include "tile.h" #include "drawtag.h" #include "annotated.h" +#include "blend.h" #define Monoid DrawMonoid @@ -149,17 +150,23 @@ void main() { Annotated_Image_write(conf.anno_alloc, out_ref, fill_mode, anno_img); break; case Element_BeginClip: + Clip begin_clip = Element_BeginClip_read(this_ref); AnnoBeginClip anno_begin_clip; anno_begin_clip.bbox = bbox; anno_begin_clip.linewidth = 0.0; // don't support clip-with-stroke - Annotated_BeginClip_write(conf.anno_alloc, out_ref, 0, anno_begin_clip); + anno_begin_clip.blend = begin_clip.blend; + uint flags = uint(begin_clip.blend != BlendComp_default) << 1; + Annotated_BeginClip_write(conf.anno_alloc, out_ref, flags, anno_begin_clip); break; } } else if (tag_word == Element_EndClip) { + Clip end_clip = Element_BeginClip_read(this_ref); AnnoEndClip anno_end_clip; // The actual bbox will be reconstructed from clip stream output. anno_end_clip.bbox = vec4(-1e9, -1e9, 1e9, 1e9); - Annotated_EndClip_write(conf.anno_alloc, out_ref, anno_end_clip); + anno_end_clip.blend = end_clip.blend; + uint flags = uint(end_clip.blend != BlendComp_default) << 1; + Annotated_EndClip_write(conf.anno_alloc, out_ref, flags, anno_end_clip); } // Generate clip stream. if (tag_word == Element_BeginClip || tag_word == Element_EndClip) { diff --git a/piet-gpu/shader/gen/coarse.dxil b/piet-gpu/shader/gen/coarse.dxil index 0599eb8..c7e1682 100644 Binary files a/piet-gpu/shader/gen/coarse.dxil and b/piet-gpu/shader/gen/coarse.dxil differ diff --git a/piet-gpu/shader/gen/coarse.hlsl b/piet-gpu/shader/gen/coarse.hlsl index b5f3949..f00eabe 100644 --- a/piet-gpu/shader/gen/coarse.hlsl +++ b/piet-gpu/shader/gen/coarse.hlsl @@ -49,6 +49,17 @@ struct AnnoLinGradient float line_c; }; +struct AnnoEndClipRef +{ + uint offset; +}; + +struct AnnoEndClip +{ + float4 bbox; + uint blend; +}; + struct AnnotatedRef { uint offset; @@ -153,6 +164,16 @@ struct CmdImage int2 offset; }; +struct CmdEndClipRef +{ + uint offset; +}; + +struct CmdEndClip +{ + uint blend; +}; + struct CmdJumpRef { uint offset; @@ -197,8 +218,8 @@ struct Config static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); -RWByteAddressBuffer _283 : register(u0, space0); -ByteAddressBuffer _1169 : register(t1, space0); +RWByteAddressBuffer _308 : register(u0, space0); +ByteAddressBuffer _1283 : register(t1, space0); static uint3 gl_WorkGroupID; static uint3 gl_LocalInvocationID; @@ -221,8 +242,8 @@ groupshared uint sh_tile_count[256]; Alloc slice_mem(Alloc a, uint offset, uint size) { - Alloc _360 = { a.offset + offset }; - return _360; + Alloc _385 = { a.offset + offset }; + return _385; } bool touch_mem(Alloc alloc, uint offset) @@ -238,7 +259,7 @@ uint read_mem(Alloc alloc, uint offset) { return 0u; } - uint v = _283.Load(offset * 4 + 8); + uint v = _308.Load(offset * 4 + 8); return v; } @@ -251,8 +272,8 @@ Alloc new_alloc(uint offset, uint size, bool mem_ok) BinInstanceRef BinInstance_index(BinInstanceRef ref, uint index) { - BinInstanceRef _674 = { ref.offset + (index * 4u) }; - return _674; + BinInstanceRef _765 = { ref.offset + (index * 4u) }; + return _765; } BinInstance BinInstance_read(Alloc a, BinInstanceRef ref) @@ -271,8 +292,8 @@ AnnotatedTag Annotated_tag(Alloc a, AnnotatedRef ref) Alloc param = a; uint param_1 = ref.offset >> uint(2); uint tag_and_flags = read_mem(param, param_1); - AnnotatedTag _636 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; - return _636; + AnnotatedTag _717 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; + return _717; } Path Path_read(Alloc a, PathRef ref) @@ -289,8 +310,8 @@ Path Path_read(Alloc a, PathRef ref) uint raw2 = read_mem(param_4, param_5); Path s; s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); - TileRef _734 = { raw2 }; - s.tiles = _734; + TileRef _825 = { raw2 }; + s.tiles = _825; return s; } @@ -300,11 +321,11 @@ void write_tile_alloc(uint el_ix, Alloc a) Alloc read_tile_alloc(uint el_ix, bool mem_ok) { - uint _1055; - _283.GetDimensions(_1055); - _1055 = (_1055 - 8) / 4; + uint _1169; + _308.GetDimensions(_1169); + _1169 = (_1169 - 8) / 4; uint param = 0u; - uint param_1 = uint(int(_1055) * 4); + uint param_1 = uint(int(_1169) * 4); bool param_2 = mem_ok; return new_alloc(param, param_1, param_2); } @@ -318,9 +339,9 @@ Tile Tile_read(Alloc a, TileRef ref) Alloc param_2 = a; uint param_3 = ix + 1u; uint raw1 = read_mem(param_2, param_3); - TileSegRef _759 = { raw0 }; + TileSegRef _850 = { raw0 }; Tile s; - s.tile = _759; + s.tile = _850; s.backdrop = int(raw1); return s; } @@ -355,30 +376,30 @@ AnnoColor AnnoColor_read(Alloc a, AnnoColorRef ref) AnnoColor Annotated_Color_read(Alloc a, AnnotatedRef ref) { - AnnoColorRef _642 = { ref.offset + 4u }; + AnnoColorRef _723 = { ref.offset + 4u }; Alloc param = a; - AnnoColorRef param_1 = _642; + AnnoColorRef param_1 = _723; return AnnoColor_read(param, param_1); } MallocResult malloc(uint size) { - uint _289; - _283.InterlockedAdd(0, size, _289); - uint offset = _289; - uint _296; - _283.GetDimensions(_296); - _296 = (_296 - 8) / 4; + uint _314; + _308.InterlockedAdd(0, size, _314); + uint offset = _314; + uint _321; + _308.GetDimensions(_321); + _321 = (_321 - 8) / 4; MallocResult r; - r.failed = (offset + size) > uint(int(_296) * 4); + r.failed = (offset + size) > uint(int(_321) * 4); uint param = offset; uint param_1 = size; bool param_2 = !r.failed; r.alloc = new_alloc(param, param_1, param_2); if (r.failed) { - uint _318; - _283.InterlockedMax(4, 1u, _318); + uint _343; + _308.InterlockedMax(4, 1u, _343); return r; } return r; @@ -392,7 +413,7 @@ void write_mem(Alloc alloc, uint offset, uint val) { return; } - _283.Store(offset * 4 + 8, val); + _308.Store(offset * 4 + 8, val); } void CmdJump_write(Alloc a, CmdJumpRef ref, CmdJump s) @@ -410,9 +431,9 @@ void Cmd_Jump_write(Alloc a, CmdRef ref, CmdJump s) uint param_1 = ref.offset >> uint(2); uint param_2 = 10u; write_mem(param, param_1, param_2); - CmdJumpRef _1048 = { ref.offset + 4u }; + CmdJumpRef _1162 = { ref.offset + 4u }; Alloc param_3 = a; - CmdJumpRef param_4 = _1048; + CmdJumpRef param_4 = _1162; CmdJump param_5 = s; CmdJump_write(param_3, param_4, param_5); } @@ -424,21 +445,21 @@ bool alloc_cmd(inout Alloc cmd_alloc, inout CmdRef cmd_ref, inout uint cmd_limit return true; } uint param = 1024u; - MallocResult _1076 = malloc(param); - MallocResult new_cmd = _1076; + MallocResult _1190 = malloc(param); + MallocResult new_cmd = _1190; if (new_cmd.failed) { return false; } - CmdJump _1086 = { new_cmd.alloc.offset }; - CmdJump jump = _1086; + CmdJump _1200 = { new_cmd.alloc.offset }; + CmdJump jump = _1200; Alloc param_1 = cmd_alloc; CmdRef param_2 = cmd_ref; CmdJump param_3 = jump; Cmd_Jump_write(param_1, param_2, param_3); cmd_alloc = new_cmd.alloc; - CmdRef _1098 = { cmd_alloc.offset }; - cmd_ref = _1098; + CmdRef _1212 = { cmd_alloc.offset }; + cmd_ref = _1212; cmd_limit = (cmd_alloc.offset + 1024u) - 60u; return true; } @@ -467,9 +488,9 @@ void Cmd_Fill_write(Alloc a, CmdRef ref, CmdFill s) uint param_1 = ref.offset >> uint(2); uint param_2 = 1u; write_mem(param, param_1, param_2); - CmdFillRef _932 = { ref.offset + 4u }; + CmdFillRef _1036 = { ref.offset + 4u }; Alloc param_3 = a; - CmdFillRef param_4 = _932; + CmdFillRef param_4 = _1036; CmdFill param_5 = s; CmdFill_write(param_3, param_4, param_5); } @@ -501,9 +522,9 @@ void Cmd_Stroke_write(Alloc a, CmdRef ref, CmdStroke s) uint param_1 = ref.offset >> uint(2); uint param_2 = 2u; write_mem(param, param_1, param_2); - CmdStrokeRef _950 = { ref.offset + 4u }; + CmdStrokeRef _1054 = { ref.offset + 4u }; Alloc param_3 = a; - CmdStrokeRef param_4 = _950; + CmdStrokeRef param_4 = _1054; CmdStroke param_5 = s; CmdStroke_write(param_3, param_4, param_5); } @@ -515,8 +536,8 @@ void write_fill(Alloc alloc, inout CmdRef cmd_ref, uint flags, Tile tile, float { if (tile.tile.offset != 0u) { - CmdFill _1122 = { tile.tile.offset, tile.backdrop }; - CmdFill cmd_fill = _1122; + CmdFill _1236 = { tile.tile.offset, tile.backdrop }; + CmdFill cmd_fill = _1236; Alloc param_1 = alloc; CmdRef param_2 = cmd_ref; CmdFill param_3 = cmd_fill; @@ -533,8 +554,8 @@ void write_fill(Alloc alloc, inout CmdRef cmd_ref, uint flags, Tile tile, float } else { - CmdStroke _1152 = { tile.tile.offset, 0.5f * linewidth }; - CmdStroke cmd_stroke = _1152; + CmdStroke _1266 = { tile.tile.offset, 0.5f * linewidth }; + CmdStroke cmd_stroke = _1266; Alloc param_6 = alloc; CmdRef param_7 = cmd_ref; CmdStroke param_8 = cmd_stroke; @@ -558,9 +579,9 @@ void Cmd_Color_write(Alloc a, CmdRef ref, CmdColor s) uint param_1 = ref.offset >> uint(2); uint param_2 = 5u; write_mem(param, param_1, param_2); - CmdColorRef _976 = { ref.offset + 4u }; + CmdColorRef _1080 = { ref.offset + 4u }; Alloc param_3 = a; - CmdColorRef param_4 = _976; + CmdColorRef param_4 = _1080; CmdColor param_5 = s; CmdColor_write(param_3, param_4, param_5); } @@ -607,9 +628,9 @@ AnnoLinGradient AnnoLinGradient_read(Alloc a, AnnoLinGradientRef ref) AnnoLinGradient Annotated_LinGradient_read(Alloc a, AnnotatedRef ref) { - AnnoLinGradientRef _652 = { ref.offset + 4u }; + AnnoLinGradientRef _733 = { ref.offset + 4u }; Alloc param = a; - AnnoLinGradientRef param_1 = _652; + AnnoLinGradientRef param_1 = _733; return AnnoLinGradient_read(param, param_1); } @@ -640,9 +661,9 @@ void Cmd_LinGrad_write(Alloc a, CmdRef ref, CmdLinGrad s) uint param_1 = ref.offset >> uint(2); uint param_2 = 6u; write_mem(param, param_1, param_2); - CmdLinGradRef _994 = { ref.offset + 4u }; + CmdLinGradRef _1098 = { ref.offset + 4u }; Alloc param_3 = a; - CmdLinGradRef param_4 = _994; + CmdLinGradRef param_4 = _1098; CmdLinGrad param_5 = s; CmdLinGrad_write(param_3, param_4, param_5); } @@ -681,9 +702,9 @@ AnnoImage AnnoImage_read(Alloc a, AnnoImageRef ref) AnnoImage Annotated_Image_read(Alloc a, AnnotatedRef ref) { - AnnoImageRef _662 = { ref.offset + 4u }; + AnnoImageRef _743 = { ref.offset + 4u }; Alloc param = a; - AnnoImageRef param_1 = _662; + AnnoImageRef param_1 = _743; return AnnoImage_read(param, param_1); } @@ -706,9 +727,9 @@ void Cmd_Image_write(Alloc a, CmdRef ref, CmdImage s) uint param_1 = ref.offset >> uint(2); uint param_2 = 7u; write_mem(param, param_1, param_2); - CmdImageRef _1012 = { ref.offset + 4u }; + CmdImageRef _1116 = { ref.offset + 4u }; Alloc param_3 = a; - CmdImageRef param_4 = _1012; + CmdImageRef param_4 = _1116; CmdImage param_5 = s; CmdImage_write(param_3, param_4, param_5); } @@ -721,12 +742,58 @@ void Cmd_BeginClip_write(Alloc a, CmdRef ref) write_mem(param, param_1, param_2); } -void Cmd_EndClip_write(Alloc a, CmdRef ref) +AnnoEndClip AnnoEndClip_read(Alloc a, AnnoEndClipRef ref) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint raw0 = read_mem(param, param_1); + Alloc param_2 = a; + uint param_3 = ix + 1u; + uint raw1 = read_mem(param_2, param_3); + Alloc param_4 = a; + uint param_5 = ix + 2u; + uint raw2 = read_mem(param_4, param_5); + Alloc param_6 = a; + uint param_7 = ix + 3u; + uint raw3 = read_mem(param_6, param_7); + Alloc param_8 = a; + uint param_9 = ix + 4u; + uint raw4 = read_mem(param_8, param_9); + AnnoEndClip s; + s.bbox = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3)); + s.blend = raw4; + return s; +} + +AnnoEndClip Annotated_EndClip_read(Alloc a, AnnotatedRef ref) +{ + AnnoEndClipRef _753 = { ref.offset + 4u }; + Alloc param = a; + AnnoEndClipRef param_1 = _753; + return AnnoEndClip_read(param, param_1); +} + +void CmdEndClip_write(Alloc a, CmdEndClipRef ref, CmdEndClip s) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint param_2 = s.blend; + write_mem(param, param_1, param_2); +} + +void Cmd_EndClip_write(Alloc a, CmdRef ref, CmdEndClip s) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 9u; write_mem(param, param_1, param_2); + CmdEndClipRef _1143 = { ref.offset + 4u }; + Alloc param_3 = a; + CmdEndClipRef param_4 = _1143; + CmdEndClip param_5 = s; + CmdEndClip_write(param_3, param_4, param_5); } void Cmd_End_write(Alloc a, CmdRef ref) @@ -739,25 +806,25 @@ void Cmd_End_write(Alloc a, CmdRef ref) void comp_main() { - uint width_in_bins = ((_1169.Load(8) + 16u) - 1u) / 16u; + uint width_in_bins = ((_1283.Load(8) + 16u) - 1u) / 16u; uint bin_ix = (width_in_bins * gl_WorkGroupID.y) + gl_WorkGroupID.x; uint partition_ix = 0u; - uint n_partitions = ((_1169.Load(0) + 256u) - 1u) / 256u; + uint n_partitions = ((_1283.Load(0) + 256u) - 1u) / 256u; uint th_ix = gl_LocalInvocationID.x; uint bin_tile_x = 16u * gl_WorkGroupID.x; uint bin_tile_y = 16u * gl_WorkGroupID.y; uint tile_x = gl_LocalInvocationID.x % 16u; uint tile_y = gl_LocalInvocationID.x / 16u; - uint this_tile_ix = (((bin_tile_y + tile_y) * _1169.Load(8)) + bin_tile_x) + tile_x; - Alloc _1234; - _1234.offset = _1169.Load(24); + uint this_tile_ix = (((bin_tile_y + tile_y) * _1283.Load(8)) + bin_tile_x) + tile_x; + Alloc _1348; + _1348.offset = _1283.Load(24); Alloc param; - param.offset = _1234.offset; + param.offset = _1348.offset; uint param_1 = this_tile_ix * 1024u; uint param_2 = 1024u; Alloc cmd_alloc = slice_mem(param, param_1, param_2); - CmdRef _1243 = { cmd_alloc.offset }; - CmdRef cmd_ref = _1243; + CmdRef _1357 = { cmd_alloc.offset }; + CmdRef cmd_ref = _1357; uint cmd_limit = (cmd_ref.offset + 1024u) - 60u; uint clip_depth = 0u; uint clip_zero_depth = 0u; @@ -765,17 +832,17 @@ void comp_main() uint wr_ix = 0u; uint part_start_ix = 0u; uint ready_ix = 0u; - bool mem_ok = _283.Load(4) == 0u; + bool mem_ok = _308.Load(4) == 0u; Alloc param_3; Alloc param_5; - uint _1448; + uint _1562; uint element_ix; AnnotatedRef ref; Alloc param_14; Alloc param_16; uint tile_count; Alloc param_23; - uint _1770; + uint _1887; Alloc param_29; Tile tile_1; AnnoColor fill; @@ -783,40 +850,41 @@ void comp_main() Alloc param_52; CmdLinGrad cmd_lin; Alloc param_69; + Alloc param_95; while (true) { for (uint i = 0u; i < 8u; i++) { sh_bitmaps[i][th_ix] = 0u; } - bool _1500; + bool _1614; for (;;) { if ((ready_ix == wr_ix) && (partition_ix < n_partitions)) { part_start_ix = ready_ix; uint count = 0u; - bool _1298 = th_ix < 256u; - bool _1306; - if (_1298) + bool _1412 = th_ix < 256u; + bool _1420; + if (_1412) { - _1306 = (partition_ix + th_ix) < n_partitions; + _1420 = (partition_ix + th_ix) < n_partitions; } else { - _1306 = _1298; + _1420 = _1412; } - if (_1306) + if (_1420) { - uint in_ix = (_1169.Load(20) >> uint(2)) + ((((partition_ix + th_ix) * 256u) + bin_ix) * 2u); - Alloc _1323; - _1323.offset = _1169.Load(20); - param_3.offset = _1323.offset; + uint in_ix = (_1283.Load(20) >> uint(2)) + ((((partition_ix + th_ix) * 256u) + bin_ix) * 2u); + Alloc _1437; + _1437.offset = _1283.Load(20); + param_3.offset = _1437.offset; uint param_4 = in_ix; count = read_mem(param_3, param_4); - Alloc _1334; - _1334.offset = _1169.Load(20); - param_5.offset = _1334.offset; + Alloc _1448; + _1448.offset = _1283.Load(20); + param_5.offset = _1448.offset; uint param_6 = in_ix + 1u; uint offset = read_mem(param_5, param_6); uint param_7 = offset; @@ -862,16 +930,16 @@ void comp_main() } if (part_ix > 0u) { - _1448 = sh_part_count[part_ix - 1u]; + _1562 = sh_part_count[part_ix - 1u]; } else { - _1448 = part_start_ix; + _1562 = part_start_ix; } - ix -= _1448; + ix -= _1562; Alloc bin_alloc = sh_part_elements[part_ix]; - BinInstanceRef _1467 = { bin_alloc.offset }; - BinInstanceRef inst_ref = _1467; + BinInstanceRef _1581 = { bin_alloc.offset }; + BinInstanceRef inst_ref = _1581; BinInstanceRef param_10 = inst_ref; uint param_11 = ix; Alloc param_12 = bin_alloc; @@ -881,16 +949,16 @@ void comp_main() } GroupMemoryBarrierWithGroupSync(); wr_ix = min((rd_ix + 256u), ready_ix); - bool _1490 = (wr_ix - rd_ix) < 256u; - if (_1490) + bool _1604 = (wr_ix - rd_ix) < 256u; + if (_1604) { - _1500 = (wr_ix < ready_ix) || (partition_ix < n_partitions); + _1614 = (wr_ix < ready_ix) || (partition_ix < n_partitions); } else { - _1500 = _1490; + _1614 = _1604; } - if (_1500) + if (_1614) { continue; } @@ -903,11 +971,11 @@ void comp_main() if ((th_ix + rd_ix) < wr_ix) { element_ix = sh_elements[th_ix]; - AnnotatedRef _1521 = { _1169.Load(32) + (element_ix * 40u) }; - ref = _1521; - Alloc _1524; - _1524.offset = _1169.Load(32); - param_14.offset = _1524.offset; + AnnotatedRef _1635 = { _1283.Load(32) + (element_ix * 40u) }; + ref = _1635; + Alloc _1638; + _1638.offset = _1283.Load(32); + param_14.offset = _1638.offset; AnnotatedRef param_15 = ref; tag = Annotated_tag(param_14, param_15).tag; } @@ -919,13 +987,13 @@ void comp_main() case 4u: case 5u: { - uint drawmonoid_base = (_1169.Load(44) >> uint(2)) + (2u * element_ix); - uint path_ix = _283.Load(drawmonoid_base * 4 + 8); - PathRef _1553 = { _1169.Load(16) + (path_ix * 12u) }; - Alloc _1556; - _1556.offset = _1169.Load(16); - param_16.offset = _1556.offset; - PathRef param_17 = _1553; + uint drawmonoid_base = (_1283.Load(44) >> uint(2)) + (2u * element_ix); + uint path_ix = _308.Load(drawmonoid_base * 4 + 8); + PathRef _1667 = { _1283.Load(16) + (path_ix * 12u) }; + Alloc _1670; + _1670.offset = _1283.Load(16); + param_16.offset = _1670.offset; + PathRef param_17 = _1667; Path path = Path_read(param_16, param_17); uint stride = path.bbox.z - path.bbox.x; sh_tile_stride[th_ix] = stride; @@ -980,22 +1048,23 @@ void comp_main() el_ix = probe_1; } } - AnnotatedRef _1755 = { _1169.Load(32) + (sh_elements[el_ix] * 40u) }; - AnnotatedRef ref_1 = _1755; - Alloc _1759; - _1759.offset = _1169.Load(32); - param_23.offset = _1759.offset; + AnnotatedRef _1869 = { _1283.Load(32) + (sh_elements[el_ix] * 40u) }; + AnnotatedRef ref_1 = _1869; + Alloc _1874; + _1874.offset = _1283.Load(32); + param_23.offset = _1874.offset; AnnotatedRef param_24 = ref_1; - uint tag_1 = Annotated_tag(param_23, param_24).tag; + AnnotatedTag anno_tag = Annotated_tag(param_23, param_24); + uint tag_1 = anno_tag.tag; if (el_ix > 0u) { - _1770 = sh_tile_count[el_ix - 1u]; + _1887 = sh_tile_count[el_ix - 1u]; } else { - _1770 = 0u; + _1887 = 0u; } - uint seq_ix = ix_1 - _1770; + uint seq_ix = ix_1 - _1887; uint width = sh_tile_width[el_ix]; uint x = sh_tile_x0[el_ix] + (seq_ix % width); uint y = sh_tile_y0[el_ix] + (seq_ix / width); @@ -1004,29 +1073,47 @@ void comp_main() { uint param_25 = el_ix; bool param_26 = mem_ok; - TileRef _1822 = { sh_tile_base[el_ix] + (((sh_tile_stride[el_ix] * y) + x) * 8u) }; + TileRef _1939 = { sh_tile_base[el_ix] + (((sh_tile_stride[el_ix] * y) + x) * 8u) }; Alloc param_27 = read_tile_alloc(param_25, param_26); - TileRef param_28 = _1822; + TileRef param_28 = _1939; Tile tile = Tile_read(param_27, param_28); bool is_clip = (tag_1 == 4u) || (tag_1 == 5u); - bool _1834 = tile.tile.offset != 0u; - bool _1843; - if (!_1834) + bool _1951 = tile.tile.offset != 0u; + bool _1960; + if (!_1951) { - _1843 = (tile.backdrop == 0) == is_clip; + _1960 = (tile.backdrop == 0) == is_clip; } else { - _1843 = _1834; + _1960 = _1951; } - include_tile = _1843; + bool _1972; + if (!_1960) + { + bool _1971; + if (is_clip) + { + _1971 = (anno_tag.flags & 2u) != 0u; + } + else + { + _1971 = is_clip; + } + _1972 = _1971; + } + else + { + _1972 = _1960; + } + include_tile = _1972; } if (include_tile) { uint el_slice = el_ix / 32u; uint el_mask = 1u << (el_ix & 31u); - uint _1863; - InterlockedOr(sh_bitmaps[el_slice][(y * 16u) + x], el_mask, _1863); + uint _1992; + InterlockedOr(sh_bitmaps[el_slice][(y * 16u) + x], el_mask, _1992); } } GroupMemoryBarrierWithGroupSync(); @@ -1050,11 +1137,11 @@ void comp_main() uint element_ref_ix = (slice_ix * 32u) + uint(int(firstbitlow(bitmap))); uint element_ix_1 = sh_elements[element_ref_ix]; bitmap &= (bitmap - 1u); - AnnotatedRef _1917 = { _1169.Load(32) + (element_ix_1 * 40u) }; - ref = _1917; - Alloc _1922; - _1922.offset = _1169.Load(32); - param_29.offset = _1922.offset; + AnnotatedRef _2046 = { _1283.Load(32) + (element_ix_1 * 40u) }; + ref = _2046; + Alloc _2050; + _2050.offset = _1283.Load(32); + param_29.offset = _2050.offset; AnnotatedRef param_30 = ref; AnnotatedTag tag_2 = Annotated_tag(param_29, param_30); if (clip_zero_depth == 0u) @@ -1065,23 +1152,23 @@ void comp_main() { uint param_31 = element_ref_ix; bool param_32 = mem_ok; - TileRef _1958 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; + TileRef _2086 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; Alloc param_33 = read_tile_alloc(param_31, param_32); - TileRef param_34 = _1958; + TileRef param_34 = _2086; tile_1 = Tile_read(param_33, param_34); - Alloc _1965; - _1965.offset = _1169.Load(32); - param_35.offset = _1965.offset; + Alloc _2093; + _2093.offset = _1283.Load(32); + param_35.offset = _2093.offset; AnnotatedRef param_36 = ref; fill = Annotated_Color_read(param_35, param_36); Alloc param_37 = cmd_alloc; CmdRef param_38 = cmd_ref; uint param_39 = cmd_limit; - bool _1977 = alloc_cmd(param_37, param_38, param_39); + bool _2105 = alloc_cmd(param_37, param_38, param_39); cmd_alloc = param_37; cmd_ref = param_38; cmd_limit = param_39; - if (!_1977) + if (!_2105) { break; } @@ -1092,10 +1179,10 @@ void comp_main() float param_44 = fill.linewidth; write_fill(param_40, param_41, param_42, param_43, param_44); cmd_ref = param_41; - CmdColor _2001 = { fill.rgba_color }; + CmdColor _2129 = { fill.rgba_color }; Alloc param_45 = cmd_alloc; CmdRef param_46 = cmd_ref; - CmdColor param_47 = _2001; + CmdColor param_47 = _2129; Cmd_Color_write(param_45, param_46, param_47); cmd_ref.offset += 8u; break; @@ -1104,23 +1191,23 @@ void comp_main() { uint param_48 = element_ref_ix; bool param_49 = mem_ok; - TileRef _2030 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; + TileRef _2158 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; Alloc param_50 = read_tile_alloc(param_48, param_49); - TileRef param_51 = _2030; + TileRef param_51 = _2158; tile_1 = Tile_read(param_50, param_51); - Alloc _2037; - _2037.offset = _1169.Load(32); - param_52.offset = _2037.offset; + Alloc _2165; + _2165.offset = _1283.Load(32); + param_52.offset = _2165.offset; AnnotatedRef param_53 = ref; AnnoLinGradient lin = Annotated_LinGradient_read(param_52, param_53); Alloc param_54 = cmd_alloc; CmdRef param_55 = cmd_ref; uint param_56 = cmd_limit; - bool _2049 = alloc_cmd(param_54, param_55, param_56); + bool _2177 = alloc_cmd(param_54, param_55, param_56); cmd_alloc = param_54; cmd_ref = param_55; cmd_limit = param_56; - if (!_2049) + if (!_2177) { break; } @@ -1146,23 +1233,23 @@ void comp_main() { uint param_65 = element_ref_ix; bool param_66 = mem_ok; - TileRef _2114 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; + TileRef _2242 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; Alloc param_67 = read_tile_alloc(param_65, param_66); - TileRef param_68 = _2114; + TileRef param_68 = _2242; tile_1 = Tile_read(param_67, param_68); - Alloc _2121; - _2121.offset = _1169.Load(32); - param_69.offset = _2121.offset; + Alloc _2249; + _2249.offset = _1283.Load(32); + param_69.offset = _2249.offset; AnnotatedRef param_70 = ref; AnnoImage fill_img = Annotated_Image_read(param_69, param_70); Alloc param_71 = cmd_alloc; CmdRef param_72 = cmd_ref; uint param_73 = cmd_limit; - bool _2133 = alloc_cmd(param_71, param_72, param_73); + bool _2261 = alloc_cmd(param_71, param_72, param_73); cmd_alloc = param_71; cmd_ref = param_72; cmd_limit = param_73; - if (!_2133) + if (!_2261) { break; } @@ -1173,10 +1260,10 @@ void comp_main() float param_78 = fill_img.linewidth; write_fill(param_74, param_75, param_76, param_77, param_78); cmd_ref = param_75; - CmdImage _2159 = { fill_img.index, fill_img.offset }; + CmdImage _2287 = { fill_img.index, fill_img.offset }; Alloc param_79 = cmd_alloc; CmdRef param_80 = cmd_ref; - CmdImage param_81 = _2159; + CmdImage param_81 = _2287; Cmd_Image_write(param_79, param_80, param_81); cmd_ref.offset += 12u; break; @@ -1185,21 +1272,21 @@ void comp_main() { uint param_82 = element_ref_ix; bool param_83 = mem_ok; - TileRef _2188 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; + TileRef _2316 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; Alloc param_84 = read_tile_alloc(param_82, param_83); - TileRef param_85 = _2188; + TileRef param_85 = _2316; tile_1 = Tile_read(param_84, param_85); - bool _2194 = tile_1.tile.offset == 0u; - bool _2200; - if (_2194) + bool _2322 = tile_1.tile.offset == 0u; + bool _2328; + if (_2322) { - _2200 = tile_1.backdrop == 0; + _2328 = tile_1.backdrop == 0; } else { - _2200 = _2194; + _2328 = _2322; } - if (_2200) + if (_2328) { clip_zero_depth = clip_depth + 1u; } @@ -1208,11 +1295,11 @@ void comp_main() Alloc param_86 = cmd_alloc; CmdRef param_87 = cmd_ref; uint param_88 = cmd_limit; - bool _2212 = alloc_cmd(param_86, param_87, param_88); + bool _2340 = alloc_cmd(param_86, param_87, param_88); cmd_alloc = param_86; cmd_ref = param_87; cmd_limit = param_88; - if (!_2212) + if (!_2340) { break; } @@ -1228,33 +1315,40 @@ void comp_main() { uint param_91 = element_ref_ix; bool param_92 = mem_ok; - TileRef _2249 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; + TileRef _2377 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; Alloc param_93 = read_tile_alloc(param_91, param_92); - TileRef param_94 = _2249; + TileRef param_94 = _2377; tile_1 = Tile_read(param_93, param_94); + Alloc _2384; + _2384.offset = _1283.Load(32); + param_95.offset = _2384.offset; + AnnotatedRef param_96 = ref; + AnnoEndClip end_clip = Annotated_EndClip_read(param_95, param_96); clip_depth--; - Alloc param_95 = cmd_alloc; - CmdRef param_96 = cmd_ref; - uint param_97 = cmd_limit; - bool _2261 = alloc_cmd(param_95, param_96, param_97); - cmd_alloc = param_95; - cmd_ref = param_96; - cmd_limit = param_97; - if (!_2261) + Alloc param_97 = cmd_alloc; + CmdRef param_98 = cmd_ref; + uint param_99 = cmd_limit; + bool _2398 = alloc_cmd(param_97, param_98, param_99); + cmd_alloc = param_97; + cmd_ref = param_98; + cmd_limit = param_99; + if (!_2398) { break; } - Alloc param_98 = cmd_alloc; - CmdRef param_99 = cmd_ref; - uint param_100 = 0u; - Tile param_101 = tile_1; - float param_102 = 0.0f; - write_fill(param_98, param_99, param_100, param_101, param_102); - cmd_ref = param_99; - Alloc param_103 = cmd_alloc; - CmdRef param_104 = cmd_ref; - Cmd_EndClip_write(param_103, param_104); - cmd_ref.offset += 4u; + Alloc param_100 = cmd_alloc; + CmdRef param_101 = cmd_ref; + uint param_102 = 0u; + Tile param_103 = tile_1; + float param_104 = 0.0f; + write_fill(param_100, param_101, param_102, param_103, param_104); + cmd_ref = param_101; + CmdEndClip _2419 = { end_clip.blend }; + Alloc param_105 = cmd_alloc; + CmdRef param_106 = cmd_ref; + CmdEndClip param_107 = _2419; + Cmd_EndClip_write(param_105, param_106, param_107); + cmd_ref.offset += 8u; break; } } @@ -1287,21 +1381,21 @@ void comp_main() break; } } - bool _2326 = (bin_tile_x + tile_x) < _1169.Load(8); - bool _2335; - if (_2326) + bool _2467 = (bin_tile_x + tile_x) < _1283.Load(8); + bool _2476; + if (_2467) { - _2335 = (bin_tile_y + tile_y) < _1169.Load(12); + _2476 = (bin_tile_y + tile_y) < _1283.Load(12); } else { - _2335 = _2326; + _2476 = _2467; } - if (_2335) + if (_2476) { - Alloc param_105 = cmd_alloc; - CmdRef param_106 = cmd_ref; - Cmd_End_write(param_105, param_106); + Alloc param_108 = cmd_alloc; + CmdRef param_109 = cmd_ref; + Cmd_End_write(param_108, param_109); } } diff --git a/piet-gpu/shader/gen/coarse.msl b/piet-gpu/shader/gen/coarse.msl index e5a0f0d..1422ff1 100644 --- a/piet-gpu/shader/gen/coarse.msl +++ b/piet-gpu/shader/gen/coarse.msl @@ -65,6 +65,17 @@ struct AnnoLinGradient float line_c; }; +struct AnnoEndClipRef +{ + uint offset; +}; + +struct AnnoEndClip +{ + float4 bbox; + uint blend; +}; + struct AnnotatedRef { uint offset; @@ -169,6 +180,16 @@ struct CmdImage int2 offset; }; +struct CmdEndClipRef +{ + uint offset; +}; + +struct CmdEndClip +{ + uint blend; +}; + struct CmdJumpRef { uint offset; @@ -243,7 +264,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset) } static inline __attribute__((always_inline)) -uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_283, constant uint& v_283BufferSize) +uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_308, constant uint& v_308BufferSize) { Alloc param = alloc; uint param_1 = offset; @@ -251,7 +272,7 @@ uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memor { return 0u; } - uint v = v_283.memory[offset]; + uint v = v_308.memory[offset]; return v; } @@ -270,39 +291,39 @@ BinInstanceRef BinInstance_index(thread const BinInstanceRef& ref, thread const } static inline __attribute__((always_inline)) -BinInstance BinInstance_read(thread const Alloc& a, thread const BinInstanceRef& ref, device Memory& v_283, constant uint& v_283BufferSize) +BinInstance BinInstance_read(thread const Alloc& a, thread const BinInstanceRef& ref, device Memory& v_308, constant uint& v_308BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_283, v_283BufferSize); + uint raw0 = read_mem(param, param_1, v_308, v_308BufferSize); BinInstance s; s.element_ix = raw0; return s; } static inline __attribute__((always_inline)) -AnnotatedTag Annotated_tag(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_283, constant uint& v_283BufferSize) +AnnotatedTag Annotated_tag(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_308, constant uint& v_308BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); - uint tag_and_flags = read_mem(param, param_1, v_283, v_283BufferSize); + uint tag_and_flags = read_mem(param, param_1, v_308, v_308BufferSize); return AnnotatedTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) }; } static inline __attribute__((always_inline)) -Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_283, constant uint& v_283BufferSize) +Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_308, constant uint& v_308BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_283, v_283BufferSize); + uint raw0 = read_mem(param, param_1, v_308, v_308BufferSize); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_283, v_283BufferSize); + uint raw1 = read_mem(param_2, param_3, v_308, v_308BufferSize); Alloc param_4 = a; uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_283, v_283BufferSize); + uint raw2 = read_mem(param_4, param_5, v_308, v_308BufferSize); Path s; s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); s.tiles = TileRef{ raw2 }; @@ -315,24 +336,24 @@ void write_tile_alloc(thread const uint& el_ix, thread const Alloc& a) } static inline __attribute__((always_inline)) -Alloc read_tile_alloc(thread const uint& el_ix, thread const bool& mem_ok, device Memory& v_283, constant uint& v_283BufferSize) +Alloc read_tile_alloc(thread const uint& el_ix, thread const bool& mem_ok, device Memory& v_308, constant uint& v_308BufferSize) { uint param = 0u; - uint param_1 = uint(int((v_283BufferSize - 8) / 4) * 4); + uint param_1 = uint(int((v_308BufferSize - 8) / 4) * 4); bool param_2 = mem_ok; return new_alloc(param, param_1, param_2); } static inline __attribute__((always_inline)) -Tile Tile_read(thread const Alloc& a, thread const TileRef& ref, device Memory& v_283, constant uint& v_283BufferSize) +Tile Tile_read(thread const Alloc& a, thread const TileRef& ref, device Memory& v_308, constant uint& v_308BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_283, v_283BufferSize); + uint raw0 = read_mem(param, param_1, v_308, v_308BufferSize); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_283, v_283BufferSize); + uint raw1 = read_mem(param_2, param_3, v_308, v_308BufferSize); Tile s; s.tile = TileSegRef{ raw0 }; s.backdrop = int(raw1); @@ -340,27 +361,27 @@ Tile Tile_read(thread const Alloc& a, thread const TileRef& ref, device Memory& } static inline __attribute__((always_inline)) -AnnoColor AnnoColor_read(thread const Alloc& a, thread const AnnoColorRef& ref, device Memory& v_283, constant uint& v_283BufferSize) +AnnoColor AnnoColor_read(thread const Alloc& a, thread const AnnoColorRef& ref, device Memory& v_308, constant uint& v_308BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_283, v_283BufferSize); + uint raw0 = read_mem(param, param_1, v_308, v_308BufferSize); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_283, v_283BufferSize); + uint raw1 = read_mem(param_2, param_3, v_308, v_308BufferSize); Alloc param_4 = a; uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_283, v_283BufferSize); + uint raw2 = read_mem(param_4, param_5, v_308, v_308BufferSize); Alloc param_6 = a; uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_283, v_283BufferSize); + uint raw3 = read_mem(param_6, param_7, v_308, v_308BufferSize); Alloc param_8 = a; uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9, v_283, v_283BufferSize); + uint raw4 = read_mem(param_8, param_9, v_308, v_308BufferSize); Alloc param_10 = a; uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11, v_283, v_283BufferSize); + uint raw5 = read_mem(param_10, param_11, v_308, v_308BufferSize); AnnoColor s; s.bbox = float4(as_type(raw0), as_type(raw1), as_type(raw2), as_type(raw3)); s.linewidth = as_type(raw4); @@ -369,34 +390,34 @@ AnnoColor AnnoColor_read(thread const Alloc& a, thread const AnnoColorRef& ref, } static inline __attribute__((always_inline)) -AnnoColor Annotated_Color_read(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_283, constant uint& v_283BufferSize) +AnnoColor Annotated_Color_read(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_308, constant uint& v_308BufferSize) { Alloc param = a; AnnoColorRef param_1 = AnnoColorRef{ ref.offset + 4u }; - return AnnoColor_read(param, param_1, v_283, v_283BufferSize); + return AnnoColor_read(param, param_1, v_308, v_308BufferSize); } static inline __attribute__((always_inline)) -MallocResult malloc(thread const uint& size, device Memory& v_283, constant uint& v_283BufferSize) +MallocResult malloc(thread const uint& size, device Memory& v_308, constant uint& v_308BufferSize) { - uint _289 = atomic_fetch_add_explicit((device atomic_uint*)&v_283.mem_offset, size, memory_order_relaxed); - uint offset = _289; + uint _314 = atomic_fetch_add_explicit((device atomic_uint*)&v_308.mem_offset, size, memory_order_relaxed); + uint offset = _314; MallocResult r; - r.failed = (offset + size) > uint(int((v_283BufferSize - 8) / 4) * 4); + r.failed = (offset + size) > uint(int((v_308BufferSize - 8) / 4) * 4); uint param = offset; uint param_1 = size; bool param_2 = !r.failed; r.alloc = new_alloc(param, param_1, param_2); if (r.failed) { - uint _318 = atomic_fetch_max_explicit((device atomic_uint*)&v_283.mem_error, 1u, memory_order_relaxed); + uint _343 = atomic_fetch_max_explicit((device atomic_uint*)&v_308.mem_error, 1u, memory_order_relaxed); return r; } return r; } static inline __attribute__((always_inline)) -void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_283, constant uint& v_283BufferSize) +void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_308, constant uint& v_308BufferSize) { Alloc param = alloc; uint param_1 = offset; @@ -404,42 +425,42 @@ void write_mem(thread const Alloc& alloc, thread const uint& offset, thread cons { return; } - v_283.memory[offset] = val; + v_308.memory[offset] = val; } static inline __attribute__((always_inline)) -void CmdJump_write(thread const Alloc& a, thread const CmdJumpRef& ref, thread const CmdJump& s, device Memory& v_283, constant uint& v_283BufferSize) +void CmdJump_write(thread const Alloc& a, thread const CmdJumpRef& ref, thread const CmdJump& s, device Memory& v_308, constant uint& v_308BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = s.new_ref; - write_mem(param, param_1, param_2, v_283, v_283BufferSize); + write_mem(param, param_1, param_2, v_308, v_308BufferSize); } static inline __attribute__((always_inline)) -void Cmd_Jump_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdJump& s, device Memory& v_283, constant uint& v_283BufferSize) +void Cmd_Jump_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdJump& s, device Memory& v_308, constant uint& v_308BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 10u; - write_mem(param, param_1, param_2, v_283, v_283BufferSize); + write_mem(param, param_1, param_2, v_308, v_308BufferSize); Alloc param_3 = a; CmdJumpRef param_4 = CmdJumpRef{ ref.offset + 4u }; CmdJump param_5 = s; - CmdJump_write(param_3, param_4, param_5, v_283, v_283BufferSize); + CmdJump_write(param_3, param_4, param_5, v_308, v_308BufferSize); } static inline __attribute__((always_inline)) -bool alloc_cmd(thread Alloc& cmd_alloc, thread CmdRef& cmd_ref, thread uint& cmd_limit, device Memory& v_283, constant uint& v_283BufferSize) +bool alloc_cmd(thread Alloc& cmd_alloc, thread CmdRef& cmd_ref, thread uint& cmd_limit, device Memory& v_308, constant uint& v_308BufferSize) { if (cmd_ref.offset < cmd_limit) { return true; } uint param = 1024u; - MallocResult _1076 = malloc(param, v_283, v_283BufferSize); - MallocResult new_cmd = _1076; + MallocResult _1190 = malloc(param, v_308, v_308BufferSize); + MallocResult new_cmd = _1190; if (new_cmd.failed) { return false; @@ -448,7 +469,7 @@ bool alloc_cmd(thread Alloc& cmd_alloc, thread CmdRef& cmd_ref, thread uint& cmd Alloc param_1 = cmd_alloc; CmdRef param_2 = cmd_ref; CmdJump param_3 = jump; - Cmd_Jump_write(param_1, param_2, param_3, v_283, v_283BufferSize); + Cmd_Jump_write(param_1, param_2, param_3, v_308, v_308BufferSize); cmd_alloc = new_cmd.alloc; cmd_ref = CmdRef{ cmd_alloc.offset }; cmd_limit = (cmd_alloc.offset + 1024u) - 60u; @@ -462,70 +483,70 @@ uint fill_mode_from_flags(thread const uint& flags) } static inline __attribute__((always_inline)) -void CmdFill_write(thread const Alloc& a, thread const CmdFillRef& ref, thread const CmdFill& s, device Memory& v_283, constant uint& v_283BufferSize) +void CmdFill_write(thread const Alloc& a, thread const CmdFillRef& ref, thread const CmdFill& s, device Memory& v_308, constant uint& v_308BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = s.tile_ref; - write_mem(param, param_1, param_2, v_283, v_283BufferSize); + write_mem(param, param_1, param_2, v_308, v_308BufferSize); Alloc param_3 = a; uint param_4 = ix + 1u; uint param_5 = uint(s.backdrop); - write_mem(param_3, param_4, param_5, v_283, v_283BufferSize); + write_mem(param_3, param_4, param_5, v_308, v_308BufferSize); } static inline __attribute__((always_inline)) -void Cmd_Fill_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdFill& s, device Memory& v_283, constant uint& v_283BufferSize) +void Cmd_Fill_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdFill& s, device Memory& v_308, constant uint& v_308BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 1u; - write_mem(param, param_1, param_2, v_283, v_283BufferSize); + write_mem(param, param_1, param_2, v_308, v_308BufferSize); Alloc param_3 = a; CmdFillRef param_4 = CmdFillRef{ ref.offset + 4u }; CmdFill param_5 = s; - CmdFill_write(param_3, param_4, param_5, v_283, v_283BufferSize); + CmdFill_write(param_3, param_4, param_5, v_308, v_308BufferSize); } static inline __attribute__((always_inline)) -void Cmd_Solid_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_283, constant uint& v_283BufferSize) +void Cmd_Solid_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_308, constant uint& v_308BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 3u; - write_mem(param, param_1, param_2, v_283, v_283BufferSize); + write_mem(param, param_1, param_2, v_308, v_308BufferSize); } static inline __attribute__((always_inline)) -void CmdStroke_write(thread const Alloc& a, thread const CmdStrokeRef& ref, thread const CmdStroke& s, device Memory& v_283, constant uint& v_283BufferSize) +void CmdStroke_write(thread const Alloc& a, thread const CmdStrokeRef& ref, thread const CmdStroke& s, device Memory& v_308, constant uint& v_308BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = s.tile_ref; - write_mem(param, param_1, param_2, v_283, v_283BufferSize); + write_mem(param, param_1, param_2, v_308, v_308BufferSize); Alloc param_3 = a; uint param_4 = ix + 1u; uint param_5 = as_type(s.half_width); - write_mem(param_3, param_4, param_5, v_283, v_283BufferSize); + write_mem(param_3, param_4, param_5, v_308, v_308BufferSize); } static inline __attribute__((always_inline)) -void Cmd_Stroke_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdStroke& s, device Memory& v_283, constant uint& v_283BufferSize) +void Cmd_Stroke_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdStroke& s, device Memory& v_308, constant uint& v_308BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 2u; - write_mem(param, param_1, param_2, v_283, v_283BufferSize); + write_mem(param, param_1, param_2, v_308, v_308BufferSize); Alloc param_3 = a; CmdStrokeRef param_4 = CmdStrokeRef{ ref.offset + 4u }; CmdStroke param_5 = s; - CmdStroke_write(param_3, param_4, param_5, v_283, v_283BufferSize); + CmdStroke_write(param_3, param_4, param_5, v_308, v_308BufferSize); } static inline __attribute__((always_inline)) -void write_fill(thread const Alloc& alloc, thread CmdRef& cmd_ref, thread const uint& flags, thread const Tile& tile, thread const float& linewidth, device Memory& v_283, constant uint& v_283BufferSize) +void write_fill(thread const Alloc& alloc, thread CmdRef& cmd_ref, thread const uint& flags, thread const Tile& tile, thread const float& linewidth, device Memory& v_308, constant uint& v_308BufferSize) { uint param = flags; if (fill_mode_from_flags(param) == 0u) @@ -536,14 +557,14 @@ void write_fill(thread const Alloc& alloc, thread CmdRef& cmd_ref, thread const Alloc param_1 = alloc; CmdRef param_2 = cmd_ref; CmdFill param_3 = cmd_fill; - Cmd_Fill_write(param_1, param_2, param_3, v_283, v_283BufferSize); + Cmd_Fill_write(param_1, param_2, param_3, v_308, v_308BufferSize); cmd_ref.offset += 12u; } else { Alloc param_4 = alloc; CmdRef param_5 = cmd_ref; - Cmd_Solid_write(param_4, param_5, v_283, v_283BufferSize); + Cmd_Solid_write(param_4, param_5, v_308, v_308BufferSize); cmd_ref.offset += 4u; } } @@ -553,65 +574,65 @@ void write_fill(thread const Alloc& alloc, thread CmdRef& cmd_ref, thread const Alloc param_6 = alloc; CmdRef param_7 = cmd_ref; CmdStroke param_8 = cmd_stroke; - Cmd_Stroke_write(param_6, param_7, param_8, v_283, v_283BufferSize); + Cmd_Stroke_write(param_6, param_7, param_8, v_308, v_308BufferSize); cmd_ref.offset += 12u; } } static inline __attribute__((always_inline)) -void CmdColor_write(thread const Alloc& a, thread const CmdColorRef& ref, thread const CmdColor& s, device Memory& v_283, constant uint& v_283BufferSize) +void CmdColor_write(thread const Alloc& a, thread const CmdColorRef& ref, thread const CmdColor& s, device Memory& v_308, constant uint& v_308BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = s.rgba_color; - write_mem(param, param_1, param_2, v_283, v_283BufferSize); + write_mem(param, param_1, param_2, v_308, v_308BufferSize); } static inline __attribute__((always_inline)) -void Cmd_Color_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdColor& s, device Memory& v_283, constant uint& v_283BufferSize) +void Cmd_Color_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdColor& s, device Memory& v_308, constant uint& v_308BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 5u; - write_mem(param, param_1, param_2, v_283, v_283BufferSize); + write_mem(param, param_1, param_2, v_308, v_308BufferSize); Alloc param_3 = a; CmdColorRef param_4 = CmdColorRef{ ref.offset + 4u }; CmdColor param_5 = s; - CmdColor_write(param_3, param_4, param_5, v_283, v_283BufferSize); + CmdColor_write(param_3, param_4, param_5, v_308, v_308BufferSize); } static inline __attribute__((always_inline)) -AnnoLinGradient AnnoLinGradient_read(thread const Alloc& a, thread const AnnoLinGradientRef& ref, device Memory& v_283, constant uint& v_283BufferSize) +AnnoLinGradient AnnoLinGradient_read(thread const Alloc& a, thread const AnnoLinGradientRef& ref, device Memory& v_308, constant uint& v_308BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_283, v_283BufferSize); + uint raw0 = read_mem(param, param_1, v_308, v_308BufferSize); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_283, v_283BufferSize); + uint raw1 = read_mem(param_2, param_3, v_308, v_308BufferSize); Alloc param_4 = a; uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_283, v_283BufferSize); + uint raw2 = read_mem(param_4, param_5, v_308, v_308BufferSize); Alloc param_6 = a; uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_283, v_283BufferSize); + uint raw3 = read_mem(param_6, param_7, v_308, v_308BufferSize); Alloc param_8 = a; uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9, v_283, v_283BufferSize); + uint raw4 = read_mem(param_8, param_9, v_308, v_308BufferSize); Alloc param_10 = a; uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11, v_283, v_283BufferSize); + uint raw5 = read_mem(param_10, param_11, v_308, v_308BufferSize); Alloc param_12 = a; uint param_13 = ix + 6u; - uint raw6 = read_mem(param_12, param_13, v_283, v_283BufferSize); + uint raw6 = read_mem(param_12, param_13, v_308, v_308BufferSize); Alloc param_14 = a; uint param_15 = ix + 7u; - uint raw7 = read_mem(param_14, param_15, v_283, v_283BufferSize); + uint raw7 = read_mem(param_14, param_15, v_308, v_308BufferSize); Alloc param_16 = a; uint param_17 = ix + 8u; - uint raw8 = read_mem(param_16, param_17, v_283, v_283BufferSize); + uint raw8 = read_mem(param_16, param_17, v_308, v_308BufferSize); AnnoLinGradient s; s.bbox = float4(as_type(raw0), as_type(raw1), as_type(raw2), as_type(raw3)); s.linewidth = as_type(raw4); @@ -623,73 +644,73 @@ AnnoLinGradient AnnoLinGradient_read(thread const Alloc& a, thread const AnnoLin } static inline __attribute__((always_inline)) -AnnoLinGradient Annotated_LinGradient_read(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_283, constant uint& v_283BufferSize) +AnnoLinGradient Annotated_LinGradient_read(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_308, constant uint& v_308BufferSize) { Alloc param = a; AnnoLinGradientRef param_1 = AnnoLinGradientRef{ ref.offset + 4u }; - return AnnoLinGradient_read(param, param_1, v_283, v_283BufferSize); + return AnnoLinGradient_read(param, param_1, v_308, v_308BufferSize); } static inline __attribute__((always_inline)) -void CmdLinGrad_write(thread const Alloc& a, thread const CmdLinGradRef& ref, thread const CmdLinGrad& s, device Memory& v_283, constant uint& v_283BufferSize) +void CmdLinGrad_write(thread const Alloc& a, thread const CmdLinGradRef& ref, thread const CmdLinGrad& s, device Memory& v_308, constant uint& v_308BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = s.index; - write_mem(param, param_1, param_2, v_283, v_283BufferSize); + write_mem(param, param_1, param_2, v_308, v_308BufferSize); Alloc param_3 = a; uint param_4 = ix + 1u; uint param_5 = as_type(s.line_x); - write_mem(param_3, param_4, param_5, v_283, v_283BufferSize); + write_mem(param_3, param_4, param_5, v_308, v_308BufferSize); Alloc param_6 = a; uint param_7 = ix + 2u; uint param_8 = as_type(s.line_y); - write_mem(param_6, param_7, param_8, v_283, v_283BufferSize); + write_mem(param_6, param_7, param_8, v_308, v_308BufferSize); Alloc param_9 = a; uint param_10 = ix + 3u; uint param_11 = as_type(s.line_c); - write_mem(param_9, param_10, param_11, v_283, v_283BufferSize); + write_mem(param_9, param_10, param_11, v_308, v_308BufferSize); } static inline __attribute__((always_inline)) -void Cmd_LinGrad_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdLinGrad& s, device Memory& v_283, constant uint& v_283BufferSize) +void Cmd_LinGrad_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdLinGrad& s, device Memory& v_308, constant uint& v_308BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 6u; - write_mem(param, param_1, param_2, v_283, v_283BufferSize); + write_mem(param, param_1, param_2, v_308, v_308BufferSize); Alloc param_3 = a; CmdLinGradRef param_4 = CmdLinGradRef{ ref.offset + 4u }; CmdLinGrad param_5 = s; - CmdLinGrad_write(param_3, param_4, param_5, v_283, v_283BufferSize); + CmdLinGrad_write(param_3, param_4, param_5, v_308, v_308BufferSize); } static inline __attribute__((always_inline)) -AnnoImage AnnoImage_read(thread const Alloc& a, thread const AnnoImageRef& ref, device Memory& v_283, constant uint& v_283BufferSize) +AnnoImage AnnoImage_read(thread const Alloc& a, thread const AnnoImageRef& ref, device Memory& v_308, constant uint& v_308BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_283, v_283BufferSize); + uint raw0 = read_mem(param, param_1, v_308, v_308BufferSize); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_283, v_283BufferSize); + uint raw1 = read_mem(param_2, param_3, v_308, v_308BufferSize); Alloc param_4 = a; uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_283, v_283BufferSize); + uint raw2 = read_mem(param_4, param_5, v_308, v_308BufferSize); Alloc param_6 = a; uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_283, v_283BufferSize); + uint raw3 = read_mem(param_6, param_7, v_308, v_308BufferSize); Alloc param_8 = a; uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9, v_283, v_283BufferSize); + uint raw4 = read_mem(param_8, param_9, v_308, v_308BufferSize); Alloc param_10 = a; uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11, v_283, v_283BufferSize); + uint raw5 = read_mem(param_10, param_11, v_308, v_308BufferSize); Alloc param_12 = a; uint param_13 = ix + 6u; - uint raw6 = read_mem(param_12, param_13, v_283, v_283BufferSize); + uint raw6 = read_mem(param_12, param_13, v_308, v_308BufferSize); AnnoImage s; s.bbox = float4(as_type(raw0), as_type(raw1), as_type(raw2), as_type(raw3)); s.linewidth = as_type(raw4); @@ -699,68 +720,115 @@ AnnoImage AnnoImage_read(thread const Alloc& a, thread const AnnoImageRef& ref, } static inline __attribute__((always_inline)) -AnnoImage Annotated_Image_read(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_283, constant uint& v_283BufferSize) +AnnoImage Annotated_Image_read(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_308, constant uint& v_308BufferSize) { Alloc param = a; AnnoImageRef param_1 = AnnoImageRef{ ref.offset + 4u }; - return AnnoImage_read(param, param_1, v_283, v_283BufferSize); + return AnnoImage_read(param, param_1, v_308, v_308BufferSize); } static inline __attribute__((always_inline)) -void CmdImage_write(thread const Alloc& a, thread const CmdImageRef& ref, thread const CmdImage& s, device Memory& v_283, constant uint& v_283BufferSize) +void CmdImage_write(thread const Alloc& a, thread const CmdImageRef& ref, thread const CmdImage& s, device Memory& v_308, constant uint& v_308BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = s.index; - write_mem(param, param_1, param_2, v_283, v_283BufferSize); + write_mem(param, param_1, param_2, v_308, v_308BufferSize); Alloc param_3 = a; uint param_4 = ix + 1u; uint param_5 = (uint(s.offset.x) & 65535u) | (uint(s.offset.y) << uint(16)); - write_mem(param_3, param_4, param_5, v_283, v_283BufferSize); + write_mem(param_3, param_4, param_5, v_308, v_308BufferSize); } static inline __attribute__((always_inline)) -void Cmd_Image_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdImage& s, device Memory& v_283, constant uint& v_283BufferSize) +void Cmd_Image_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdImage& s, device Memory& v_308, constant uint& v_308BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 7u; - write_mem(param, param_1, param_2, v_283, v_283BufferSize); + write_mem(param, param_1, param_2, v_308, v_308BufferSize); Alloc param_3 = a; CmdImageRef param_4 = CmdImageRef{ ref.offset + 4u }; CmdImage param_5 = s; - CmdImage_write(param_3, param_4, param_5, v_283, v_283BufferSize); + CmdImage_write(param_3, param_4, param_5, v_308, v_308BufferSize); } static inline __attribute__((always_inline)) -void Cmd_BeginClip_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_283, constant uint& v_283BufferSize) +void Cmd_BeginClip_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_308, constant uint& v_308BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 8u; - write_mem(param, param_1, param_2, v_283, v_283BufferSize); + write_mem(param, param_1, param_2, v_308, v_308BufferSize); } static inline __attribute__((always_inline)) -void Cmd_EndClip_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_283, constant uint& v_283BufferSize) +AnnoEndClip AnnoEndClip_read(thread const Alloc& a, thread const AnnoEndClipRef& ref, device Memory& v_308, constant uint& v_308BufferSize) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint raw0 = read_mem(param, param_1, v_308, v_308BufferSize); + Alloc param_2 = a; + uint param_3 = ix + 1u; + uint raw1 = read_mem(param_2, param_3, v_308, v_308BufferSize); + Alloc param_4 = a; + uint param_5 = ix + 2u; + uint raw2 = read_mem(param_4, param_5, v_308, v_308BufferSize); + Alloc param_6 = a; + uint param_7 = ix + 3u; + uint raw3 = read_mem(param_6, param_7, v_308, v_308BufferSize); + Alloc param_8 = a; + uint param_9 = ix + 4u; + uint raw4 = read_mem(param_8, param_9, v_308, v_308BufferSize); + AnnoEndClip s; + s.bbox = float4(as_type(raw0), as_type(raw1), as_type(raw2), as_type(raw3)); + s.blend = raw4; + return s; +} + +static inline __attribute__((always_inline)) +AnnoEndClip Annotated_EndClip_read(thread const Alloc& a, thread const AnnotatedRef& ref, device Memory& v_308, constant uint& v_308BufferSize) +{ + Alloc param = a; + AnnoEndClipRef param_1 = AnnoEndClipRef{ ref.offset + 4u }; + return AnnoEndClip_read(param, param_1, v_308, v_308BufferSize); +} + +static inline __attribute__((always_inline)) +void CmdEndClip_write(thread const Alloc& a, thread const CmdEndClipRef& ref, thread const CmdEndClip& s, device Memory& v_308, constant uint& v_308BufferSize) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint param_2 = s.blend; + write_mem(param, param_1, param_2, v_308, v_308BufferSize); +} + +static inline __attribute__((always_inline)) +void Cmd_EndClip_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdEndClip& s, device Memory& v_308, constant uint& v_308BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 9u; - write_mem(param, param_1, param_2, v_283, v_283BufferSize); + write_mem(param, param_1, param_2, v_308, v_308BufferSize); + Alloc param_3 = a; + CmdEndClipRef param_4 = CmdEndClipRef{ ref.offset + 4u }; + CmdEndClip param_5 = s; + CmdEndClip_write(param_3, param_4, param_5, v_308, v_308BufferSize); } static inline __attribute__((always_inline)) -void Cmd_End_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_283, constant uint& v_283BufferSize) +void Cmd_End_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_308, constant uint& v_308BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 0u; - write_mem(param, param_1, param_2, v_283, v_283BufferSize); + write_mem(param, param_1, param_2, v_308, v_308BufferSize); } -kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_283 [[buffer(0)]], const device ConfigBuf& _1169 [[buffer(1)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) +kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_308 [[buffer(0)]], const device ConfigBuf& _1283 [[buffer(1)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) { threadgroup uint sh_bitmaps[8][256]; threadgroup Alloc sh_part_elements[256]; @@ -772,19 +840,19 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M threadgroup uint sh_tile_y0[256]; threadgroup uint sh_tile_base[256]; threadgroup uint sh_tile_count[256]; - constant uint& v_283BufferSize = spvBufferSizeConstants[0]; - uint width_in_bins = ((_1169.conf.width_in_tiles + 16u) - 1u) / 16u; + constant uint& v_308BufferSize = spvBufferSizeConstants[0]; + uint width_in_bins = ((_1283.conf.width_in_tiles + 16u) - 1u) / 16u; uint bin_ix = (width_in_bins * gl_WorkGroupID.y) + gl_WorkGroupID.x; uint partition_ix = 0u; - uint n_partitions = ((_1169.conf.n_elements + 256u) - 1u) / 256u; + uint n_partitions = ((_1283.conf.n_elements + 256u) - 1u) / 256u; uint th_ix = gl_LocalInvocationID.x; uint bin_tile_x = 16u * gl_WorkGroupID.x; uint bin_tile_y = 16u * gl_WorkGroupID.y; uint tile_x = gl_LocalInvocationID.x % 16u; uint tile_y = gl_LocalInvocationID.x / 16u; - uint this_tile_ix = (((bin_tile_y + tile_y) * _1169.conf.width_in_tiles) + bin_tile_x) + tile_x; + uint this_tile_ix = (((bin_tile_y + tile_y) * _1283.conf.width_in_tiles) + bin_tile_x) + tile_x; Alloc param; - param.offset = _1169.conf.ptcl_alloc.offset; + param.offset = _1283.conf.ptcl_alloc.offset; uint param_1 = this_tile_ix * 1024u; uint param_2 = 1024u; Alloc cmd_alloc = slice_mem(param, param_1, param_2); @@ -796,17 +864,17 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M uint wr_ix = 0u; uint part_start_ix = 0u; uint ready_ix = 0u; - bool mem_ok = v_283.mem_error == 0u; + bool mem_ok = v_308.mem_error == 0u; Alloc param_3; Alloc param_5; - uint _1448; + uint _1562; uint element_ix; AnnotatedRef ref; Alloc param_14; Alloc param_16; uint tile_count; Alloc param_23; - uint _1770; + uint _1887; Alloc param_29; Tile tile_1; AnnoColor fill; @@ -814,38 +882,39 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M Alloc param_52; CmdLinGrad cmd_lin; Alloc param_69; + Alloc param_95; while (true) { for (uint i = 0u; i < 8u; i++) { sh_bitmaps[i][th_ix] = 0u; } - bool _1500; + bool _1614; for (;;) { if ((ready_ix == wr_ix) && (partition_ix < n_partitions)) { part_start_ix = ready_ix; uint count = 0u; - bool _1298 = th_ix < 256u; - bool _1306; - if (_1298) + bool _1412 = th_ix < 256u; + bool _1420; + if (_1412) { - _1306 = (partition_ix + th_ix) < n_partitions; + _1420 = (partition_ix + th_ix) < n_partitions; } else { - _1306 = _1298; + _1420 = _1412; } - if (_1306) + if (_1420) { - uint in_ix = (_1169.conf.bin_alloc.offset >> uint(2)) + ((((partition_ix + th_ix) * 256u) + bin_ix) * 2u); - param_3.offset = _1169.conf.bin_alloc.offset; + uint in_ix = (_1283.conf.bin_alloc.offset >> uint(2)) + ((((partition_ix + th_ix) * 256u) + bin_ix) * 2u); + param_3.offset = _1283.conf.bin_alloc.offset; uint param_4 = in_ix; - count = read_mem(param_3, param_4, v_283, v_283BufferSize); - param_5.offset = _1169.conf.bin_alloc.offset; + count = read_mem(param_3, param_4, v_308, v_308BufferSize); + param_5.offset = _1283.conf.bin_alloc.offset; uint param_6 = in_ix + 1u; - uint offset = read_mem(param_5, param_6, v_283, v_283BufferSize); + uint offset = read_mem(param_5, param_6, v_308, v_308BufferSize); uint param_7 = offset; uint param_8 = count * 4u; bool param_9 = mem_ok; @@ -889,34 +958,34 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M } if (part_ix > 0u) { - _1448 = sh_part_count[part_ix - 1u]; + _1562 = sh_part_count[part_ix - 1u]; } else { - _1448 = part_start_ix; + _1562 = part_start_ix; } - ix -= _1448; + ix -= _1562; Alloc bin_alloc = sh_part_elements[part_ix]; BinInstanceRef inst_ref = BinInstanceRef{ bin_alloc.offset }; BinInstanceRef param_10 = inst_ref; uint param_11 = ix; Alloc param_12 = bin_alloc; BinInstanceRef param_13 = BinInstance_index(param_10, param_11); - BinInstance inst = BinInstance_read(param_12, param_13, v_283, v_283BufferSize); + BinInstance inst = BinInstance_read(param_12, param_13, v_308, v_308BufferSize); sh_elements[th_ix] = inst.element_ix; } threadgroup_barrier(mem_flags::mem_threadgroup); wr_ix = min((rd_ix + 256u), ready_ix); - bool _1490 = (wr_ix - rd_ix) < 256u; - if (_1490) + bool _1604 = (wr_ix - rd_ix) < 256u; + if (_1604) { - _1500 = (wr_ix < ready_ix) || (partition_ix < n_partitions); + _1614 = (wr_ix < ready_ix) || (partition_ix < n_partitions); } else { - _1500 = _1490; + _1614 = _1604; } - if (_1500) + if (_1614) { continue; } @@ -929,10 +998,10 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M if ((th_ix + rd_ix) < wr_ix) { element_ix = sh_elements[th_ix]; - ref = AnnotatedRef{ _1169.conf.anno_alloc.offset + (element_ix * 40u) }; - param_14.offset = _1169.conf.anno_alloc.offset; + ref = AnnotatedRef{ _1283.conf.anno_alloc.offset + (element_ix * 40u) }; + param_14.offset = _1283.conf.anno_alloc.offset; AnnotatedRef param_15 = ref; - tag = Annotated_tag(param_14, param_15, v_283, v_283BufferSize).tag; + tag = Annotated_tag(param_14, param_15, v_308, v_308BufferSize).tag; } switch (tag) { @@ -942,11 +1011,11 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M case 4u: case 5u: { - uint drawmonoid_base = (_1169.conf.drawmonoid_alloc.offset >> uint(2)) + (2u * element_ix); - uint path_ix = v_283.memory[drawmonoid_base]; - param_16.offset = _1169.conf.tile_alloc.offset; - PathRef param_17 = PathRef{ _1169.conf.tile_alloc.offset + (path_ix * 12u) }; - Path path = Path_read(param_16, param_17, v_283, v_283BufferSize); + uint drawmonoid_base = (_1283.conf.drawmonoid_alloc.offset >> uint(2)) + (2u * element_ix); + uint path_ix = v_308.memory[drawmonoid_base]; + param_16.offset = _1283.conf.tile_alloc.offset; + PathRef param_17 = PathRef{ _1283.conf.tile_alloc.offset + (path_ix * 12u) }; + Path path = Path_read(param_16, param_17, v_308, v_308BufferSize); uint stride = path.bbox.z - path.bbox.x; sh_tile_stride[th_ix] = stride; int dx = int(path.bbox.x) - int(bin_tile_x); @@ -1000,19 +1069,20 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M el_ix = probe_1; } } - AnnotatedRef ref_1 = AnnotatedRef{ _1169.conf.anno_alloc.offset + (sh_elements[el_ix] * 40u) }; - param_23.offset = _1169.conf.anno_alloc.offset; + AnnotatedRef ref_1 = AnnotatedRef{ _1283.conf.anno_alloc.offset + (sh_elements[el_ix] * 40u) }; + param_23.offset = _1283.conf.anno_alloc.offset; AnnotatedRef param_24 = ref_1; - uint tag_1 = Annotated_tag(param_23, param_24, v_283, v_283BufferSize).tag; + AnnotatedTag anno_tag = Annotated_tag(param_23, param_24, v_308, v_308BufferSize); + uint tag_1 = anno_tag.tag; if (el_ix > 0u) { - _1770 = sh_tile_count[el_ix - 1u]; + _1887 = sh_tile_count[el_ix - 1u]; } else { - _1770 = 0u; + _1887 = 0u; } - uint seq_ix = ix_1 - _1770; + uint seq_ix = ix_1 - _1887; uint width = sh_tile_width[el_ix]; uint x = sh_tile_x0[el_ix] + (seq_ix % width); uint y = sh_tile_y0[el_ix] + (seq_ix / width); @@ -1021,27 +1091,45 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M { uint param_25 = el_ix; bool param_26 = mem_ok; - Alloc param_27 = read_tile_alloc(param_25, param_26, v_283, v_283BufferSize); + Alloc param_27 = read_tile_alloc(param_25, param_26, v_308, v_308BufferSize); TileRef param_28 = TileRef{ sh_tile_base[el_ix] + (((sh_tile_stride[el_ix] * y) + x) * 8u) }; - Tile tile = Tile_read(param_27, param_28, v_283, v_283BufferSize); + Tile tile = Tile_read(param_27, param_28, v_308, v_308BufferSize); bool is_clip = (tag_1 == 4u) || (tag_1 == 5u); - bool _1834 = tile.tile.offset != 0u; - bool _1843; - if (!_1834) + bool _1951 = tile.tile.offset != 0u; + bool _1960; + if (!_1951) { - _1843 = (tile.backdrop == 0) == is_clip; + _1960 = (tile.backdrop == 0) == is_clip; } else { - _1843 = _1834; + _1960 = _1951; } - include_tile = _1843; + bool _1972; + if (!_1960) + { + bool _1971; + if (is_clip) + { + _1971 = (anno_tag.flags & 2u) != 0u; + } + else + { + _1971 = is_clip; + } + _1972 = _1971; + } + else + { + _1972 = _1960; + } + include_tile = _1972; } if (include_tile) { uint el_slice = el_ix / 32u; uint el_mask = 1u << (el_ix & 31u); - uint _1863 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&sh_bitmaps[el_slice][(y * 16u) + x], el_mask, memory_order_relaxed); + uint _1992 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&sh_bitmaps[el_slice][(y * 16u) + x], el_mask, memory_order_relaxed); } } threadgroup_barrier(mem_flags::mem_threadgroup); @@ -1065,10 +1153,10 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M uint element_ref_ix = (slice_ix * 32u) + uint(int(spvFindLSB(bitmap))); uint element_ix_1 = sh_elements[element_ref_ix]; bitmap &= (bitmap - 1u); - ref = AnnotatedRef{ _1169.conf.anno_alloc.offset + (element_ix_1 * 40u) }; - param_29.offset = _1169.conf.anno_alloc.offset; + ref = AnnotatedRef{ _1283.conf.anno_alloc.offset + (element_ix_1 * 40u) }; + param_29.offset = _1283.conf.anno_alloc.offset; AnnotatedRef param_30 = ref; - AnnotatedTag tag_2 = Annotated_tag(param_29, param_30, v_283, v_283BufferSize); + AnnotatedTag tag_2 = Annotated_tag(param_29, param_30, v_308, v_308BufferSize); if (clip_zero_depth == 0u) { switch (tag_2.tag) @@ -1077,20 +1165,20 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M { uint param_31 = element_ref_ix; bool param_32 = mem_ok; - Alloc param_33 = read_tile_alloc(param_31, param_32, v_283, v_283BufferSize); + Alloc param_33 = read_tile_alloc(param_31, param_32, v_308, v_308BufferSize); TileRef param_34 = TileRef{ sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; - tile_1 = Tile_read(param_33, param_34, v_283, v_283BufferSize); - param_35.offset = _1169.conf.anno_alloc.offset; + tile_1 = Tile_read(param_33, param_34, v_308, v_308BufferSize); + param_35.offset = _1283.conf.anno_alloc.offset; AnnotatedRef param_36 = ref; - fill = Annotated_Color_read(param_35, param_36, v_283, v_283BufferSize); + fill = Annotated_Color_read(param_35, param_36, v_308, v_308BufferSize); Alloc param_37 = cmd_alloc; CmdRef param_38 = cmd_ref; uint param_39 = cmd_limit; - bool _1977 = alloc_cmd(param_37, param_38, param_39, v_283, v_283BufferSize); + bool _2105 = alloc_cmd(param_37, param_38, param_39, v_308, v_308BufferSize); cmd_alloc = param_37; cmd_ref = param_38; cmd_limit = param_39; - if (!_1977) + if (!_2105) { break; } @@ -1099,12 +1187,12 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M uint param_42 = tag_2.flags; Tile param_43 = tile_1; float param_44 = fill.linewidth; - write_fill(param_40, param_41, param_42, param_43, param_44, v_283, v_283BufferSize); + write_fill(param_40, param_41, param_42, param_43, param_44, v_308, v_308BufferSize); cmd_ref = param_41; Alloc param_45 = cmd_alloc; CmdRef param_46 = cmd_ref; CmdColor param_47 = CmdColor{ fill.rgba_color }; - Cmd_Color_write(param_45, param_46, param_47, v_283, v_283BufferSize); + Cmd_Color_write(param_45, param_46, param_47, v_308, v_308BufferSize); cmd_ref.offset += 8u; break; } @@ -1112,20 +1200,20 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M { uint param_48 = element_ref_ix; bool param_49 = mem_ok; - Alloc param_50 = read_tile_alloc(param_48, param_49, v_283, v_283BufferSize); + Alloc param_50 = read_tile_alloc(param_48, param_49, v_308, v_308BufferSize); TileRef param_51 = TileRef{ sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; - tile_1 = Tile_read(param_50, param_51, v_283, v_283BufferSize); - param_52.offset = _1169.conf.anno_alloc.offset; + tile_1 = Tile_read(param_50, param_51, v_308, v_308BufferSize); + param_52.offset = _1283.conf.anno_alloc.offset; AnnotatedRef param_53 = ref; - AnnoLinGradient lin = Annotated_LinGradient_read(param_52, param_53, v_283, v_283BufferSize); + AnnoLinGradient lin = Annotated_LinGradient_read(param_52, param_53, v_308, v_308BufferSize); Alloc param_54 = cmd_alloc; CmdRef param_55 = cmd_ref; uint param_56 = cmd_limit; - bool _2049 = alloc_cmd(param_54, param_55, param_56, v_283, v_283BufferSize); + bool _2177 = alloc_cmd(param_54, param_55, param_56, v_308, v_308BufferSize); cmd_alloc = param_54; cmd_ref = param_55; cmd_limit = param_56; - if (!_2049) + if (!_2177) { break; } @@ -1134,7 +1222,7 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M uint param_59 = tag_2.flags; Tile param_60 = tile_1; float param_61 = fill.linewidth; - write_fill(param_57, param_58, param_59, param_60, param_61, v_283, v_283BufferSize); + write_fill(param_57, param_58, param_59, param_60, param_61, v_308, v_308BufferSize); cmd_ref = param_58; cmd_lin.index = lin.index; cmd_lin.line_x = lin.line_x; @@ -1143,7 +1231,7 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M Alloc param_62 = cmd_alloc; CmdRef param_63 = cmd_ref; CmdLinGrad param_64 = cmd_lin; - Cmd_LinGrad_write(param_62, param_63, param_64, v_283, v_283BufferSize); + Cmd_LinGrad_write(param_62, param_63, param_64, v_308, v_308BufferSize); cmd_ref.offset += 20u; break; } @@ -1151,20 +1239,20 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M { uint param_65 = element_ref_ix; bool param_66 = mem_ok; - Alloc param_67 = read_tile_alloc(param_65, param_66, v_283, v_283BufferSize); + Alloc param_67 = read_tile_alloc(param_65, param_66, v_308, v_308BufferSize); TileRef param_68 = TileRef{ sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; - tile_1 = Tile_read(param_67, param_68, v_283, v_283BufferSize); - param_69.offset = _1169.conf.anno_alloc.offset; + tile_1 = Tile_read(param_67, param_68, v_308, v_308BufferSize); + param_69.offset = _1283.conf.anno_alloc.offset; AnnotatedRef param_70 = ref; - AnnoImage fill_img = Annotated_Image_read(param_69, param_70, v_283, v_283BufferSize); + AnnoImage fill_img = Annotated_Image_read(param_69, param_70, v_308, v_308BufferSize); Alloc param_71 = cmd_alloc; CmdRef param_72 = cmd_ref; uint param_73 = cmd_limit; - bool _2133 = alloc_cmd(param_71, param_72, param_73, v_283, v_283BufferSize); + bool _2261 = alloc_cmd(param_71, param_72, param_73, v_308, v_308BufferSize); cmd_alloc = param_71; cmd_ref = param_72; cmd_limit = param_73; - if (!_2133) + if (!_2261) { break; } @@ -1173,12 +1261,12 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M uint param_76 = tag_2.flags; Tile param_77 = tile_1; float param_78 = fill_img.linewidth; - write_fill(param_74, param_75, param_76, param_77, param_78, v_283, v_283BufferSize); + write_fill(param_74, param_75, param_76, param_77, param_78, v_308, v_308BufferSize); cmd_ref = param_75; Alloc param_79 = cmd_alloc; CmdRef param_80 = cmd_ref; CmdImage param_81 = CmdImage{ fill_img.index, fill_img.offset }; - Cmd_Image_write(param_79, param_80, param_81, v_283, v_283BufferSize); + Cmd_Image_write(param_79, param_80, param_81, v_308, v_308BufferSize); cmd_ref.offset += 12u; break; } @@ -1186,20 +1274,20 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M { uint param_82 = element_ref_ix; bool param_83 = mem_ok; - Alloc param_84 = read_tile_alloc(param_82, param_83, v_283, v_283BufferSize); + Alloc param_84 = read_tile_alloc(param_82, param_83, v_308, v_308BufferSize); TileRef param_85 = TileRef{ sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; - tile_1 = Tile_read(param_84, param_85, v_283, v_283BufferSize); - bool _2194 = tile_1.tile.offset == 0u; - bool _2200; - if (_2194) + tile_1 = Tile_read(param_84, param_85, v_308, v_308BufferSize); + bool _2322 = tile_1.tile.offset == 0u; + bool _2328; + if (_2322) { - _2200 = tile_1.backdrop == 0; + _2328 = tile_1.backdrop == 0; } else { - _2200 = _2194; + _2328 = _2322; } - if (_2200) + if (_2328) { clip_zero_depth = clip_depth + 1u; } @@ -1208,17 +1296,17 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M Alloc param_86 = cmd_alloc; CmdRef param_87 = cmd_ref; uint param_88 = cmd_limit; - bool _2212 = alloc_cmd(param_86, param_87, param_88, v_283, v_283BufferSize); + bool _2340 = alloc_cmd(param_86, param_87, param_88, v_308, v_308BufferSize); cmd_alloc = param_86; cmd_ref = param_87; cmd_limit = param_88; - if (!_2212) + if (!_2340) { break; } Alloc param_89 = cmd_alloc; CmdRef param_90 = cmd_ref; - Cmd_BeginClip_write(param_89, param_90, v_283, v_283BufferSize); + Cmd_BeginClip_write(param_89, param_90, v_308, v_308BufferSize); cmd_ref.offset += 4u; } clip_depth++; @@ -1228,32 +1316,36 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M { uint param_91 = element_ref_ix; bool param_92 = mem_ok; - Alloc param_93 = read_tile_alloc(param_91, param_92, v_283, v_283BufferSize); + Alloc param_93 = read_tile_alloc(param_91, param_92, v_308, v_308BufferSize); TileRef param_94 = TileRef{ sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; - tile_1 = Tile_read(param_93, param_94, v_283, v_283BufferSize); + tile_1 = Tile_read(param_93, param_94, v_308, v_308BufferSize); + param_95.offset = _1283.conf.anno_alloc.offset; + AnnotatedRef param_96 = ref; + AnnoEndClip end_clip = Annotated_EndClip_read(param_95, param_96, v_308, v_308BufferSize); clip_depth--; - Alloc param_95 = cmd_alloc; - CmdRef param_96 = cmd_ref; - uint param_97 = cmd_limit; - bool _2261 = alloc_cmd(param_95, param_96, param_97, v_283, v_283BufferSize); - cmd_alloc = param_95; - cmd_ref = param_96; - cmd_limit = param_97; - if (!_2261) + Alloc param_97 = cmd_alloc; + CmdRef param_98 = cmd_ref; + uint param_99 = cmd_limit; + bool _2398 = alloc_cmd(param_97, param_98, param_99, v_308, v_308BufferSize); + cmd_alloc = param_97; + cmd_ref = param_98; + cmd_limit = param_99; + if (!_2398) { break; } - Alloc param_98 = cmd_alloc; - CmdRef param_99 = cmd_ref; - uint param_100 = 0u; - Tile param_101 = tile_1; - float param_102 = 0.0; - write_fill(param_98, param_99, param_100, param_101, param_102, v_283, v_283BufferSize); - cmd_ref = param_99; - Alloc param_103 = cmd_alloc; - CmdRef param_104 = cmd_ref; - Cmd_EndClip_write(param_103, param_104, v_283, v_283BufferSize); - cmd_ref.offset += 4u; + Alloc param_100 = cmd_alloc; + CmdRef param_101 = cmd_ref; + uint param_102 = 0u; + Tile param_103 = tile_1; + float param_104 = 0.0; + write_fill(param_100, param_101, param_102, param_103, param_104, v_308, v_308BufferSize); + cmd_ref = param_101; + Alloc param_105 = cmd_alloc; + CmdRef param_106 = cmd_ref; + CmdEndClip param_107 = CmdEndClip{ end_clip.blend }; + Cmd_EndClip_write(param_105, param_106, param_107, v_308, v_308BufferSize); + cmd_ref.offset += 8u; break; } } @@ -1286,21 +1378,21 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M break; } } - bool _2326 = (bin_tile_x + tile_x) < _1169.conf.width_in_tiles; - bool _2335; - if (_2326) + bool _2467 = (bin_tile_x + tile_x) < _1283.conf.width_in_tiles; + bool _2476; + if (_2467) { - _2335 = (bin_tile_y + tile_y) < _1169.conf.height_in_tiles; + _2476 = (bin_tile_y + tile_y) < _1283.conf.height_in_tiles; } else { - _2335 = _2326; + _2476 = _2467; } - if (_2335) + if (_2476) { - Alloc param_105 = cmd_alloc; - CmdRef param_106 = cmd_ref; - Cmd_End_write(param_105, param_106, v_283, v_283BufferSize); + Alloc param_108 = cmd_alloc; + CmdRef param_109 = cmd_ref; + Cmd_End_write(param_108, param_109, v_308, v_308BufferSize); } } diff --git a/piet-gpu/shader/gen/coarse.spv b/piet-gpu/shader/gen/coarse.spv index b30e2d8..1fef2d7 100644 Binary files a/piet-gpu/shader/gen/coarse.spv and b/piet-gpu/shader/gen/coarse.spv differ diff --git a/piet-gpu/shader/gen/draw_leaf.dxil b/piet-gpu/shader/gen/draw_leaf.dxil index d901a80..d1567c9 100644 Binary files a/piet-gpu/shader/gen/draw_leaf.dxil and b/piet-gpu/shader/gen/draw_leaf.dxil differ diff --git a/piet-gpu/shader/gen/draw_leaf.hlsl b/piet-gpu/shader/gen/draw_leaf.hlsl index 0ca5843..1f2f78b 100644 --- a/piet-gpu/shader/gen/draw_leaf.hlsl +++ b/piet-gpu/shader/gen/draw_leaf.hlsl @@ -41,6 +41,17 @@ struct FillImage int2 offset; }; +struct ClipRef +{ + uint offset; +}; + +struct Clip +{ + float4 bbox; + uint blend; +}; + struct ElementTag { uint tag; @@ -102,6 +113,7 @@ struct AnnoBeginClip { float4 bbox; float linewidth; + uint blend; }; struct AnnoEndClipRef @@ -112,6 +124,7 @@ struct AnnoEndClipRef struct AnnoEndClip { float4 bbox; + uint blend; }; struct AnnotatedRef @@ -148,14 +161,14 @@ struct Config static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); -static const DrawMonoid _348 = { 0u, 0u }; -static const DrawMonoid _372 = { 1u, 0u }; -static const DrawMonoid _374 = { 1u, 1u }; +static const DrawMonoid _413 = { 0u, 0u }; +static const DrawMonoid _437 = { 1u, 0u }; +static const DrawMonoid _439 = { 1u, 1u }; -RWByteAddressBuffer _187 : register(u0, space0); -ByteAddressBuffer _211 : register(t2, space0); -ByteAddressBuffer _934 : register(t3, space0); -ByteAddressBuffer _968 : register(t1, space0); +RWByteAddressBuffer _199 : register(u0, space0); +ByteAddressBuffer _223 : register(t2, space0); +ByteAddressBuffer _1020 : register(t3, space0); +ByteAddressBuffer _1054 : register(t1, space0); static uint3 gl_WorkGroupID; static uint3 gl_LocalInvocationID; @@ -171,9 +184,9 @@ groupshared DrawMonoid sh_scratch[256]; ElementTag Element_tag(ElementRef ref) { - uint tag_and_flags = _211.Load((ref.offset >> uint(2)) * 4 + 0); - ElementTag _321 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; - return _321; + uint tag_and_flags = _223.Load((ref.offset >> uint(2)) * 4 + 0); + ElementTag _378 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; + return _378; } DrawMonoid map_tag(uint tag_word) @@ -184,24 +197,24 @@ DrawMonoid map_tag(uint tag_word) case 5u: case 6u: { - return _372; + return _437; } case 9u: case 10u: { - return _374; + return _439; } default: { - return _348; + return _413; } } } ElementRef Element_index(ElementRef ref, uint index) { - ElementRef _200 = { ref.offset + (index * 36u) }; - return _200; + ElementRef _212 = { ref.offset + (index * 36u) }; + return _212; } DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b) @@ -214,13 +227,13 @@ DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b) DrawMonoid tag_monoid_identity() { - return _348; + return _413; } FillColor FillColor_read(FillColorRef ref) { uint ix = ref.offset >> uint(2); - uint raw0 = _211.Load((ix + 0u) * 4 + 0); + uint raw0 = _223.Load((ix + 0u) * 4 + 0); FillColor s; s.rgba_color = raw0; return s; @@ -228,8 +241,8 @@ FillColor FillColor_read(FillColorRef ref) FillColor Element_FillColor_read(ElementRef ref) { - FillColorRef _327 = { ref.offset + 4u }; - FillColorRef param = _327; + FillColorRef _384 = { ref.offset + 4u }; + FillColorRef param = _384; return FillColor_read(param); } @@ -246,7 +259,7 @@ void write_mem(Alloc alloc, uint offset, uint val) { return; } - _187.Store(offset * 4 + 8, val); + _199.Store(offset * 4 + 8, val); } void AnnoColor_write(Alloc a, AnnoColorRef ref, AnnoColor s) @@ -284,9 +297,9 @@ void Annotated_Color_write(Alloc a, AnnotatedRef ref, uint flags, AnnoColor s) uint param_1 = ref.offset >> uint(2); uint param_2 = (flags << uint(16)) | 1u; write_mem(param, param_1, param_2); - AnnoColorRef _735 = { ref.offset + 4u }; + AnnoColorRef _818 = { ref.offset + 4u }; Alloc param_3 = a; - AnnoColorRef param_4 = _735; + AnnoColorRef param_4 = _818; AnnoColor param_5 = s; AnnoColor_write(param_3, param_4, param_5); } @@ -294,11 +307,11 @@ void Annotated_Color_write(Alloc a, AnnotatedRef ref, uint flags, AnnoColor s) FillLinGradient FillLinGradient_read(FillLinGradientRef ref) { uint ix = ref.offset >> uint(2); - uint raw0 = _211.Load((ix + 0u) * 4 + 0); - uint raw1 = _211.Load((ix + 1u) * 4 + 0); - uint raw2 = _211.Load((ix + 2u) * 4 + 0); - uint raw3 = _211.Load((ix + 3u) * 4 + 0); - uint raw4 = _211.Load((ix + 4u) * 4 + 0); + uint raw0 = _223.Load((ix + 0u) * 4 + 0); + uint raw1 = _223.Load((ix + 1u) * 4 + 0); + uint raw2 = _223.Load((ix + 2u) * 4 + 0); + uint raw3 = _223.Load((ix + 3u) * 4 + 0); + uint raw4 = _223.Load((ix + 4u) * 4 + 0); FillLinGradient s; s.index = raw0; s.p0 = float2(asfloat(raw1), asfloat(raw2)); @@ -308,8 +321,8 @@ FillLinGradient FillLinGradient_read(FillLinGradientRef ref) FillLinGradient Element_FillLinGradient_read(ElementRef ref) { - FillLinGradientRef _335 = { ref.offset + 4u }; - FillLinGradientRef param = _335; + FillLinGradientRef _392 = { ref.offset + 4u }; + FillLinGradientRef param = _392; return FillLinGradient_read(param); } @@ -360,9 +373,9 @@ void Annotated_LinGradient_write(Alloc a, AnnotatedRef ref, uint flags, AnnoLinG uint param_1 = ref.offset >> uint(2); uint param_2 = (flags << uint(16)) | 2u; write_mem(param, param_1, param_2); - AnnoLinGradientRef _756 = { ref.offset + 4u }; + AnnoLinGradientRef _839 = { ref.offset + 4u }; Alloc param_3 = a; - AnnoLinGradientRef param_4 = _756; + AnnoLinGradientRef param_4 = _839; AnnoLinGradient param_5 = s; AnnoLinGradient_write(param_3, param_4, param_5); } @@ -370,8 +383,8 @@ void Annotated_LinGradient_write(Alloc a, AnnotatedRef ref, uint flags, AnnoLinG FillImage FillImage_read(FillImageRef ref) { uint ix = ref.offset >> uint(2); - uint raw0 = _211.Load((ix + 0u) * 4 + 0); - uint raw1 = _211.Load((ix + 1u) * 4 + 0); + uint raw0 = _223.Load((ix + 0u) * 4 + 0); + uint raw1 = _223.Load((ix + 1u) * 4 + 0); FillImage s; s.index = raw0; s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); @@ -380,8 +393,8 @@ FillImage FillImage_read(FillImageRef ref) FillImage Element_FillImage_read(ElementRef ref) { - FillImageRef _343 = { ref.offset + 4u }; - FillImageRef param = _343; + FillImageRef _400 = { ref.offset + 4u }; + FillImageRef param = _400; return FillImage_read(param); } @@ -424,13 +437,33 @@ void Annotated_Image_write(Alloc a, AnnotatedRef ref, uint flags, AnnoImage s) uint param_1 = ref.offset >> uint(2); uint param_2 = (flags << uint(16)) | 3u; write_mem(param, param_1, param_2); - AnnoImageRef _777 = { ref.offset + 4u }; + AnnoImageRef _860 = { ref.offset + 4u }; Alloc param_3 = a; - AnnoImageRef param_4 = _777; + AnnoImageRef param_4 = _860; AnnoImage param_5 = s; AnnoImage_write(param_3, param_4, param_5); } +Clip Clip_read(ClipRef ref) +{ + uint ix = ref.offset >> uint(2); + uint raw0 = _223.Load((ix + 0u) * 4 + 0); + uint raw1 = _223.Load((ix + 1u) * 4 + 0); + uint raw2 = _223.Load((ix + 2u) * 4 + 0); + uint raw3 = _223.Load((ix + 3u) * 4 + 0); + Clip s; + s.bbox = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3)); + s.blend = _223.Load((ix + 4u) * 4 + 0); + return s; +} + +Clip Element_BeginClip_read(ElementRef ref) +{ + ClipRef _408 = { ref.offset + 4u }; + ClipRef param = _408; + return Clip_read(param); +} + void AnnoBeginClip_write(Alloc a, AnnoBeginClipRef ref, AnnoBeginClip s) { uint ix = ref.offset >> uint(2); @@ -454,6 +487,10 @@ void AnnoBeginClip_write(Alloc a, AnnoBeginClipRef ref, AnnoBeginClip s) uint param_13 = ix + 4u; uint param_14 = asuint(s.linewidth); write_mem(param_12, param_13, param_14); + Alloc param_15 = a; + uint param_16 = ix + 5u; + uint param_17 = s.blend; + write_mem(param_15, param_16, param_17); } void Annotated_BeginClip_write(Alloc a, AnnotatedRef ref, uint flags, AnnoBeginClip s) @@ -462,9 +499,9 @@ void Annotated_BeginClip_write(Alloc a, AnnotatedRef ref, uint flags, AnnoBeginC uint param_1 = ref.offset >> uint(2); uint param_2 = (flags << uint(16)) | 4u; write_mem(param, param_1, param_2); - AnnoBeginClipRef _798 = { ref.offset + 4u }; + AnnoBeginClipRef _881 = { ref.offset + 4u }; Alloc param_3 = a; - AnnoBeginClipRef param_4 = _798; + AnnoBeginClipRef param_4 = _881; AnnoBeginClip param_5 = s; AnnoBeginClip_write(param_3, param_4, param_5); } @@ -488,17 +525,21 @@ void AnnoEndClip_write(Alloc a, AnnoEndClipRef ref, AnnoEndClip s) uint param_10 = ix + 3u; uint param_11 = asuint(s.bbox.w); write_mem(param_9, param_10, param_11); + Alloc param_12 = a; + uint param_13 = ix + 4u; + uint param_14 = s.blend; + write_mem(param_12, param_13, param_14); } -void Annotated_EndClip_write(Alloc a, AnnotatedRef ref, AnnoEndClip s) +void Annotated_EndClip_write(Alloc a, AnnotatedRef ref, uint flags, AnnoEndClip s) { Alloc param = a; uint param_1 = ref.offset >> uint(2); - uint param_2 = 5u; + uint param_2 = (flags << uint(16)) | 5u; write_mem(param, param_1, param_2); - AnnoEndClipRef _816 = { ref.offset + 4u }; + AnnoEndClipRef _902 = { ref.offset + 4u }; Alloc param_3 = a; - AnnoEndClipRef param_4 = _816; + AnnoEndClipRef param_4 = _902; AnnoEndClip param_5 = s; AnnoEndClip_write(param_3, param_4, param_5); } @@ -506,8 +547,8 @@ void Annotated_EndClip_write(Alloc a, AnnotatedRef ref, AnnoEndClip s) void comp_main() { uint ix = gl_GlobalInvocationID.x * 8u; - ElementRef _834 = { ix * 36u }; - ElementRef ref = _834; + ElementRef _920 = { ix * 36u }; + ElementRef ref = _920; ElementRef param = ref; uint tag_word = Element_tag(param).tag; uint param_1 = tag_word; @@ -544,11 +585,11 @@ void comp_main() DrawMonoid row = tag_monoid_identity(); if (gl_WorkGroupID.x > 0u) { - DrawMonoid _940; - _940.path_ix = _934.Load((gl_WorkGroupID.x - 1u) * 8 + 0); - _940.clip_ix = _934.Load((gl_WorkGroupID.x - 1u) * 8 + 4); - row.path_ix = _940.path_ix; - row.clip_ix = _940.clip_ix; + DrawMonoid _1026; + _1026.path_ix = _1020.Load((gl_WorkGroupID.x - 1u) * 8 + 0); + _1026.clip_ix = _1020.Load((gl_WorkGroupID.x - 1u) * 8 + 4); + row.path_ix = _1026.path_ix; + row.clip_ix = _1026.clip_ix; } if (gl_LocalInvocationID.x > 0u) { @@ -557,10 +598,10 @@ void comp_main() row = combine_tag_monoid(param_10, param_11); } uint out_ix = gl_GlobalInvocationID.x * 8u; - uint out_base = (_968.Load(44) >> uint(2)) + (out_ix * 2u); - uint clip_out_base = _968.Load(48) >> uint(2); - AnnotatedRef _989 = { _968.Load(32) + (out_ix * 40u) }; - AnnotatedRef out_ref = _989; + uint out_base = (_1054.Load(44) >> uint(2)) + (out_ix * 2u); + uint clip_out_base = _1054.Load(48) >> uint(2); + AnnotatedRef _1075 = { _1054.Load(32) + (out_ix * 40u) }; + AnnotatedRef out_ref = _1075; float4 mat; float2 translate; AnnoColor anno_fill; @@ -570,9 +611,9 @@ void comp_main() AnnoImage anno_img; Alloc param_28; AnnoBeginClip anno_begin_clip; - Alloc param_32; + Alloc param_33; AnnoEndClip anno_end_clip; - Alloc param_36; + Alloc param_38; for (uint i_2 = 0u; i_2 < 8u; i_2++) { DrawMonoid m = row; @@ -582,8 +623,8 @@ void comp_main() DrawMonoid param_13 = local[i_2 - 1u]; m = combine_tag_monoid(param_12, param_13); } - _187.Store((out_base + (i_2 * 2u)) * 4 + 8, m.path_ix); - _187.Store(((out_base + (i_2 * 2u)) + 1u) * 4 + 8, m.clip_ix); + _199.Store((out_base + (i_2 * 2u)) * 4 + 8, m.path_ix); + _199.Store(((out_base + (i_2 * 2u)) + 1u) * 4 + 8, m.clip_ix); ElementRef param_14 = ref; uint param_15 = i_2; ElementRef this_ref = Element_index(param_14, param_15); @@ -591,22 +632,22 @@ void comp_main() tag_word = Element_tag(param_16).tag; if ((((tag_word == 4u) || (tag_word == 5u)) || (tag_word == 6u)) || (tag_word == 9u)) { - uint bbox_offset = (_968.Load(40) >> uint(2)) + (6u * m.path_ix); - float bbox_l = float(_187.Load(bbox_offset * 4 + 8)) - 32768.0f; - float bbox_t = float(_187.Load((bbox_offset + 1u) * 4 + 8)) - 32768.0f; - float bbox_r = float(_187.Load((bbox_offset + 2u) * 4 + 8)) - 32768.0f; - float bbox_b = float(_187.Load((bbox_offset + 3u) * 4 + 8)) - 32768.0f; + uint bbox_offset = (_1054.Load(40) >> uint(2)) + (6u * m.path_ix); + float bbox_l = float(_199.Load(bbox_offset * 4 + 8)) - 32768.0f; + float bbox_t = float(_199.Load((bbox_offset + 1u) * 4 + 8)) - 32768.0f; + float bbox_r = float(_199.Load((bbox_offset + 2u) * 4 + 8)) - 32768.0f; + float bbox_b = float(_199.Load((bbox_offset + 3u) * 4 + 8)) - 32768.0f; float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); - float linewidth = asfloat(_187.Load((bbox_offset + 4u) * 4 + 8)); + float linewidth = asfloat(_199.Load((bbox_offset + 4u) * 4 + 8)); uint fill_mode = uint(linewidth >= 0.0f); if ((linewidth >= 0.0f) || (tag_word == 5u)) { - uint trans_ix = _187.Load((bbox_offset + 5u) * 4 + 8); - uint t = (_968.Load(36) >> uint(2)) + (6u * trans_ix); - mat = asfloat(uint4(_187.Load(t * 4 + 8), _187.Load((t + 1u) * 4 + 8), _187.Load((t + 2u) * 4 + 8), _187.Load((t + 3u) * 4 + 8))); + uint trans_ix = _199.Load((bbox_offset + 5u) * 4 + 8); + uint t = (_1054.Load(36) >> uint(2)) + (6u * trans_ix); + mat = asfloat(uint4(_199.Load(t * 4 + 8), _199.Load((t + 1u) * 4 + 8), _199.Load((t + 2u) * 4 + 8), _199.Load((t + 3u) * 4 + 8))); if (tag_word == 5u) { - translate = asfloat(uint2(_187.Load((t + 4u) * 4 + 8), _187.Load((t + 5u) * 4 + 8))); + translate = asfloat(uint2(_199.Load((t + 4u) * 4 + 8), _199.Load((t + 5u) * 4 + 8))); } } if (linewidth >= 0.0f) @@ -623,9 +664,9 @@ void comp_main() anno_fill.bbox = bbox; anno_fill.linewidth = linewidth; anno_fill.rgba_color = fill.rgba_color; - Alloc _1203; - _1203.offset = _968.Load(32); - param_18.offset = _1203.offset; + Alloc _1288; + _1288.offset = _1054.Load(32); + param_18.offset = _1288.offset; AnnotatedRef param_19 = out_ref; uint param_20 = fill_mode; AnnoColor param_21 = anno_fill; @@ -648,9 +689,9 @@ void comp_main() anno_lin.line_x = line_x; anno_lin.line_y = line_y; anno_lin.line_c = -((p0.x * line_x) + (p0.y * line_y)); - Alloc _1299; - _1299.offset = _968.Load(32); - param_23.offset = _1299.offset; + Alloc _1384; + _1384.offset = _1054.Load(32); + param_23.offset = _1384.offset; AnnotatedRef param_24 = out_ref; uint param_25 = fill_mode; AnnoLinGradient param_26 = anno_lin; @@ -665,9 +706,9 @@ void comp_main() anno_img.linewidth = linewidth; anno_img.index = fill_img.index; anno_img.offset = fill_img.offset; - Alloc _1327; - _1327.offset = _968.Load(32); - param_28.offset = _1327.offset; + Alloc _1412; + _1412.offset = _1054.Load(32); + param_28.offset = _1412.offset; AnnotatedRef param_29 = out_ref; uint param_30 = fill_mode; AnnoImage param_31 = anno_img; @@ -676,15 +717,19 @@ void comp_main() } case 9u: { + ElementRef param_32 = this_ref; + Clip begin_clip = Element_BeginClip_read(param_32); anno_begin_clip.bbox = bbox; anno_begin_clip.linewidth = 0.0f; - Alloc _1344; - _1344.offset = _968.Load(32); - param_32.offset = _1344.offset; - AnnotatedRef param_33 = out_ref; - uint param_34 = 0u; - AnnoBeginClip param_35 = anno_begin_clip; - Annotated_BeginClip_write(param_32, param_33, param_34, param_35); + anno_begin_clip.blend = begin_clip.blend; + uint flags = uint(begin_clip.blend != 3u) << uint(1); + Alloc _1442; + _1442.offset = _1054.Load(32); + param_33.offset = _1442.offset; + AnnotatedRef param_34 = out_ref; + uint param_35 = flags; + AnnoBeginClip param_36 = anno_begin_clip; + Annotated_BeginClip_write(param_33, param_34, param_35, param_36); break; } } @@ -693,13 +738,18 @@ void comp_main() { if (tag_word == 10u) { + ElementRef param_37 = this_ref; + Clip end_clip = Element_BeginClip_read(param_37); anno_end_clip.bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f); - Alloc _1368; - _1368.offset = _968.Load(32); - param_36.offset = _1368.offset; - AnnotatedRef param_37 = out_ref; - AnnoEndClip param_38 = anno_end_clip; - Annotated_EndClip_write(param_36, param_37, param_38); + anno_end_clip.blend = end_clip.blend; + uint flags_1 = uint(end_clip.blend != 3u) << uint(1); + Alloc _1480; + _1480.offset = _1054.Load(32); + param_38.offset = _1480.offset; + AnnotatedRef param_39 = out_ref; + uint param_40 = flags_1; + AnnoEndClip param_41 = anno_end_clip; + Annotated_EndClip_write(param_38, param_39, param_40, param_41); } } if ((tag_word == 9u) || (tag_word == 10u)) @@ -709,7 +759,7 @@ void comp_main() { path_ix = m.path_ix; } - _187.Store((clip_out_base + m.clip_ix) * 4 + 8, path_ix); + _199.Store((clip_out_base + m.clip_ix) * 4 + 8, path_ix); } out_ref.offset += 40u; } diff --git a/piet-gpu/shader/gen/draw_leaf.msl b/piet-gpu/shader/gen/draw_leaf.msl index 8de5379..5b9ecc6 100644 --- a/piet-gpu/shader/gen/draw_leaf.msl +++ b/piet-gpu/shader/gen/draw_leaf.msl @@ -87,6 +87,17 @@ struct FillImage int2 offset; }; +struct ClipRef +{ + uint offset; +}; + +struct Clip +{ + float4 bbox; + uint blend; +}; + struct ElementTag { uint tag; @@ -148,6 +159,7 @@ struct AnnoBeginClip { float4 bbox; float linewidth; + uint blend; }; struct AnnoEndClipRef @@ -158,6 +170,7 @@ struct AnnoEndClipRef struct AnnoEndClip { float4 bbox; + uint blend; }; struct AnnotatedRef @@ -228,9 +241,9 @@ struct ConfigBuf constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); static inline __attribute__((always_inline)) -ElementTag Element_tag(thread const ElementRef& ref, const device SceneBuf& v_211) +ElementTag Element_tag(thread const ElementRef& ref, const device SceneBuf& v_223) { - uint tag_and_flags = v_211.scene[ref.offset >> uint(2)]; + uint tag_and_flags = v_223.scene[ref.offset >> uint(2)]; return ElementTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) }; } @@ -279,20 +292,20 @@ DrawMonoid tag_monoid_identity() } static inline __attribute__((always_inline)) -FillColor FillColor_read(thread const FillColorRef& ref, const device SceneBuf& v_211) +FillColor FillColor_read(thread const FillColorRef& ref, const device SceneBuf& v_223) { uint ix = ref.offset >> uint(2); - uint raw0 = v_211.scene[ix + 0u]; + uint raw0 = v_223.scene[ix + 0u]; FillColor s; s.rgba_color = raw0; return s; } static inline __attribute__((always_inline)) -FillColor Element_FillColor_read(thread const ElementRef& ref, const device SceneBuf& v_211) +FillColor Element_FillColor_read(thread const ElementRef& ref, const device SceneBuf& v_223) { FillColorRef param = FillColorRef{ ref.offset + 4u }; - return FillColor_read(param, v_211); + return FillColor_read(param, v_223); } static inline __attribute__((always_inline)) @@ -302,7 +315,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset) } static inline __attribute__((always_inline)) -void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_187) +void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_199) { Alloc param = alloc; uint param_1 = offset; @@ -310,61 +323,61 @@ void write_mem(thread const Alloc& alloc, thread const uint& offset, thread cons { return; } - v_187.memory[offset] = val; + v_199.memory[offset] = val; } static inline __attribute__((always_inline)) -void AnnoColor_write(thread const Alloc& a, thread const AnnoColorRef& ref, thread const AnnoColor& s, device Memory& v_187) +void AnnoColor_write(thread const Alloc& a, thread const AnnoColorRef& ref, thread const AnnoColor& s, device Memory& v_199) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = as_type(s.bbox.x); - write_mem(param, param_1, param_2, v_187); + write_mem(param, param_1, param_2, v_199); Alloc param_3 = a; uint param_4 = ix + 1u; uint param_5 = as_type(s.bbox.y); - write_mem(param_3, param_4, param_5, v_187); + write_mem(param_3, param_4, param_5, v_199); Alloc param_6 = a; uint param_7 = ix + 2u; uint param_8 = as_type(s.bbox.z); - write_mem(param_6, param_7, param_8, v_187); + write_mem(param_6, param_7, param_8, v_199); Alloc param_9 = a; uint param_10 = ix + 3u; uint param_11 = as_type(s.bbox.w); - write_mem(param_9, param_10, param_11, v_187); + write_mem(param_9, param_10, param_11, v_199); Alloc param_12 = a; uint param_13 = ix + 4u; uint param_14 = as_type(s.linewidth); - write_mem(param_12, param_13, param_14, v_187); + write_mem(param_12, param_13, param_14, v_199); Alloc param_15 = a; uint param_16 = ix + 5u; uint param_17 = s.rgba_color; - write_mem(param_15, param_16, param_17, v_187); + write_mem(param_15, param_16, param_17, v_199); } static inline __attribute__((always_inline)) -void Annotated_Color_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoColor& s, device Memory& v_187) +void Annotated_Color_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoColor& s, device Memory& v_199) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = (flags << uint(16)) | 1u; - write_mem(param, param_1, param_2, v_187); + write_mem(param, param_1, param_2, v_199); Alloc param_3 = a; AnnoColorRef param_4 = AnnoColorRef{ ref.offset + 4u }; AnnoColor param_5 = s; - AnnoColor_write(param_3, param_4, param_5, v_187); + AnnoColor_write(param_3, param_4, param_5, v_199); } static inline __attribute__((always_inline)) -FillLinGradient FillLinGradient_read(thread const FillLinGradientRef& ref, const device SceneBuf& v_211) +FillLinGradient FillLinGradient_read(thread const FillLinGradientRef& ref, const device SceneBuf& v_223) { uint ix = ref.offset >> uint(2); - uint raw0 = v_211.scene[ix + 0u]; - uint raw1 = v_211.scene[ix + 1u]; - uint raw2 = v_211.scene[ix + 2u]; - uint raw3 = v_211.scene[ix + 3u]; - uint raw4 = v_211.scene[ix + 4u]; + uint raw0 = v_223.scene[ix + 0u]; + uint raw1 = v_223.scene[ix + 1u]; + uint raw2 = v_223.scene[ix + 2u]; + uint raw3 = v_223.scene[ix + 3u]; + uint raw4 = v_223.scene[ix + 4u]; FillLinGradient s; s.index = raw0; s.p0 = float2(as_type(raw1), as_type(raw2)); @@ -373,73 +386,73 @@ FillLinGradient FillLinGradient_read(thread const FillLinGradientRef& ref, const } static inline __attribute__((always_inline)) -FillLinGradient Element_FillLinGradient_read(thread const ElementRef& ref, const device SceneBuf& v_211) +FillLinGradient Element_FillLinGradient_read(thread const ElementRef& ref, const device SceneBuf& v_223) { FillLinGradientRef param = FillLinGradientRef{ ref.offset + 4u }; - return FillLinGradient_read(param, v_211); + return FillLinGradient_read(param, v_223); } static inline __attribute__((always_inline)) -void AnnoLinGradient_write(thread const Alloc& a, thread const AnnoLinGradientRef& ref, thread const AnnoLinGradient& s, device Memory& v_187) +void AnnoLinGradient_write(thread const Alloc& a, thread const AnnoLinGradientRef& ref, thread const AnnoLinGradient& s, device Memory& v_199) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = as_type(s.bbox.x); - write_mem(param, param_1, param_2, v_187); + write_mem(param, param_1, param_2, v_199); Alloc param_3 = a; uint param_4 = ix + 1u; uint param_5 = as_type(s.bbox.y); - write_mem(param_3, param_4, param_5, v_187); + write_mem(param_3, param_4, param_5, v_199); Alloc param_6 = a; uint param_7 = ix + 2u; uint param_8 = as_type(s.bbox.z); - write_mem(param_6, param_7, param_8, v_187); + write_mem(param_6, param_7, param_8, v_199); Alloc param_9 = a; uint param_10 = ix + 3u; uint param_11 = as_type(s.bbox.w); - write_mem(param_9, param_10, param_11, v_187); + write_mem(param_9, param_10, param_11, v_199); Alloc param_12 = a; uint param_13 = ix + 4u; uint param_14 = as_type(s.linewidth); - write_mem(param_12, param_13, param_14, v_187); + write_mem(param_12, param_13, param_14, v_199); Alloc param_15 = a; uint param_16 = ix + 5u; uint param_17 = s.index; - write_mem(param_15, param_16, param_17, v_187); + write_mem(param_15, param_16, param_17, v_199); Alloc param_18 = a; uint param_19 = ix + 6u; uint param_20 = as_type(s.line_x); - write_mem(param_18, param_19, param_20, v_187); + write_mem(param_18, param_19, param_20, v_199); Alloc param_21 = a; uint param_22 = ix + 7u; uint param_23 = as_type(s.line_y); - write_mem(param_21, param_22, param_23, v_187); + write_mem(param_21, param_22, param_23, v_199); Alloc param_24 = a; uint param_25 = ix + 8u; uint param_26 = as_type(s.line_c); - write_mem(param_24, param_25, param_26, v_187); + write_mem(param_24, param_25, param_26, v_199); } static inline __attribute__((always_inline)) -void Annotated_LinGradient_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoLinGradient& s, device Memory& v_187) +void Annotated_LinGradient_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoLinGradient& s, device Memory& v_199) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = (flags << uint(16)) | 2u; - write_mem(param, param_1, param_2, v_187); + write_mem(param, param_1, param_2, v_199); Alloc param_3 = a; AnnoLinGradientRef param_4 = AnnoLinGradientRef{ ref.offset + 4u }; AnnoLinGradient param_5 = s; - AnnoLinGradient_write(param_3, param_4, param_5, v_187); + AnnoLinGradient_write(param_3, param_4, param_5, v_199); } static inline __attribute__((always_inline)) -FillImage FillImage_read(thread const FillImageRef& ref, const device SceneBuf& v_211) +FillImage FillImage_read(thread const FillImageRef& ref, const device SceneBuf& v_223) { uint ix = ref.offset >> uint(2); - uint raw0 = v_211.scene[ix + 0u]; - uint raw1 = v_211.scene[ix + 1u]; + uint raw0 = v_223.scene[ix + 0u]; + uint raw1 = v_223.scene[ix + 1u]; FillImage s; s.index = raw0; s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); @@ -447,140 +460,169 @@ FillImage FillImage_read(thread const FillImageRef& ref, const device SceneBuf& } static inline __attribute__((always_inline)) -FillImage Element_FillImage_read(thread const ElementRef& ref, const device SceneBuf& v_211) +FillImage Element_FillImage_read(thread const ElementRef& ref, const device SceneBuf& v_223) { FillImageRef param = FillImageRef{ ref.offset + 4u }; - return FillImage_read(param, v_211); + return FillImage_read(param, v_223); } static inline __attribute__((always_inline)) -void AnnoImage_write(thread const Alloc& a, thread const AnnoImageRef& ref, thread const AnnoImage& s, device Memory& v_187) +void AnnoImage_write(thread const Alloc& a, thread const AnnoImageRef& ref, thread const AnnoImage& s, device Memory& v_199) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = as_type(s.bbox.x); - write_mem(param, param_1, param_2, v_187); + write_mem(param, param_1, param_2, v_199); Alloc param_3 = a; uint param_4 = ix + 1u; uint param_5 = as_type(s.bbox.y); - write_mem(param_3, param_4, param_5, v_187); + write_mem(param_3, param_4, param_5, v_199); Alloc param_6 = a; uint param_7 = ix + 2u; uint param_8 = as_type(s.bbox.z); - write_mem(param_6, param_7, param_8, v_187); + write_mem(param_6, param_7, param_8, v_199); Alloc param_9 = a; uint param_10 = ix + 3u; uint param_11 = as_type(s.bbox.w); - write_mem(param_9, param_10, param_11, v_187); + write_mem(param_9, param_10, param_11, v_199); Alloc param_12 = a; uint param_13 = ix + 4u; uint param_14 = as_type(s.linewidth); - write_mem(param_12, param_13, param_14, v_187); + write_mem(param_12, param_13, param_14, v_199); Alloc param_15 = a; uint param_16 = ix + 5u; uint param_17 = s.index; - write_mem(param_15, param_16, param_17, v_187); + write_mem(param_15, param_16, param_17, v_199); Alloc param_18 = a; uint param_19 = ix + 6u; uint param_20 = (uint(s.offset.x) & 65535u) | (uint(s.offset.y) << uint(16)); - write_mem(param_18, param_19, param_20, v_187); + write_mem(param_18, param_19, param_20, v_199); } static inline __attribute__((always_inline)) -void Annotated_Image_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoImage& s, device Memory& v_187) +void Annotated_Image_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoImage& s, device Memory& v_199) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = (flags << uint(16)) | 3u; - write_mem(param, param_1, param_2, v_187); + write_mem(param, param_1, param_2, v_199); Alloc param_3 = a; AnnoImageRef param_4 = AnnoImageRef{ ref.offset + 4u }; AnnoImage param_5 = s; - AnnoImage_write(param_3, param_4, param_5, v_187); + AnnoImage_write(param_3, param_4, param_5, v_199); } static inline __attribute__((always_inline)) -void AnnoBeginClip_write(thread const Alloc& a, thread const AnnoBeginClipRef& ref, thread const AnnoBeginClip& s, device Memory& v_187) +Clip Clip_read(thread const ClipRef& ref, const device SceneBuf& v_223) +{ + uint ix = ref.offset >> uint(2); + uint raw0 = v_223.scene[ix + 0u]; + uint raw1 = v_223.scene[ix + 1u]; + uint raw2 = v_223.scene[ix + 2u]; + uint raw3 = v_223.scene[ix + 3u]; + Clip s; + s.bbox = float4(as_type(raw0), as_type(raw1), as_type(raw2), as_type(raw3)); + s.blend = v_223.scene[ix + 4u]; + return s; +} + +static inline __attribute__((always_inline)) +Clip Element_BeginClip_read(thread const ElementRef& ref, const device SceneBuf& v_223) +{ + ClipRef param = ClipRef{ ref.offset + 4u }; + return Clip_read(param, v_223); +} + +static inline __attribute__((always_inline)) +void AnnoBeginClip_write(thread const Alloc& a, thread const AnnoBeginClipRef& ref, thread const AnnoBeginClip& s, device Memory& v_199) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = as_type(s.bbox.x); - write_mem(param, param_1, param_2, v_187); + write_mem(param, param_1, param_2, v_199); Alloc param_3 = a; uint param_4 = ix + 1u; uint param_5 = as_type(s.bbox.y); - write_mem(param_3, param_4, param_5, v_187); + write_mem(param_3, param_4, param_5, v_199); Alloc param_6 = a; uint param_7 = ix + 2u; uint param_8 = as_type(s.bbox.z); - write_mem(param_6, param_7, param_8, v_187); + write_mem(param_6, param_7, param_8, v_199); Alloc param_9 = a; uint param_10 = ix + 3u; uint param_11 = as_type(s.bbox.w); - write_mem(param_9, param_10, param_11, v_187); + write_mem(param_9, param_10, param_11, v_199); Alloc param_12 = a; uint param_13 = ix + 4u; uint param_14 = as_type(s.linewidth); - write_mem(param_12, param_13, param_14, v_187); + write_mem(param_12, param_13, param_14, v_199); + Alloc param_15 = a; + uint param_16 = ix + 5u; + uint param_17 = s.blend; + write_mem(param_15, param_16, param_17, v_199); } static inline __attribute__((always_inline)) -void Annotated_BeginClip_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoBeginClip& s, device Memory& v_187) +void Annotated_BeginClip_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoBeginClip& s, device Memory& v_199) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = (flags << uint(16)) | 4u; - write_mem(param, param_1, param_2, v_187); + write_mem(param, param_1, param_2, v_199); Alloc param_3 = a; AnnoBeginClipRef param_4 = AnnoBeginClipRef{ ref.offset + 4u }; AnnoBeginClip param_5 = s; - AnnoBeginClip_write(param_3, param_4, param_5, v_187); + AnnoBeginClip_write(param_3, param_4, param_5, v_199); } static inline __attribute__((always_inline)) -void AnnoEndClip_write(thread const Alloc& a, thread const AnnoEndClipRef& ref, thread const AnnoEndClip& s, device Memory& v_187) +void AnnoEndClip_write(thread const Alloc& a, thread const AnnoEndClipRef& ref, thread const AnnoEndClip& s, device Memory& v_199) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = as_type(s.bbox.x); - write_mem(param, param_1, param_2, v_187); + write_mem(param, param_1, param_2, v_199); Alloc param_3 = a; uint param_4 = ix + 1u; uint param_5 = as_type(s.bbox.y); - write_mem(param_3, param_4, param_5, v_187); + write_mem(param_3, param_4, param_5, v_199); Alloc param_6 = a; uint param_7 = ix + 2u; uint param_8 = as_type(s.bbox.z); - write_mem(param_6, param_7, param_8, v_187); + write_mem(param_6, param_7, param_8, v_199); Alloc param_9 = a; uint param_10 = ix + 3u; uint param_11 = as_type(s.bbox.w); - write_mem(param_9, param_10, param_11, v_187); + write_mem(param_9, param_10, param_11, v_199); + Alloc param_12 = a; + uint param_13 = ix + 4u; + uint param_14 = s.blend; + write_mem(param_12, param_13, param_14, v_199); } static inline __attribute__((always_inline)) -void Annotated_EndClip_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const AnnoEndClip& s, device Memory& v_187) +void Annotated_EndClip_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoEndClip& s, device Memory& v_199) { Alloc param = a; uint param_1 = ref.offset >> uint(2); - uint param_2 = 5u; - write_mem(param, param_1, param_2, v_187); + uint param_2 = (flags << uint(16)) | 5u; + write_mem(param, param_1, param_2, v_199); Alloc param_3 = a; AnnoEndClipRef param_4 = AnnoEndClipRef{ ref.offset + 4u }; AnnoEndClip param_5 = s; - AnnoEndClip_write(param_3, param_4, param_5, v_187); + AnnoEndClip_write(param_3, param_4, param_5, v_199); } -kernel void main0(device Memory& v_187 [[buffer(0)]], const device ConfigBuf& _968 [[buffer(1)]], const device SceneBuf& v_211 [[buffer(2)]], const device ParentBuf& _934 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) +kernel void main0(device Memory& v_199 [[buffer(0)]], const device ConfigBuf& _1054 [[buffer(1)]], const device SceneBuf& v_223 [[buffer(2)]], const device ParentBuf& _1020 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) { threadgroup DrawMonoid sh_scratch[256]; uint ix = gl_GlobalInvocationID.x * 8u; ElementRef ref = ElementRef{ ix * 36u }; ElementRef param = ref; - uint tag_word = Element_tag(param, v_211).tag; + uint tag_word = Element_tag(param, v_223).tag; uint param_1 = tag_word; DrawMonoid agg = map_tag(param_1); spvUnsafeArray local; @@ -590,7 +632,7 @@ kernel void main0(device Memory& v_187 [[buffer(0)]], const device ConfigBuf& _9 ElementRef param_2 = ref; uint param_3 = i; ElementRef param_4 = Element_index(param_2, param_3); - tag_word = Element_tag(param_4, v_211).tag; + tag_word = Element_tag(param_4, v_223).tag; uint param_5 = tag_word; DrawMonoid param_6 = agg; DrawMonoid param_7 = map_tag(param_5); @@ -615,9 +657,9 @@ kernel void main0(device Memory& v_187 [[buffer(0)]], const device ConfigBuf& _9 DrawMonoid row = tag_monoid_identity(); if (gl_WorkGroupID.x > 0u) { - uint _937 = gl_WorkGroupID.x - 1u; - row.path_ix = _934.parent[_937].path_ix; - row.clip_ix = _934.parent[_937].clip_ix; + uint _1023 = gl_WorkGroupID.x - 1u; + row.path_ix = _1020.parent[_1023].path_ix; + row.clip_ix = _1020.parent[_1023].clip_ix; } if (gl_LocalInvocationID.x > 0u) { @@ -626,9 +668,9 @@ kernel void main0(device Memory& v_187 [[buffer(0)]], const device ConfigBuf& _9 row = combine_tag_monoid(param_10, param_11); } uint out_ix = gl_GlobalInvocationID.x * 8u; - uint out_base = (_968.conf.drawmonoid_alloc.offset >> uint(2)) + (out_ix * 2u); - uint clip_out_base = _968.conf.clip_alloc.offset >> uint(2); - AnnotatedRef out_ref = AnnotatedRef{ _968.conf.anno_alloc.offset + (out_ix * 40u) }; + uint out_base = (_1054.conf.drawmonoid_alloc.offset >> uint(2)) + (out_ix * 2u); + uint clip_out_base = _1054.conf.clip_alloc.offset >> uint(2); + AnnotatedRef out_ref = AnnotatedRef{ _1054.conf.anno_alloc.offset + (out_ix * 40u) }; float4 mat; float2 translate; AnnoColor anno_fill; @@ -638,9 +680,9 @@ kernel void main0(device Memory& v_187 [[buffer(0)]], const device ConfigBuf& _9 AnnoImage anno_img; Alloc param_28; AnnoBeginClip anno_begin_clip; - Alloc param_32; + Alloc param_33; AnnoEndClip anno_end_clip; - Alloc param_36; + Alloc param_38; for (uint i_2 = 0u; i_2 < 8u; i_2++) { DrawMonoid m = row; @@ -650,31 +692,31 @@ kernel void main0(device Memory& v_187 [[buffer(0)]], const device ConfigBuf& _9 DrawMonoid param_13 = local[i_2 - 1u]; m = combine_tag_monoid(param_12, param_13); } - v_187.memory[out_base + (i_2 * 2u)] = m.path_ix; - v_187.memory[(out_base + (i_2 * 2u)) + 1u] = m.clip_ix; + v_199.memory[out_base + (i_2 * 2u)] = m.path_ix; + v_199.memory[(out_base + (i_2 * 2u)) + 1u] = m.clip_ix; ElementRef param_14 = ref; uint param_15 = i_2; ElementRef this_ref = Element_index(param_14, param_15); ElementRef param_16 = this_ref; - tag_word = Element_tag(param_16, v_211).tag; + tag_word = Element_tag(param_16, v_223).tag; if ((((tag_word == 4u) || (tag_word == 5u)) || (tag_word == 6u)) || (tag_word == 9u)) { - uint bbox_offset = (_968.conf.bbox_alloc.offset >> uint(2)) + (6u * m.path_ix); - float bbox_l = float(v_187.memory[bbox_offset]) - 32768.0; - float bbox_t = float(v_187.memory[bbox_offset + 1u]) - 32768.0; - float bbox_r = float(v_187.memory[bbox_offset + 2u]) - 32768.0; - float bbox_b = float(v_187.memory[bbox_offset + 3u]) - 32768.0; + uint bbox_offset = (_1054.conf.bbox_alloc.offset >> uint(2)) + (6u * m.path_ix); + float bbox_l = float(v_199.memory[bbox_offset]) - 32768.0; + float bbox_t = float(v_199.memory[bbox_offset + 1u]) - 32768.0; + float bbox_r = float(v_199.memory[bbox_offset + 2u]) - 32768.0; + float bbox_b = float(v_199.memory[bbox_offset + 3u]) - 32768.0; float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); - float linewidth = as_type(v_187.memory[bbox_offset + 4u]); + float linewidth = as_type(v_199.memory[bbox_offset + 4u]); uint fill_mode = uint(linewidth >= 0.0); if ((linewidth >= 0.0) || (tag_word == 5u)) { - uint trans_ix = v_187.memory[bbox_offset + 5u]; - uint t = (_968.conf.trans_alloc.offset >> uint(2)) + (6u * trans_ix); - mat = as_type(uint4(v_187.memory[t], v_187.memory[t + 1u], v_187.memory[t + 2u], v_187.memory[t + 3u])); + uint trans_ix = v_199.memory[bbox_offset + 5u]; + uint t = (_1054.conf.trans_alloc.offset >> uint(2)) + (6u * trans_ix); + mat = as_type(uint4(v_199.memory[t], v_199.memory[t + 1u], v_199.memory[t + 2u], v_199.memory[t + 3u])); if (tag_word == 5u) { - translate = as_type(uint2(v_187.memory[t + 4u], v_187.memory[t + 5u])); + translate = as_type(uint2(v_199.memory[t + 4u], v_199.memory[t + 5u])); } } if (linewidth >= 0.0) @@ -687,21 +729,21 @@ kernel void main0(device Memory& v_187 [[buffer(0)]], const device ConfigBuf& _9 case 4u: { ElementRef param_17 = this_ref; - FillColor fill = Element_FillColor_read(param_17, v_211); + FillColor fill = Element_FillColor_read(param_17, v_223); anno_fill.bbox = bbox; anno_fill.linewidth = linewidth; anno_fill.rgba_color = fill.rgba_color; - param_18.offset = _968.conf.anno_alloc.offset; + param_18.offset = _1054.conf.anno_alloc.offset; AnnotatedRef param_19 = out_ref; uint param_20 = fill_mode; AnnoColor param_21 = anno_fill; - Annotated_Color_write(param_18, param_19, param_20, param_21, v_187); + Annotated_Color_write(param_18, param_19, param_20, param_21, v_199); break; } case 5u: { ElementRef param_22 = this_ref; - FillLinGradient lin = Element_FillLinGradient_read(param_22, v_211); + FillLinGradient lin = Element_FillLinGradient_read(param_22, v_223); anno_lin.bbox = bbox; anno_lin.linewidth = linewidth; anno_lin.index = lin.index; @@ -714,37 +756,41 @@ kernel void main0(device Memory& v_187 [[buffer(0)]], const device ConfigBuf& _9 anno_lin.line_x = line_x; anno_lin.line_y = line_y; anno_lin.line_c = -((p0.x * line_x) + (p0.y * line_y)); - param_23.offset = _968.conf.anno_alloc.offset; + param_23.offset = _1054.conf.anno_alloc.offset; AnnotatedRef param_24 = out_ref; uint param_25 = fill_mode; AnnoLinGradient param_26 = anno_lin; - Annotated_LinGradient_write(param_23, param_24, param_25, param_26, v_187); + Annotated_LinGradient_write(param_23, param_24, param_25, param_26, v_199); break; } case 6u: { ElementRef param_27 = this_ref; - FillImage fill_img = Element_FillImage_read(param_27, v_211); + FillImage fill_img = Element_FillImage_read(param_27, v_223); anno_img.bbox = bbox; anno_img.linewidth = linewidth; anno_img.index = fill_img.index; anno_img.offset = fill_img.offset; - param_28.offset = _968.conf.anno_alloc.offset; + param_28.offset = _1054.conf.anno_alloc.offset; AnnotatedRef param_29 = out_ref; uint param_30 = fill_mode; AnnoImage param_31 = anno_img; - Annotated_Image_write(param_28, param_29, param_30, param_31, v_187); + Annotated_Image_write(param_28, param_29, param_30, param_31, v_199); break; } case 9u: { + ElementRef param_32 = this_ref; + Clip begin_clip = Element_BeginClip_read(param_32, v_223); anno_begin_clip.bbox = bbox; anno_begin_clip.linewidth = 0.0; - param_32.offset = _968.conf.anno_alloc.offset; - AnnotatedRef param_33 = out_ref; - uint param_34 = 0u; - AnnoBeginClip param_35 = anno_begin_clip; - Annotated_BeginClip_write(param_32, param_33, param_34, param_35, v_187); + anno_begin_clip.blend = begin_clip.blend; + uint flags = uint(begin_clip.blend != 3u) << uint(1); + param_33.offset = _1054.conf.anno_alloc.offset; + AnnotatedRef param_34 = out_ref; + uint param_35 = flags; + AnnoBeginClip param_36 = anno_begin_clip; + Annotated_BeginClip_write(param_33, param_34, param_35, param_36, v_199); break; } } @@ -753,11 +799,16 @@ kernel void main0(device Memory& v_187 [[buffer(0)]], const device ConfigBuf& _9 { if (tag_word == 10u) { + ElementRef param_37 = this_ref; + Clip end_clip = Element_BeginClip_read(param_37, v_223); anno_end_clip.bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0); - param_36.offset = _968.conf.anno_alloc.offset; - AnnotatedRef param_37 = out_ref; - AnnoEndClip param_38 = anno_end_clip; - Annotated_EndClip_write(param_36, param_37, param_38, v_187); + anno_end_clip.blend = end_clip.blend; + uint flags_1 = uint(end_clip.blend != 3u) << uint(1); + param_38.offset = _1054.conf.anno_alloc.offset; + AnnotatedRef param_39 = out_ref; + uint param_40 = flags_1; + AnnoEndClip param_41 = anno_end_clip; + Annotated_EndClip_write(param_38, param_39, param_40, param_41, v_199); } } if ((tag_word == 9u) || (tag_word == 10u)) @@ -767,7 +818,7 @@ kernel void main0(device Memory& v_187 [[buffer(0)]], const device ConfigBuf& _9 { path_ix = m.path_ix; } - v_187.memory[clip_out_base + m.clip_ix] = path_ix; + v_199.memory[clip_out_base + m.clip_ix] = path_ix; } out_ref.offset += 40u; } diff --git a/piet-gpu/shader/gen/draw_leaf.spv b/piet-gpu/shader/gen/draw_leaf.spv index d5e9136..bdbdb0c 100644 Binary files a/piet-gpu/shader/gen/draw_leaf.spv and b/piet-gpu/shader/gen/draw_leaf.spv differ diff --git a/piet-gpu/shader/gen/kernel4.dxil b/piet-gpu/shader/gen/kernel4.dxil index 0a14cfa..c0c27c9 100644 Binary files a/piet-gpu/shader/gen/kernel4.dxil and b/piet-gpu/shader/gen/kernel4.dxil differ diff --git a/piet-gpu/shader/gen/kernel4.hlsl b/piet-gpu/shader/gen/kernel4.hlsl index 9457d14..21bd083 100644 --- a/piet-gpu/shader/gen/kernel4.hlsl +++ b/piet-gpu/shader/gen/kernel4.hlsl @@ -69,6 +69,16 @@ struct CmdAlpha float alpha; }; +struct CmdEndClipRef +{ + uint offset; +}; + +struct CmdEndClip +{ + uint blend; +}; + struct CmdJumpRef { uint offset; @@ -132,8 +142,8 @@ struct Config static const uint3 gl_WorkGroupSize = uint3(8u, 4u, 1u); -RWByteAddressBuffer _202 : register(u0, space0); -ByteAddressBuffer _723 : register(t1, space0); +RWByteAddressBuffer _278 : register(u0, space0); +ByteAddressBuffer _1521 : register(t1, space0); RWTexture2D image_atlas : register(u3, space0); RWTexture2D gradients : register(u4, space0); RWTexture2D image : register(u2, space0); @@ -160,8 +170,8 @@ float4 spvUnpackUnorm4x8(uint value) Alloc slice_mem(Alloc a, uint offset, uint size) { - Alloc _215 = { a.offset + offset }; - return _215; + Alloc _291 = { a.offset + offset }; + return _291; } bool touch_mem(Alloc alloc, uint offset) @@ -177,7 +187,7 @@ uint read_mem(Alloc alloc, uint offset) { return 0u; } - uint v = _202.Load(offset * 4 + 8); + uint v = _278.Load(offset * 4 + 8); return v; } @@ -186,8 +196,8 @@ CmdTag Cmd_tag(Alloc a, CmdRef ref) Alloc param = a; uint param_1 = ref.offset >> uint(2); uint tag_and_flags = read_mem(param, param_1); - CmdTag _432 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; - return _432; + CmdTag _525 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; + return _525; } CmdStroke CmdStroke_read(Alloc a, CmdStrokeRef ref) @@ -207,9 +217,9 @@ CmdStroke CmdStroke_read(Alloc a, CmdStrokeRef ref) CmdStroke Cmd_Stroke_read(Alloc a, CmdRef ref) { - CmdStrokeRef _449 = { ref.offset + 4u }; + CmdStrokeRef _542 = { ref.offset + 4u }; Alloc param = a; - CmdStrokeRef param_1 = _449; + CmdStrokeRef param_1 = _542; return CmdStroke_read(param, param_1); } @@ -245,8 +255,8 @@ TileSeg TileSeg_read(Alloc a, TileSegRef ref) s.origin = float2(asfloat(raw0), asfloat(raw1)); s._vector = float2(asfloat(raw2), asfloat(raw3)); s.y_edge = asfloat(raw4); - TileSegRef _572 = { raw5 }; - s.next = _572; + TileSegRef _675 = { raw5 }; + s.next = _675; return s; } @@ -272,9 +282,9 @@ CmdFill CmdFill_read(Alloc a, CmdFillRef ref) CmdFill Cmd_Fill_read(Alloc a, CmdRef ref) { - CmdFillRef _439 = { ref.offset + 4u }; + CmdFillRef _532 = { ref.offset + 4u }; Alloc param = a; - CmdFillRef param_1 = _439; + CmdFillRef param_1 = _532; return CmdFill_read(param, param_1); } @@ -291,9 +301,9 @@ CmdAlpha CmdAlpha_read(Alloc a, CmdAlphaRef ref) CmdAlpha Cmd_Alpha_read(Alloc a, CmdRef ref) { - CmdAlphaRef _459 = { ref.offset + 4u }; + CmdAlphaRef _552 = { ref.offset + 4u }; Alloc param = a; - CmdAlphaRef param_1 = _459; + CmdAlphaRef param_1 = _552; return CmdAlpha_read(param, param_1); } @@ -310,9 +320,9 @@ CmdColor CmdColor_read(Alloc a, CmdColorRef ref) CmdColor Cmd_Color_read(Alloc a, CmdRef ref) { - CmdColorRef _469 = { ref.offset + 4u }; + CmdColorRef _562 = { ref.offset + 4u }; Alloc param = a; - CmdColorRef param_1 = _469; + CmdColorRef param_1 = _562; return CmdColor_read(param, param_1); } @@ -356,9 +366,9 @@ CmdLinGrad CmdLinGrad_read(Alloc a, CmdLinGradRef ref) CmdLinGrad Cmd_LinGrad_read(Alloc a, CmdRef ref) { - CmdLinGradRef _479 = { ref.offset + 4u }; + CmdLinGradRef _572 = { ref.offset + 4u }; Alloc param = a; - CmdLinGradRef param_1 = _479; + CmdLinGradRef param_1 = _572; return CmdLinGrad_read(param, param_1); } @@ -379,9 +389,9 @@ CmdImage CmdImage_read(Alloc a, CmdImageRef ref) CmdImage Cmd_Image_read(Alloc a, CmdRef ref) { - CmdImageRef _489 = { ref.offset + 4u }; + CmdImageRef _582 = { ref.offset + 4u }; Alloc param = a; - CmdImageRef param_1 = _489; + CmdImageRef param_1 = _582; return CmdImage_read(param, param_1); } @@ -394,10 +404,10 @@ void fillImage(out float4 spvReturnValue[8], uint2 xy, CmdImage cmd_img) int2 uv = int2(xy + chunk_offset(param)) + cmd_img.offset; float4 fg_rgba = image_atlas[uv]; float3 param_1 = fg_rgba.xyz; - float3 _695 = fromsRGB(param_1); - fg_rgba.x = _695.x; - fg_rgba.y = _695.y; - fg_rgba.z = _695.z; + float3 _1493 = fromsRGB(param_1); + fg_rgba.x = _1493.x; + fg_rgba.y = _1493.y; + fg_rgba.z = _1493.z; rgba[i] = fg_rgba; } spvReturnValue = rgba; @@ -418,6 +428,438 @@ uint packsRGB(inout float4 rgba) return spvPackUnorm4x8(rgba.wzyx); } +CmdEndClip CmdEndClip_read(Alloc a, CmdEndClipRef ref) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint raw0 = read_mem(param, param_1); + CmdEndClip s; + s.blend = raw0; + return s; +} + +CmdEndClip Cmd_EndClip_read(Alloc a, CmdRef ref) +{ + CmdEndClipRef _592 = { ref.offset + 4u }; + Alloc param = a; + CmdEndClipRef param_1 = _592; + return CmdEndClip_read(param, param_1); +} + +float3 screen(float3 cb, float3 cs) +{ + return (cb + cs) - (cb * cs); +} + +float3 hard_light(float3 cb, float3 cs) +{ + float3 param = cb; + float3 param_1 = (cs * 2.0f) - 1.0f.xxx; + return lerp(screen(param, param_1), (cb * 2.0f) * cs, float3(bool3(cs.x <= 0.5f.xxx.x, cs.y <= 0.5f.xxx.y, cs.z <= 0.5f.xxx.z))); +} + +float color_dodge(float cb, float cs) +{ + if (cb == 0.0f) + { + return 0.0f; + } + else + { + if (cs == 1.0f) + { + return 1.0f; + } + else + { + return min(1.0f, cb / (1.0f - cs)); + } + } +} + +float color_burn(float cb, float cs) +{ + if (cb == 1.0f) + { + return 1.0f; + } + else + { + if (cs == 0.0f) + { + return 0.0f; + } + else + { + return 1.0f - min(1.0f, (1.0f - cb) / cs); + } + } +} + +float3 soft_light(float3 cb, float3 cs) +{ + float3 d = lerp(sqrt(cb), ((((cb * 16.0f) - 12.0f.xxx) * cb) + 4.0f.xxx) * cb, float3(bool3(cb.x <= 0.25f.xxx.x, cb.y <= 0.25f.xxx.y, cb.z <= 0.25f.xxx.z))); + return lerp(cb + (((cs * 2.0f) - 1.0f.xxx) * (d - cb)), cb - (((1.0f.xxx - (cs * 2.0f)) * cb) * (1.0f.xxx - cb)), float3(bool3(cs.x <= 0.5f.xxx.x, cs.y <= 0.5f.xxx.y, cs.z <= 0.5f.xxx.z))); +} + +float sat(float3 c) +{ + return max(c.x, max(c.y, c.z)) - min(c.x, min(c.y, c.z)); +} + +void set_sat_inner(inout float cmin, inout float cmid, inout float cmax, float s) +{ + if (cmax > cmin) + { + cmid = ((cmid - cmin) * s) / (cmax - cmin); + cmax = s; + } + else + { + cmid = 0.0f; + cmax = 0.0f; + } + cmin = 0.0f; +} + +float3 set_sat(inout float3 c, float s) +{ + if (c.x <= c.y) + { + if (c.y <= c.z) + { + float param = c.x; + float param_1 = c.y; + float param_2 = c.z; + float param_3 = s; + set_sat_inner(param, param_1, param_2, param_3); + c.x = param; + c.y = param_1; + c.z = param_2; + } + else + { + if (c.x <= c.z) + { + float param_4 = c.x; + float param_5 = c.z; + float param_6 = c.y; + float param_7 = s; + set_sat_inner(param_4, param_5, param_6, param_7); + c.x = param_4; + c.z = param_5; + c.y = param_6; + } + else + { + float param_8 = c.z; + float param_9 = c.x; + float param_10 = c.y; + float param_11 = s; + set_sat_inner(param_8, param_9, param_10, param_11); + c.z = param_8; + c.x = param_9; + c.y = param_10; + } + } + } + else + { + if (c.x <= c.z) + { + float param_12 = c.y; + float param_13 = c.x; + float param_14 = c.z; + float param_15 = s; + set_sat_inner(param_12, param_13, param_14, param_15); + c.y = param_12; + c.x = param_13; + c.z = param_14; + } + else + { + if (c.y <= c.z) + { + float param_16 = c.y; + float param_17 = c.z; + float param_18 = c.x; + float param_19 = s; + set_sat_inner(param_16, param_17, param_18, param_19); + c.y = param_16; + c.z = param_17; + c.x = param_18; + } + else + { + float param_20 = c.z; + float param_21 = c.y; + float param_22 = c.x; + float param_23 = s; + set_sat_inner(param_20, param_21, param_22, param_23); + c.z = param_20; + c.y = param_21; + c.x = param_22; + } + } + } + return c; +} + +float lum(float3 c) +{ + float3 f = float3(0.300000011920928955078125f, 0.589999973773956298828125f, 0.10999999940395355224609375f); + return dot(c, f); +} + +float3 clip_color(inout float3 c) +{ + float3 param = c; + float L = lum(param); + float n = min(c.x, min(c.y, c.z)); + float x = max(c.x, max(c.y, c.z)); + if (n < 0.0f) + { + c = L.xxx + (((c - L.xxx) * L) / (L - n).xxx); + } + if (x > 1.0f) + { + c = L.xxx + (((c - L.xxx) * (1.0f - L)) / (x - L).xxx); + } + return c; +} + +float3 set_lum(float3 c, float l) +{ + float3 param = c; + float3 param_1 = c + (l - lum(param)).xxx; + float3 _901 = clip_color(param_1); + return _901; +} + +float3 mix_blend(float3 cb, float3 cs, uint mode) +{ + float3 b = 0.0f.xxx; + switch (mode) + { + case 1u: + { + b = cb * cs; + break; + } + case 2u: + { + float3 param = cb; + float3 param_1 = cs; + b = screen(param, param_1); + break; + } + case 3u: + { + float3 param_2 = cs; + float3 param_3 = cb; + b = hard_light(param_2, param_3); + break; + } + case 4u: + { + b = min(cb, cs); + break; + } + case 5u: + { + b = max(cb, cs); + break; + } + case 6u: + { + float param_4 = cb.x; + float param_5 = cs.x; + float param_6 = cb.y; + float param_7 = cs.y; + float param_8 = cb.z; + float param_9 = cs.z; + b = float3(color_dodge(param_4, param_5), color_dodge(param_6, param_7), color_dodge(param_8, param_9)); + break; + } + case 7u: + { + float param_10 = cb.x; + float param_11 = cs.x; + float param_12 = cb.y; + float param_13 = cs.y; + float param_14 = cb.z; + float param_15 = cs.z; + b = float3(color_burn(param_10, param_11), color_burn(param_12, param_13), color_burn(param_14, param_15)); + break; + } + case 8u: + { + float3 param_16 = cb; + float3 param_17 = cs; + b = hard_light(param_16, param_17); + break; + } + case 9u: + { + float3 param_18 = cb; + float3 param_19 = cs; + b = soft_light(param_18, param_19); + break; + } + case 10u: + { + b = abs(cb - cs); + break; + } + case 11u: + { + b = (cb + cs) - ((cb * 2.0f) * cs); + break; + } + case 12u: + { + float3 param_20 = cb; + float3 param_21 = cs; + float param_22 = sat(param_20); + float3 _1192 = set_sat(param_21, param_22); + float3 param_23 = cb; + float3 param_24 = _1192; + float param_25 = lum(param_23); + b = set_lum(param_24, param_25); + break; + } + case 13u: + { + float3 param_26 = cs; + float3 param_27 = cb; + float param_28 = sat(param_26); + float3 _1206 = set_sat(param_27, param_28); + float3 param_29 = cb; + float3 param_30 = _1206; + float param_31 = lum(param_29); + b = set_lum(param_30, param_31); + break; + } + case 14u: + { + float3 param_32 = cb; + float3 param_33 = cs; + float param_34 = lum(param_32); + b = set_lum(param_33, param_34); + break; + } + case 15u: + { + float3 param_35 = cs; + float3 param_36 = cb; + float param_37 = lum(param_35); + b = set_lum(param_36, param_37); + break; + } + default: + { + b = cs; + break; + } + } + return b; +} + +float4 mix_compose(float3 cb, float3 cs, float ab, float as, uint mode) +{ + float fa = 0.0f; + float fb = 0.0f; + switch (mode) + { + case 1u: + { + fa = 1.0f; + fb = 0.0f; + break; + } + case 2u: + { + fa = 0.0f; + fb = 1.0f; + break; + } + case 3u: + { + fa = 1.0f; + fb = 1.0f - as; + break; + } + case 4u: + { + fa = 1.0f - ab; + fb = 1.0f; + break; + } + case 5u: + { + fa = ab; + fb = 0.0f; + break; + } + case 6u: + { + fa = 0.0f; + fb = as; + break; + } + case 7u: + { + fa = 1.0f - ab; + fb = 0.0f; + break; + } + case 8u: + { + fa = 0.0f; + fb = 1.0f - as; + break; + } + case 9u: + { + fa = ab; + fb = 1.0f - as; + break; + } + case 10u: + { + fa = 1.0f - ab; + fb = as; + break; + } + case 11u: + { + fa = 1.0f - ab; + fb = 1.0f - as; + break; + } + case 12u: + { + fa = 1.0f; + fb = 1.0f; + break; + } + case 13u: + { + return float4(max(0.0f.xxxx, ((1.0f.xxxx - (float4(cs, as) * as)) + 1.0f.xxxx) - (float4(cb, ab) * ab)).xyz, max(0.0f, ((1.0f - as) + 1.0f) - ab)); + } + case 14u: + { + return float4(min(1.0f.xxxx, (float4(cs, as) * as) + (float4(cb, ab) * ab)).xyz, min(1.0f, as + ab)); + } + default: + { + break; + } + } + return (float4(cs, as) * (as * fa)) + (float4(cb, ab) * (ab * fb)); +} + CmdJump CmdJump_read(Alloc a, CmdJumpRef ref) { uint ix = ref.offset >> uint(2); @@ -431,24 +873,24 @@ CmdJump CmdJump_read(Alloc a, CmdJumpRef ref) CmdJump Cmd_Jump_read(Alloc a, CmdRef ref) { - CmdJumpRef _499 = { ref.offset + 4u }; + CmdJumpRef _602 = { ref.offset + 4u }; Alloc param = a; - CmdJumpRef param_1 = _499; + CmdJumpRef param_1 = _602; return CmdJump_read(param, param_1); } void comp_main() { - uint tile_ix = (gl_WorkGroupID.y * _723.Load(8)) + gl_WorkGroupID.x; - Alloc _738; - _738.offset = _723.Load(24); + uint tile_ix = (gl_WorkGroupID.y * _1521.Load(8)) + gl_WorkGroupID.x; + Alloc _1536; + _1536.offset = _1521.Load(24); Alloc param; - param.offset = _738.offset; + param.offset = _1536.offset; uint param_1 = tile_ix * 1024u; uint param_2 = 1024u; Alloc cmd_alloc = slice_mem(param, param_1, param_2); - CmdRef _747 = { cmd_alloc.offset }; - CmdRef cmd_ref = _747; + CmdRef _1545 = { cmd_alloc.offset }; + CmdRef cmd_ref = _1545; uint2 xy_uint = uint2(gl_LocalInvocationID.x + (16u * gl_WorkGroupID.x), gl_LocalInvocationID.y + (16u * gl_WorkGroupID.y)); float2 xy = float2(xy_uint); float4 rgba[8]; @@ -457,7 +899,7 @@ void comp_main() rgba[i] = 0.0f.xxxx; } uint clip_depth = 0u; - bool mem_ok = _202.Load(4) == 0u; + bool mem_ok = _278.Load(4) == 0u; float df[8]; TileSegRef tile_seg_ref; float area[8]; @@ -482,8 +924,8 @@ void comp_main() { df[k] = 1000000000.0f; } - TileSegRef _842 = { stroke.tile_ref }; - tile_seg_ref = _842; + TileSegRef _1638 = { stroke.tile_ref }; + tile_seg_ref = _1638; do { uint param_7 = tile_seg_ref.offset; @@ -519,8 +961,8 @@ void comp_main() { area[k_3] = float(fill.backdrop); } - TileSegRef _964 = { fill.tile_ref }; - tile_seg_ref = _964; + TileSegRef _1758 = { fill.tile_ref }; + tile_seg_ref = _1758; do { uint param_15 = tile_seg_ref.offset; @@ -609,10 +1051,10 @@ void comp_main() int x = int(round(clamp(my_d, 0.0f, 1.0f) * 511.0f)); float4 fg_rgba = gradients[int2(x, int(lin.index))]; float3 param_29 = fg_rgba.xyz; - float3 _1298 = fromsRGB(param_29); - fg_rgba.x = _1298.x; - fg_rgba.y = _1298.y; - fg_rgba.z = _1298.z; + float3 _2092 = fromsRGB(param_29); + fg_rgba.x = _2092.x; + fg_rgba.y = _2092.y; + fg_rgba.z = _2092.z; rgba[k_9] = fg_rgba; } cmd_ref.offset += 20u; @@ -625,9 +1067,9 @@ void comp_main() CmdImage fill_img = Cmd_Image_read(param_30, param_31); uint2 param_32 = xy_uint; CmdImage param_33 = fill_img; - float4 _1327[8]; - fillImage(_1327, param_32, param_33); - float4 img[8] = _1327; + float4 _2121[8]; + fillImage(_2121, param_32, param_33); + float4 img[8] = _2121; for (uint k_10 = 0u; k_10 < 8u; k_10++) { float4 fg_k_1 = img[k_10] * area[k_10]; @@ -642,8 +1084,8 @@ void comp_main() { uint d_2 = min(clip_depth, 127u); float4 param_34 = float4(rgba[k_11]); - uint _1390 = packsRGB(param_34); - blend_stack[d_2][k_11] = _1390; + uint _2184 = packsRGB(param_34); + blend_stack[d_2][k_11] = _2184; rgba[k_11] = 0.0f.xxxx; } clip_depth++; @@ -652,24 +1094,44 @@ void comp_main() } case 9u: { + Alloc param_35 = cmd_alloc; + CmdRef param_36 = cmd_ref; + CmdEndClip end_clip = Cmd_EndClip_read(param_35, param_36); + uint blend_mode = end_clip.blend >> uint(8); + uint comp_mode = end_clip.blend & 255u; clip_depth--; for (uint k_12 = 0u; k_12 < 8u; k_12++) { uint d_3 = min(clip_depth, 127u); - uint param_35 = blend_stack[d_3][k_12]; - float4 bg = unpacksRGB(param_35); + uint param_37 = blend_stack[d_3][k_12]; + float4 bg = unpacksRGB(param_37); float4 fg_1 = rgba[k_12] * area[k_12]; - rgba[k_12] = (bg * (1.0f - fg_1.w)) + fg_1; + float3 param_38 = bg.xyz; + float3 param_39 = fg_1.xyz; + uint param_40 = blend_mode; + float3 blend = mix_blend(param_38, param_39, param_40); + float4 _2251 = fg_1; + float _2255 = fg_1.w; + float3 _2262 = lerp(_2251.xyz, blend, float((_2255 * bg.w) > 0.0f).xxx); + fg_1.x = _2262.x; + fg_1.y = _2262.y; + fg_1.z = _2262.z; + float3 param_41 = bg.xyz; + float3 param_42 = fg_1.xyz; + float param_43 = bg.w; + float param_44 = fg_1.w; + uint param_45 = comp_mode; + rgba[k_12] = mix_compose(param_41, param_42, param_43, param_44, param_45); } - cmd_ref.offset += 4u; + cmd_ref.offset += 8u; break; } case 10u: { - Alloc param_36 = cmd_alloc; - CmdRef param_37 = cmd_ref; - CmdRef _1453 = { Cmd_Jump_read(param_36, param_37).new_ref }; - cmd_ref = _1453; + Alloc param_46 = cmd_alloc; + CmdRef param_47 = cmd_ref; + CmdRef _2299 = { Cmd_Jump_read(param_46, param_47).new_ref }; + cmd_ref = _2299; cmd_alloc.offset = cmd_ref.offset; break; } @@ -677,9 +1139,9 @@ void comp_main() } for (uint i_1 = 0u; i_1 < 8u; i_1++) { - uint param_38 = i_1; - float3 param_39 = rgba[i_1].xyz; - image[int2(xy_uint + chunk_offset(param_38))] = float4(tosRGB(param_39), rgba[i_1].w); + uint param_48 = i_1; + float3 param_49 = rgba[i_1].xyz; + image[int2(xy_uint + chunk_offset(param_48))] = float4(tosRGB(param_49), rgba[i_1].w); } } diff --git a/piet-gpu/shader/gen/kernel4.msl b/piet-gpu/shader/gen/kernel4.msl index 3dc7517..9a8fbd0 100644 --- a/piet-gpu/shader/gen/kernel4.msl +++ b/piet-gpu/shader/gen/kernel4.msl @@ -115,6 +115,16 @@ struct CmdAlpha float alpha; }; +struct CmdEndClipRef +{ + uint offset; +}; + +struct CmdEndClip +{ + uint blend; +}; + struct CmdJumpRef { uint offset; @@ -208,7 +218,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset) } static inline __attribute__((always_inline)) -uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_202) +uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_278) { Alloc param = alloc; uint param_1 = offset; @@ -216,29 +226,29 @@ uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memor { return 0u; } - uint v = v_202.memory[offset]; + uint v = v_278.memory[offset]; return v; } static inline __attribute__((always_inline)) -CmdTag Cmd_tag(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_202) +CmdTag Cmd_tag(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) { Alloc param = a; uint param_1 = ref.offset >> uint(2); - uint tag_and_flags = read_mem(param, param_1, v_202); + uint tag_and_flags = read_mem(param, param_1, v_278); return CmdTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) }; } static inline __attribute__((always_inline)) -CmdStroke CmdStroke_read(thread const Alloc& a, thread const CmdStrokeRef& ref, device Memory& v_202) +CmdStroke CmdStroke_read(thread const Alloc& a, thread const CmdStrokeRef& ref, device Memory& v_278) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_202); + uint raw0 = read_mem(param, param_1, v_278); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_202); + uint raw1 = read_mem(param_2, param_3, v_278); CmdStroke s; s.tile_ref = raw0; s.half_width = as_type(raw1); @@ -246,11 +256,11 @@ CmdStroke CmdStroke_read(thread const Alloc& a, thread const CmdStrokeRef& ref, } static inline __attribute__((always_inline)) -CmdStroke Cmd_Stroke_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_202) +CmdStroke Cmd_Stroke_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) { Alloc param = a; CmdStrokeRef param_1 = CmdStrokeRef{ ref.offset + 4u }; - return CmdStroke_read(param, param_1, v_202); + return CmdStroke_read(param, param_1, v_278); } static inline __attribute__((always_inline)) @@ -262,27 +272,27 @@ Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const } static inline __attribute__((always_inline)) -TileSeg TileSeg_read(thread const Alloc& a, thread const TileSegRef& ref, device Memory& v_202) +TileSeg TileSeg_read(thread const Alloc& a, thread const TileSegRef& ref, device Memory& v_278) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_202); + uint raw0 = read_mem(param, param_1, v_278); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_202); + uint raw1 = read_mem(param_2, param_3, v_278); Alloc param_4 = a; uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_202); + uint raw2 = read_mem(param_4, param_5, v_278); Alloc param_6 = a; uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_202); + uint raw3 = read_mem(param_6, param_7, v_278); Alloc param_8 = a; uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9, v_202); + uint raw4 = read_mem(param_8, param_9, v_278); Alloc param_10 = a; uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11, v_202); + uint raw5 = read_mem(param_10, param_11, v_278); TileSeg s; s.origin = float2(as_type(raw0), as_type(raw1)); s.vector = float2(as_type(raw2), as_type(raw3)); @@ -298,15 +308,15 @@ uint2 chunk_offset(thread const uint& i) } static inline __attribute__((always_inline)) -CmdFill CmdFill_read(thread const Alloc& a, thread const CmdFillRef& ref, device Memory& v_202) +CmdFill CmdFill_read(thread const Alloc& a, thread const CmdFillRef& ref, device Memory& v_278) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_202); + uint raw0 = read_mem(param, param_1, v_278); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_202); + uint raw1 = read_mem(param_2, param_3, v_278); CmdFill s; s.tile_ref = raw0; s.backdrop = int(raw1); @@ -314,51 +324,51 @@ CmdFill CmdFill_read(thread const Alloc& a, thread const CmdFillRef& ref, device } static inline __attribute__((always_inline)) -CmdFill Cmd_Fill_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_202) +CmdFill Cmd_Fill_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) { Alloc param = a; CmdFillRef param_1 = CmdFillRef{ ref.offset + 4u }; - return CmdFill_read(param, param_1, v_202); + return CmdFill_read(param, param_1, v_278); } static inline __attribute__((always_inline)) -CmdAlpha CmdAlpha_read(thread const Alloc& a, thread const CmdAlphaRef& ref, device Memory& v_202) +CmdAlpha CmdAlpha_read(thread const Alloc& a, thread const CmdAlphaRef& ref, device Memory& v_278) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_202); + uint raw0 = read_mem(param, param_1, v_278); CmdAlpha s; s.alpha = as_type(raw0); return s; } static inline __attribute__((always_inline)) -CmdAlpha Cmd_Alpha_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_202) +CmdAlpha Cmd_Alpha_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) { Alloc param = a; CmdAlphaRef param_1 = CmdAlphaRef{ ref.offset + 4u }; - return CmdAlpha_read(param, param_1, v_202); + return CmdAlpha_read(param, param_1, v_278); } static inline __attribute__((always_inline)) -CmdColor CmdColor_read(thread const Alloc& a, thread const CmdColorRef& ref, device Memory& v_202) +CmdColor CmdColor_read(thread const Alloc& a, thread const CmdColorRef& ref, device Memory& v_278) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_202); + uint raw0 = read_mem(param, param_1, v_278); CmdColor s; s.rgba_color = raw0; return s; } static inline __attribute__((always_inline)) -CmdColor Cmd_Color_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_202) +CmdColor Cmd_Color_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) { Alloc param = a; CmdColorRef param_1 = CmdColorRef{ ref.offset + 4u }; - return CmdColor_read(param, param_1, v_202); + return CmdColor_read(param, param_1, v_278); } static inline __attribute__((always_inline)) @@ -379,21 +389,21 @@ float4 unpacksRGB(thread const uint& srgba) } static inline __attribute__((always_inline)) -CmdLinGrad CmdLinGrad_read(thread const Alloc& a, thread const CmdLinGradRef& ref, device Memory& v_202) +CmdLinGrad CmdLinGrad_read(thread const Alloc& a, thread const CmdLinGradRef& ref, device Memory& v_278) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_202); + uint raw0 = read_mem(param, param_1, v_278); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_202); + uint raw1 = read_mem(param_2, param_3, v_278); Alloc param_4 = a; uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_202); + uint raw2 = read_mem(param_4, param_5, v_278); Alloc param_6 = a; uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_202); + uint raw3 = read_mem(param_6, param_7, v_278); CmdLinGrad s; s.index = raw0; s.line_x = as_type(raw1); @@ -403,23 +413,23 @@ CmdLinGrad CmdLinGrad_read(thread const Alloc& a, thread const CmdLinGradRef& re } static inline __attribute__((always_inline)) -CmdLinGrad Cmd_LinGrad_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_202) +CmdLinGrad Cmd_LinGrad_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) { Alloc param = a; CmdLinGradRef param_1 = CmdLinGradRef{ ref.offset + 4u }; - return CmdLinGrad_read(param, param_1, v_202); + return CmdLinGrad_read(param, param_1, v_278); } static inline __attribute__((always_inline)) -CmdImage CmdImage_read(thread const Alloc& a, thread const CmdImageRef& ref, device Memory& v_202) +CmdImage CmdImage_read(thread const Alloc& a, thread const CmdImageRef& ref, device Memory& v_278) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_202); + uint raw0 = read_mem(param, param_1, v_278); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_202); + uint raw1 = read_mem(param_2, param_3, v_278); CmdImage s; s.index = raw0; s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); @@ -427,11 +437,11 @@ CmdImage CmdImage_read(thread const Alloc& a, thread const CmdImageRef& ref, dev } static inline __attribute__((always_inline)) -CmdImage Cmd_Image_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_202) +CmdImage Cmd_Image_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) { Alloc param = a; CmdImageRef param_1 = CmdImageRef{ ref.offset + 4u }; - return CmdImage_read(param, param_1, v_202); + return CmdImage_read(param, param_1, v_278); } static inline __attribute__((always_inline)) @@ -444,10 +454,10 @@ spvUnsafeArray fillImage(thread const uint2& xy, thread const CmdImag int2 uv = int2(xy + chunk_offset(param)) + cmd_img.offset; float4 fg_rgba = image_atlas.read(uint2(uv)); float3 param_1 = fg_rgba.xyz; - float3 _695 = fromsRGB(param_1); - fg_rgba.x = _695.x; - fg_rgba.y = _695.y; - fg_rgba.z = _695.z; + float3 _1493 = fromsRGB(param_1); + fg_rgba.x = _1493.x; + fg_rgba.y = _1493.y; + fg_rgba.z = _1493.z; rgba[i] = fg_rgba; } return rgba; @@ -471,30 +481,476 @@ uint packsRGB(thread float4& rgba) } static inline __attribute__((always_inline)) -CmdJump CmdJump_read(thread const Alloc& a, thread const CmdJumpRef& ref, device Memory& v_202) +CmdEndClip CmdEndClip_read(thread const Alloc& a, thread const CmdEndClipRef& ref, device Memory& v_278) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_202); + uint raw0 = read_mem(param, param_1, v_278); + CmdEndClip s; + s.blend = raw0; + return s; +} + +static inline __attribute__((always_inline)) +CmdEndClip Cmd_EndClip_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) +{ + Alloc param = a; + CmdEndClipRef param_1 = CmdEndClipRef{ ref.offset + 4u }; + return CmdEndClip_read(param, param_1, v_278); +} + +static inline __attribute__((always_inline)) +float3 screen(thread const float3& cb, thread const float3& cs) +{ + return (cb + cs) - (cb * cs); +} + +static inline __attribute__((always_inline)) +float3 hard_light(thread const float3& cb, thread const float3& cs) +{ + float3 param = cb; + float3 param_1 = (cs * 2.0) - float3(1.0); + return mix(screen(param, param_1), (cb * 2.0) * cs, float3(cs <= float3(0.5))); +} + +static inline __attribute__((always_inline)) +float color_dodge(thread const float& cb, thread const float& cs) +{ + if (cb == 0.0) + { + return 0.0; + } + else + { + if (cs == 1.0) + { + return 1.0; + } + else + { + return fast::min(1.0, cb / (1.0 - cs)); + } + } +} + +static inline __attribute__((always_inline)) +float color_burn(thread const float& cb, thread const float& cs) +{ + if (cb == 1.0) + { + return 1.0; + } + else + { + if (cs == 0.0) + { + return 0.0; + } + else + { + return 1.0 - fast::min(1.0, (1.0 - cb) / cs); + } + } +} + +static inline __attribute__((always_inline)) +float3 soft_light(thread const float3& cb, thread const float3& cs) +{ + float3 d = mix(sqrt(cb), ((((cb * 16.0) - float3(12.0)) * cb) + float3(4.0)) * cb, float3(cb <= float3(0.25))); + return mix(cb + (((cs * 2.0) - float3(1.0)) * (d - cb)), cb - (((float3(1.0) - (cs * 2.0)) * cb) * (float3(1.0) - cb)), float3(cs <= float3(0.5))); +} + +static inline __attribute__((always_inline)) +float sat(thread const float3& c) +{ + return fast::max(c.x, fast::max(c.y, c.z)) - fast::min(c.x, fast::min(c.y, c.z)); +} + +static inline __attribute__((always_inline)) +void set_sat_inner(thread float& cmin, thread float& cmid, thread float& cmax, thread const float& s) +{ + if (cmax > cmin) + { + cmid = ((cmid - cmin) * s) / (cmax - cmin); + cmax = s; + } + else + { + cmid = 0.0; + cmax = 0.0; + } + cmin = 0.0; +} + +static inline __attribute__((always_inline)) +float3 set_sat(thread float3& c, thread const float& s) +{ + if (c.x <= c.y) + { + if (c.y <= c.z) + { + float param = c.x; + float param_1 = c.y; + float param_2 = c.z; + float param_3 = s; + set_sat_inner(param, param_1, param_2, param_3); + c.x = param; + c.y = param_1; + c.z = param_2; + } + else + { + if (c.x <= c.z) + { + float param_4 = c.x; + float param_5 = c.z; + float param_6 = c.y; + float param_7 = s; + set_sat_inner(param_4, param_5, param_6, param_7); + c.x = param_4; + c.z = param_5; + c.y = param_6; + } + else + { + float param_8 = c.z; + float param_9 = c.x; + float param_10 = c.y; + float param_11 = s; + set_sat_inner(param_8, param_9, param_10, param_11); + c.z = param_8; + c.x = param_9; + c.y = param_10; + } + } + } + else + { + if (c.x <= c.z) + { + float param_12 = c.y; + float param_13 = c.x; + float param_14 = c.z; + float param_15 = s; + set_sat_inner(param_12, param_13, param_14, param_15); + c.y = param_12; + c.x = param_13; + c.z = param_14; + } + else + { + if (c.y <= c.z) + { + float param_16 = c.y; + float param_17 = c.z; + float param_18 = c.x; + float param_19 = s; + set_sat_inner(param_16, param_17, param_18, param_19); + c.y = param_16; + c.z = param_17; + c.x = param_18; + } + else + { + float param_20 = c.z; + float param_21 = c.y; + float param_22 = c.x; + float param_23 = s; + set_sat_inner(param_20, param_21, param_22, param_23); + c.z = param_20; + c.y = param_21; + c.x = param_22; + } + } + } + return c; +} + +static inline __attribute__((always_inline)) +float lum(thread const float3& c) +{ + float3 f = float3(0.300000011920928955078125, 0.589999973773956298828125, 0.10999999940395355224609375); + return dot(c, f); +} + +static inline __attribute__((always_inline)) +float3 clip_color(thread float3& c) +{ + float3 param = c; + float L = lum(param); + float n = fast::min(c.x, fast::min(c.y, c.z)); + float x = fast::max(c.x, fast::max(c.y, c.z)); + if (n < 0.0) + { + c = float3(L) + (((c - float3(L)) * L) / float3(L - n)); + } + if (x > 1.0) + { + c = float3(L) + (((c - float3(L)) * (1.0 - L)) / float3(x - L)); + } + return c; +} + +static inline __attribute__((always_inline)) +float3 set_lum(thread const float3& c, thread const float& l) +{ + float3 param = c; + float3 param_1 = c + float3(l - lum(param)); + float3 _901 = clip_color(param_1); + return _901; +} + +static inline __attribute__((always_inline)) +float3 mix_blend(thread const float3& cb, thread const float3& cs, thread const uint& mode) +{ + float3 b = float3(0.0); + switch (mode) + { + case 1u: + { + b = cb * cs; + break; + } + case 2u: + { + float3 param = cb; + float3 param_1 = cs; + b = screen(param, param_1); + break; + } + case 3u: + { + float3 param_2 = cs; + float3 param_3 = cb; + b = hard_light(param_2, param_3); + break; + } + case 4u: + { + b = fast::min(cb, cs); + break; + } + case 5u: + { + b = fast::max(cb, cs); + break; + } + case 6u: + { + float param_4 = cb.x; + float param_5 = cs.x; + float param_6 = cb.y; + float param_7 = cs.y; + float param_8 = cb.z; + float param_9 = cs.z; + b = float3(color_dodge(param_4, param_5), color_dodge(param_6, param_7), color_dodge(param_8, param_9)); + break; + } + case 7u: + { + float param_10 = cb.x; + float param_11 = cs.x; + float param_12 = cb.y; + float param_13 = cs.y; + float param_14 = cb.z; + float param_15 = cs.z; + b = float3(color_burn(param_10, param_11), color_burn(param_12, param_13), color_burn(param_14, param_15)); + break; + } + case 8u: + { + float3 param_16 = cb; + float3 param_17 = cs; + b = hard_light(param_16, param_17); + break; + } + case 9u: + { + float3 param_18 = cb; + float3 param_19 = cs; + b = soft_light(param_18, param_19); + break; + } + case 10u: + { + b = abs(cb - cs); + break; + } + case 11u: + { + b = (cb + cs) - ((cb * 2.0) * cs); + break; + } + case 12u: + { + float3 param_20 = cb; + float3 param_21 = cs; + float param_22 = sat(param_20); + float3 _1192 = set_sat(param_21, param_22); + float3 param_23 = cb; + float3 param_24 = _1192; + float param_25 = lum(param_23); + b = set_lum(param_24, param_25); + break; + } + case 13u: + { + float3 param_26 = cs; + float3 param_27 = cb; + float param_28 = sat(param_26); + float3 _1206 = set_sat(param_27, param_28); + float3 param_29 = cb; + float3 param_30 = _1206; + float param_31 = lum(param_29); + b = set_lum(param_30, param_31); + break; + } + case 14u: + { + float3 param_32 = cb; + float3 param_33 = cs; + float param_34 = lum(param_32); + b = set_lum(param_33, param_34); + break; + } + case 15u: + { + float3 param_35 = cs; + float3 param_36 = cb; + float param_37 = lum(param_35); + b = set_lum(param_36, param_37); + break; + } + default: + { + b = cs; + break; + } + } + return b; +} + +static inline __attribute__((always_inline)) +float4 mix_compose(thread const float3& cb, thread const float3& cs, thread const float& ab, thread const float& as, thread const uint& mode) +{ + float fa = 0.0; + float fb = 0.0; + switch (mode) + { + case 1u: + { + fa = 1.0; + fb = 0.0; + break; + } + case 2u: + { + fa = 0.0; + fb = 1.0; + break; + } + case 3u: + { + fa = 1.0; + fb = 1.0 - as; + break; + } + case 4u: + { + fa = 1.0 - ab; + fb = 1.0; + break; + } + case 5u: + { + fa = ab; + fb = 0.0; + break; + } + case 6u: + { + fa = 0.0; + fb = as; + break; + } + case 7u: + { + fa = 1.0 - ab; + fb = 0.0; + break; + } + case 8u: + { + fa = 0.0; + fb = 1.0 - as; + break; + } + case 9u: + { + fa = ab; + fb = 1.0 - as; + break; + } + case 10u: + { + fa = 1.0 - ab; + fb = as; + break; + } + case 11u: + { + fa = 1.0 - ab; + fb = 1.0 - as; + break; + } + case 12u: + { + fa = 1.0; + fb = 1.0; + break; + } + case 13u: + { + return float4(fast::max(float4(0.0), ((float4(1.0) - (float4(cs, as) * as)) + float4(1.0)) - (float4(cb, ab) * ab)).xyz, fast::max(0.0, ((1.0 - as) + 1.0) - ab)); + } + case 14u: + { + return float4(fast::min(float4(1.0), (float4(cs, as) * as) + (float4(cb, ab) * ab)).xyz, fast::min(1.0, as + ab)); + } + default: + { + break; + } + } + return (float4(cs, as) * (as * fa)) + (float4(cb, ab) * (ab * fb)); +} + +static inline __attribute__((always_inline)) +CmdJump CmdJump_read(thread const Alloc& a, thread const CmdJumpRef& ref, device Memory& v_278) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint raw0 = read_mem(param, param_1, v_278); CmdJump s; s.new_ref = raw0; return s; } static inline __attribute__((always_inline)) -CmdJump Cmd_Jump_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_202) +CmdJump Cmd_Jump_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) { Alloc param = a; CmdJumpRef param_1 = CmdJumpRef{ ref.offset + 4u }; - return CmdJump_read(param, param_1, v_202); + return CmdJump_read(param, param_1, v_278); } -kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _723 [[buffer(1)]], texture2d image [[texture(2)]], texture2d image_atlas [[texture(3)]], texture2d gradients [[texture(4)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) +kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1521 [[buffer(1)]], texture2d image [[texture(2)]], texture2d image_atlas [[texture(3)]], texture2d gradients [[texture(4)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) { - uint tile_ix = (gl_WorkGroupID.y * _723.conf.width_in_tiles) + gl_WorkGroupID.x; + uint tile_ix = (gl_WorkGroupID.y * _1521.conf.width_in_tiles) + gl_WorkGroupID.x; Alloc param; - param.offset = _723.conf.ptcl_alloc.offset; + param.offset = _1521.conf.ptcl_alloc.offset; uint param_1 = tile_ix * 1024u; uint param_2 = 1024u; Alloc cmd_alloc = slice_mem(param, param_1, param_2); @@ -507,7 +963,7 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7 rgba[i] = float4(0.0); } uint clip_depth = 0u; - bool mem_ok = v_202.mem_error == 0u; + bool mem_ok = v_278.mem_error == 0u; spvUnsafeArray df; TileSegRef tile_seg_ref; spvUnsafeArray area; @@ -516,7 +972,7 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7 { Alloc param_3 = cmd_alloc; CmdRef param_4 = cmd_ref; - uint tag = Cmd_tag(param_3, param_4, v_202).tag; + uint tag = Cmd_tag(param_3, param_4, v_278).tag; if (tag == 0u) { break; @@ -527,7 +983,7 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7 { Alloc param_5 = cmd_alloc; CmdRef param_6 = cmd_ref; - CmdStroke stroke = Cmd_Stroke_read(param_5, param_6, v_202); + CmdStroke stroke = Cmd_Stroke_read(param_5, param_6, v_278); for (uint k = 0u; k < 8u; k++) { df[k] = 1000000000.0; @@ -540,7 +996,7 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7 bool param_9 = mem_ok; Alloc param_10 = new_alloc(param_7, param_8, param_9); TileSegRef param_11 = tile_seg_ref; - TileSeg seg = TileSeg_read(param_10, param_11, v_202); + TileSeg seg = TileSeg_read(param_10, param_11, v_278); float2 line_vec = seg.vector; for (uint k_1 = 0u; k_1 < 8u; k_1++) { @@ -563,7 +1019,7 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7 { Alloc param_13 = cmd_alloc; CmdRef param_14 = cmd_ref; - CmdFill fill = Cmd_Fill_read(param_13, param_14, v_202); + CmdFill fill = Cmd_Fill_read(param_13, param_14, v_278); for (uint k_3 = 0u; k_3 < 8u; k_3++) { area[k_3] = float(fill.backdrop); @@ -576,7 +1032,7 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7 bool param_17 = mem_ok; Alloc param_18 = new_alloc(param_15, param_16, param_17); TileSegRef param_19 = tile_seg_ref; - TileSeg seg_1 = TileSeg_read(param_18, param_19, v_202); + TileSeg seg_1 = TileSeg_read(param_18, param_19, v_278); for (uint k_4 = 0u; k_4 < 8u; k_4++) { uint param_20 = k_4; @@ -620,7 +1076,7 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7 { Alloc param_21 = cmd_alloc; CmdRef param_22 = cmd_ref; - CmdAlpha alpha = Cmd_Alpha_read(param_21, param_22, v_202); + CmdAlpha alpha = Cmd_Alpha_read(param_21, param_22, v_278); for (uint k_7 = 0u; k_7 < 8u; k_7++) { area[k_7] = alpha.alpha; @@ -632,7 +1088,7 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7 { Alloc param_23 = cmd_alloc; CmdRef param_24 = cmd_ref; - CmdColor color = Cmd_Color_read(param_23, param_24, v_202); + CmdColor color = Cmd_Color_read(param_23, param_24, v_278); uint param_25 = color.rgba_color; float4 fg = unpacksRGB(param_25); for (uint k_8 = 0u; k_8 < 8u; k_8++) @@ -647,7 +1103,7 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7 { Alloc param_26 = cmd_alloc; CmdRef param_27 = cmd_ref; - CmdLinGrad lin = Cmd_LinGrad_read(param_26, param_27, v_202); + CmdLinGrad lin = Cmd_LinGrad_read(param_26, param_27, v_278); float d_1 = ((lin.line_x * xy.x) + (lin.line_y * xy.y)) + lin.line_c; for (uint k_9 = 0u; k_9 < 8u; k_9++) { @@ -657,10 +1113,10 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7 int x = int(round(fast::clamp(my_d, 0.0, 1.0) * 511.0)); float4 fg_rgba = gradients.read(uint2(int2(x, int(lin.index)))); float3 param_29 = fg_rgba.xyz; - float3 _1298 = fromsRGB(param_29); - fg_rgba.x = _1298.x; - fg_rgba.y = _1298.y; - fg_rgba.z = _1298.z; + float3 _2092 = fromsRGB(param_29); + fg_rgba.x = _2092.x; + fg_rgba.y = _2092.y; + fg_rgba.z = _2092.z; rgba[k_9] = fg_rgba; } cmd_ref.offset += 20u; @@ -670,7 +1126,7 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7 { Alloc param_30 = cmd_alloc; CmdRef param_31 = cmd_ref; - CmdImage fill_img = Cmd_Image_read(param_30, param_31, v_202); + CmdImage fill_img = Cmd_Image_read(param_30, param_31, v_278); uint2 param_32 = xy_uint; CmdImage param_33 = fill_img; spvUnsafeArray img; @@ -689,8 +1145,8 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7 { uint d_2 = min(clip_depth, 127u); float4 param_34 = float4(rgba[k_11]); - uint _1390 = packsRGB(param_34); - blend_stack[d_2][k_11] = _1390; + uint _2184 = packsRGB(param_34); + blend_stack[d_2][k_11] = _2184; rgba[k_11] = float4(0.0); } clip_depth++; @@ -699,23 +1155,43 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7 } case 9u: { + Alloc param_35 = cmd_alloc; + CmdRef param_36 = cmd_ref; + CmdEndClip end_clip = Cmd_EndClip_read(param_35, param_36, v_278); + uint blend_mode = end_clip.blend >> uint(8); + uint comp_mode = end_clip.blend & 255u; clip_depth--; for (uint k_12 = 0u; k_12 < 8u; k_12++) { uint d_3 = min(clip_depth, 127u); - uint param_35 = blend_stack[d_3][k_12]; - float4 bg = unpacksRGB(param_35); + uint param_37 = blend_stack[d_3][k_12]; + float4 bg = unpacksRGB(param_37); float4 fg_1 = rgba[k_12] * area[k_12]; - rgba[k_12] = (bg * (1.0 - fg_1.w)) + fg_1; + float3 param_38 = bg.xyz; + float3 param_39 = fg_1.xyz; + uint param_40 = blend_mode; + float3 blend = mix_blend(param_38, param_39, param_40); + float4 _2251 = fg_1; + float _2255 = fg_1.w; + float3 _2262 = mix(_2251.xyz, blend, float3(float((_2255 * bg.w) > 0.0))); + fg_1.x = _2262.x; + fg_1.y = _2262.y; + fg_1.z = _2262.z; + float3 param_41 = bg.xyz; + float3 param_42 = fg_1.xyz; + float param_43 = bg.w; + float param_44 = fg_1.w; + uint param_45 = comp_mode; + rgba[k_12] = mix_compose(param_41, param_42, param_43, param_44, param_45); } - cmd_ref.offset += 4u; + cmd_ref.offset += 8u; break; } case 10u: { - Alloc param_36 = cmd_alloc; - CmdRef param_37 = cmd_ref; - cmd_ref = CmdRef{ Cmd_Jump_read(param_36, param_37, v_202).new_ref }; + Alloc param_46 = cmd_alloc; + CmdRef param_47 = cmd_ref; + cmd_ref = CmdRef{ Cmd_Jump_read(param_46, param_47, v_278).new_ref }; cmd_alloc.offset = cmd_ref.offset; break; } @@ -723,9 +1199,9 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7 } for (uint i_1 = 0u; i_1 < 8u; i_1++) { - uint param_38 = i_1; - float3 param_39 = rgba[i_1].xyz; - image.write(float4(tosRGB(param_39), rgba[i_1].w), uint2(int2(xy_uint + chunk_offset(param_38)))); + uint param_48 = i_1; + float3 param_49 = rgba[i_1].xyz; + image.write(float4(tosRGB(param_49), rgba[i_1].w), uint2(int2(xy_uint + chunk_offset(param_48)))); } } diff --git a/piet-gpu/shader/gen/kernel4.spv b/piet-gpu/shader/gen/kernel4.spv index 31f11c9..4d205ce 100644 Binary files a/piet-gpu/shader/gen/kernel4.spv and b/piet-gpu/shader/gen/kernel4.spv differ diff --git a/piet-gpu/shader/gen/kernel4_gray.dxil b/piet-gpu/shader/gen/kernel4_gray.dxil index f3bd028..18c4b7e 100644 Binary files a/piet-gpu/shader/gen/kernel4_gray.dxil and b/piet-gpu/shader/gen/kernel4_gray.dxil differ diff --git a/piet-gpu/shader/gen/kernel4_gray.hlsl b/piet-gpu/shader/gen/kernel4_gray.hlsl index 5ff97fb..7dc2e01 100644 --- a/piet-gpu/shader/gen/kernel4_gray.hlsl +++ b/piet-gpu/shader/gen/kernel4_gray.hlsl @@ -69,6 +69,16 @@ struct CmdAlpha float alpha; }; +struct CmdEndClipRef +{ + uint offset; +}; + +struct CmdEndClip +{ + uint blend; +}; + struct CmdJumpRef { uint offset; @@ -132,8 +142,8 @@ struct Config static const uint3 gl_WorkGroupSize = uint3(8u, 4u, 1u); -RWByteAddressBuffer _202 : register(u0, space0); -ByteAddressBuffer _723 : register(t1, space0); +RWByteAddressBuffer _278 : register(u0, space0); +ByteAddressBuffer _1521 : register(t1, space0); RWTexture2D image_atlas : register(u3, space0); RWTexture2D gradients : register(u4, space0); RWTexture2D image : register(u2, space0); @@ -160,8 +170,8 @@ float4 spvUnpackUnorm4x8(uint value) Alloc slice_mem(Alloc a, uint offset, uint size) { - Alloc _215 = { a.offset + offset }; - return _215; + Alloc _291 = { a.offset + offset }; + return _291; } bool touch_mem(Alloc alloc, uint offset) @@ -177,7 +187,7 @@ uint read_mem(Alloc alloc, uint offset) { return 0u; } - uint v = _202.Load(offset * 4 + 8); + uint v = _278.Load(offset * 4 + 8); return v; } @@ -186,8 +196,8 @@ CmdTag Cmd_tag(Alloc a, CmdRef ref) Alloc param = a; uint param_1 = ref.offset >> uint(2); uint tag_and_flags = read_mem(param, param_1); - CmdTag _432 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; - return _432; + CmdTag _525 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; + return _525; } CmdStroke CmdStroke_read(Alloc a, CmdStrokeRef ref) @@ -207,9 +217,9 @@ CmdStroke CmdStroke_read(Alloc a, CmdStrokeRef ref) CmdStroke Cmd_Stroke_read(Alloc a, CmdRef ref) { - CmdStrokeRef _449 = { ref.offset + 4u }; + CmdStrokeRef _542 = { ref.offset + 4u }; Alloc param = a; - CmdStrokeRef param_1 = _449; + CmdStrokeRef param_1 = _542; return CmdStroke_read(param, param_1); } @@ -245,8 +255,8 @@ TileSeg TileSeg_read(Alloc a, TileSegRef ref) s.origin = float2(asfloat(raw0), asfloat(raw1)); s._vector = float2(asfloat(raw2), asfloat(raw3)); s.y_edge = asfloat(raw4); - TileSegRef _572 = { raw5 }; - s.next = _572; + TileSegRef _675 = { raw5 }; + s.next = _675; return s; } @@ -272,9 +282,9 @@ CmdFill CmdFill_read(Alloc a, CmdFillRef ref) CmdFill Cmd_Fill_read(Alloc a, CmdRef ref) { - CmdFillRef _439 = { ref.offset + 4u }; + CmdFillRef _532 = { ref.offset + 4u }; Alloc param = a; - CmdFillRef param_1 = _439; + CmdFillRef param_1 = _532; return CmdFill_read(param, param_1); } @@ -291,9 +301,9 @@ CmdAlpha CmdAlpha_read(Alloc a, CmdAlphaRef ref) CmdAlpha Cmd_Alpha_read(Alloc a, CmdRef ref) { - CmdAlphaRef _459 = { ref.offset + 4u }; + CmdAlphaRef _552 = { ref.offset + 4u }; Alloc param = a; - CmdAlphaRef param_1 = _459; + CmdAlphaRef param_1 = _552; return CmdAlpha_read(param, param_1); } @@ -310,9 +320,9 @@ CmdColor CmdColor_read(Alloc a, CmdColorRef ref) CmdColor Cmd_Color_read(Alloc a, CmdRef ref) { - CmdColorRef _469 = { ref.offset + 4u }; + CmdColorRef _562 = { ref.offset + 4u }; Alloc param = a; - CmdColorRef param_1 = _469; + CmdColorRef param_1 = _562; return CmdColor_read(param, param_1); } @@ -356,9 +366,9 @@ CmdLinGrad CmdLinGrad_read(Alloc a, CmdLinGradRef ref) CmdLinGrad Cmd_LinGrad_read(Alloc a, CmdRef ref) { - CmdLinGradRef _479 = { ref.offset + 4u }; + CmdLinGradRef _572 = { ref.offset + 4u }; Alloc param = a; - CmdLinGradRef param_1 = _479; + CmdLinGradRef param_1 = _572; return CmdLinGrad_read(param, param_1); } @@ -379,9 +389,9 @@ CmdImage CmdImage_read(Alloc a, CmdImageRef ref) CmdImage Cmd_Image_read(Alloc a, CmdRef ref) { - CmdImageRef _489 = { ref.offset + 4u }; + CmdImageRef _582 = { ref.offset + 4u }; Alloc param = a; - CmdImageRef param_1 = _489; + CmdImageRef param_1 = _582; return CmdImage_read(param, param_1); } @@ -394,10 +404,10 @@ void fillImage(out float4 spvReturnValue[8], uint2 xy, CmdImage cmd_img) int2 uv = int2(xy + chunk_offset(param)) + cmd_img.offset; float4 fg_rgba = image_atlas[uv]; float3 param_1 = fg_rgba.xyz; - float3 _695 = fromsRGB(param_1); - fg_rgba.x = _695.x; - fg_rgba.y = _695.y; - fg_rgba.z = _695.z; + float3 _1493 = fromsRGB(param_1); + fg_rgba.x = _1493.x; + fg_rgba.y = _1493.y; + fg_rgba.z = _1493.z; rgba[i] = fg_rgba; } spvReturnValue = rgba; @@ -418,6 +428,438 @@ uint packsRGB(inout float4 rgba) return spvPackUnorm4x8(rgba.wzyx); } +CmdEndClip CmdEndClip_read(Alloc a, CmdEndClipRef ref) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint raw0 = read_mem(param, param_1); + CmdEndClip s; + s.blend = raw0; + return s; +} + +CmdEndClip Cmd_EndClip_read(Alloc a, CmdRef ref) +{ + CmdEndClipRef _592 = { ref.offset + 4u }; + Alloc param = a; + CmdEndClipRef param_1 = _592; + return CmdEndClip_read(param, param_1); +} + +float3 screen(float3 cb, float3 cs) +{ + return (cb + cs) - (cb * cs); +} + +float3 hard_light(float3 cb, float3 cs) +{ + float3 param = cb; + float3 param_1 = (cs * 2.0f) - 1.0f.xxx; + return lerp(screen(param, param_1), (cb * 2.0f) * cs, float3(bool3(cs.x <= 0.5f.xxx.x, cs.y <= 0.5f.xxx.y, cs.z <= 0.5f.xxx.z))); +} + +float color_dodge(float cb, float cs) +{ + if (cb == 0.0f) + { + return 0.0f; + } + else + { + if (cs == 1.0f) + { + return 1.0f; + } + else + { + return min(1.0f, cb / (1.0f - cs)); + } + } +} + +float color_burn(float cb, float cs) +{ + if (cb == 1.0f) + { + return 1.0f; + } + else + { + if (cs == 0.0f) + { + return 0.0f; + } + else + { + return 1.0f - min(1.0f, (1.0f - cb) / cs); + } + } +} + +float3 soft_light(float3 cb, float3 cs) +{ + float3 d = lerp(sqrt(cb), ((((cb * 16.0f) - 12.0f.xxx) * cb) + 4.0f.xxx) * cb, float3(bool3(cb.x <= 0.25f.xxx.x, cb.y <= 0.25f.xxx.y, cb.z <= 0.25f.xxx.z))); + return lerp(cb + (((cs * 2.0f) - 1.0f.xxx) * (d - cb)), cb - (((1.0f.xxx - (cs * 2.0f)) * cb) * (1.0f.xxx - cb)), float3(bool3(cs.x <= 0.5f.xxx.x, cs.y <= 0.5f.xxx.y, cs.z <= 0.5f.xxx.z))); +} + +float sat(float3 c) +{ + return max(c.x, max(c.y, c.z)) - min(c.x, min(c.y, c.z)); +} + +void set_sat_inner(inout float cmin, inout float cmid, inout float cmax, float s) +{ + if (cmax > cmin) + { + cmid = ((cmid - cmin) * s) / (cmax - cmin); + cmax = s; + } + else + { + cmid = 0.0f; + cmax = 0.0f; + } + cmin = 0.0f; +} + +float3 set_sat(inout float3 c, float s) +{ + if (c.x <= c.y) + { + if (c.y <= c.z) + { + float param = c.x; + float param_1 = c.y; + float param_2 = c.z; + float param_3 = s; + set_sat_inner(param, param_1, param_2, param_3); + c.x = param; + c.y = param_1; + c.z = param_2; + } + else + { + if (c.x <= c.z) + { + float param_4 = c.x; + float param_5 = c.z; + float param_6 = c.y; + float param_7 = s; + set_sat_inner(param_4, param_5, param_6, param_7); + c.x = param_4; + c.z = param_5; + c.y = param_6; + } + else + { + float param_8 = c.z; + float param_9 = c.x; + float param_10 = c.y; + float param_11 = s; + set_sat_inner(param_8, param_9, param_10, param_11); + c.z = param_8; + c.x = param_9; + c.y = param_10; + } + } + } + else + { + if (c.x <= c.z) + { + float param_12 = c.y; + float param_13 = c.x; + float param_14 = c.z; + float param_15 = s; + set_sat_inner(param_12, param_13, param_14, param_15); + c.y = param_12; + c.x = param_13; + c.z = param_14; + } + else + { + if (c.y <= c.z) + { + float param_16 = c.y; + float param_17 = c.z; + float param_18 = c.x; + float param_19 = s; + set_sat_inner(param_16, param_17, param_18, param_19); + c.y = param_16; + c.z = param_17; + c.x = param_18; + } + else + { + float param_20 = c.z; + float param_21 = c.y; + float param_22 = c.x; + float param_23 = s; + set_sat_inner(param_20, param_21, param_22, param_23); + c.z = param_20; + c.y = param_21; + c.x = param_22; + } + } + } + return c; +} + +float lum(float3 c) +{ + float3 f = float3(0.300000011920928955078125f, 0.589999973773956298828125f, 0.10999999940395355224609375f); + return dot(c, f); +} + +float3 clip_color(inout float3 c) +{ + float3 param = c; + float L = lum(param); + float n = min(c.x, min(c.y, c.z)); + float x = max(c.x, max(c.y, c.z)); + if (n < 0.0f) + { + c = L.xxx + (((c - L.xxx) * L) / (L - n).xxx); + } + if (x > 1.0f) + { + c = L.xxx + (((c - L.xxx) * (1.0f - L)) / (x - L).xxx); + } + return c; +} + +float3 set_lum(float3 c, float l) +{ + float3 param = c; + float3 param_1 = c + (l - lum(param)).xxx; + float3 _901 = clip_color(param_1); + return _901; +} + +float3 mix_blend(float3 cb, float3 cs, uint mode) +{ + float3 b = 0.0f.xxx; + switch (mode) + { + case 1u: + { + b = cb * cs; + break; + } + case 2u: + { + float3 param = cb; + float3 param_1 = cs; + b = screen(param, param_1); + break; + } + case 3u: + { + float3 param_2 = cs; + float3 param_3 = cb; + b = hard_light(param_2, param_3); + break; + } + case 4u: + { + b = min(cb, cs); + break; + } + case 5u: + { + b = max(cb, cs); + break; + } + case 6u: + { + float param_4 = cb.x; + float param_5 = cs.x; + float param_6 = cb.y; + float param_7 = cs.y; + float param_8 = cb.z; + float param_9 = cs.z; + b = float3(color_dodge(param_4, param_5), color_dodge(param_6, param_7), color_dodge(param_8, param_9)); + break; + } + case 7u: + { + float param_10 = cb.x; + float param_11 = cs.x; + float param_12 = cb.y; + float param_13 = cs.y; + float param_14 = cb.z; + float param_15 = cs.z; + b = float3(color_burn(param_10, param_11), color_burn(param_12, param_13), color_burn(param_14, param_15)); + break; + } + case 8u: + { + float3 param_16 = cb; + float3 param_17 = cs; + b = hard_light(param_16, param_17); + break; + } + case 9u: + { + float3 param_18 = cb; + float3 param_19 = cs; + b = soft_light(param_18, param_19); + break; + } + case 10u: + { + b = abs(cb - cs); + break; + } + case 11u: + { + b = (cb + cs) - ((cb * 2.0f) * cs); + break; + } + case 12u: + { + float3 param_20 = cb; + float3 param_21 = cs; + float param_22 = sat(param_20); + float3 _1192 = set_sat(param_21, param_22); + float3 param_23 = cb; + float3 param_24 = _1192; + float param_25 = lum(param_23); + b = set_lum(param_24, param_25); + break; + } + case 13u: + { + float3 param_26 = cs; + float3 param_27 = cb; + float param_28 = sat(param_26); + float3 _1206 = set_sat(param_27, param_28); + float3 param_29 = cb; + float3 param_30 = _1206; + float param_31 = lum(param_29); + b = set_lum(param_30, param_31); + break; + } + case 14u: + { + float3 param_32 = cb; + float3 param_33 = cs; + float param_34 = lum(param_32); + b = set_lum(param_33, param_34); + break; + } + case 15u: + { + float3 param_35 = cs; + float3 param_36 = cb; + float param_37 = lum(param_35); + b = set_lum(param_36, param_37); + break; + } + default: + { + b = cs; + break; + } + } + return b; +} + +float4 mix_compose(float3 cb, float3 cs, float ab, float as, uint mode) +{ + float fa = 0.0f; + float fb = 0.0f; + switch (mode) + { + case 1u: + { + fa = 1.0f; + fb = 0.0f; + break; + } + case 2u: + { + fa = 0.0f; + fb = 1.0f; + break; + } + case 3u: + { + fa = 1.0f; + fb = 1.0f - as; + break; + } + case 4u: + { + fa = 1.0f - ab; + fb = 1.0f; + break; + } + case 5u: + { + fa = ab; + fb = 0.0f; + break; + } + case 6u: + { + fa = 0.0f; + fb = as; + break; + } + case 7u: + { + fa = 1.0f - ab; + fb = 0.0f; + break; + } + case 8u: + { + fa = 0.0f; + fb = 1.0f - as; + break; + } + case 9u: + { + fa = ab; + fb = 1.0f - as; + break; + } + case 10u: + { + fa = 1.0f - ab; + fb = as; + break; + } + case 11u: + { + fa = 1.0f - ab; + fb = 1.0f - as; + break; + } + case 12u: + { + fa = 1.0f; + fb = 1.0f; + break; + } + case 13u: + { + return float4(max(0.0f.xxxx, ((1.0f.xxxx - (float4(cs, as) * as)) + 1.0f.xxxx) - (float4(cb, ab) * ab)).xyz, max(0.0f, ((1.0f - as) + 1.0f) - ab)); + } + case 14u: + { + return float4(min(1.0f.xxxx, (float4(cs, as) * as) + (float4(cb, ab) * ab)).xyz, min(1.0f, as + ab)); + } + default: + { + break; + } + } + return (float4(cs, as) * (as * fa)) + (float4(cb, ab) * (ab * fb)); +} + CmdJump CmdJump_read(Alloc a, CmdJumpRef ref) { uint ix = ref.offset >> uint(2); @@ -431,24 +873,24 @@ CmdJump CmdJump_read(Alloc a, CmdJumpRef ref) CmdJump Cmd_Jump_read(Alloc a, CmdRef ref) { - CmdJumpRef _499 = { ref.offset + 4u }; + CmdJumpRef _602 = { ref.offset + 4u }; Alloc param = a; - CmdJumpRef param_1 = _499; + CmdJumpRef param_1 = _602; return CmdJump_read(param, param_1); } void comp_main() { - uint tile_ix = (gl_WorkGroupID.y * _723.Load(8)) + gl_WorkGroupID.x; - Alloc _738; - _738.offset = _723.Load(24); + uint tile_ix = (gl_WorkGroupID.y * _1521.Load(8)) + gl_WorkGroupID.x; + Alloc _1536; + _1536.offset = _1521.Load(24); Alloc param; - param.offset = _738.offset; + param.offset = _1536.offset; uint param_1 = tile_ix * 1024u; uint param_2 = 1024u; Alloc cmd_alloc = slice_mem(param, param_1, param_2); - CmdRef _747 = { cmd_alloc.offset }; - CmdRef cmd_ref = _747; + CmdRef _1545 = { cmd_alloc.offset }; + CmdRef cmd_ref = _1545; uint2 xy_uint = uint2(gl_LocalInvocationID.x + (16u * gl_WorkGroupID.x), gl_LocalInvocationID.y + (16u * gl_WorkGroupID.y)); float2 xy = float2(xy_uint); float4 rgba[8]; @@ -457,7 +899,7 @@ void comp_main() rgba[i] = 0.0f.xxxx; } uint clip_depth = 0u; - bool mem_ok = _202.Load(4) == 0u; + bool mem_ok = _278.Load(4) == 0u; float df[8]; TileSegRef tile_seg_ref; float area[8]; @@ -482,8 +924,8 @@ void comp_main() { df[k] = 1000000000.0f; } - TileSegRef _842 = { stroke.tile_ref }; - tile_seg_ref = _842; + TileSegRef _1638 = { stroke.tile_ref }; + tile_seg_ref = _1638; do { uint param_7 = tile_seg_ref.offset; @@ -519,8 +961,8 @@ void comp_main() { area[k_3] = float(fill.backdrop); } - TileSegRef _964 = { fill.tile_ref }; - tile_seg_ref = _964; + TileSegRef _1758 = { fill.tile_ref }; + tile_seg_ref = _1758; do { uint param_15 = tile_seg_ref.offset; @@ -609,10 +1051,10 @@ void comp_main() int x = int(round(clamp(my_d, 0.0f, 1.0f) * 511.0f)); float4 fg_rgba = gradients[int2(x, int(lin.index))]; float3 param_29 = fg_rgba.xyz; - float3 _1298 = fromsRGB(param_29); - fg_rgba.x = _1298.x; - fg_rgba.y = _1298.y; - fg_rgba.z = _1298.z; + float3 _2092 = fromsRGB(param_29); + fg_rgba.x = _2092.x; + fg_rgba.y = _2092.y; + fg_rgba.z = _2092.z; rgba[k_9] = fg_rgba; } cmd_ref.offset += 20u; @@ -625,9 +1067,9 @@ void comp_main() CmdImage fill_img = Cmd_Image_read(param_30, param_31); uint2 param_32 = xy_uint; CmdImage param_33 = fill_img; - float4 _1327[8]; - fillImage(_1327, param_32, param_33); - float4 img[8] = _1327; + float4 _2121[8]; + fillImage(_2121, param_32, param_33); + float4 img[8] = _2121; for (uint k_10 = 0u; k_10 < 8u; k_10++) { float4 fg_k_1 = img[k_10] * area[k_10]; @@ -642,8 +1084,8 @@ void comp_main() { uint d_2 = min(clip_depth, 127u); float4 param_34 = float4(rgba[k_11]); - uint _1390 = packsRGB(param_34); - blend_stack[d_2][k_11] = _1390; + uint _2184 = packsRGB(param_34); + blend_stack[d_2][k_11] = _2184; rgba[k_11] = 0.0f.xxxx; } clip_depth++; @@ -652,24 +1094,44 @@ void comp_main() } case 9u: { + Alloc param_35 = cmd_alloc; + CmdRef param_36 = cmd_ref; + CmdEndClip end_clip = Cmd_EndClip_read(param_35, param_36); + uint blend_mode = end_clip.blend >> uint(8); + uint comp_mode = end_clip.blend & 255u; clip_depth--; for (uint k_12 = 0u; k_12 < 8u; k_12++) { uint d_3 = min(clip_depth, 127u); - uint param_35 = blend_stack[d_3][k_12]; - float4 bg = unpacksRGB(param_35); + uint param_37 = blend_stack[d_3][k_12]; + float4 bg = unpacksRGB(param_37); float4 fg_1 = rgba[k_12] * area[k_12]; - rgba[k_12] = (bg * (1.0f - fg_1.w)) + fg_1; + float3 param_38 = bg.xyz; + float3 param_39 = fg_1.xyz; + uint param_40 = blend_mode; + float3 blend = mix_blend(param_38, param_39, param_40); + float4 _2251 = fg_1; + float _2255 = fg_1.w; + float3 _2262 = lerp(_2251.xyz, blend, float((_2255 * bg.w) > 0.0f).xxx); + fg_1.x = _2262.x; + fg_1.y = _2262.y; + fg_1.z = _2262.z; + float3 param_41 = bg.xyz; + float3 param_42 = fg_1.xyz; + float param_43 = bg.w; + float param_44 = fg_1.w; + uint param_45 = comp_mode; + rgba[k_12] = mix_compose(param_41, param_42, param_43, param_44, param_45); } - cmd_ref.offset += 4u; + cmd_ref.offset += 8u; break; } case 10u: { - Alloc param_36 = cmd_alloc; - CmdRef param_37 = cmd_ref; - CmdRef _1453 = { Cmd_Jump_read(param_36, param_37).new_ref }; - cmd_ref = _1453; + Alloc param_46 = cmd_alloc; + CmdRef param_47 = cmd_ref; + CmdRef _2299 = { Cmd_Jump_read(param_46, param_47).new_ref }; + cmd_ref = _2299; cmd_alloc.offset = cmd_ref.offset; break; } @@ -677,8 +1139,8 @@ void comp_main() } for (uint i_1 = 0u; i_1 < 8u; i_1++) { - uint param_38 = i_1; - image[int2(xy_uint + chunk_offset(param_38))] = rgba[i_1].w.x; + uint param_48 = i_1; + image[int2(xy_uint + chunk_offset(param_48))] = rgba[i_1].w.x; } } diff --git a/piet-gpu/shader/gen/kernel4_gray.msl b/piet-gpu/shader/gen/kernel4_gray.msl index 15351a0..38506dd 100644 --- a/piet-gpu/shader/gen/kernel4_gray.msl +++ b/piet-gpu/shader/gen/kernel4_gray.msl @@ -115,6 +115,16 @@ struct CmdAlpha float alpha; }; +struct CmdEndClipRef +{ + uint offset; +}; + +struct CmdEndClip +{ + uint blend; +}; + struct CmdJumpRef { uint offset; @@ -208,7 +218,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset) } static inline __attribute__((always_inline)) -uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_202) +uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_278) { Alloc param = alloc; uint param_1 = offset; @@ -216,29 +226,29 @@ uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memor { return 0u; } - uint v = v_202.memory[offset]; + uint v = v_278.memory[offset]; return v; } static inline __attribute__((always_inline)) -CmdTag Cmd_tag(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_202) +CmdTag Cmd_tag(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) { Alloc param = a; uint param_1 = ref.offset >> uint(2); - uint tag_and_flags = read_mem(param, param_1, v_202); + uint tag_and_flags = read_mem(param, param_1, v_278); return CmdTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) }; } static inline __attribute__((always_inline)) -CmdStroke CmdStroke_read(thread const Alloc& a, thread const CmdStrokeRef& ref, device Memory& v_202) +CmdStroke CmdStroke_read(thread const Alloc& a, thread const CmdStrokeRef& ref, device Memory& v_278) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_202); + uint raw0 = read_mem(param, param_1, v_278); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_202); + uint raw1 = read_mem(param_2, param_3, v_278); CmdStroke s; s.tile_ref = raw0; s.half_width = as_type(raw1); @@ -246,11 +256,11 @@ CmdStroke CmdStroke_read(thread const Alloc& a, thread const CmdStrokeRef& ref, } static inline __attribute__((always_inline)) -CmdStroke Cmd_Stroke_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_202) +CmdStroke Cmd_Stroke_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) { Alloc param = a; CmdStrokeRef param_1 = CmdStrokeRef{ ref.offset + 4u }; - return CmdStroke_read(param, param_1, v_202); + return CmdStroke_read(param, param_1, v_278); } static inline __attribute__((always_inline)) @@ -262,27 +272,27 @@ Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const } static inline __attribute__((always_inline)) -TileSeg TileSeg_read(thread const Alloc& a, thread const TileSegRef& ref, device Memory& v_202) +TileSeg TileSeg_read(thread const Alloc& a, thread const TileSegRef& ref, device Memory& v_278) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_202); + uint raw0 = read_mem(param, param_1, v_278); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_202); + uint raw1 = read_mem(param_2, param_3, v_278); Alloc param_4 = a; uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_202); + uint raw2 = read_mem(param_4, param_5, v_278); Alloc param_6 = a; uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_202); + uint raw3 = read_mem(param_6, param_7, v_278); Alloc param_8 = a; uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9, v_202); + uint raw4 = read_mem(param_8, param_9, v_278); Alloc param_10 = a; uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11, v_202); + uint raw5 = read_mem(param_10, param_11, v_278); TileSeg s; s.origin = float2(as_type(raw0), as_type(raw1)); s.vector = float2(as_type(raw2), as_type(raw3)); @@ -298,15 +308,15 @@ uint2 chunk_offset(thread const uint& i) } static inline __attribute__((always_inline)) -CmdFill CmdFill_read(thread const Alloc& a, thread const CmdFillRef& ref, device Memory& v_202) +CmdFill CmdFill_read(thread const Alloc& a, thread const CmdFillRef& ref, device Memory& v_278) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_202); + uint raw0 = read_mem(param, param_1, v_278); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_202); + uint raw1 = read_mem(param_2, param_3, v_278); CmdFill s; s.tile_ref = raw0; s.backdrop = int(raw1); @@ -314,51 +324,51 @@ CmdFill CmdFill_read(thread const Alloc& a, thread const CmdFillRef& ref, device } static inline __attribute__((always_inline)) -CmdFill Cmd_Fill_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_202) +CmdFill Cmd_Fill_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) { Alloc param = a; CmdFillRef param_1 = CmdFillRef{ ref.offset + 4u }; - return CmdFill_read(param, param_1, v_202); + return CmdFill_read(param, param_1, v_278); } static inline __attribute__((always_inline)) -CmdAlpha CmdAlpha_read(thread const Alloc& a, thread const CmdAlphaRef& ref, device Memory& v_202) +CmdAlpha CmdAlpha_read(thread const Alloc& a, thread const CmdAlphaRef& ref, device Memory& v_278) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_202); + uint raw0 = read_mem(param, param_1, v_278); CmdAlpha s; s.alpha = as_type(raw0); return s; } static inline __attribute__((always_inline)) -CmdAlpha Cmd_Alpha_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_202) +CmdAlpha Cmd_Alpha_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) { Alloc param = a; CmdAlphaRef param_1 = CmdAlphaRef{ ref.offset + 4u }; - return CmdAlpha_read(param, param_1, v_202); + return CmdAlpha_read(param, param_1, v_278); } static inline __attribute__((always_inline)) -CmdColor CmdColor_read(thread const Alloc& a, thread const CmdColorRef& ref, device Memory& v_202) +CmdColor CmdColor_read(thread const Alloc& a, thread const CmdColorRef& ref, device Memory& v_278) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_202); + uint raw0 = read_mem(param, param_1, v_278); CmdColor s; s.rgba_color = raw0; return s; } static inline __attribute__((always_inline)) -CmdColor Cmd_Color_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_202) +CmdColor Cmd_Color_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) { Alloc param = a; CmdColorRef param_1 = CmdColorRef{ ref.offset + 4u }; - return CmdColor_read(param, param_1, v_202); + return CmdColor_read(param, param_1, v_278); } static inline __attribute__((always_inline)) @@ -379,21 +389,21 @@ float4 unpacksRGB(thread const uint& srgba) } static inline __attribute__((always_inline)) -CmdLinGrad CmdLinGrad_read(thread const Alloc& a, thread const CmdLinGradRef& ref, device Memory& v_202) +CmdLinGrad CmdLinGrad_read(thread const Alloc& a, thread const CmdLinGradRef& ref, device Memory& v_278) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_202); + uint raw0 = read_mem(param, param_1, v_278); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_202); + uint raw1 = read_mem(param_2, param_3, v_278); Alloc param_4 = a; uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_202); + uint raw2 = read_mem(param_4, param_5, v_278); Alloc param_6 = a; uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_202); + uint raw3 = read_mem(param_6, param_7, v_278); CmdLinGrad s; s.index = raw0; s.line_x = as_type(raw1); @@ -403,23 +413,23 @@ CmdLinGrad CmdLinGrad_read(thread const Alloc& a, thread const CmdLinGradRef& re } static inline __attribute__((always_inline)) -CmdLinGrad Cmd_LinGrad_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_202) +CmdLinGrad Cmd_LinGrad_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) { Alloc param = a; CmdLinGradRef param_1 = CmdLinGradRef{ ref.offset + 4u }; - return CmdLinGrad_read(param, param_1, v_202); + return CmdLinGrad_read(param, param_1, v_278); } static inline __attribute__((always_inline)) -CmdImage CmdImage_read(thread const Alloc& a, thread const CmdImageRef& ref, device Memory& v_202) +CmdImage CmdImage_read(thread const Alloc& a, thread const CmdImageRef& ref, device Memory& v_278) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_202); + uint raw0 = read_mem(param, param_1, v_278); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_202); + uint raw1 = read_mem(param_2, param_3, v_278); CmdImage s; s.index = raw0; s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); @@ -427,11 +437,11 @@ CmdImage CmdImage_read(thread const Alloc& a, thread const CmdImageRef& ref, dev } static inline __attribute__((always_inline)) -CmdImage Cmd_Image_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_202) +CmdImage Cmd_Image_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) { Alloc param = a; CmdImageRef param_1 = CmdImageRef{ ref.offset + 4u }; - return CmdImage_read(param, param_1, v_202); + return CmdImage_read(param, param_1, v_278); } static inline __attribute__((always_inline)) @@ -444,10 +454,10 @@ spvUnsafeArray fillImage(thread const uint2& xy, thread const CmdImag int2 uv = int2(xy + chunk_offset(param)) + cmd_img.offset; float4 fg_rgba = image_atlas.read(uint2(uv)); float3 param_1 = fg_rgba.xyz; - float3 _695 = fromsRGB(param_1); - fg_rgba.x = _695.x; - fg_rgba.y = _695.y; - fg_rgba.z = _695.z; + float3 _1493 = fromsRGB(param_1); + fg_rgba.x = _1493.x; + fg_rgba.y = _1493.y; + fg_rgba.z = _1493.z; rgba[i] = fg_rgba; } return rgba; @@ -471,30 +481,476 @@ uint packsRGB(thread float4& rgba) } static inline __attribute__((always_inline)) -CmdJump CmdJump_read(thread const Alloc& a, thread const CmdJumpRef& ref, device Memory& v_202) +CmdEndClip CmdEndClip_read(thread const Alloc& a, thread const CmdEndClipRef& ref, device Memory& v_278) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_202); + uint raw0 = read_mem(param, param_1, v_278); + CmdEndClip s; + s.blend = raw0; + return s; +} + +static inline __attribute__((always_inline)) +CmdEndClip Cmd_EndClip_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) +{ + Alloc param = a; + CmdEndClipRef param_1 = CmdEndClipRef{ ref.offset + 4u }; + return CmdEndClip_read(param, param_1, v_278); +} + +static inline __attribute__((always_inline)) +float3 screen(thread const float3& cb, thread const float3& cs) +{ + return (cb + cs) - (cb * cs); +} + +static inline __attribute__((always_inline)) +float3 hard_light(thread const float3& cb, thread const float3& cs) +{ + float3 param = cb; + float3 param_1 = (cs * 2.0) - float3(1.0); + return mix(screen(param, param_1), (cb * 2.0) * cs, float3(cs <= float3(0.5))); +} + +static inline __attribute__((always_inline)) +float color_dodge(thread const float& cb, thread const float& cs) +{ + if (cb == 0.0) + { + return 0.0; + } + else + { + if (cs == 1.0) + { + return 1.0; + } + else + { + return fast::min(1.0, cb / (1.0 - cs)); + } + } +} + +static inline __attribute__((always_inline)) +float color_burn(thread const float& cb, thread const float& cs) +{ + if (cb == 1.0) + { + return 1.0; + } + else + { + if (cs == 0.0) + { + return 0.0; + } + else + { + return 1.0 - fast::min(1.0, (1.0 - cb) / cs); + } + } +} + +static inline __attribute__((always_inline)) +float3 soft_light(thread const float3& cb, thread const float3& cs) +{ + float3 d = mix(sqrt(cb), ((((cb * 16.0) - float3(12.0)) * cb) + float3(4.0)) * cb, float3(cb <= float3(0.25))); + return mix(cb + (((cs * 2.0) - float3(1.0)) * (d - cb)), cb - (((float3(1.0) - (cs * 2.0)) * cb) * (float3(1.0) - cb)), float3(cs <= float3(0.5))); +} + +static inline __attribute__((always_inline)) +float sat(thread const float3& c) +{ + return fast::max(c.x, fast::max(c.y, c.z)) - fast::min(c.x, fast::min(c.y, c.z)); +} + +static inline __attribute__((always_inline)) +void set_sat_inner(thread float& cmin, thread float& cmid, thread float& cmax, thread const float& s) +{ + if (cmax > cmin) + { + cmid = ((cmid - cmin) * s) / (cmax - cmin); + cmax = s; + } + else + { + cmid = 0.0; + cmax = 0.0; + } + cmin = 0.0; +} + +static inline __attribute__((always_inline)) +float3 set_sat(thread float3& c, thread const float& s) +{ + if (c.x <= c.y) + { + if (c.y <= c.z) + { + float param = c.x; + float param_1 = c.y; + float param_2 = c.z; + float param_3 = s; + set_sat_inner(param, param_1, param_2, param_3); + c.x = param; + c.y = param_1; + c.z = param_2; + } + else + { + if (c.x <= c.z) + { + float param_4 = c.x; + float param_5 = c.z; + float param_6 = c.y; + float param_7 = s; + set_sat_inner(param_4, param_5, param_6, param_7); + c.x = param_4; + c.z = param_5; + c.y = param_6; + } + else + { + float param_8 = c.z; + float param_9 = c.x; + float param_10 = c.y; + float param_11 = s; + set_sat_inner(param_8, param_9, param_10, param_11); + c.z = param_8; + c.x = param_9; + c.y = param_10; + } + } + } + else + { + if (c.x <= c.z) + { + float param_12 = c.y; + float param_13 = c.x; + float param_14 = c.z; + float param_15 = s; + set_sat_inner(param_12, param_13, param_14, param_15); + c.y = param_12; + c.x = param_13; + c.z = param_14; + } + else + { + if (c.y <= c.z) + { + float param_16 = c.y; + float param_17 = c.z; + float param_18 = c.x; + float param_19 = s; + set_sat_inner(param_16, param_17, param_18, param_19); + c.y = param_16; + c.z = param_17; + c.x = param_18; + } + else + { + float param_20 = c.z; + float param_21 = c.y; + float param_22 = c.x; + float param_23 = s; + set_sat_inner(param_20, param_21, param_22, param_23); + c.z = param_20; + c.y = param_21; + c.x = param_22; + } + } + } + return c; +} + +static inline __attribute__((always_inline)) +float lum(thread const float3& c) +{ + float3 f = float3(0.300000011920928955078125, 0.589999973773956298828125, 0.10999999940395355224609375); + return dot(c, f); +} + +static inline __attribute__((always_inline)) +float3 clip_color(thread float3& c) +{ + float3 param = c; + float L = lum(param); + float n = fast::min(c.x, fast::min(c.y, c.z)); + float x = fast::max(c.x, fast::max(c.y, c.z)); + if (n < 0.0) + { + c = float3(L) + (((c - float3(L)) * L) / float3(L - n)); + } + if (x > 1.0) + { + c = float3(L) + (((c - float3(L)) * (1.0 - L)) / float3(x - L)); + } + return c; +} + +static inline __attribute__((always_inline)) +float3 set_lum(thread const float3& c, thread const float& l) +{ + float3 param = c; + float3 param_1 = c + float3(l - lum(param)); + float3 _901 = clip_color(param_1); + return _901; +} + +static inline __attribute__((always_inline)) +float3 mix_blend(thread const float3& cb, thread const float3& cs, thread const uint& mode) +{ + float3 b = float3(0.0); + switch (mode) + { + case 1u: + { + b = cb * cs; + break; + } + case 2u: + { + float3 param = cb; + float3 param_1 = cs; + b = screen(param, param_1); + break; + } + case 3u: + { + float3 param_2 = cs; + float3 param_3 = cb; + b = hard_light(param_2, param_3); + break; + } + case 4u: + { + b = fast::min(cb, cs); + break; + } + case 5u: + { + b = fast::max(cb, cs); + break; + } + case 6u: + { + float param_4 = cb.x; + float param_5 = cs.x; + float param_6 = cb.y; + float param_7 = cs.y; + float param_8 = cb.z; + float param_9 = cs.z; + b = float3(color_dodge(param_4, param_5), color_dodge(param_6, param_7), color_dodge(param_8, param_9)); + break; + } + case 7u: + { + float param_10 = cb.x; + float param_11 = cs.x; + float param_12 = cb.y; + float param_13 = cs.y; + float param_14 = cb.z; + float param_15 = cs.z; + b = float3(color_burn(param_10, param_11), color_burn(param_12, param_13), color_burn(param_14, param_15)); + break; + } + case 8u: + { + float3 param_16 = cb; + float3 param_17 = cs; + b = hard_light(param_16, param_17); + break; + } + case 9u: + { + float3 param_18 = cb; + float3 param_19 = cs; + b = soft_light(param_18, param_19); + break; + } + case 10u: + { + b = abs(cb - cs); + break; + } + case 11u: + { + b = (cb + cs) - ((cb * 2.0) * cs); + break; + } + case 12u: + { + float3 param_20 = cb; + float3 param_21 = cs; + float param_22 = sat(param_20); + float3 _1192 = set_sat(param_21, param_22); + float3 param_23 = cb; + float3 param_24 = _1192; + float param_25 = lum(param_23); + b = set_lum(param_24, param_25); + break; + } + case 13u: + { + float3 param_26 = cs; + float3 param_27 = cb; + float param_28 = sat(param_26); + float3 _1206 = set_sat(param_27, param_28); + float3 param_29 = cb; + float3 param_30 = _1206; + float param_31 = lum(param_29); + b = set_lum(param_30, param_31); + break; + } + case 14u: + { + float3 param_32 = cb; + float3 param_33 = cs; + float param_34 = lum(param_32); + b = set_lum(param_33, param_34); + break; + } + case 15u: + { + float3 param_35 = cs; + float3 param_36 = cb; + float param_37 = lum(param_35); + b = set_lum(param_36, param_37); + break; + } + default: + { + b = cs; + break; + } + } + return b; +} + +static inline __attribute__((always_inline)) +float4 mix_compose(thread const float3& cb, thread const float3& cs, thread const float& ab, thread const float& as, thread const uint& mode) +{ + float fa = 0.0; + float fb = 0.0; + switch (mode) + { + case 1u: + { + fa = 1.0; + fb = 0.0; + break; + } + case 2u: + { + fa = 0.0; + fb = 1.0; + break; + } + case 3u: + { + fa = 1.0; + fb = 1.0 - as; + break; + } + case 4u: + { + fa = 1.0 - ab; + fb = 1.0; + break; + } + case 5u: + { + fa = ab; + fb = 0.0; + break; + } + case 6u: + { + fa = 0.0; + fb = as; + break; + } + case 7u: + { + fa = 1.0 - ab; + fb = 0.0; + break; + } + case 8u: + { + fa = 0.0; + fb = 1.0 - as; + break; + } + case 9u: + { + fa = ab; + fb = 1.0 - as; + break; + } + case 10u: + { + fa = 1.0 - ab; + fb = as; + break; + } + case 11u: + { + fa = 1.0 - ab; + fb = 1.0 - as; + break; + } + case 12u: + { + fa = 1.0; + fb = 1.0; + break; + } + case 13u: + { + return float4(fast::max(float4(0.0), ((float4(1.0) - (float4(cs, as) * as)) + float4(1.0)) - (float4(cb, ab) * ab)).xyz, fast::max(0.0, ((1.0 - as) + 1.0) - ab)); + } + case 14u: + { + return float4(fast::min(float4(1.0), (float4(cs, as) * as) + (float4(cb, ab) * ab)).xyz, fast::min(1.0, as + ab)); + } + default: + { + break; + } + } + return (float4(cs, as) * (as * fa)) + (float4(cb, ab) * (ab * fb)); +} + +static inline __attribute__((always_inline)) +CmdJump CmdJump_read(thread const Alloc& a, thread const CmdJumpRef& ref, device Memory& v_278) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint raw0 = read_mem(param, param_1, v_278); CmdJump s; s.new_ref = raw0; return s; } static inline __attribute__((always_inline)) -CmdJump Cmd_Jump_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_202) +CmdJump Cmd_Jump_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) { Alloc param = a; CmdJumpRef param_1 = CmdJumpRef{ ref.offset + 4u }; - return CmdJump_read(param, param_1, v_202); + return CmdJump_read(param, param_1, v_278); } -kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _723 [[buffer(1)]], texture2d image [[texture(2)]], texture2d image_atlas [[texture(3)]], texture2d gradients [[texture(4)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) +kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1521 [[buffer(1)]], texture2d image [[texture(2)]], texture2d image_atlas [[texture(3)]], texture2d gradients [[texture(4)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) { - uint tile_ix = (gl_WorkGroupID.y * _723.conf.width_in_tiles) + gl_WorkGroupID.x; + uint tile_ix = (gl_WorkGroupID.y * _1521.conf.width_in_tiles) + gl_WorkGroupID.x; Alloc param; - param.offset = _723.conf.ptcl_alloc.offset; + param.offset = _1521.conf.ptcl_alloc.offset; uint param_1 = tile_ix * 1024u; uint param_2 = 1024u; Alloc cmd_alloc = slice_mem(param, param_1, param_2); @@ -507,7 +963,7 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7 rgba[i] = float4(0.0); } uint clip_depth = 0u; - bool mem_ok = v_202.mem_error == 0u; + bool mem_ok = v_278.mem_error == 0u; spvUnsafeArray df; TileSegRef tile_seg_ref; spvUnsafeArray area; @@ -516,7 +972,7 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7 { Alloc param_3 = cmd_alloc; CmdRef param_4 = cmd_ref; - uint tag = Cmd_tag(param_3, param_4, v_202).tag; + uint tag = Cmd_tag(param_3, param_4, v_278).tag; if (tag == 0u) { break; @@ -527,7 +983,7 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7 { Alloc param_5 = cmd_alloc; CmdRef param_6 = cmd_ref; - CmdStroke stroke = Cmd_Stroke_read(param_5, param_6, v_202); + CmdStroke stroke = Cmd_Stroke_read(param_5, param_6, v_278); for (uint k = 0u; k < 8u; k++) { df[k] = 1000000000.0; @@ -540,7 +996,7 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7 bool param_9 = mem_ok; Alloc param_10 = new_alloc(param_7, param_8, param_9); TileSegRef param_11 = tile_seg_ref; - TileSeg seg = TileSeg_read(param_10, param_11, v_202); + TileSeg seg = TileSeg_read(param_10, param_11, v_278); float2 line_vec = seg.vector; for (uint k_1 = 0u; k_1 < 8u; k_1++) { @@ -563,7 +1019,7 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7 { Alloc param_13 = cmd_alloc; CmdRef param_14 = cmd_ref; - CmdFill fill = Cmd_Fill_read(param_13, param_14, v_202); + CmdFill fill = Cmd_Fill_read(param_13, param_14, v_278); for (uint k_3 = 0u; k_3 < 8u; k_3++) { area[k_3] = float(fill.backdrop); @@ -576,7 +1032,7 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7 bool param_17 = mem_ok; Alloc param_18 = new_alloc(param_15, param_16, param_17); TileSegRef param_19 = tile_seg_ref; - TileSeg seg_1 = TileSeg_read(param_18, param_19, v_202); + TileSeg seg_1 = TileSeg_read(param_18, param_19, v_278); for (uint k_4 = 0u; k_4 < 8u; k_4++) { uint param_20 = k_4; @@ -620,7 +1076,7 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7 { Alloc param_21 = cmd_alloc; CmdRef param_22 = cmd_ref; - CmdAlpha alpha = Cmd_Alpha_read(param_21, param_22, v_202); + CmdAlpha alpha = Cmd_Alpha_read(param_21, param_22, v_278); for (uint k_7 = 0u; k_7 < 8u; k_7++) { area[k_7] = alpha.alpha; @@ -632,7 +1088,7 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7 { Alloc param_23 = cmd_alloc; CmdRef param_24 = cmd_ref; - CmdColor color = Cmd_Color_read(param_23, param_24, v_202); + CmdColor color = Cmd_Color_read(param_23, param_24, v_278); uint param_25 = color.rgba_color; float4 fg = unpacksRGB(param_25); for (uint k_8 = 0u; k_8 < 8u; k_8++) @@ -647,7 +1103,7 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7 { Alloc param_26 = cmd_alloc; CmdRef param_27 = cmd_ref; - CmdLinGrad lin = Cmd_LinGrad_read(param_26, param_27, v_202); + CmdLinGrad lin = Cmd_LinGrad_read(param_26, param_27, v_278); float d_1 = ((lin.line_x * xy.x) + (lin.line_y * xy.y)) + lin.line_c; for (uint k_9 = 0u; k_9 < 8u; k_9++) { @@ -657,10 +1113,10 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7 int x = int(round(fast::clamp(my_d, 0.0, 1.0) * 511.0)); float4 fg_rgba = gradients.read(uint2(int2(x, int(lin.index)))); float3 param_29 = fg_rgba.xyz; - float3 _1298 = fromsRGB(param_29); - fg_rgba.x = _1298.x; - fg_rgba.y = _1298.y; - fg_rgba.z = _1298.z; + float3 _2092 = fromsRGB(param_29); + fg_rgba.x = _2092.x; + fg_rgba.y = _2092.y; + fg_rgba.z = _2092.z; rgba[k_9] = fg_rgba; } cmd_ref.offset += 20u; @@ -670,7 +1126,7 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7 { Alloc param_30 = cmd_alloc; CmdRef param_31 = cmd_ref; - CmdImage fill_img = Cmd_Image_read(param_30, param_31, v_202); + CmdImage fill_img = Cmd_Image_read(param_30, param_31, v_278); uint2 param_32 = xy_uint; CmdImage param_33 = fill_img; spvUnsafeArray img; @@ -689,8 +1145,8 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7 { uint d_2 = min(clip_depth, 127u); float4 param_34 = float4(rgba[k_11]); - uint _1390 = packsRGB(param_34); - blend_stack[d_2][k_11] = _1390; + uint _2184 = packsRGB(param_34); + blend_stack[d_2][k_11] = _2184; rgba[k_11] = float4(0.0); } clip_depth++; @@ -699,23 +1155,43 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7 } case 9u: { + Alloc param_35 = cmd_alloc; + CmdRef param_36 = cmd_ref; + CmdEndClip end_clip = Cmd_EndClip_read(param_35, param_36, v_278); + uint blend_mode = end_clip.blend >> uint(8); + uint comp_mode = end_clip.blend & 255u; clip_depth--; for (uint k_12 = 0u; k_12 < 8u; k_12++) { uint d_3 = min(clip_depth, 127u); - uint param_35 = blend_stack[d_3][k_12]; - float4 bg = unpacksRGB(param_35); + uint param_37 = blend_stack[d_3][k_12]; + float4 bg = unpacksRGB(param_37); float4 fg_1 = rgba[k_12] * area[k_12]; - rgba[k_12] = (bg * (1.0 - fg_1.w)) + fg_1; + float3 param_38 = bg.xyz; + float3 param_39 = fg_1.xyz; + uint param_40 = blend_mode; + float3 blend = mix_blend(param_38, param_39, param_40); + float4 _2251 = fg_1; + float _2255 = fg_1.w; + float3 _2262 = mix(_2251.xyz, blend, float3(float((_2255 * bg.w) > 0.0))); + fg_1.x = _2262.x; + fg_1.y = _2262.y; + fg_1.z = _2262.z; + float3 param_41 = bg.xyz; + float3 param_42 = fg_1.xyz; + float param_43 = bg.w; + float param_44 = fg_1.w; + uint param_45 = comp_mode; + rgba[k_12] = mix_compose(param_41, param_42, param_43, param_44, param_45); } - cmd_ref.offset += 4u; + cmd_ref.offset += 8u; break; } case 10u: { - Alloc param_36 = cmd_alloc; - CmdRef param_37 = cmd_ref; - cmd_ref = CmdRef{ Cmd_Jump_read(param_36, param_37, v_202).new_ref }; + Alloc param_46 = cmd_alloc; + CmdRef param_47 = cmd_ref; + cmd_ref = CmdRef{ Cmd_Jump_read(param_46, param_47, v_278).new_ref }; cmd_alloc.offset = cmd_ref.offset; break; } @@ -723,8 +1199,8 @@ kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _7 } for (uint i_1 = 0u; i_1 < 8u; i_1++) { - uint param_38 = i_1; - image.write(float4(rgba[i_1].w), uint2(int2(xy_uint + chunk_offset(param_38)))); + uint param_48 = i_1; + image.write(float4(rgba[i_1].w), uint2(int2(xy_uint + chunk_offset(param_48)))); } } diff --git a/piet-gpu/shader/gen/kernel4_gray.spv b/piet-gpu/shader/gen/kernel4_gray.spv index 42964c8..305facd 100644 Binary files a/piet-gpu/shader/gen/kernel4_gray.spv and b/piet-gpu/shader/gen/kernel4_gray.spv differ diff --git a/piet-gpu/shader/gen/tile_alloc.dxil b/piet-gpu/shader/gen/tile_alloc.dxil index d69db16..fdc60a1 100644 Binary files a/piet-gpu/shader/gen/tile_alloc.dxil and b/piet-gpu/shader/gen/tile_alloc.dxil differ diff --git a/piet-gpu/shader/gen/tile_alloc.hlsl b/piet-gpu/shader/gen/tile_alloc.hlsl index 97e1c23..1c9d04b 100644 --- a/piet-gpu/shader/gen/tile_alloc.hlsl +++ b/piet-gpu/shader/gen/tile_alloc.hlsl @@ -17,6 +17,7 @@ struct AnnoEndClipRef struct AnnoEndClip { float4 bbox; + uint blend; }; struct AnnotatedRef @@ -76,7 +77,7 @@ struct Config static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); RWByteAddressBuffer _92 : register(u0, space0); -ByteAddressBuffer _305 : register(t1, space0); +ByteAddressBuffer _314 : register(t1, space0); static uint3 gl_LocalInvocationID; static uint3 gl_GlobalInvocationID; @@ -111,8 +112,8 @@ AnnotatedTag Annotated_tag(Alloc a, AnnotatedRef ref) Alloc param = a; uint param_1 = ref.offset >> uint(2); uint tag_and_flags = read_mem(param, param_1); - AnnotatedTag _236 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; - return _236; + AnnotatedTag _246 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; + return _246; } AnnoEndClip AnnoEndClip_read(Alloc a, AnnoEndClipRef ref) @@ -130,16 +131,20 @@ AnnoEndClip AnnoEndClip_read(Alloc a, AnnoEndClipRef ref) Alloc param_6 = a; uint param_7 = ix + 3u; uint raw3 = read_mem(param_6, param_7); + Alloc param_8 = a; + uint param_9 = ix + 4u; + uint raw4 = read_mem(param_8, param_9); AnnoEndClip s; s.bbox = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3)); + s.blend = raw4; return s; } AnnoEndClip Annotated_EndClip_read(Alloc a, AnnotatedRef ref) { - AnnoEndClipRef _243 = { ref.offset + 4u }; + AnnoEndClipRef _252 = { ref.offset + 4u }; Alloc param = a; - AnnoEndClipRef param_1 = _243; + AnnoEndClipRef param_1 = _252; return AnnoEndClip_read(param, param_1); } @@ -211,17 +216,17 @@ void comp_main() { uint th_ix = gl_LocalInvocationID.x; uint element_ix = gl_GlobalInvocationID.x; - PathRef _312 = { _305.Load(16) + (element_ix * 12u) }; - PathRef path_ref = _312; - AnnotatedRef _321 = { _305.Load(32) + (element_ix * 40u) }; - AnnotatedRef ref = _321; + PathRef _321 = { _314.Load(16) + (element_ix * 12u) }; + PathRef path_ref = _321; + AnnotatedRef _330 = { _314.Load(32) + (element_ix * 40u) }; + AnnotatedRef ref = _330; uint tag = 0u; - if (element_ix < _305.Load(0)) + if (element_ix < _314.Load(0)) { - Alloc _332; - _332.offset = _305.Load(32); + Alloc _341; + _341.offset = _314.Load(32); Alloc param; - param.offset = _332.offset; + param.offset = _341.offset; AnnotatedRef param_1 = ref; tag = Annotated_tag(param, param_1).tag; } @@ -237,10 +242,10 @@ void comp_main() case 4u: case 5u: { - Alloc _350; - _350.offset = _305.Load(32); + Alloc _359; + _359.offset = _314.Load(32); Alloc param_2; - param_2.offset = _350.offset; + param_2.offset = _359.offset; AnnotatedRef param_3 = ref; AnnoEndClip clip = Annotated_EndClip_read(param_2, param_3); x0 = int(floor(clip.bbox.x * 0.0625f)); @@ -250,10 +255,10 @@ void comp_main() break; } } - x0 = clamp(x0, 0, int(_305.Load(8))); - y0 = clamp(y0, 0, int(_305.Load(12))); - x1 = clamp(x1, 0, int(_305.Load(8))); - y1 = clamp(y1, 0, int(_305.Load(12))); + x0 = clamp(x0, 0, int(_314.Load(8))); + y0 = clamp(y0, 0, int(_314.Load(12))); + x1 = clamp(x1, 0, int(_314.Load(8))); + y1 = clamp(y1, 0, int(_314.Load(12))); Path path; path.bbox = uint4(uint(x0), uint(y0), uint(x1), uint(y1)); uint tile_count = uint((x1 - x0) * (y1 - y0)); @@ -276,46 +281,46 @@ void comp_main() if (th_ix == 255u) { uint param_4 = total_tile_count * 8u; - MallocResult _476 = malloc(param_4); - sh_tile_alloc = _476; + MallocResult _485 = malloc(param_4); + sh_tile_alloc = _485; } GroupMemoryBarrierWithGroupSync(); MallocResult alloc_start = sh_tile_alloc; - bool _487; + bool _496; if (!alloc_start.failed) { - _487 = _92.Load(4) != 0u; + _496 = _92.Load(4) != 0u; } else { - _487 = alloc_start.failed; + _496 = alloc_start.failed; } - if (_487) + if (_496) { return; } - if (element_ix < _305.Load(0)) + if (element_ix < _314.Load(0)) { - uint _500; + uint _509; if (th_ix > 0u) { - _500 = sh_tile_count[th_ix - 1u]; + _509 = sh_tile_count[th_ix - 1u]; } else { - _500 = 0u; + _509 = 0u; } - uint tile_subix = _500; + uint tile_subix = _509; Alloc param_5 = alloc_start.alloc; uint param_6 = 8u * tile_subix; uint param_7 = 8u * tile_count; Alloc tiles_alloc = slice_mem(param_5, param_6, param_7); - TileRef _522 = { tiles_alloc.offset }; - path.tiles = _522; - Alloc _527; - _527.offset = _305.Load(16); + TileRef _531 = { tiles_alloc.offset }; + path.tiles = _531; + Alloc _536; + _536.offset = _314.Load(16); Alloc param_8; - param_8.offset = _527.offset; + param_8.offset = _536.offset; PathRef param_9 = path_ref; Path param_10 = path; Path_write(param_8, param_9, param_10); diff --git a/piet-gpu/shader/gen/tile_alloc.msl b/piet-gpu/shader/gen/tile_alloc.msl index bb10cf0..c03e830 100644 --- a/piet-gpu/shader/gen/tile_alloc.msl +++ b/piet-gpu/shader/gen/tile_alloc.msl @@ -26,6 +26,7 @@ struct AnnoEndClipRef struct AnnoEndClip { float4 bbox; + uint blend; }; struct AnnotatedRef @@ -145,8 +146,12 @@ AnnoEndClip AnnoEndClip_read(thread const Alloc& a, thread const AnnoEndClipRef& Alloc param_6 = a; uint param_7 = ix + 3u; uint raw3 = read_mem(param_6, param_7, v_92, v_92BufferSize); + Alloc param_8 = a; + uint param_9 = ix + 4u; + uint raw4 = read_mem(param_8, param_9, v_92, v_92BufferSize); AnnoEndClip s; s.bbox = float4(as_type(raw0), as_type(raw1), as_type(raw2), as_type(raw3)); + s.blend = raw4; return s; } @@ -221,20 +226,20 @@ void Path_write(thread const Alloc& a, thread const PathRef& ref, thread const P write_mem(param_6, param_7, param_8, v_92, v_92BufferSize); } -kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_92 [[buffer(0)]], const device ConfigBuf& _305 [[buffer(1)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_92 [[buffer(0)]], const device ConfigBuf& _314 [[buffer(1)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { threadgroup uint sh_tile_count[256]; threadgroup MallocResult sh_tile_alloc; constant uint& v_92BufferSize = spvBufferSizeConstants[0]; uint th_ix = gl_LocalInvocationID.x; uint element_ix = gl_GlobalInvocationID.x; - PathRef path_ref = PathRef{ _305.conf.tile_alloc.offset + (element_ix * 12u) }; - AnnotatedRef ref = AnnotatedRef{ _305.conf.anno_alloc.offset + (element_ix * 40u) }; + PathRef path_ref = PathRef{ _314.conf.tile_alloc.offset + (element_ix * 12u) }; + AnnotatedRef ref = AnnotatedRef{ _314.conf.anno_alloc.offset + (element_ix * 40u) }; uint tag = 0u; - if (element_ix < _305.conf.n_elements) + if (element_ix < _314.conf.n_elements) { Alloc param; - param.offset = _305.conf.anno_alloc.offset; + param.offset = _314.conf.anno_alloc.offset; AnnotatedRef param_1 = ref; tag = Annotated_tag(param, param_1, v_92, v_92BufferSize).tag; } @@ -251,7 +256,7 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M case 5u: { Alloc param_2; - param_2.offset = _305.conf.anno_alloc.offset; + param_2.offset = _314.conf.anno_alloc.offset; AnnotatedRef param_3 = ref; AnnoEndClip clip = Annotated_EndClip_read(param_2, param_3, v_92, v_92BufferSize); x0 = int(floor(clip.bbox.x * 0.0625)); @@ -261,10 +266,10 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M break; } } - x0 = clamp(x0, 0, int(_305.conf.width_in_tiles)); - y0 = clamp(y0, 0, int(_305.conf.height_in_tiles)); - x1 = clamp(x1, 0, int(_305.conf.width_in_tiles)); - y1 = clamp(y1, 0, int(_305.conf.height_in_tiles)); + x0 = clamp(x0, 0, int(_314.conf.width_in_tiles)); + y0 = clamp(y0, 0, int(_314.conf.height_in_tiles)); + x1 = clamp(x1, 0, int(_314.conf.width_in_tiles)); + y1 = clamp(y1, 0, int(_314.conf.height_in_tiles)); Path path; path.bbox = uint4(uint(x0), uint(y0), uint(x1), uint(y1)); uint tile_count = uint((x1 - x0) * (y1 - y0)); @@ -287,43 +292,43 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M if (th_ix == 255u) { uint param_4 = total_tile_count * 8u; - MallocResult _476 = malloc(param_4, v_92, v_92BufferSize); - sh_tile_alloc = _476; + MallocResult _485 = malloc(param_4, v_92, v_92BufferSize); + sh_tile_alloc = _485; } threadgroup_barrier(mem_flags::mem_threadgroup); MallocResult alloc_start = sh_tile_alloc; - bool _487; + bool _496; if (!alloc_start.failed) { - _487 = v_92.mem_error != 0u; + _496 = v_92.mem_error != 0u; } else { - _487 = alloc_start.failed; + _496 = alloc_start.failed; } - if (_487) + if (_496) { return; } - if (element_ix < _305.conf.n_elements) + if (element_ix < _314.conf.n_elements) { - uint _500; + uint _509; if (th_ix > 0u) { - _500 = sh_tile_count[th_ix - 1u]; + _509 = sh_tile_count[th_ix - 1u]; } else { - _500 = 0u; + _509 = 0u; } - uint tile_subix = _500; + uint tile_subix = _509; Alloc param_5 = alloc_start.alloc; uint param_6 = 8u * tile_subix; uint param_7 = 8u * tile_count; Alloc tiles_alloc = slice_mem(param_5, param_6, param_7); path.tiles = TileRef{ tiles_alloc.offset }; Alloc param_8; - param_8.offset = _305.conf.tile_alloc.offset; + param_8.offset = _314.conf.tile_alloc.offset; PathRef param_9 = path_ref; Path param_10 = path; Path_write(param_8, param_9, param_10, v_92, v_92BufferSize); diff --git a/piet-gpu/shader/gen/tile_alloc.spv b/piet-gpu/shader/gen/tile_alloc.spv index 12277f1..cf2f01c 100644 Binary files a/piet-gpu/shader/gen/tile_alloc.spv and b/piet-gpu/shader/gen/tile_alloc.spv differ diff --git a/piet-gpu/shader/kernel4.comp b/piet-gpu/shader/kernel4.comp index dd4a855..a97715a 100644 --- a/piet-gpu/shader/kernel4.comp +++ b/piet-gpu/shader/kernel4.comp @@ -35,6 +35,7 @@ layout(rgba8, set = 0, binding = 4) uniform restrict readonly image2D gradients; #include "ptcl.h" #include "tile.h" +#include "blend.h" #define MAX_BLEND_STACK 128 mediump vec3 tosRGB(mediump vec3 rgb) { @@ -216,14 +217,20 @@ void main() { cmd_ref.offset += 4; break; case Cmd_EndClip: + CmdEndClip end_clip = Cmd_EndClip_read(cmd_alloc, cmd_ref); + uint blend_mode = uint(end_clip.blend >> 8); + uint comp_mode = uint(end_clip.blend & 0xFF); clip_depth--; for (uint k = 0; k < CHUNK; k++) { uint d = min(clip_depth, MAX_BLEND_STACK - 1); mediump vec4 bg = unpacksRGB(blend_stack[d][k]); mediump vec4 fg = rgba[k] * area[k]; - rgba[k] = bg * (1.0 - fg.a) + fg; + vec3 blend = mix_blend(bg.rgb, fg.rgb, blend_mode); + // Apply the blend color only where the foreground and background overlap. + fg.rgb = mix(fg.rgb, blend, float((fg.a * bg.a) > 0.0)); + rgba[k] = mix_compose(bg.rgb, fg.rgb, bg.a, fg.a, comp_mode); } - cmd_ref.offset += 4; + cmd_ref.offset += 4 + CmdEndClip_size; break; case Cmd_Jump: cmd_ref = CmdRef(Cmd_Jump_read(cmd_alloc, cmd_ref).new_ref); diff --git a/piet-gpu/shader/ptcl.h b/piet-gpu/shader/ptcl.h index 936c431..9b9b341 100644 --- a/piet-gpu/shader/ptcl.h +++ b/piet-gpu/shader/ptcl.h @@ -26,6 +26,10 @@ struct CmdAlphaRef { uint offset; }; +struct CmdEndClipRef { + uint offset; +}; + struct CmdJumpRef { uint offset; }; @@ -100,6 +104,16 @@ CmdAlphaRef CmdAlpha_index(CmdAlphaRef ref, uint index) { return CmdAlphaRef(ref.offset + index * CmdAlpha_size); } +struct CmdEndClip { + uint blend; +}; + +#define CmdEndClip_size 4 + +CmdEndClipRef CmdEndClip_index(CmdEndClipRef ref, uint index) { + return CmdEndClipRef(ref.offset + index * CmdEndClip_size); +} + struct CmdJump { uint new_ref; }; @@ -228,6 +242,19 @@ void CmdAlpha_write(Alloc a, CmdAlphaRef ref, CmdAlpha s) { write_mem(a, ix + 0, floatBitsToUint(s.alpha)); } +CmdEndClip CmdEndClip_read(Alloc a, CmdEndClipRef ref) { + uint ix = ref.offset >> 2; + uint raw0 = read_mem(a, ix + 0); + CmdEndClip s; + s.blend = raw0; + return s; +} + +void CmdEndClip_write(Alloc a, CmdEndClipRef ref, CmdEndClip s) { + uint ix = ref.offset >> 2; + write_mem(a, ix + 0, s.blend); +} + CmdJump CmdJump_read(Alloc a, CmdJumpRef ref) { uint ix = ref.offset >> 2; uint raw0 = read_mem(a, ix + 0); @@ -270,6 +297,10 @@ CmdImage Cmd_Image_read(Alloc a, CmdRef ref) { return CmdImage_read(a, CmdImageRef(ref.offset + 4)); } +CmdEndClip Cmd_EndClip_read(Alloc a, CmdRef ref) { + return CmdEndClip_read(a, CmdEndClipRef(ref.offset + 4)); +} + CmdJump Cmd_Jump_read(Alloc a, CmdRef ref) { return CmdJump_read(a, CmdJumpRef(ref.offset + 4)); } @@ -316,8 +347,9 @@ void Cmd_BeginClip_write(Alloc a, CmdRef ref) { write_mem(a, ref.offset >> 2, Cmd_BeginClip); } -void Cmd_EndClip_write(Alloc a, CmdRef ref) { +void Cmd_EndClip_write(Alloc a, CmdRef ref, CmdEndClip s) { write_mem(a, ref.offset >> 2, Cmd_EndClip); + CmdEndClip_write(a, CmdEndClipRef(ref.offset + 4), s); } void Cmd_Jump_write(Alloc a, CmdRef ref, CmdJump s) { diff --git a/piet-gpu/shader/scene.h b/piet-gpu/shader/scene.h index 254d4aa..3e74b69 100644 --- a/piet-gpu/shader/scene.h +++ b/piet-gpu/shader/scene.h @@ -138,9 +138,10 @@ TransformRef Transform_index(TransformRef ref, uint index) { struct Clip { vec4 bbox; + uint blend; }; -#define Clip_size 16 +#define Clip_size 20 ClipRef Clip_index(ClipRef ref, uint index) { return ClipRef(ref.offset + index * Clip_size); @@ -286,6 +287,7 @@ Clip Clip_read(ClipRef ref) { uint raw3 = scene[ix + 3]; Clip s; s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3)); + s.blend = scene[ix + 4]; return s; } diff --git a/piet-gpu/src/blend.rs b/piet-gpu/src/blend.rs new file mode 100644 index 0000000..6f1e791 --- /dev/null +++ b/piet-gpu/src/blend.rs @@ -0,0 +1,99 @@ +// Copyright 2022 The piet-gpu authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +#[repr(C)] +pub enum BlendMode { + Normal = 0, + Multiply = 1, + Screen = 2, + Overlay = 3, + Darken = 4, + Lighten = 5, + ColorDodge = 6, + ColorBurn = 7, + HardLight = 8, + SoftLight = 9, + Difference = 10, + Exclusion = 11, + Hue = 12, + Saturation = 13, + Color = 14, + Luminosity = 15, +} + +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +#[repr(C)] +pub enum CompositionMode { + Clear = 0, + Copy = 1, + Dest = 2, + SrcOver = 3, + DestOver = 4, + SrcIn = 5, + DestIn = 6, + SrcOut = 7, + DestOut = 8, + SrcAtop = 9, + DestAtop = 10, + Xor = 11, + Plus = 12, + PlusDarker = 13, + PlusLighter = 14, +} + +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub struct Blend { + pub mode: BlendMode, + pub composition_mode: CompositionMode, +} + +impl Blend { + pub fn new(mode: BlendMode, composition_mode: CompositionMode) -> Self { + Self { mode, composition_mode } + } + + pub(crate) fn pack(&self) -> u32 { + (self.mode as u32) << 8 | self.composition_mode as u32 + } +} + +impl Default for Blend { + fn default() -> Self { + Self { + mode: BlendMode::Normal, + composition_mode: CompositionMode::SrcOver, + } + } +} + +impl From for Blend { + fn from(mode: BlendMode) -> Self { + Self { + mode, + composition_mode: CompositionMode::SrcOver, + } + } +} + +impl From for Blend { + fn from(mode: CompositionMode) -> Self { + Self { + mode: BlendMode::Normal, + composition_mode: mode, + } + } +} diff --git a/piet-gpu/src/encoder.rs b/piet-gpu/src/encoder.rs index 767f4ba..c24615e 100644 --- a/piet-gpu/src/encoder.rs +++ b/piet-gpu/src/encoder.rs @@ -16,6 +16,7 @@ //! Low-level scene encoding. +use crate::Blend; use bytemuck::{Pod, Zeroable}; use piet_gpu_hal::BufWrite; @@ -87,7 +88,8 @@ pub struct FillLinGradient { pub struct Clip { tag: u32, bbox: [f32; 4], - padding: [u32; 4], + blend: u32, + padding: [u32; 3], } impl Encoder { @@ -151,10 +153,11 @@ impl Encoder { } /// Start a clip and return a save point to be filled in later. - pub fn begin_clip(&mut self) -> usize { + pub fn begin_clip(&mut self, blend: Option) -> usize { let saved = self.drawobj_stream.len(); let element = Clip { tag: ELEMENT_BEGINCLIP, + blend: blend.unwrap_or(Blend::default()).pack(), ..Default::default() }; self.drawobj_stream.extend(bytemuck::bytes_of(&element)); @@ -162,10 +165,11 @@ impl Encoder { saved } - pub fn end_clip(&mut self, bbox: [f32; 4], save_point: usize) { + pub fn end_clip(&mut self, bbox: [f32; 4], blend: Option, save_point: usize) { let element = Clip { tag: ELEMENT_ENDCLIP, bbox, + blend: blend.unwrap_or(Blend::default()).pack(), ..Default::default() }; self.drawobj_stream[save_point + 4..save_point + 20] diff --git a/piet-gpu/src/lib.rs b/piet-gpu/src/lib.rs index b8b7532..bd26d45 100644 --- a/piet-gpu/src/lib.rs +++ b/piet-gpu/src/lib.rs @@ -1,3 +1,4 @@ +mod blend; mod encoder; pub mod glyph_render; mod gradient; @@ -9,6 +10,7 @@ mod text; use std::convert::TryInto; +pub use blend::{Blend, BlendMode, CompositionMode}; pub use render_ctx::PietGpuRenderContext; use piet::kurbo::Vec2; diff --git a/piet-gpu/src/render_ctx.rs b/piet-gpu/src/render_ctx.rs index ef0a3a7..1fe1ce9 100644 --- a/piet-gpu/src/render_ctx.rs +++ b/piet-gpu/src/render_ctx.rs @@ -16,6 +16,7 @@ use piet_gpu_types::scene::Element; use crate::gradient::{LinearGradient, RampCache}; use crate::text::Font; pub use crate::text::{PietGpuText, PietGpuTextLayout, PietGpuTextLayoutBuilder}; +use crate::Blend; pub struct PietGpuImage; @@ -66,6 +67,7 @@ struct ClipElement { /// Byte offset of BeginClip element in element vec, for bbox fixup. save_point: usize, bbox: Option, + blend: Option, } const TOLERANCE: f64 = 0.25; @@ -230,13 +232,14 @@ impl RenderContext for PietGpuRenderContext { self.encode_linewidth(-1.0); let path = shape.path_elements(TOLERANCE); self.encode_path(path, true); - let save_point = self.new_encoder.begin_clip(); + let save_point = self.new_encoder.begin_clip(None); if self.clip_stack.len() >= MAX_BLEND_STACK { panic!("Maximum clip/blend stack size {} exceeded", MAX_BLEND_STACK); } self.clip_stack.push(ClipElement { bbox: None, save_point, + blend: None, }); if let Some(tos) = self.state_stack.last_mut() { tos.n_clip += 1; @@ -333,6 +336,25 @@ impl RenderContext for PietGpuRenderContext { } impl PietGpuRenderContext { + pub fn blend(&mut self, shape: impl Shape, blend: Blend) { + self.encode_linewidth(-1.0); + let path = shape.path_elements(TOLERANCE); + self.encode_path(path, true); + let save_point = self.new_encoder.begin_clip(Some(blend)); + if self.clip_stack.len() >= MAX_BLEND_STACK { + panic!("Maximum clip/blend stack size {} exceeded", MAX_BLEND_STACK); + } + self.clip_stack.push(ClipElement { + bbox: None, + save_point, + blend: Some(blend), + }); + self.accumulate_bbox(|| shape.bounding_box()); + if let Some(tos) = self.state_stack.last_mut() { + tos.n_clip += 1; + } + } + fn encode_path(&mut self, path: impl Iterator, is_fill: bool) { if is_fill { self.encode_path_inner( @@ -386,7 +408,7 @@ impl PietGpuRenderContext { let tos = self.clip_stack.pop().unwrap(); let bbox = tos.bbox.unwrap_or_default(); let bbox_f32_4 = rect_to_f32_4(bbox); - self.new_encoder.end_clip(bbox_f32_4, tos.save_point); + self.new_encoder.end_clip(bbox_f32_4, tos.blend, tos.save_point); if let Some(bbox) = tos.bbox { self.union_bbox(bbox); } diff --git a/piet-gpu/src/test_scenes.rs b/piet-gpu/src/test_scenes.rs index 47ace66..118b727 100644 --- a/piet-gpu/src/test_scenes.rs +++ b/piet-gpu/src/test_scenes.rs @@ -2,7 +2,8 @@ use rand::{Rng, RngCore}; -use piet::kurbo::{BezPath, Circle, Line, Point, Rect, Shape}; +use crate::{PietGpuRenderContext, Blend, BlendMode, CompositionMode}; +use piet::kurbo::{Affine, BezPath, Circle, Line, Point, Rect, Shape}; use piet::{ Color, FixedGradient, FixedLinearGradient, GradientStop, Text, TextAttribute, TextLayoutBuilder, }; @@ -11,6 +12,18 @@ use crate::{PicoSvg, RenderContext, Vec2}; const N_CIRCLES: usize = 0; +pub fn render_blend_test(rc: &mut PietGpuRenderContext, i: usize, blend: Blend) { + rc.fill( + Rect::new(400., 400., 800., 800.), + &Color::rgb8(0, 0, 200), + ); + rc.save().unwrap(); + rc.blend(Rect::new(0., 0., 1000., 1000.), blend); + rc.transform(Affine::translate(Vec2::new(600., 600.)) * Affine::rotate(0.01 * i as f64)); + rc.fill(Rect::new(0., 0., 400., 400.), &Color::rgba8(255, 0, 0, 255)); + rc.restore().unwrap(); +} + pub fn render_svg(rc: &mut impl RenderContext, filename: &str, scale: f64) { let xml_str = std::fs::read_to_string(filename).unwrap(); let start = std::time::Instant::now();