diff --git a/piet-gpu/bin/cli.rs b/piet-gpu/bin/cli.rs index c48f65f..60c9660 100644 --- a/piet-gpu/bin/cli.rs +++ b/piet-gpu/bin/cli.rs @@ -276,8 +276,8 @@ fn main() -> Result<(), Error> { /* let mut data: Vec = Default::default(); - renderer.tile_buf.read(&mut data).unwrap(); - piet_gpu::dump_k1_data(&data); + renderer.memory_buf_dev.read(&mut data).unwrap(); + piet_gpu::dump_k1_data(&data[2..]); trace_ptcl(&data); */ diff --git a/piet-gpu/shader/backdrop.spv b/piet-gpu/shader/backdrop.spv index 4dd01ed..a1ed332 100644 Binary files a/piet-gpu/shader/backdrop.spv and b/piet-gpu/shader/backdrop.spv differ diff --git a/piet-gpu/shader/backdrop_lg.spv b/piet-gpu/shader/backdrop_lg.spv index b00e3cd..457cb02 100644 Binary files a/piet-gpu/shader/backdrop_lg.spv and b/piet-gpu/shader/backdrop_lg.spv differ diff --git a/piet-gpu/shader/bbox_clear.comp b/piet-gpu/shader/bbox_clear.comp index 4ac5062..c609642 100644 --- a/piet-gpu/shader/bbox_clear.comp +++ b/piet-gpu/shader/bbox_clear.comp @@ -19,8 +19,8 @@ layout(binding = 1) readonly buffer ConfigBuf { void main() { uint ix = gl_GlobalInvocationID.x; - if (ix < conf.n_elements) { - uint out_ix = (conf.bbox_alloc.offset >> 2) + 4 * ix; + if (ix < conf.n_path) { + uint out_ix = (conf.bbox_alloc.offset >> 2) + 6 * ix; memory[out_ix] = 0xffff; memory[out_ix + 1] = 0xffff; memory[out_ix + 2] = 0; diff --git a/piet-gpu/shader/binning.spv b/piet-gpu/shader/binning.spv index 38d10b3..5ec7aec 100644 Binary files a/piet-gpu/shader/binning.spv and b/piet-gpu/shader/binning.spv differ diff --git a/piet-gpu/shader/build.ninja b/piet-gpu/shader/build.ninja index 1df1876..497915c 100644 --- a/piet-gpu/shader/build.ninja +++ b/piet-gpu/shader/build.ninja @@ -25,7 +25,7 @@ rule msl build elements.spv: glsl elements.comp | scene.h state.h annotated.h -build binning.spv: glsl binning.comp | annotated.h state.h bins.h setup.h +build binning.spv: glsl binning.comp | annotated.h state.h bins.h setup.h mem.h build tile_alloc.spv: glsl tile_alloc.comp | annotated.h tile.h setup.h @@ -90,7 +90,7 @@ build gen/draw_root.hlsl: hlsl gen/draw_root.spv build gen/draw_root.dxil: dxil gen/draw_root.hlsl build gen/draw_root.msl: msl gen/draw_root.spv -build gen/draw_leaf.spv: glsl draw_leaf.comp | scene.h drawtag.h setup.h mem.h +build gen/draw_leaf.spv: glsl draw_leaf.comp | scene.h drawtag.h annotated.h setup.h mem.h build gen/draw_leaf.hlsl: hlsl gen/draw_leaf.spv build gen/draw_leaf.dxil: dxil gen/draw_leaf.hlsl build gen/draw_leaf.msl: msl gen/draw_leaf.spv diff --git a/piet-gpu/shader/coarse.spv b/piet-gpu/shader/coarse.spv index a2071ad..8d4f7c0 100644 Binary files a/piet-gpu/shader/coarse.spv and b/piet-gpu/shader/coarse.spv differ diff --git a/piet-gpu/shader/draw_leaf.comp b/piet-gpu/shader/draw_leaf.comp index ec6a928..85d9528 100644 --- a/piet-gpu/shader/draw_leaf.comp +++ b/piet-gpu/shader/draw_leaf.comp @@ -28,6 +28,7 @@ layout(binding = 2) readonly buffer SceneBuf { #include "scene.h" #include "tile.h" #include "drawtag.h" +#include "annotated.h" #define Monoid DrawMonoid @@ -70,10 +71,93 @@ void main() { if (gl_LocalInvocationID.x > 0) { row = combine_tag_monoid(row, sh_scratch[gl_LocalInvocationID.x - 1]); } - uint out_base = (conf.drawmonoid_alloc.offset >> 2) + gl_GlobalInvocationID.x * 2 * N_ROWS; + uint out_ix = gl_GlobalInvocationID.x * N_ROWS; + uint out_base = (conf.drawmonoid_alloc.offset >> 2) + out_ix * 2; + AnnotatedRef out_ref = AnnotatedRef(conf.anno_alloc.offset + out_ix * Annotated_size); for (uint i = 0; i < N_ROWS; i++) { Monoid m = combine_tag_monoid(row, local[i]); memory[out_base + i * 2] = m.path_ix; memory[out_base + i * 2 + 1] = m.clip_ix; + + // For compatibility, we'll generate an Annotated object, same as old + // pipeline. However, going forward we'll get rid of that, and have + // later stages read scene + bbox etc. + ElementRef this_ref = Element_index(ref, i); + tag_word = Element_tag(this_ref).tag; + if (tag_word == Element_FillColor || tag_word == Element_FillLinGradient || tag_word == Element_FillImage) { + uint bbox_offset = (conf.bbox_alloc.offset >> 2) + 6 * (m.path_ix - 1); + float bbox_l = float(memory[bbox_offset]) - 32768.0; + float bbox_t = float(memory[bbox_offset + 1]) - 32768.0; + float bbox_r = float(memory[bbox_offset + 2]) - 32768.0; + float bbox_b = float(memory[bbox_offset + 3]) - 32768.0; + vec4 bbox = vec4(bbox_l, bbox_t, bbox_r, bbox_b); + float linewidth = uintBitsToFloat(memory[bbox_offset + 4]); + uint fill_mode = uint(linewidth >= 0.0); + vec4 mat; + vec2 translate; + if (linewidth >= 0.0 || tag_word == Element_FillLinGradient) { + uint trans_ix = memory[bbox_offset + 5]; + uint t = (conf.trans_alloc.offset >> 2) + 6 * trans_ix; + mat = uintBitsToFloat(uvec4(memory[t], memory[t + 1], memory[t + 2], memory[t + 3])); + if (tag_word == Element_FillLinGradient) { + translate = uintBitsToFloat(uvec2(memory[t + 4], memory[t + 5])); + } + } + if (linewidth >= 0.0) { + // TODO: need to deal with anisotropic case + linewidth *= sqrt(abs(mat.x * mat.w - mat.y * mat.z)); + } + linewidth = max(linewidth, 0.0); + switch (tag_word) { + case Element_FillColor: + FillColor fill = Element_FillColor_read(this_ref); + AnnoColor anno_fill; + anno_fill.bbox = bbox; + anno_fill.linewidth = linewidth; + anno_fill.rgba_color = fill.rgba_color; + Annotated_Color_write(conf.anno_alloc, out_ref, fill_mode, anno_fill); + break; + case Element_FillLinGradient: + FillLinGradient lin = Element_FillLinGradient_read(this_ref); + AnnoLinGradient anno_lin; + anno_lin.bbox = bbox; + anno_lin.linewidth = linewidth; + anno_lin.index = lin.index; + vec2 p0 = mat.xy * lin.p0.x + mat.zw * lin.p0.y + translate; + vec2 p1 = mat.xy * lin.p1.x + mat.zw * lin.p1.y + translate; + vec2 dxy = p1 - p0; + float scale = 1.0 / (dxy.x * dxy.x + dxy.y * dxy.y); + float line_x = dxy.x * scale; + float line_y = dxy.y * scale; + anno_lin.line_x = line_x; + anno_lin.line_y = line_y; + anno_lin.line_c = -(p0.x * line_x + p0.y * line_y); + Annotated_LinGradient_write(conf.anno_alloc, out_ref, fill_mode, anno_lin); + break; + case Element_FillImage: + FillImage fill_img = Element_FillImage_read(this_ref); + AnnoImage anno_img; + anno_img.bbox = bbox; + anno_img.linewidth = linewidth; + anno_img.index = fill_img.index; + anno_img.offset = fill_img.offset; + Annotated_Image_write(conf.anno_alloc, out_ref, fill_mode, anno_img); + break; + } + } else if (tag_word == Element_BeginClip) { + Clip begin_clip = Element_BeginClip_read(this_ref); + AnnoBeginClip anno_begin_clip; + // This is the absolute bbox, it's been transformed during encoding. + anno_begin_clip.bbox = begin_clip.bbox; + anno_begin_clip.linewidth = 0.0; // don't support clip-with-stroke + Annotated_BeginClip_write(conf.anno_alloc, out_ref, 0, anno_begin_clip); + } else if (tag_word == Element_EndClip) { + Clip end_clip = Element_EndClip_read(this_ref); + AnnoEndClip anno_end_clip; + // This bbox is expected to be the same as the begin one. + anno_end_clip.bbox = end_clip.bbox; + Annotated_EndClip_write(conf.anno_alloc, out_ref, anno_end_clip); + } + out_ref.offset += Annotated_size; } } diff --git a/piet-gpu/shader/elements.comp b/piet-gpu/shader/elements.comp index 873fc41..6f33544 100644 --- a/piet-gpu/shader/elements.comp +++ b/piet-gpu/shader/elements.comp @@ -445,7 +445,7 @@ void main() { vec2 lw = get_linewidth(st); anno_begin_clip.linewidth = st.linewidth * sqrt(abs(st.mat.x * st.mat.w - st.mat.y * st.mat.z)); } else { - anno_fill.linewidth = 0.0; + anno_begin_clip.linewidth = 0.0; } out_ref = AnnotatedRef(conf.anno_alloc.offset + (st.path_count - 1) * Annotated_size); Annotated_BeginClip_write(conf.anno_alloc, out_ref, fill_mode, anno_begin_clip); diff --git a/piet-gpu/shader/elements.spv b/piet-gpu/shader/elements.spv index 37cc051..f906dac 100644 Binary files a/piet-gpu/shader/elements.spv and b/piet-gpu/shader/elements.spv differ diff --git a/piet-gpu/shader/gen/bbox_clear.dxil b/piet-gpu/shader/gen/bbox_clear.dxil index 8a46725..9ce0add 100644 Binary files a/piet-gpu/shader/gen/bbox_clear.dxil and b/piet-gpu/shader/gen/bbox_clear.dxil differ diff --git a/piet-gpu/shader/gen/bbox_clear.hlsl b/piet-gpu/shader/gen/bbox_clear.hlsl index 7a4e86a..903a185 100644 --- a/piet-gpu/shader/gen/bbox_clear.hlsl +++ b/piet-gpu/shader/gen/bbox_clear.hlsl @@ -18,16 +18,17 @@ struct Config Alloc bbox_alloc; Alloc drawmonoid_alloc; uint n_trans; + uint n_path; uint trans_offset; - uint pathtag_offset; uint linewidth_offset; + uint pathtag_offset; uint pathseg_offset; }; static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u); ByteAddressBuffer _21 : register(t1); -RWByteAddressBuffer _44 : register(u0); +RWByteAddressBuffer _45 : register(u0); static uint3 gl_GlobalInvocationID; struct SPIRV_Cross_Input @@ -38,13 +39,13 @@ struct SPIRV_Cross_Input void comp_main() { uint ix = gl_GlobalInvocationID.x; - if (ix < _21.Load(0)) + if (ix < _21.Load(52)) { - uint out_ix = (_21.Load(40) >> uint(2)) + (4u * ix); - _44.Store(out_ix * 4 + 8, 65535u); - _44.Store((out_ix + 1u) * 4 + 8, 65535u); - _44.Store((out_ix + 2u) * 4 + 8, 0u); - _44.Store((out_ix + 3u) * 4 + 8, 0u); + uint out_ix = (_21.Load(40) >> uint(2)) + (6u * ix); + _45.Store(out_ix * 4 + 8, 65535u); + _45.Store((out_ix + 1u) * 4 + 8, 65535u); + _45.Store((out_ix + 2u) * 4 + 8, 0u); + _45.Store((out_ix + 3u) * 4 + 8, 0u); } } diff --git a/piet-gpu/shader/gen/bbox_clear.msl b/piet-gpu/shader/gen/bbox_clear.msl index 6f73531..9af5b11 100644 --- a/piet-gpu/shader/gen/bbox_clear.msl +++ b/piet-gpu/shader/gen/bbox_clear.msl @@ -23,9 +23,10 @@ struct Config Alloc bbox_alloc; Alloc drawmonoid_alloc; uint n_trans; + uint n_path; uint trans_offset; - uint pathtag_offset; uint linewidth_offset; + uint pathtag_offset; uint pathseg_offset; }; @@ -43,16 +44,16 @@ struct Memory constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(512u, 1u, 1u); -kernel void main0(device Memory& _44 [[buffer(0)]], const device ConfigBuf& _21 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +kernel void main0(device Memory& _45 [[buffer(0)]], const device ConfigBuf& _21 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { uint ix = gl_GlobalInvocationID.x; - if (ix < _21.conf.n_elements) + if (ix < _21.conf.n_path) { - uint out_ix = (_21.conf.bbox_alloc.offset >> uint(2)) + (4u * ix); - _44.memory[out_ix] = 65535u; - _44.memory[out_ix + 1u] = 65535u; - _44.memory[out_ix + 2u] = 0u; - _44.memory[out_ix + 3u] = 0u; + uint out_ix = (_21.conf.bbox_alloc.offset >> uint(2)) + (6u * ix); + _45.memory[out_ix] = 65535u; + _45.memory[out_ix + 1u] = 65535u; + _45.memory[out_ix + 2u] = 0u; + _45.memory[out_ix + 3u] = 0u; } } diff --git a/piet-gpu/shader/gen/bbox_clear.spv b/piet-gpu/shader/gen/bbox_clear.spv index 2b659f4..c459502 100644 Binary files a/piet-gpu/shader/gen/bbox_clear.spv and b/piet-gpu/shader/gen/bbox_clear.spv differ diff --git a/piet-gpu/shader/gen/draw_leaf.dxil b/piet-gpu/shader/gen/draw_leaf.dxil index 17bace7..f95e5bc 100644 Binary files a/piet-gpu/shader/gen/draw_leaf.dxil and b/piet-gpu/shader/gen/draw_leaf.dxil differ diff --git a/piet-gpu/shader/gen/draw_leaf.hlsl b/piet-gpu/shader/gen/draw_leaf.hlsl index e5f50fd..0ef9538 100644 --- a/piet-gpu/shader/gen/draw_leaf.hlsl +++ b/piet-gpu/shader/gen/draw_leaf.hlsl @@ -1,8 +1,56 @@ +struct Alloc +{ + uint offset; +}; + struct ElementRef { uint offset; }; +struct FillColorRef +{ + uint offset; +}; + +struct FillColor +{ + uint rgba_color; +}; + +struct FillLinGradientRef +{ + uint offset; +}; + +struct FillLinGradient +{ + uint index; + float2 p0; + float2 p1; +}; + +struct FillImageRef +{ + uint offset; +}; + +struct FillImage +{ + uint index; + int2 offset; +}; + +struct ClipRef +{ + uint offset; +}; + +struct Clip +{ + float4 bbox; +}; + struct ElementTag { uint tag; @@ -15,7 +63,68 @@ struct DrawMonoid uint clip_ix; }; -struct Alloc +struct AnnoImageRef +{ + uint offset; +}; + +struct AnnoImage +{ + float4 bbox; + float linewidth; + uint index; + int2 offset; +}; + +struct AnnoColorRef +{ + uint offset; +}; + +struct AnnoColor +{ + float4 bbox; + float linewidth; + uint rgba_color; +}; + +struct AnnoLinGradientRef +{ + uint offset; +}; + +struct AnnoLinGradient +{ + float4 bbox; + float linewidth; + uint index; + float line_x; + float line_y; + float line_c; +}; + +struct AnnoBeginClipRef +{ + uint offset; +}; + +struct AnnoBeginClip +{ + float4 bbox; + float linewidth; +}; + +struct AnnoEndClipRef +{ + uint offset; +}; + +struct AnnoEndClip +{ + float4 bbox; +}; + +struct AnnotatedRef { uint offset; }; @@ -35,23 +144,24 @@ struct Config Alloc bbox_alloc; Alloc drawmonoid_alloc; uint n_trans; + uint n_path; uint trans_offset; - uint pathtag_offset; uint linewidth_offset; + uint pathtag_offset; uint pathseg_offset; }; static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u); -static const DrawMonoid _67 = { 0u, 0u }; -static const DrawMonoid _94 = { 1u, 0u }; -static const DrawMonoid _96 = { 1u, 1u }; -static const DrawMonoid _98 = { 0u, 1u }; +static const DrawMonoid _418 = { 0u, 0u }; +static const DrawMonoid _443 = { 1u, 0u }; +static const DrawMonoid _445 = { 1u, 1u }; +static const DrawMonoid _447 = { 0u, 1u }; -ByteAddressBuffer _49 : register(t2); -ByteAddressBuffer _218 : register(t3); -ByteAddressBuffer _248 : register(t1); -RWByteAddressBuffer _277 : register(u0); +RWByteAddressBuffer _201 : register(u0); +ByteAddressBuffer _225 : register(t2); +ByteAddressBuffer _1008 : register(t3); +ByteAddressBuffer _1042 : register(t1); static uint3 gl_WorkGroupID; static uint3 gl_LocalInvocationID; @@ -67,9 +177,9 @@ groupshared DrawMonoid sh_scratch[512]; ElementTag Element_tag(ElementRef ref) { - uint tag_and_flags = _49.Load((ref.offset >> uint(2)) * 4 + 0); - ElementTag _63 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; - return _63; + uint tag_and_flags = _225.Load((ref.offset >> uint(2)) * 4 + 0); + ElementTag _375 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; + return _375; } DrawMonoid map_tag(uint tag_word) @@ -80,27 +190,27 @@ DrawMonoid map_tag(uint tag_word) case 5u: case 6u: { - return _94; + return _443; } case 9u: { - return _96; + return _445; } case 10u: { - return _98; + return _447; } default: { - return _67; + return _418; } } } ElementRef Element_index(ElementRef ref, uint index) { - ElementRef _42 = { ref.offset + (index * 36u) }; - return _42; + ElementRef _214 = { ref.offset + (index * 36u) }; + return _214; } DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b) @@ -113,14 +223,326 @@ DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b) DrawMonoid tag_monoid_identity() { - return _67; + return _418; +} + +FillColor FillColor_read(FillColorRef ref) +{ + uint ix = ref.offset >> uint(2); + uint raw0 = _225.Load((ix + 0u) * 4 + 0); + FillColor s; + s.rgba_color = raw0; + return s; +} + +FillColor Element_FillColor_read(ElementRef ref) +{ + FillColorRef _381 = { ref.offset + 4u }; + FillColorRef param = _381; + return FillColor_read(param); +} + +bool touch_mem(Alloc alloc, uint offset) +{ + return true; +} + +void write_mem(Alloc alloc, uint offset, uint val) +{ + Alloc param = alloc; + uint param_1 = offset; + if (!touch_mem(param, param_1)) + { + return; + } + _201.Store(offset * 4 + 8, val); +} + +void AnnoColor_write(Alloc a, AnnoColorRef ref, AnnoColor s) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint param_2 = asuint(s.bbox.x); + write_mem(param, param_1, param_2); + Alloc param_3 = a; + uint param_4 = ix + 1u; + uint param_5 = asuint(s.bbox.y); + write_mem(param_3, param_4, param_5); + Alloc param_6 = a; + uint param_7 = ix + 2u; + uint param_8 = asuint(s.bbox.z); + write_mem(param_6, param_7, param_8); + Alloc param_9 = a; + uint param_10 = ix + 3u; + uint param_11 = asuint(s.bbox.w); + write_mem(param_9, param_10, param_11); + Alloc param_12 = a; + uint param_13 = ix + 4u; + uint param_14 = asuint(s.linewidth); + write_mem(param_12, param_13, param_14); + Alloc param_15 = a; + uint param_16 = ix + 5u; + uint param_17 = s.rgba_color; + write_mem(param_15, param_16, param_17); +} + +void Annotated_Color_write(Alloc a, AnnotatedRef ref, uint flags, AnnoColor s) +{ + Alloc param = a; + uint param_1 = ref.offset >> uint(2); + uint param_2 = (flags << uint(16)) | 1u; + write_mem(param, param_1, param_2); + AnnoColorRef _808 = { ref.offset + 4u }; + Alloc param_3 = a; + AnnoColorRef param_4 = _808; + AnnoColor param_5 = s; + AnnoColor_write(param_3, param_4, param_5); +} + +FillLinGradient FillLinGradient_read(FillLinGradientRef ref) +{ + uint ix = ref.offset >> uint(2); + uint raw0 = _225.Load((ix + 0u) * 4 + 0); + uint raw1 = _225.Load((ix + 1u) * 4 + 0); + uint raw2 = _225.Load((ix + 2u) * 4 + 0); + uint raw3 = _225.Load((ix + 3u) * 4 + 0); + uint raw4 = _225.Load((ix + 4u) * 4 + 0); + FillLinGradient s; + s.index = raw0; + s.p0 = float2(asfloat(raw1), asfloat(raw2)); + s.p1 = float2(asfloat(raw3), asfloat(raw4)); + return s; +} + +FillLinGradient Element_FillLinGradient_read(ElementRef ref) +{ + FillLinGradientRef _389 = { ref.offset + 4u }; + FillLinGradientRef param = _389; + return FillLinGradient_read(param); +} + +void AnnoLinGradient_write(Alloc a, AnnoLinGradientRef ref, AnnoLinGradient s) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint param_2 = asuint(s.bbox.x); + write_mem(param, param_1, param_2); + Alloc param_3 = a; + uint param_4 = ix + 1u; + uint param_5 = asuint(s.bbox.y); + write_mem(param_3, param_4, param_5); + Alloc param_6 = a; + uint param_7 = ix + 2u; + uint param_8 = asuint(s.bbox.z); + write_mem(param_6, param_7, param_8); + Alloc param_9 = a; + uint param_10 = ix + 3u; + uint param_11 = asuint(s.bbox.w); + write_mem(param_9, param_10, param_11); + Alloc param_12 = a; + uint param_13 = ix + 4u; + uint param_14 = asuint(s.linewidth); + write_mem(param_12, param_13, param_14); + Alloc param_15 = a; + uint param_16 = ix + 5u; + uint param_17 = s.index; + write_mem(param_15, param_16, param_17); + Alloc param_18 = a; + uint param_19 = ix + 6u; + uint param_20 = asuint(s.line_x); + write_mem(param_18, param_19, param_20); + Alloc param_21 = a; + uint param_22 = ix + 7u; + uint param_23 = asuint(s.line_y); + write_mem(param_21, param_22, param_23); + Alloc param_24 = a; + uint param_25 = ix + 8u; + uint param_26 = asuint(s.line_c); + write_mem(param_24, param_25, param_26); +} + +void Annotated_LinGradient_write(Alloc a, AnnotatedRef ref, uint flags, AnnoLinGradient s) +{ + Alloc param = a; + uint param_1 = ref.offset >> uint(2); + uint param_2 = (flags << uint(16)) | 2u; + write_mem(param, param_1, param_2); + AnnoLinGradientRef _829 = { ref.offset + 4u }; + Alloc param_3 = a; + AnnoLinGradientRef param_4 = _829; + AnnoLinGradient param_5 = s; + AnnoLinGradient_write(param_3, param_4, param_5); +} + +FillImage FillImage_read(FillImageRef ref) +{ + uint ix = ref.offset >> uint(2); + uint raw0 = _225.Load((ix + 0u) * 4 + 0); + uint raw1 = _225.Load((ix + 1u) * 4 + 0); + FillImage s; + s.index = raw0; + s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); + return s; +} + +FillImage Element_FillImage_read(ElementRef ref) +{ + FillImageRef _397 = { ref.offset + 4u }; + FillImageRef param = _397; + return FillImage_read(param); +} + +void AnnoImage_write(Alloc a, AnnoImageRef ref, AnnoImage s) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint param_2 = asuint(s.bbox.x); + write_mem(param, param_1, param_2); + Alloc param_3 = a; + uint param_4 = ix + 1u; + uint param_5 = asuint(s.bbox.y); + write_mem(param_3, param_4, param_5); + Alloc param_6 = a; + uint param_7 = ix + 2u; + uint param_8 = asuint(s.bbox.z); + write_mem(param_6, param_7, param_8); + Alloc param_9 = a; + uint param_10 = ix + 3u; + uint param_11 = asuint(s.bbox.w); + write_mem(param_9, param_10, param_11); + Alloc param_12 = a; + uint param_13 = ix + 4u; + uint param_14 = asuint(s.linewidth); + write_mem(param_12, param_13, param_14); + Alloc param_15 = a; + uint param_16 = ix + 5u; + uint param_17 = s.index; + write_mem(param_15, param_16, param_17); + Alloc param_18 = a; + uint param_19 = ix + 6u; + uint param_20 = (uint(s.offset.x) & 65535u) | (uint(s.offset.y) << uint(16)); + write_mem(param_18, param_19, param_20); +} + +void Annotated_Image_write(Alloc a, AnnotatedRef ref, uint flags, AnnoImage s) +{ + Alloc param = a; + uint param_1 = ref.offset >> uint(2); + uint param_2 = (flags << uint(16)) | 3u; + write_mem(param, param_1, param_2); + AnnoImageRef _850 = { ref.offset + 4u }; + Alloc param_3 = a; + AnnoImageRef param_4 = _850; + AnnoImage param_5 = s; + AnnoImage_write(param_3, param_4, param_5); +} + +Clip Clip_read(ClipRef ref) +{ + uint ix = ref.offset >> uint(2); + uint raw0 = _225.Load((ix + 0u) * 4 + 0); + uint raw1 = _225.Load((ix + 1u) * 4 + 0); + uint raw2 = _225.Load((ix + 2u) * 4 + 0); + uint raw3 = _225.Load((ix + 3u) * 4 + 0); + Clip s; + s.bbox = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3)); + return s; +} + +Clip Element_BeginClip_read(ElementRef ref) +{ + ClipRef _405 = { ref.offset + 4u }; + ClipRef param = _405; + return Clip_read(param); +} + +void AnnoBeginClip_write(Alloc a, AnnoBeginClipRef ref, AnnoBeginClip s) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint param_2 = asuint(s.bbox.x); + write_mem(param, param_1, param_2); + Alloc param_3 = a; + uint param_4 = ix + 1u; + uint param_5 = asuint(s.bbox.y); + write_mem(param_3, param_4, param_5); + Alloc param_6 = a; + uint param_7 = ix + 2u; + uint param_8 = asuint(s.bbox.z); + write_mem(param_6, param_7, param_8); + Alloc param_9 = a; + uint param_10 = ix + 3u; + uint param_11 = asuint(s.bbox.w); + write_mem(param_9, param_10, param_11); + Alloc param_12 = a; + uint param_13 = ix + 4u; + uint param_14 = asuint(s.linewidth); + write_mem(param_12, param_13, param_14); +} + +void Annotated_BeginClip_write(Alloc a, AnnotatedRef ref, uint flags, AnnoBeginClip s) +{ + Alloc param = a; + uint param_1 = ref.offset >> uint(2); + uint param_2 = (flags << uint(16)) | 4u; + write_mem(param, param_1, param_2); + AnnoBeginClipRef _871 = { ref.offset + 4u }; + Alloc param_3 = a; + AnnoBeginClipRef param_4 = _871; + AnnoBeginClip param_5 = s; + AnnoBeginClip_write(param_3, param_4, param_5); +} + +Clip Element_EndClip_read(ElementRef ref) +{ + ClipRef _413 = { ref.offset + 4u }; + ClipRef param = _413; + return Clip_read(param); +} + +void AnnoEndClip_write(Alloc a, AnnoEndClipRef ref, AnnoEndClip s) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint param_2 = asuint(s.bbox.x); + write_mem(param, param_1, param_2); + Alloc param_3 = a; + uint param_4 = ix + 1u; + uint param_5 = asuint(s.bbox.y); + write_mem(param_3, param_4, param_5); + Alloc param_6 = a; + uint param_7 = ix + 2u; + uint param_8 = asuint(s.bbox.z); + write_mem(param_6, param_7, param_8); + Alloc param_9 = a; + uint param_10 = ix + 3u; + uint param_11 = asuint(s.bbox.w); + write_mem(param_9, param_10, param_11); +} + +void Annotated_EndClip_write(Alloc a, AnnotatedRef ref, AnnoEndClip s) +{ + Alloc param = a; + uint param_1 = ref.offset >> uint(2); + uint param_2 = 5u; + write_mem(param, param_1, param_2); + AnnoEndClipRef _889 = { ref.offset + 4u }; + Alloc param_3 = a; + AnnoEndClipRef param_4 = _889; + AnnoEndClip param_5 = s; + AnnoEndClip_write(param_3, param_4, param_5); } void comp_main() { uint ix = gl_GlobalInvocationID.x * 8u; - ElementRef _115 = { ix * 36u }; - ElementRef ref = _115; + ElementRef _907 = { ix * 36u }; + ElementRef ref = _907; ElementRef param = ref; uint tag_word = Element_tag(param).tag; uint param_1 = tag_word; @@ -157,11 +579,11 @@ void comp_main() DrawMonoid row = tag_monoid_identity(); if (gl_WorkGroupID.x > 0u) { - DrawMonoid _224; - _224.path_ix = _218.Load((gl_WorkGroupID.x - 1u) * 8 + 0); - _224.clip_ix = _218.Load((gl_WorkGroupID.x - 1u) * 8 + 4); - row.path_ix = _224.path_ix; - row.clip_ix = _224.clip_ix; + DrawMonoid _1014; + _1014.path_ix = _1008.Load((gl_WorkGroupID.x - 1u) * 8 + 0); + _1014.clip_ix = _1008.Load((gl_WorkGroupID.x - 1u) * 8 + 4); + row.path_ix = _1014.path_ix; + row.clip_ix = _1014.clip_ix; } if (gl_LocalInvocationID.x > 0u) { @@ -169,14 +591,154 @@ void comp_main() DrawMonoid param_11 = sh_scratch[gl_LocalInvocationID.x - 1u]; row = combine_tag_monoid(param_10, param_11); } - uint out_base = (_248.Load(44) >> uint(2)) + ((gl_GlobalInvocationID.x * 2u) * 8u); + uint out_ix = gl_GlobalInvocationID.x * 8u; + uint out_base = (_1042.Load(44) >> uint(2)) + (out_ix * 2u); + AnnotatedRef _1058 = { _1042.Load(32) + (out_ix * 40u) }; + AnnotatedRef out_ref = _1058; + float4 mat; + float2 translate; + AnnoColor anno_fill; + Alloc param_18; + AnnoLinGradient anno_lin; + Alloc param_23; + AnnoImage anno_img; + Alloc param_28; + AnnoBeginClip anno_begin_clip; + Alloc param_33; + AnnoEndClip anno_end_clip; + Alloc param_38; for (uint i_2 = 0u; i_2 < 8u; i_2++) { DrawMonoid param_12 = row; DrawMonoid param_13 = local[i_2]; DrawMonoid m = combine_tag_monoid(param_12, param_13); - _277.Store((out_base + (i_2 * 2u)) * 4 + 8, m.path_ix); - _277.Store(((out_base + (i_2 * 2u)) + 1u) * 4 + 8, m.clip_ix); + _201.Store((out_base + (i_2 * 2u)) * 4 + 8, m.path_ix); + _201.Store(((out_base + (i_2 * 2u)) + 1u) * 4 + 8, m.clip_ix); + ElementRef param_14 = ref; + uint param_15 = i_2; + ElementRef this_ref = Element_index(param_14, param_15); + ElementRef param_16 = this_ref; + tag_word = Element_tag(param_16).tag; + if (((tag_word == 4u) || (tag_word == 5u)) || (tag_word == 6u)) + { + uint bbox_offset = (_1042.Load(40) >> uint(2)) + (6u * (m.path_ix - 1u)); + float bbox_l = float(_201.Load(bbox_offset * 4 + 8)) - 32768.0f; + float bbox_t = float(_201.Load((bbox_offset + 1u) * 4 + 8)) - 32768.0f; + float bbox_r = float(_201.Load((bbox_offset + 2u) * 4 + 8)) - 32768.0f; + float bbox_b = float(_201.Load((bbox_offset + 3u) * 4 + 8)) - 32768.0f; + float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); + float linewidth = asfloat(_201.Load((bbox_offset + 4u) * 4 + 8)); + uint fill_mode = uint(linewidth >= 0.0f); + if ((linewidth >= 0.0f) || (tag_word == 5u)) + { + uint trans_ix = _201.Load((bbox_offset + 5u) * 4 + 8); + uint t = (_1042.Load(36) >> uint(2)) + (6u * trans_ix); + mat = asfloat(uint4(_201.Load(t * 4 + 8), _201.Load((t + 1u) * 4 + 8), _201.Load((t + 2u) * 4 + 8), _201.Load((t + 3u) * 4 + 8))); + if (tag_word == 5u) + { + translate = asfloat(uint2(_201.Load((t + 4u) * 4 + 8), _201.Load((t + 5u) * 4 + 8))); + } + } + if (linewidth >= 0.0f) + { + linewidth *= sqrt(abs((mat.x * mat.w) - (mat.y * mat.z))); + } + linewidth = max(linewidth, 0.0f); + switch (tag_word) + { + case 4u: + { + ElementRef param_17 = this_ref; + FillColor fill = Element_FillColor_read(param_17); + anno_fill.bbox = bbox; + anno_fill.linewidth = linewidth; + anno_fill.rgba_color = fill.rgba_color; + Alloc _1261; + _1261.offset = _1042.Load(32); + param_18.offset = _1261.offset; + AnnotatedRef param_19 = out_ref; + uint param_20 = fill_mode; + AnnoColor param_21 = anno_fill; + Annotated_Color_write(param_18, param_19, param_20, param_21); + break; + } + case 5u: + { + ElementRef param_22 = this_ref; + FillLinGradient lin = Element_FillLinGradient_read(param_22); + anno_lin.bbox = bbox; + anno_lin.linewidth = linewidth; + anno_lin.index = lin.index; + float2 p0 = ((mat.xy * lin.p0.x) + (mat.zw * lin.p0.y)) + translate; + float2 p1 = ((mat.xy * lin.p1.x) + (mat.zw * lin.p1.y)) + translate; + float2 dxy = p1 - p0; + float scale = 1.0f / ((dxy.x * dxy.x) + (dxy.y * dxy.y)); + float line_x = dxy.x * scale; + float line_y = dxy.y * scale; + anno_lin.line_x = line_x; + anno_lin.line_y = line_y; + anno_lin.line_c = -((p0.x * line_x) + (p0.y * line_y)); + Alloc _1357; + _1357.offset = _1042.Load(32); + param_23.offset = _1357.offset; + AnnotatedRef param_24 = out_ref; + uint param_25 = fill_mode; + AnnoLinGradient param_26 = anno_lin; + Annotated_LinGradient_write(param_23, param_24, param_25, param_26); + break; + } + case 6u: + { + ElementRef param_27 = this_ref; + FillImage fill_img = Element_FillImage_read(param_27); + anno_img.bbox = bbox; + anno_img.linewidth = linewidth; + anno_img.index = fill_img.index; + anno_img.offset = fill_img.offset; + Alloc _1385; + _1385.offset = _1042.Load(32); + param_28.offset = _1385.offset; + AnnotatedRef param_29 = out_ref; + uint param_30 = fill_mode; + AnnoImage param_31 = anno_img; + Annotated_Image_write(param_28, param_29, param_30, param_31); + break; + } + } + } + else + { + if (tag_word == 9u) + { + ElementRef param_32 = this_ref; + Clip begin_clip = Element_BeginClip_read(param_32); + anno_begin_clip.bbox = begin_clip.bbox; + anno_begin_clip.linewidth = 0.0f; + Alloc _1413; + _1413.offset = _1042.Load(32); + param_33.offset = _1413.offset; + AnnotatedRef param_34 = out_ref; + uint param_35 = 0u; + AnnoBeginClip param_36 = anno_begin_clip; + Annotated_BeginClip_write(param_33, param_34, param_35, param_36); + } + else + { + if (tag_word == 10u) + { + ElementRef param_37 = this_ref; + Clip end_clip = Element_EndClip_read(param_37); + anno_end_clip.bbox = end_clip.bbox; + Alloc _1438; + _1438.offset = _1042.Load(32); + param_38.offset = _1438.offset; + AnnotatedRef param_39 = out_ref; + AnnoEndClip param_40 = anno_end_clip; + Annotated_EndClip_write(param_38, param_39, param_40); + } + } + } + out_ref.offset += 40u; } } diff --git a/piet-gpu/shader/gen/draw_leaf.msl b/piet-gpu/shader/gen/draw_leaf.msl index d52a560..f713186 100644 --- a/piet-gpu/shader/gen/draw_leaf.msl +++ b/piet-gpu/shader/gen/draw_leaf.msl @@ -44,11 +44,59 @@ struct spvUnsafeArray } }; +struct Alloc +{ + uint offset; +}; + struct ElementRef { uint offset; }; +struct FillColorRef +{ + uint offset; +}; + +struct FillColor +{ + uint rgba_color; +}; + +struct FillLinGradientRef +{ + uint offset; +}; + +struct FillLinGradient +{ + uint index; + float2 p0; + float2 p1; +}; + +struct FillImageRef +{ + uint offset; +}; + +struct FillImage +{ + uint index; + int2 offset; +}; + +struct ClipRef +{ + uint offset; +}; + +struct Clip +{ + float4 bbox; +}; + struct ElementTag { uint tag; @@ -61,6 +109,79 @@ struct DrawMonoid uint clip_ix; }; +struct AnnoImageRef +{ + uint offset; +}; + +struct AnnoImage +{ + float4 bbox; + float linewidth; + uint index; + int2 offset; +}; + +struct AnnoColorRef +{ + uint offset; +}; + +struct AnnoColor +{ + float4 bbox; + float linewidth; + uint rgba_color; +}; + +struct AnnoLinGradientRef +{ + uint offset; +}; + +struct AnnoLinGradient +{ + float4 bbox; + float linewidth; + uint index; + float line_x; + float line_y; + float line_c; +}; + +struct AnnoBeginClipRef +{ + uint offset; +}; + +struct AnnoBeginClip +{ + float4 bbox; + float linewidth; +}; + +struct AnnoEndClipRef +{ + uint offset; +}; + +struct AnnoEndClip +{ + float4 bbox; +}; + +struct AnnotatedRef +{ + uint offset; +}; + +struct Memory +{ + uint mem_offset; + uint mem_error; + uint memory[1]; +}; + struct SceneBuf { uint scene[1]; @@ -77,7 +198,7 @@ struct ParentBuf DrawMonoid_1 parent[1]; }; -struct Alloc +struct Alloc_1 { uint offset; }; @@ -88,18 +209,19 @@ struct Config uint n_pathseg; uint width_in_tiles; uint height_in_tiles; - Alloc tile_alloc; - Alloc bin_alloc; - Alloc ptcl_alloc; - Alloc pathseg_alloc; - Alloc anno_alloc; - Alloc trans_alloc; - Alloc bbox_alloc; - Alloc drawmonoid_alloc; + Alloc_1 tile_alloc; + Alloc_1 bin_alloc; + Alloc_1 ptcl_alloc; + Alloc_1 pathseg_alloc; + Alloc_1 anno_alloc; + Alloc_1 trans_alloc; + Alloc_1 bbox_alloc; + Alloc_1 drawmonoid_alloc; uint n_trans; + uint n_path; uint trans_offset; - uint pathtag_offset; uint linewidth_offset; + uint pathtag_offset; uint pathseg_offset; }; @@ -108,19 +230,12 @@ struct ConfigBuf Config conf; }; -struct Memory -{ - uint mem_offset; - uint mem_error; - uint memory[1]; -}; - constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(512u, 1u, 1u); static inline __attribute__((always_inline)) -ElementTag Element_tag(thread const ElementRef& ref, const device SceneBuf& v_49) +ElementTag Element_tag(thread const ElementRef& ref, const device SceneBuf& v_225) { - uint tag_and_flags = v_49.scene[ref.offset >> uint(2)]; + uint tag_and_flags = v_225.scene[ref.offset >> uint(2)]; return ElementTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) }; } @@ -171,13 +286,336 @@ DrawMonoid tag_monoid_identity() return DrawMonoid{ 0u, 0u }; } -kernel void main0(device Memory& _277 [[buffer(0)]], const device ConfigBuf& _248 [[buffer(1)]], const device SceneBuf& v_49 [[buffer(2)]], const device ParentBuf& _218 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) +static inline __attribute__((always_inline)) +FillColor FillColor_read(thread const FillColorRef& ref, const device SceneBuf& v_225) +{ + uint ix = ref.offset >> uint(2); + uint raw0 = v_225.scene[ix + 0u]; + FillColor s; + s.rgba_color = raw0; + return s; +} + +static inline __attribute__((always_inline)) +FillColor Element_FillColor_read(thread const ElementRef& ref, const device SceneBuf& v_225) +{ + FillColorRef param = FillColorRef{ ref.offset + 4u }; + return FillColor_read(param, v_225); +} + +static inline __attribute__((always_inline)) +bool touch_mem(thread const Alloc& alloc, thread const uint& offset) +{ + return true; +} + +static inline __attribute__((always_inline)) +void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_201) +{ + Alloc param = alloc; + uint param_1 = offset; + if (!touch_mem(param, param_1)) + { + return; + } + v_201.memory[offset] = val; +} + +static inline __attribute__((always_inline)) +void AnnoColor_write(thread const Alloc& a, thread const AnnoColorRef& ref, thread const AnnoColor& s, device Memory& v_201) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint param_2 = as_type(s.bbox.x); + write_mem(param, param_1, param_2, v_201); + Alloc param_3 = a; + uint param_4 = ix + 1u; + uint param_5 = as_type(s.bbox.y); + write_mem(param_3, param_4, param_5, v_201); + Alloc param_6 = a; + uint param_7 = ix + 2u; + uint param_8 = as_type(s.bbox.z); + write_mem(param_6, param_7, param_8, v_201); + Alloc param_9 = a; + uint param_10 = ix + 3u; + uint param_11 = as_type(s.bbox.w); + write_mem(param_9, param_10, param_11, v_201); + Alloc param_12 = a; + uint param_13 = ix + 4u; + uint param_14 = as_type(s.linewidth); + write_mem(param_12, param_13, param_14, v_201); + Alloc param_15 = a; + uint param_16 = ix + 5u; + uint param_17 = s.rgba_color; + write_mem(param_15, param_16, param_17, v_201); +} + +static inline __attribute__((always_inline)) +void Annotated_Color_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoColor& s, device Memory& v_201) +{ + Alloc param = a; + uint param_1 = ref.offset >> uint(2); + uint param_2 = (flags << uint(16)) | 1u; + write_mem(param, param_1, param_2, v_201); + Alloc param_3 = a; + AnnoColorRef param_4 = AnnoColorRef{ ref.offset + 4u }; + AnnoColor param_5 = s; + AnnoColor_write(param_3, param_4, param_5, v_201); +} + +static inline __attribute__((always_inline)) +FillLinGradient FillLinGradient_read(thread const FillLinGradientRef& ref, const device SceneBuf& v_225) +{ + uint ix = ref.offset >> uint(2); + uint raw0 = v_225.scene[ix + 0u]; + uint raw1 = v_225.scene[ix + 1u]; + uint raw2 = v_225.scene[ix + 2u]; + uint raw3 = v_225.scene[ix + 3u]; + uint raw4 = v_225.scene[ix + 4u]; + FillLinGradient s; + s.index = raw0; + s.p0 = float2(as_type(raw1), as_type(raw2)); + s.p1 = float2(as_type(raw3), as_type(raw4)); + return s; +} + +static inline __attribute__((always_inline)) +FillLinGradient Element_FillLinGradient_read(thread const ElementRef& ref, const device SceneBuf& v_225) +{ + FillLinGradientRef param = FillLinGradientRef{ ref.offset + 4u }; + return FillLinGradient_read(param, v_225); +} + +static inline __attribute__((always_inline)) +void AnnoLinGradient_write(thread const Alloc& a, thread const AnnoLinGradientRef& ref, thread const AnnoLinGradient& s, device Memory& v_201) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint param_2 = as_type(s.bbox.x); + write_mem(param, param_1, param_2, v_201); + Alloc param_3 = a; + uint param_4 = ix + 1u; + uint param_5 = as_type(s.bbox.y); + write_mem(param_3, param_4, param_5, v_201); + Alloc param_6 = a; + uint param_7 = ix + 2u; + uint param_8 = as_type(s.bbox.z); + write_mem(param_6, param_7, param_8, v_201); + Alloc param_9 = a; + uint param_10 = ix + 3u; + uint param_11 = as_type(s.bbox.w); + write_mem(param_9, param_10, param_11, v_201); + Alloc param_12 = a; + uint param_13 = ix + 4u; + uint param_14 = as_type(s.linewidth); + write_mem(param_12, param_13, param_14, v_201); + Alloc param_15 = a; + uint param_16 = ix + 5u; + uint param_17 = s.index; + write_mem(param_15, param_16, param_17, v_201); + Alloc param_18 = a; + uint param_19 = ix + 6u; + uint param_20 = as_type(s.line_x); + write_mem(param_18, param_19, param_20, v_201); + Alloc param_21 = a; + uint param_22 = ix + 7u; + uint param_23 = as_type(s.line_y); + write_mem(param_21, param_22, param_23, v_201); + Alloc param_24 = a; + uint param_25 = ix + 8u; + uint param_26 = as_type(s.line_c); + write_mem(param_24, param_25, param_26, v_201); +} + +static inline __attribute__((always_inline)) +void Annotated_LinGradient_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoLinGradient& s, device Memory& v_201) +{ + Alloc param = a; + uint param_1 = ref.offset >> uint(2); + uint param_2 = (flags << uint(16)) | 2u; + write_mem(param, param_1, param_2, v_201); + Alloc param_3 = a; + AnnoLinGradientRef param_4 = AnnoLinGradientRef{ ref.offset + 4u }; + AnnoLinGradient param_5 = s; + AnnoLinGradient_write(param_3, param_4, param_5, v_201); +} + +static inline __attribute__((always_inline)) +FillImage FillImage_read(thread const FillImageRef& ref, const device SceneBuf& v_225) +{ + uint ix = ref.offset >> uint(2); + uint raw0 = v_225.scene[ix + 0u]; + uint raw1 = v_225.scene[ix + 1u]; + FillImage s; + s.index = raw0; + s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); + return s; +} + +static inline __attribute__((always_inline)) +FillImage Element_FillImage_read(thread const ElementRef& ref, const device SceneBuf& v_225) +{ + FillImageRef param = FillImageRef{ ref.offset + 4u }; + return FillImage_read(param, v_225); +} + +static inline __attribute__((always_inline)) +void AnnoImage_write(thread const Alloc& a, thread const AnnoImageRef& ref, thread const AnnoImage& s, device Memory& v_201) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint param_2 = as_type(s.bbox.x); + write_mem(param, param_1, param_2, v_201); + Alloc param_3 = a; + uint param_4 = ix + 1u; + uint param_5 = as_type(s.bbox.y); + write_mem(param_3, param_4, param_5, v_201); + Alloc param_6 = a; + uint param_7 = ix + 2u; + uint param_8 = as_type(s.bbox.z); + write_mem(param_6, param_7, param_8, v_201); + Alloc param_9 = a; + uint param_10 = ix + 3u; + uint param_11 = as_type(s.bbox.w); + write_mem(param_9, param_10, param_11, v_201); + Alloc param_12 = a; + uint param_13 = ix + 4u; + uint param_14 = as_type(s.linewidth); + write_mem(param_12, param_13, param_14, v_201); + Alloc param_15 = a; + uint param_16 = ix + 5u; + uint param_17 = s.index; + write_mem(param_15, param_16, param_17, v_201); + Alloc param_18 = a; + uint param_19 = ix + 6u; + uint param_20 = (uint(s.offset.x) & 65535u) | (uint(s.offset.y) << uint(16)); + write_mem(param_18, param_19, param_20, v_201); +} + +static inline __attribute__((always_inline)) +void Annotated_Image_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoImage& s, device Memory& v_201) +{ + Alloc param = a; + uint param_1 = ref.offset >> uint(2); + uint param_2 = (flags << uint(16)) | 3u; + write_mem(param, param_1, param_2, v_201); + Alloc param_3 = a; + AnnoImageRef param_4 = AnnoImageRef{ ref.offset + 4u }; + AnnoImage param_5 = s; + AnnoImage_write(param_3, param_4, param_5, v_201); +} + +static inline __attribute__((always_inline)) +Clip Clip_read(thread const ClipRef& ref, const device SceneBuf& v_225) +{ + uint ix = ref.offset >> uint(2); + uint raw0 = v_225.scene[ix + 0u]; + uint raw1 = v_225.scene[ix + 1u]; + uint raw2 = v_225.scene[ix + 2u]; + uint raw3 = v_225.scene[ix + 3u]; + Clip s; + s.bbox = float4(as_type(raw0), as_type(raw1), as_type(raw2), as_type(raw3)); + return s; +} + +static inline __attribute__((always_inline)) +Clip Element_BeginClip_read(thread const ElementRef& ref, const device SceneBuf& v_225) +{ + ClipRef param = ClipRef{ ref.offset + 4u }; + return Clip_read(param, v_225); +} + +static inline __attribute__((always_inline)) +void AnnoBeginClip_write(thread const Alloc& a, thread const AnnoBeginClipRef& ref, thread const AnnoBeginClip& s, device Memory& v_201) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint param_2 = as_type(s.bbox.x); + write_mem(param, param_1, param_2, v_201); + Alloc param_3 = a; + uint param_4 = ix + 1u; + uint param_5 = as_type(s.bbox.y); + write_mem(param_3, param_4, param_5, v_201); + Alloc param_6 = a; + uint param_7 = ix + 2u; + uint param_8 = as_type(s.bbox.z); + write_mem(param_6, param_7, param_8, v_201); + Alloc param_9 = a; + uint param_10 = ix + 3u; + uint param_11 = as_type(s.bbox.w); + write_mem(param_9, param_10, param_11, v_201); + Alloc param_12 = a; + uint param_13 = ix + 4u; + uint param_14 = as_type(s.linewidth); + write_mem(param_12, param_13, param_14, v_201); +} + +static inline __attribute__((always_inline)) +void Annotated_BeginClip_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const uint& flags, thread const AnnoBeginClip& s, device Memory& v_201) +{ + Alloc param = a; + uint param_1 = ref.offset >> uint(2); + uint param_2 = (flags << uint(16)) | 4u; + write_mem(param, param_1, param_2, v_201); + Alloc param_3 = a; + AnnoBeginClipRef param_4 = AnnoBeginClipRef{ ref.offset + 4u }; + AnnoBeginClip param_5 = s; + AnnoBeginClip_write(param_3, param_4, param_5, v_201); +} + +static inline __attribute__((always_inline)) +Clip Element_EndClip_read(thread const ElementRef& ref, const device SceneBuf& v_225) +{ + ClipRef param = ClipRef{ ref.offset + 4u }; + return Clip_read(param, v_225); +} + +static inline __attribute__((always_inline)) +void AnnoEndClip_write(thread const Alloc& a, thread const AnnoEndClipRef& ref, thread const AnnoEndClip& s, device Memory& v_201) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint param_2 = as_type(s.bbox.x); + write_mem(param, param_1, param_2, v_201); + Alloc param_3 = a; + uint param_4 = ix + 1u; + uint param_5 = as_type(s.bbox.y); + write_mem(param_3, param_4, param_5, v_201); + Alloc param_6 = a; + uint param_7 = ix + 2u; + uint param_8 = as_type(s.bbox.z); + write_mem(param_6, param_7, param_8, v_201); + Alloc param_9 = a; + uint param_10 = ix + 3u; + uint param_11 = as_type(s.bbox.w); + write_mem(param_9, param_10, param_11, v_201); +} + +static inline __attribute__((always_inline)) +void Annotated_EndClip_write(thread const Alloc& a, thread const AnnotatedRef& ref, thread const AnnoEndClip& s, device Memory& v_201) +{ + Alloc param = a; + uint param_1 = ref.offset >> uint(2); + uint param_2 = 5u; + write_mem(param, param_1, param_2, v_201); + Alloc param_3 = a; + AnnoEndClipRef param_4 = AnnoEndClipRef{ ref.offset + 4u }; + AnnoEndClip param_5 = s; + AnnoEndClip_write(param_3, param_4, param_5, v_201); +} + +kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1042 [[buffer(1)]], const device SceneBuf& v_225 [[buffer(2)]], const device ParentBuf& _1008 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) { threadgroup DrawMonoid sh_scratch[512]; uint ix = gl_GlobalInvocationID.x * 8u; ElementRef ref = ElementRef{ ix * 36u }; ElementRef param = ref; - uint tag_word = Element_tag(param, v_49).tag; + uint tag_word = Element_tag(param, v_225).tag; uint param_1 = tag_word; DrawMonoid agg = map_tag(param_1); spvUnsafeArray local; @@ -187,7 +625,7 @@ kernel void main0(device Memory& _277 [[buffer(0)]], const device ConfigBuf& _24 ElementRef param_2 = ref; uint param_3 = i; ElementRef param_4 = Element_index(param_2, param_3); - tag_word = Element_tag(param_4, v_49).tag; + tag_word = Element_tag(param_4, v_225).tag; uint param_5 = tag_word; DrawMonoid param_6 = agg; DrawMonoid param_7 = map_tag(param_5); @@ -212,9 +650,9 @@ kernel void main0(device Memory& _277 [[buffer(0)]], const device ConfigBuf& _24 DrawMonoid row = tag_monoid_identity(); if (gl_WorkGroupID.x > 0u) { - uint _221 = gl_WorkGroupID.x - 1u; - row.path_ix = _218.parent[_221].path_ix; - row.clip_ix = _218.parent[_221].clip_ix; + uint _1011 = gl_WorkGroupID.x - 1u; + row.path_ix = _1008.parent[_1011].path_ix; + row.clip_ix = _1008.parent[_1011].clip_ix; } if (gl_LocalInvocationID.x > 0u) { @@ -222,14 +660,143 @@ kernel void main0(device Memory& _277 [[buffer(0)]], const device ConfigBuf& _24 DrawMonoid param_11 = sh_scratch[gl_LocalInvocationID.x - 1u]; row = combine_tag_monoid(param_10, param_11); } - uint out_base = (_248.conf.drawmonoid_alloc.offset >> uint(2)) + ((gl_GlobalInvocationID.x * 2u) * 8u); + uint out_ix = gl_GlobalInvocationID.x * 8u; + uint out_base = (_1042.conf.drawmonoid_alloc.offset >> uint(2)) + (out_ix * 2u); + AnnotatedRef out_ref = AnnotatedRef{ _1042.conf.anno_alloc.offset + (out_ix * 40u) }; + float4 mat; + float2 translate; + AnnoColor anno_fill; + Alloc param_18; + AnnoLinGradient anno_lin; + Alloc param_23; + AnnoImage anno_img; + Alloc param_28; + AnnoBeginClip anno_begin_clip; + Alloc param_33; + AnnoEndClip anno_end_clip; + Alloc param_38; for (uint i_2 = 0u; i_2 < 8u; i_2++) { DrawMonoid param_12 = row; DrawMonoid param_13 = local[i_2]; DrawMonoid m = combine_tag_monoid(param_12, param_13); - _277.memory[out_base + (i_2 * 2u)] = m.path_ix; - _277.memory[(out_base + (i_2 * 2u)) + 1u] = m.clip_ix; + v_201.memory[out_base + (i_2 * 2u)] = m.path_ix; + v_201.memory[(out_base + (i_2 * 2u)) + 1u] = m.clip_ix; + ElementRef param_14 = ref; + uint param_15 = i_2; + ElementRef this_ref = Element_index(param_14, param_15); + ElementRef param_16 = this_ref; + tag_word = Element_tag(param_16, v_225).tag; + if (((tag_word == 4u) || (tag_word == 5u)) || (tag_word == 6u)) + { + uint bbox_offset = (_1042.conf.bbox_alloc.offset >> uint(2)) + (6u * (m.path_ix - 1u)); + float bbox_l = float(v_201.memory[bbox_offset]) - 32768.0; + float bbox_t = float(v_201.memory[bbox_offset + 1u]) - 32768.0; + float bbox_r = float(v_201.memory[bbox_offset + 2u]) - 32768.0; + float bbox_b = float(v_201.memory[bbox_offset + 3u]) - 32768.0; + float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); + float linewidth = as_type(v_201.memory[bbox_offset + 4u]); + uint fill_mode = uint(linewidth >= 0.0); + if ((linewidth >= 0.0) || (tag_word == 5u)) + { + uint trans_ix = v_201.memory[bbox_offset + 5u]; + uint t = (_1042.conf.trans_alloc.offset >> uint(2)) + (6u * trans_ix); + mat = as_type(uint4(v_201.memory[t], v_201.memory[t + 1u], v_201.memory[t + 2u], v_201.memory[t + 3u])); + if (tag_word == 5u) + { + translate = as_type(uint2(v_201.memory[t + 4u], v_201.memory[t + 5u])); + } + } + if (linewidth >= 0.0) + { + linewidth *= sqrt(abs((mat.x * mat.w) - (mat.y * mat.z))); + } + linewidth = fast::max(linewidth, 0.0); + switch (tag_word) + { + case 4u: + { + ElementRef param_17 = this_ref; + FillColor fill = Element_FillColor_read(param_17, v_225); + anno_fill.bbox = bbox; + anno_fill.linewidth = linewidth; + anno_fill.rgba_color = fill.rgba_color; + param_18.offset = _1042.conf.anno_alloc.offset; + AnnotatedRef param_19 = out_ref; + uint param_20 = fill_mode; + AnnoColor param_21 = anno_fill; + Annotated_Color_write(param_18, param_19, param_20, param_21, v_201); + break; + } + case 5u: + { + ElementRef param_22 = this_ref; + FillLinGradient lin = Element_FillLinGradient_read(param_22, v_225); + anno_lin.bbox = bbox; + anno_lin.linewidth = linewidth; + anno_lin.index = lin.index; + float2 p0 = ((mat.xy * lin.p0.x) + (mat.zw * lin.p0.y)) + translate; + float2 p1 = ((mat.xy * lin.p1.x) + (mat.zw * lin.p1.y)) + translate; + float2 dxy = p1 - p0; + float scale = 1.0 / ((dxy.x * dxy.x) + (dxy.y * dxy.y)); + float line_x = dxy.x * scale; + float line_y = dxy.y * scale; + anno_lin.line_x = line_x; + anno_lin.line_y = line_y; + anno_lin.line_c = -((p0.x * line_x) + (p0.y * line_y)); + param_23.offset = _1042.conf.anno_alloc.offset; + AnnotatedRef param_24 = out_ref; + uint param_25 = fill_mode; + AnnoLinGradient param_26 = anno_lin; + Annotated_LinGradient_write(param_23, param_24, param_25, param_26, v_201); + break; + } + case 6u: + { + ElementRef param_27 = this_ref; + FillImage fill_img = Element_FillImage_read(param_27, v_225); + anno_img.bbox = bbox; + anno_img.linewidth = linewidth; + anno_img.index = fill_img.index; + anno_img.offset = fill_img.offset; + param_28.offset = _1042.conf.anno_alloc.offset; + AnnotatedRef param_29 = out_ref; + uint param_30 = fill_mode; + AnnoImage param_31 = anno_img; + Annotated_Image_write(param_28, param_29, param_30, param_31, v_201); + break; + } + } + } + else + { + if (tag_word == 9u) + { + ElementRef param_32 = this_ref; + Clip begin_clip = Element_BeginClip_read(param_32, v_225); + anno_begin_clip.bbox = begin_clip.bbox; + anno_begin_clip.linewidth = 0.0; + param_33.offset = _1042.conf.anno_alloc.offset; + AnnotatedRef param_34 = out_ref; + uint param_35 = 0u; + AnnoBeginClip param_36 = anno_begin_clip; + Annotated_BeginClip_write(param_33, param_34, param_35, param_36, v_201); + } + else + { + if (tag_word == 10u) + { + ElementRef param_37 = this_ref; + Clip end_clip = Element_EndClip_read(param_37, v_225); + anno_end_clip.bbox = end_clip.bbox; + param_38.offset = _1042.conf.anno_alloc.offset; + AnnotatedRef param_39 = out_ref; + AnnoEndClip param_40 = anno_end_clip; + Annotated_EndClip_write(param_38, param_39, param_40, v_201); + } + } + } + out_ref.offset += 40u; } } diff --git a/piet-gpu/shader/gen/draw_leaf.spv b/piet-gpu/shader/gen/draw_leaf.spv index 30740a2..8fade68 100644 Binary files a/piet-gpu/shader/gen/draw_leaf.spv and b/piet-gpu/shader/gen/draw_leaf.spv differ diff --git a/piet-gpu/shader/gen/draw_reduce.hlsl b/piet-gpu/shader/gen/draw_reduce.hlsl index 27c206a..b28c956 100644 --- a/piet-gpu/shader/gen/draw_reduce.hlsl +++ b/piet-gpu/shader/gen/draw_reduce.hlsl @@ -37,9 +37,10 @@ struct Config Alloc bbox_alloc; Alloc drawmonoid_alloc; uint n_trans; + uint n_path; uint trans_offset; - uint pathtag_offset; uint linewidth_offset; + uint pathtag_offset; uint pathseg_offset; }; diff --git a/piet-gpu/shader/gen/draw_reduce.msl b/piet-gpu/shader/gen/draw_reduce.msl index dd2f517..550cf8c 100644 --- a/piet-gpu/shader/gen/draw_reduce.msl +++ b/piet-gpu/shader/gen/draw_reduce.msl @@ -67,9 +67,10 @@ struct Config Alloc bbox_alloc; Alloc drawmonoid_alloc; uint n_trans; + uint n_path; uint trans_offset; - uint pathtag_offset; uint linewidth_offset; + uint pathtag_offset; uint pathseg_offset; }; diff --git a/piet-gpu/shader/gen/draw_reduce.spv b/piet-gpu/shader/gen/draw_reduce.spv index 286bd33..02ebc5d 100644 Binary files a/piet-gpu/shader/gen/draw_reduce.spv and b/piet-gpu/shader/gen/draw_reduce.spv differ diff --git a/piet-gpu/shader/gen/pathseg.dxil b/piet-gpu/shader/gen/pathseg.dxil index 4464d9d..0ca0d18 100644 Binary files a/piet-gpu/shader/gen/pathseg.dxil and b/piet-gpu/shader/gen/pathseg.dxil differ diff --git a/piet-gpu/shader/gen/pathseg.hlsl b/piet-gpu/shader/gen/pathseg.hlsl index e29ddd3..c7f7df0 100644 --- a/piet-gpu/shader/gen/pathseg.hlsl +++ b/piet-gpu/shader/gen/pathseg.hlsl @@ -65,9 +65,10 @@ struct Config Alloc bbox_alloc; Alloc drawmonoid_alloc; uint n_trans; + uint n_path; uint trans_offset; - uint pathtag_offset; uint linewidth_offset; + uint pathtag_offset; uint pathseg_offset; }; @@ -79,7 +80,7 @@ static const Monoid _567 = { 0.0f.xxxx, 0u }; RWByteAddressBuffer _111 : register(u0); ByteAddressBuffer _574 : register(t2); ByteAddressBuffer _639 : register(t1); -ByteAddressBuffer _710 : register(t3); +ByteAddressBuffer _709 : register(t3); static uint3 gl_WorkGroupID; static uint3 gl_LocalInvocationID; @@ -355,7 +356,7 @@ uint round_up(float x) void comp_main() { uint ix = gl_GlobalInvocationID.x * 4u; - uint tag_word = _574.Load(((_639.Load(56) >> uint(2)) + (ix >> uint(2))) * 4 + 0); + uint tag_word = _574.Load(((_639.Load(64) >> uint(2)) + (ix >> uint(2))) * 4 + 0); uint param = tag_word; TagMonoid local_tm = reduce_tag(param); sh_tag[gl_LocalInvocationID.x] = local_tm; @@ -376,17 +377,17 @@ void comp_main() TagMonoid tm = tag_monoid_identity(); if (gl_WorkGroupID.x > 0u) { - TagMonoid _716; - _716.trans_ix = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 0); - _716.linewidth_ix = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 4); - _716.pathseg_ix = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 8); - _716.path_ix = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 12); - _716.pathseg_offset = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 16); - tm.trans_ix = _716.trans_ix; - tm.linewidth_ix = _716.linewidth_ix; - tm.pathseg_ix = _716.pathseg_ix; - tm.path_ix = _716.path_ix; - tm.pathseg_offset = _716.pathseg_offset; + TagMonoid _715; + _715.trans_ix = _709.Load((gl_WorkGroupID.x - 1u) * 20 + 0); + _715.linewidth_ix = _709.Load((gl_WorkGroupID.x - 1u) * 20 + 4); + _715.pathseg_ix = _709.Load((gl_WorkGroupID.x - 1u) * 20 + 8); + _715.path_ix = _709.Load((gl_WorkGroupID.x - 1u) * 20 + 12); + _715.pathseg_offset = _709.Load((gl_WorkGroupID.x - 1u) * 20 + 16); + tm.trans_ix = _715.trans_ix; + tm.linewidth_ix = _715.linewidth_ix; + tm.pathseg_ix = _715.pathseg_ix; + tm.path_ix = _715.path_ix; + tm.pathseg_offset = _715.pathseg_offset; } if (gl_LocalInvocationID.x > 0u) { @@ -394,13 +395,16 @@ void comp_main() TagMonoid param_4 = sh_tag[gl_LocalInvocationID.x - 1u]; tm = combine_tag_monoid(param_3, param_4); } - uint ps_ix = (_639.Load(64) >> uint(2)) + tm.pathseg_offset; + uint ps_ix = (_639.Load(68) >> uint(2)) + tm.pathseg_offset; uint lw_ix = (_639.Load(60) >> uint(2)) + tm.linewidth_ix; uint save_path_ix = tm.path_ix; - TransformSegRef _768 = { _639.Load(36) + (tm.trans_ix * 24u) }; - TransformSegRef trans_ref = _768; - PathSegRef _778 = { _639.Load(28) + (tm.pathseg_ix * 52u) }; - PathSegRef ps_ref = _778; + uint trans_ix = tm.trans_ix; + TransformSegRef _770 = { _639.Load(36) + (trans_ix * 24u) }; + TransformSegRef trans_ref = _770; + PathSegRef _780 = { _639.Load(28) + (tm.pathseg_ix * 52u) }; + PathSegRef ps_ref = _780; + float linewidth[4]; + uint save_trans_ix[4]; float2 p0; float2 p1; float2 p2; @@ -411,6 +415,8 @@ void comp_main() Alloc param_15; for (uint i_1 = 0u; i_1 < 4u; i_1++) { + linewidth[i_1] = asfloat(_574.Load(lw_ix * 4 + 0)); + save_trans_ix[i_1] = trans_ix; uint tag_byte = tag_word >> (i_1 * 8u); uint seg_type = tag_byte & 3u; if (seg_type != 0u) @@ -449,10 +455,9 @@ void comp_main() } } } - float linewidth = asfloat(_574.Load(lw_ix * 4 + 0)); - Alloc _864; - _864.offset = _639.Load(36); - param_13.offset = _864.offset; + Alloc _876; + _876.offset = _639.Load(36); + param_13.offset = _876.offset; TransformSegRef param_14 = trans_ref; TransformSeg transform = TransformSeg_read(param_13, param_14); p0 = ((transform.mat.xy * p0.x) + (transform.mat.zw * p0.y)) + transform.translate; @@ -461,25 +466,25 @@ void comp_main() if (seg_type >= 2u) { p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate; - float4 _934 = bbox; - float2 _937 = min(_934.xy, p2); - bbox.x = _937.x; - bbox.y = _937.y; - float4 _942 = bbox; - float2 _945 = max(_942.zw, p2); - bbox.z = _945.x; - bbox.w = _945.y; + float4 _946 = bbox; + float2 _949 = min(_946.xy, p2); + bbox.x = _949.x; + bbox.y = _949.y; + float4 _954 = bbox; + float2 _957 = max(_954.zw, p2); + bbox.z = _957.x; + bbox.w = _957.y; if (seg_type == 3u) { p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate; - float4 _970 = bbox; - float2 _973 = min(_970.xy, p3); - bbox.x = _973.x; - bbox.y = _973.y; - float4 _978 = bbox; - float2 _981 = max(_978.zw, p3); - bbox.z = _981.x; - bbox.w = _981.y; + float4 _982 = bbox; + float2 _985 = min(_982.xy, p3); + bbox.x = _985.x; + bbox.y = _985.y; + float4 _990 = bbox; + float2 _993 = max(_990.zw, p3); + bbox.z = _993.x; + bbox.w = _993.y; } else { @@ -495,9 +500,9 @@ void comp_main() p1 = lerp(p0, p3, 0.3333333432674407958984375f.xx); } float2 stroke = 0.0f.xx; - if (linewidth >= 0.0f) + if (linewidth[i_1] >= 0.0f) { - stroke = float2(length(transform.mat.xz), length(transform.mat.yw)) * (0.5f * linewidth); + stroke = float2(length(transform.mat.xz), length(transform.mat.yw)) * (0.5f * linewidth[i_1]); bbox += float4(-stroke, stroke); } local[i_1].bbox = bbox; @@ -509,10 +514,10 @@ void comp_main() cubic.path_ix = tm.path_ix; cubic.trans_ix = (gl_GlobalInvocationID.x * 4u) + i_1; cubic.stroke = stroke; - uint fill_mode = uint(linewidth >= 0.0f); - Alloc _1070; - _1070.offset = _639.Load(28); - param_15.offset = _1070.offset; + uint fill_mode = uint(linewidth[i_1] >= 0.0f); + Alloc _1088; + _1088.offset = _639.Load(28); + param_15.offset = _1088.offset; PathSegRef param_16 = ps_ref; uint param_17 = fill_mode; PathCubic param_18 = cubic; @@ -528,6 +533,7 @@ void comp_main() uint is_path = (tag_byte >> uint(4)) & 1u; local[i_1].flags = is_path; tm.path_ix += is_path; + trans_ix += ((tag_byte >> uint(5)) & 1u); trans_ref.offset += (((tag_byte >> uint(5)) & 1u) * 24u); lw_ix += ((tag_byte >> uint(6)) & 1u); } @@ -556,7 +562,7 @@ void comp_main() } GroupMemoryBarrierWithGroupSync(); uint path_ix = save_path_ix; - uint bbox_out_ix = (_639.Load(40) >> uint(2)) + (path_ix * 4u); + uint bbox_out_ix = (_639.Load(40) >> uint(2)) + (path_ix * 6u); Monoid row = monoid_identity(); if (gl_LocalInvocationID.x > 0u) { @@ -568,22 +574,24 @@ void comp_main() Monoid param_24 = local[i_4]; Monoid m = combine_monoid(param_23, param_24); bool do_atomic = false; - bool _1240 = i_4 == 3u; - bool _1247; - if (_1240) + bool _1263 = i_4 == 3u; + bool _1270; + if (_1263) { - _1247 = gl_LocalInvocationID.x == 511u; + _1270 = gl_LocalInvocationID.x == 511u; } else { - _1247 = _1240; + _1270 = _1263; } - if (_1247) + if (_1270) { do_atomic = true; } if ((m.flags & 1u) != 0u) { + _111.Store((bbox_out_ix + 4u) * 4 + 8, asuint(linewidth[i_4])); + _111.Store((bbox_out_ix + 5u) * 4 + 8, save_trans_ix[i_4]); if ((m.flags & 2u) == 0u) { do_atomic = true; @@ -598,38 +606,38 @@ void comp_main() _111.Store((bbox_out_ix + 2u) * 4 + 8, round_up(param_27)); float param_28 = m.bbox.w; _111.Store((bbox_out_ix + 3u) * 4 + 8, round_up(param_28)); - bbox_out_ix += 4u; + bbox_out_ix += 6u; do_atomic = false; } } if (do_atomic) { - bool _1299 = m.bbox.z > m.bbox.x; - bool _1308; - if (!_1299) + bool _1335 = m.bbox.z > m.bbox.x; + bool _1344; + if (!_1335) { - _1308 = m.bbox.w > m.bbox.y; + _1344 = m.bbox.w > m.bbox.y; } else { - _1308 = _1299; + _1344 = _1335; } - if (_1308) + if (_1344) { float param_29 = m.bbox.x; - uint _1317; - _111.InterlockedMin(bbox_out_ix * 4 + 8, round_down(param_29), _1317); + uint _1353; + _111.InterlockedMin(bbox_out_ix * 4 + 8, round_down(param_29), _1353); float param_30 = m.bbox.y; - uint _1325; - _111.InterlockedMin((bbox_out_ix + 1u) * 4 + 8, round_down(param_30), _1325); + uint _1361; + _111.InterlockedMin((bbox_out_ix + 1u) * 4 + 8, round_down(param_30), _1361); float param_31 = m.bbox.z; - uint _1333; - _111.InterlockedMax((bbox_out_ix + 2u) * 4 + 8, round_up(param_31), _1333); + uint _1369; + _111.InterlockedMax((bbox_out_ix + 2u) * 4 + 8, round_up(param_31), _1369); float param_32 = m.bbox.w; - uint _1341; - _111.InterlockedMax((bbox_out_ix + 3u) * 4 + 8, round_up(param_32), _1341); + uint _1377; + _111.InterlockedMax((bbox_out_ix + 3u) * 4 + 8, round_up(param_32), _1377); } - bbox_out_ix += 4u; + bbox_out_ix += 6u; } } } diff --git a/piet-gpu/shader/gen/pathseg.msl b/piet-gpu/shader/gen/pathseg.msl index 71299bd..0f60d4d 100644 --- a/piet-gpu/shader/gen/pathseg.msl +++ b/piet-gpu/shader/gen/pathseg.msl @@ -130,9 +130,10 @@ struct Config Alloc_1 bbox_alloc; Alloc_1 drawmonoid_alloc; uint n_trans; + uint n_path; uint trans_offset; - uint pathtag_offset; uint linewidth_offset; + uint pathtag_offset; uint pathseg_offset; }; @@ -429,7 +430,7 @@ uint round_up(thread const float& x) return uint(fast::min(65535.0, ceil(x) + 32768.0)); } -kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _639 [[buffer(1)]], const device SceneBuf& v_574 [[buffer(2)]], const device ParentBuf& _710 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) +kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _639 [[buffer(1)]], const device SceneBuf& v_574 [[buffer(2)]], const device ParentBuf& _709 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) { threadgroup TagMonoid sh_tag[512]; threadgroup Monoid sh_scratch[512]; @@ -455,12 +456,12 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6 TagMonoid tm = tag_monoid_identity(); if (gl_WorkGroupID.x > 0u) { - uint _713 = gl_WorkGroupID.x - 1u; - tm.trans_ix = _710.parent[_713].trans_ix; - tm.linewidth_ix = _710.parent[_713].linewidth_ix; - tm.pathseg_ix = _710.parent[_713].pathseg_ix; - tm.path_ix = _710.parent[_713].path_ix; - tm.pathseg_offset = _710.parent[_713].pathseg_offset; + uint _712 = gl_WorkGroupID.x - 1u; + tm.trans_ix = _709.parent[_712].trans_ix; + tm.linewidth_ix = _709.parent[_712].linewidth_ix; + tm.pathseg_ix = _709.parent[_712].pathseg_ix; + tm.path_ix = _709.parent[_712].path_ix; + tm.pathseg_offset = _709.parent[_712].pathseg_offset; } if (gl_LocalInvocationID.x > 0u) { @@ -471,8 +472,11 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6 uint ps_ix = (_639.conf.pathseg_offset >> uint(2)) + tm.pathseg_offset; uint lw_ix = (_639.conf.linewidth_offset >> uint(2)) + tm.linewidth_ix; uint save_path_ix = tm.path_ix; - TransformSegRef trans_ref = TransformSegRef{ _639.conf.trans_alloc.offset + (tm.trans_ix * 24u) }; + uint trans_ix = tm.trans_ix; + TransformSegRef trans_ref = TransformSegRef{ _639.conf.trans_alloc.offset + (trans_ix * 24u) }; PathSegRef ps_ref = PathSegRef{ _639.conf.pathseg_alloc.offset + (tm.pathseg_ix * 52u) }; + spvUnsafeArray linewidth; + spvUnsafeArray save_trans_ix; float2 p0; float2 p1; float2 p2; @@ -483,6 +487,8 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6 Alloc param_15; for (uint i_1 = 0u; i_1 < 4u; i_1++) { + linewidth[i_1] = as_type(v_574.scene[lw_ix]); + save_trans_ix[i_1] = trans_ix; uint tag_byte = tag_word >> (i_1 * 8u); uint seg_type = tag_byte & 3u; if (seg_type != 0u) @@ -521,7 +527,6 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6 } } } - float linewidth = as_type(v_574.scene[lw_ix]); param_13.offset = _639.conf.trans_alloc.offset; TransformSegRef param_14 = trans_ref; TransformSeg transform = TransformSeg_read(param_13, param_14, v_111); @@ -531,25 +536,25 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6 if (seg_type >= 2u) { p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate; - float4 _934 = bbox; - float2 _937 = fast::min(_934.xy, p2); - bbox.x = _937.x; - bbox.y = _937.y; - float4 _942 = bbox; - float2 _945 = fast::max(_942.zw, p2); - bbox.z = _945.x; - bbox.w = _945.y; + float4 _946 = bbox; + float2 _949 = fast::min(_946.xy, p2); + bbox.x = _949.x; + bbox.y = _949.y; + float4 _954 = bbox; + float2 _957 = fast::max(_954.zw, p2); + bbox.z = _957.x; + bbox.w = _957.y; if (seg_type == 3u) { p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate; - float4 _970 = bbox; - float2 _973 = fast::min(_970.xy, p3); - bbox.x = _973.x; - bbox.y = _973.y; - float4 _978 = bbox; - float2 _981 = fast::max(_978.zw, p3); - bbox.z = _981.x; - bbox.w = _981.y; + float4 _982 = bbox; + float2 _985 = fast::min(_982.xy, p3); + bbox.x = _985.x; + bbox.y = _985.y; + float4 _990 = bbox; + float2 _993 = fast::max(_990.zw, p3); + bbox.z = _993.x; + bbox.w = _993.y; } else { @@ -565,9 +570,9 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6 p1 = mix(p0, p3, float2(0.3333333432674407958984375)); } float2 stroke = float2(0.0); - if (linewidth >= 0.0) + if (linewidth[i_1] >= 0.0) { - stroke = float2(length(transform.mat.xz), length(transform.mat.yw)) * (0.5 * linewidth); + stroke = float2(length(transform.mat.xz), length(transform.mat.yw)) * (0.5 * linewidth[i_1]); bbox += float4(-stroke, stroke); } local[i_1].bbox = bbox; @@ -579,7 +584,7 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6 cubic.path_ix = tm.path_ix; cubic.trans_ix = (gl_GlobalInvocationID.x * 4u) + i_1; cubic.stroke = stroke; - uint fill_mode = uint(linewidth >= 0.0); + uint fill_mode = uint(linewidth[i_1] >= 0.0); param_15.offset = _639.conf.pathseg_alloc.offset; PathSegRef param_16 = ps_ref; uint param_17 = fill_mode; @@ -596,6 +601,7 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6 uint is_path = (tag_byte >> uint(4)) & 1u; local[i_1].flags = is_path; tm.path_ix += is_path; + trans_ix += ((tag_byte >> uint(5)) & 1u); trans_ref.offset += (((tag_byte >> uint(5)) & 1u) * 24u); lw_ix += ((tag_byte >> uint(6)) & 1u); } @@ -624,7 +630,7 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6 } threadgroup_barrier(mem_flags::mem_threadgroup); uint path_ix = save_path_ix; - uint bbox_out_ix = (_639.conf.bbox_alloc.offset >> uint(2)) + (path_ix * 4u); + uint bbox_out_ix = (_639.conf.bbox_alloc.offset >> uint(2)) + (path_ix * 6u); Monoid row = monoid_identity(); if (gl_LocalInvocationID.x > 0u) { @@ -636,22 +642,24 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6 Monoid param_24 = local[i_4]; Monoid m = combine_monoid(param_23, param_24); bool do_atomic = false; - bool _1240 = i_4 == 3u; - bool _1247; - if (_1240) + bool _1263 = i_4 == 3u; + bool _1270; + if (_1263) { - _1247 = gl_LocalInvocationID.x == 511u; + _1270 = gl_LocalInvocationID.x == 511u; } else { - _1247 = _1240; + _1270 = _1263; } - if (_1247) + if (_1270) { do_atomic = true; } if ((m.flags & 1u) != 0u) { + v_111.memory[bbox_out_ix + 4u] = as_type(linewidth[i_4]); + v_111.memory[bbox_out_ix + 5u] = save_trans_ix[i_4]; if ((m.flags & 2u) == 0u) { do_atomic = true; @@ -666,34 +674,34 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6 v_111.memory[bbox_out_ix + 2u] = round_up(param_27); float param_28 = m.bbox.w; v_111.memory[bbox_out_ix + 3u] = round_up(param_28); - bbox_out_ix += 4u; + bbox_out_ix += 6u; do_atomic = false; } } if (do_atomic) { - bool _1299 = m.bbox.z > m.bbox.x; - bool _1308; - if (!_1299) + bool _1335 = m.bbox.z > m.bbox.x; + bool _1344; + if (!_1335) { - _1308 = m.bbox.w > m.bbox.y; + _1344 = m.bbox.w > m.bbox.y; } else { - _1308 = _1299; + _1344 = _1335; } - if (_1308) + if (_1344) { float param_29 = m.bbox.x; - uint _1317 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix], round_down(param_29), memory_order_relaxed); + uint _1353 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix], round_down(param_29), memory_order_relaxed); float param_30 = m.bbox.y; - uint _1325 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 1u], round_down(param_30), memory_order_relaxed); + uint _1361 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 1u], round_down(param_30), memory_order_relaxed); float param_31 = m.bbox.z; - uint _1333 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 2u], round_up(param_31), memory_order_relaxed); + uint _1369 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 2u], round_up(param_31), memory_order_relaxed); float param_32 = m.bbox.w; - uint _1341 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 3u], round_up(param_32), memory_order_relaxed); + uint _1377 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 3u], round_up(param_32), memory_order_relaxed); } - bbox_out_ix += 4u; + bbox_out_ix += 6u; } } } diff --git a/piet-gpu/shader/gen/pathseg.spv b/piet-gpu/shader/gen/pathseg.spv index bc165ac..fc63eb5 100644 Binary files a/piet-gpu/shader/gen/pathseg.spv and b/piet-gpu/shader/gen/pathseg.spv differ diff --git a/piet-gpu/shader/gen/pathtag_reduce.dxil b/piet-gpu/shader/gen/pathtag_reduce.dxil index 02a4750..d585c96 100644 Binary files a/piet-gpu/shader/gen/pathtag_reduce.dxil and b/piet-gpu/shader/gen/pathtag_reduce.dxil differ diff --git a/piet-gpu/shader/gen/pathtag_reduce.hlsl b/piet-gpu/shader/gen/pathtag_reduce.hlsl index 5e98362..dd7c611 100644 --- a/piet-gpu/shader/gen/pathtag_reduce.hlsl +++ b/piet-gpu/shader/gen/pathtag_reduce.hlsl @@ -27,18 +27,19 @@ struct Config Alloc bbox_alloc; Alloc drawmonoid_alloc; uint n_trans; + uint n_path; uint trans_offset; - uint pathtag_offset; uint linewidth_offset; + uint pathtag_offset; uint pathseg_offset; }; static const uint3 gl_WorkGroupSize = uint3(128u, 1u, 1u); ByteAddressBuffer _139 : register(t1); -ByteAddressBuffer _151 : register(t2); -RWByteAddressBuffer _239 : register(u3); -RWByteAddressBuffer _259 : register(u0); +ByteAddressBuffer _150 : register(t2); +RWByteAddressBuffer _238 : register(u3); +RWByteAddressBuffer _258 : register(u0); static uint3 gl_WorkGroupID; static uint3 gl_LocalInvocationID; @@ -82,13 +83,13 @@ TagMonoid combine_tag_monoid(TagMonoid a, TagMonoid b) void comp_main() { uint ix = gl_GlobalInvocationID.x * 4u; - uint scene_ix = (_139.Load(56) >> uint(2)) + ix; - uint tag_word = _151.Load(scene_ix * 4 + 0); + uint scene_ix = (_139.Load(64) >> uint(2)) + ix; + uint tag_word = _150.Load(scene_ix * 4 + 0); uint param = tag_word; TagMonoid agg = reduce_tag(param); for (uint i = 1u; i < 4u; i++) { - tag_word = _151.Load((scene_ix + i) * 4 + 0); + tag_word = _150.Load((scene_ix + i) * 4 + 0); uint param_1 = tag_word; TagMonoid param_2 = agg; TagMonoid param_3 = reduce_tag(param_1); @@ -110,11 +111,11 @@ void comp_main() } if (gl_LocalInvocationID.x == 0u) { - _239.Store(gl_WorkGroupID.x * 20 + 0, agg.trans_ix); - _239.Store(gl_WorkGroupID.x * 20 + 4, agg.linewidth_ix); - _239.Store(gl_WorkGroupID.x * 20 + 8, agg.pathseg_ix); - _239.Store(gl_WorkGroupID.x * 20 + 12, agg.path_ix); - _239.Store(gl_WorkGroupID.x * 20 + 16, agg.pathseg_offset); + _238.Store(gl_WorkGroupID.x * 20 + 0, agg.trans_ix); + _238.Store(gl_WorkGroupID.x * 20 + 4, agg.linewidth_ix); + _238.Store(gl_WorkGroupID.x * 20 + 8, agg.pathseg_ix); + _238.Store(gl_WorkGroupID.x * 20 + 12, agg.path_ix); + _238.Store(gl_WorkGroupID.x * 20 + 16, agg.pathseg_offset); } } diff --git a/piet-gpu/shader/gen/pathtag_reduce.msl b/piet-gpu/shader/gen/pathtag_reduce.msl index 38451d4..e82577c 100644 --- a/piet-gpu/shader/gen/pathtag_reduce.msl +++ b/piet-gpu/shader/gen/pathtag_reduce.msl @@ -34,9 +34,10 @@ struct Config Alloc bbox_alloc; Alloc drawmonoid_alloc; uint n_trans; + uint n_path; uint trans_offset; - uint pathtag_offset; uint linewidth_offset; + uint pathtag_offset; uint pathseg_offset; }; @@ -102,17 +103,17 @@ TagMonoid combine_tag_monoid(thread const TagMonoid& a, thread const TagMonoid& return c; } -kernel void main0(const device ConfigBuf& _139 [[buffer(1)]], const device SceneBuf& _151 [[buffer(2)]], device OutBuf& _239 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) +kernel void main0(const device ConfigBuf& _139 [[buffer(1)]], const device SceneBuf& _150 [[buffer(2)]], device OutBuf& _238 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) { threadgroup TagMonoid sh_scratch[128]; uint ix = gl_GlobalInvocationID.x * 4u; uint scene_ix = (_139.conf.pathtag_offset >> uint(2)) + ix; - uint tag_word = _151.scene[scene_ix]; + uint tag_word = _150.scene[scene_ix]; uint param = tag_word; TagMonoid agg = reduce_tag(param); for (uint i = 1u; i < 4u; i++) { - tag_word = _151.scene[scene_ix + i]; + tag_word = _150.scene[scene_ix + i]; uint param_1 = tag_word; TagMonoid param_2 = agg; TagMonoid param_3 = reduce_tag(param_1); @@ -134,11 +135,11 @@ kernel void main0(const device ConfigBuf& _139 [[buffer(1)]], const device Scene } if (gl_LocalInvocationID.x == 0u) { - _239.outbuf[gl_WorkGroupID.x].trans_ix = agg.trans_ix; - _239.outbuf[gl_WorkGroupID.x].linewidth_ix = agg.linewidth_ix; - _239.outbuf[gl_WorkGroupID.x].pathseg_ix = agg.pathseg_ix; - _239.outbuf[gl_WorkGroupID.x].path_ix = agg.path_ix; - _239.outbuf[gl_WorkGroupID.x].pathseg_offset = agg.pathseg_offset; + _238.outbuf[gl_WorkGroupID.x].trans_ix = agg.trans_ix; + _238.outbuf[gl_WorkGroupID.x].linewidth_ix = agg.linewidth_ix; + _238.outbuf[gl_WorkGroupID.x].pathseg_ix = agg.pathseg_ix; + _238.outbuf[gl_WorkGroupID.x].path_ix = agg.path_ix; + _238.outbuf[gl_WorkGroupID.x].pathseg_offset = agg.pathseg_offset; } } diff --git a/piet-gpu/shader/gen/pathtag_reduce.spv b/piet-gpu/shader/gen/pathtag_reduce.spv index eef46a2..6dc35b8 100644 Binary files a/piet-gpu/shader/gen/pathtag_reduce.spv and b/piet-gpu/shader/gen/pathtag_reduce.spv differ diff --git a/piet-gpu/shader/gen/transform_leaf.dxil b/piet-gpu/shader/gen/transform_leaf.dxil index dabc049..102d2f0 100644 Binary files a/piet-gpu/shader/gen/transform_leaf.dxil and b/piet-gpu/shader/gen/transform_leaf.dxil differ diff --git a/piet-gpu/shader/gen/transform_leaf.hlsl b/piet-gpu/shader/gen/transform_leaf.hlsl index 2f0de05..6fa9267 100644 --- a/piet-gpu/shader/gen/transform_leaf.hlsl +++ b/piet-gpu/shader/gen/transform_leaf.hlsl @@ -40,9 +40,10 @@ struct Config Alloc bbox_alloc; Alloc drawmonoid_alloc; uint n_trans; + uint n_path; uint trans_offset; - uint pathtag_offset; uint linewidth_offset; + uint pathtag_offset; uint pathseg_offset; }; @@ -149,7 +150,7 @@ void TransformSeg_write(Alloc a, TransformSegRef ref, TransformSeg s) void comp_main() { uint ix = gl_GlobalInvocationID.x * 8u; - TransformRef _285 = { _278.Load(52) + (ix * 24u) }; + TransformRef _285 = { _278.Load(56) + (ix * 24u) }; TransformRef ref = _285; TransformRef param = ref; Transform agg = Transform_read(param); diff --git a/piet-gpu/shader/gen/transform_leaf.msl b/piet-gpu/shader/gen/transform_leaf.msl index 3120b3d..9c7e6b7 100644 --- a/piet-gpu/shader/gen/transform_leaf.msl +++ b/piet-gpu/shader/gen/transform_leaf.msl @@ -103,9 +103,10 @@ struct Config Alloc_1 bbox_alloc; Alloc_1 drawmonoid_alloc; uint n_trans; + uint n_path; uint trans_offset; - uint pathtag_offset; uint linewidth_offset; + uint pathtag_offset; uint pathseg_offset; }; diff --git a/piet-gpu/shader/gen/transform_leaf.spv b/piet-gpu/shader/gen/transform_leaf.spv index 01f047b..e561e9d 100644 Binary files a/piet-gpu/shader/gen/transform_leaf.spv and b/piet-gpu/shader/gen/transform_leaf.spv differ diff --git a/piet-gpu/shader/gen/transform_reduce.dxil b/piet-gpu/shader/gen/transform_reduce.dxil index 68997d0..1ed5e0e 100644 Binary files a/piet-gpu/shader/gen/transform_reduce.dxil and b/piet-gpu/shader/gen/transform_reduce.dxil differ diff --git a/piet-gpu/shader/gen/transform_reduce.hlsl b/piet-gpu/shader/gen/transform_reduce.hlsl index 9d8a5d6..60addf3 100644 --- a/piet-gpu/shader/gen/transform_reduce.hlsl +++ b/piet-gpu/shader/gen/transform_reduce.hlsl @@ -29,9 +29,10 @@ struct Config Alloc bbox_alloc; Alloc drawmonoid_alloc; uint n_trans; + uint n_path; uint trans_offset; - uint pathtag_offset; uint linewidth_offset; + uint pathtag_offset; uint pathseg_offset; }; @@ -86,7 +87,7 @@ Transform combine_monoid(Transform a, Transform b) void comp_main() { uint ix = gl_GlobalInvocationID.x * 8u; - TransformRef _168 = { _161.Load(52) + (ix * 24u) }; + TransformRef _168 = { _161.Load(56) + (ix * 24u) }; TransformRef ref = _168; TransformRef param = ref; Transform agg = Transform_read(param); diff --git a/piet-gpu/shader/gen/transform_reduce.msl b/piet-gpu/shader/gen/transform_reduce.msl index e61b602..ac586d9 100644 --- a/piet-gpu/shader/gen/transform_reduce.msl +++ b/piet-gpu/shader/gen/transform_reduce.msl @@ -41,9 +41,10 @@ struct Config Alloc bbox_alloc; Alloc drawmonoid_alloc; uint n_trans; + uint n_path; uint trans_offset; - uint pathtag_offset; uint linewidth_offset; + uint pathtag_offset; uint pathseg_offset; }; diff --git a/piet-gpu/shader/gen/transform_reduce.spv b/piet-gpu/shader/gen/transform_reduce.spv index 77eadb2..5638afb 100644 Binary files a/piet-gpu/shader/gen/transform_reduce.spv and b/piet-gpu/shader/gen/transform_reduce.spv differ diff --git a/piet-gpu/shader/kernel4.spv b/piet-gpu/shader/kernel4.spv index 04b6364..0eb1e5a 100644 Binary files a/piet-gpu/shader/kernel4.spv and b/piet-gpu/shader/kernel4.spv differ diff --git a/piet-gpu/shader/path_coarse.comp b/piet-gpu/shader/path_coarse.comp index ea525f5..1bd06f9 100644 --- a/piet-gpu/shader/path_coarse.comp +++ b/piet-gpu/shader/path_coarse.comp @@ -100,6 +100,8 @@ void main() { case PathSeg_Cubic: PathCubic cubic = PathSeg_Cubic_read(conf.pathseg_alloc, ref); + // Affine transform is now applied in pathseg + /* uint trans_ix = cubic.trans_ix; if (trans_ix > 0) { TransformSegRef trans_ref = TransformSegRef(conf.trans_alloc.offset + (trans_ix - 1) * TransformSeg_size); @@ -109,6 +111,7 @@ void main() { cubic.p2 = trans.mat.xy * cubic.p2.x + trans.mat.zw * cubic.p2.y + trans.translate; cubic.p3 = trans.mat.xy * cubic.p3.x + trans.mat.zw * cubic.p3.y + trans.translate; } + */ vec2 err_v = 3.0 * (cubic.p2 - cubic.p1) + cubic.p0 - cubic.p3; float err = err_v.x * err_v.x + err_v.y * err_v.y; diff --git a/piet-gpu/shader/path_coarse.spv b/piet-gpu/shader/path_coarse.spv index 240f8f7..0da044f 100644 Binary files a/piet-gpu/shader/path_coarse.spv and b/piet-gpu/shader/path_coarse.spv differ diff --git a/piet-gpu/shader/pathseg.comp b/piet-gpu/shader/pathseg.comp index 7b8f3f0..ec0a440 100644 --- a/piet-gpu/shader/pathseg.comp +++ b/piet-gpu/shader/pathseg.comp @@ -92,6 +92,8 @@ uint round_up(float x) { void main() { Monoid local[N_SEQ]; + float linewidth[N_SEQ]; + uint save_trans_ix[N_SEQ]; uint ix = gl_GlobalInvocationID.x * N_SEQ; @@ -124,9 +126,12 @@ void main() { uint ps_ix = (conf.pathseg_offset >> 2) + tm.pathseg_offset; uint lw_ix = (conf.linewidth_offset >> 2) + tm.linewidth_ix; uint save_path_ix = tm.path_ix; - TransformSegRef trans_ref = TransformSegRef(conf.trans_alloc.offset + tm.trans_ix * TransformSeg_size); + uint trans_ix = tm.trans_ix; + TransformSegRef trans_ref = TransformSegRef(conf.trans_alloc.offset + trans_ix * TransformSeg_size); PathSegRef ps_ref = PathSegRef(conf.pathseg_alloc.offset + tm.pathseg_ix * PathSeg_size); for (uint i = 0; i < N_SEQ; i++) { + linewidth[i] = uintBitsToFloat(scene[lw_ix]); + save_trans_ix[i] = trans_ix; // if N_SEQ > 4, need to load tag_word from local if N_SEQ % 4 == 0 uint tag_byte = tag_word >> (i * 8); uint seg_type = tag_byte & 3; @@ -158,7 +163,6 @@ void main() { } } } - float linewidth = uintBitsToFloat(scene[lw_ix]); TransformSeg transform = TransformSeg_read(conf.trans_alloc, trans_ref); p0 = transform.mat.xy * p0.x + transform.mat.zw * p0.y + transform.translate; p1 = transform.mat.xy * p1.x + transform.mat.zw * p1.y + transform.translate; @@ -183,9 +187,9 @@ void main() { p1 = mix(p0, p3, 1.0 / 3.0); } vec2 stroke = vec2(0.0, 0.0); - if (linewidth >= 0.0) { + if (linewidth[i] >= 0.0) { // See https://www.iquilezles.org/www/articles/ellipses/ellipses.htm - stroke = 0.5 * linewidth * vec2(length(transform.mat.xz), length(transform.mat.yw)); + stroke = 0.5 * linewidth[i] * vec2(length(transform.mat.xz), length(transform.mat.yw)); bbox += vec4(-stroke, stroke); } local[i].bbox = bbox; @@ -201,7 +205,7 @@ void main() { // Not needed, TODO remove from struct cubic.trans_ix = gl_GlobalInvocationID.x * 4 + i; cubic.stroke = stroke; - uint fill_mode = uint(linewidth >= 0.0); + uint fill_mode = uint(linewidth[i] >= 0.0); PathSeg_Cubic_write(conf.pathseg_alloc, ps_ref, fill_mode, cubic); ps_ref.offset += PathSeg_size; @@ -215,6 +219,7 @@ void main() { // Relies on the fact that RESET_BBOX == 1 local[i].flags = is_path; tm.path_ix += is_path; + trans_ix += (tag_byte >> 5) & 1; trans_ref.offset += ((tag_byte >> 5) & 1) * TransformSeg_size; lw_ix += (tag_byte >> 6) & 1; } @@ -244,7 +249,7 @@ void main() { barrier(); uint path_ix = save_path_ix; - uint bbox_out_ix = (conf.bbox_alloc.offset >> 2) + path_ix * 4; + uint bbox_out_ix = (conf.bbox_alloc.offset >> 2) + path_ix * 6; // Write bboxes to paths; do atomic min/max if partial Monoid row = monoid_identity(); if (gl_LocalInvocationID.x > 0) { @@ -259,6 +264,8 @@ void main() { do_atomic = true; } if ((m.flags & FLAG_RESET_BBOX) != 0) { + memory[bbox_out_ix + 4] = floatBitsToUint(linewidth[i]); + memory[bbox_out_ix + 5] = save_trans_ix[i]; if ((m.flags & FLAG_SET_BBOX) == 0) { do_atomic = true; } else { @@ -266,7 +273,7 @@ void main() { memory[bbox_out_ix + 1] = round_down(m.bbox.y); memory[bbox_out_ix + 2] = round_up(m.bbox.z); memory[bbox_out_ix + 3] = round_up(m.bbox.w); - bbox_out_ix += 4; + bbox_out_ix += 6; do_atomic = false; } } @@ -278,7 +285,7 @@ void main() { atomicMax(memory[bbox_out_ix + 2], round_up(m.bbox.z)); atomicMax(memory[bbox_out_ix + 3], round_up(m.bbox.w)); } - bbox_out_ix += 4; + bbox_out_ix += 6; } } } diff --git a/piet-gpu/shader/setup.h b/piet-gpu/shader/setup.h index 3bb1fdd..5d4cc73 100644 --- a/piet-gpu/shader/setup.h +++ b/piet-gpu/shader/setup.h @@ -48,12 +48,14 @@ struct Config { // Number of transforms in scene // This is probably not needed. uint n_trans; + // This only counts actual paths, not EndClip. + uint n_path; // Offset (in bytes) of transform stream in scene buffer uint trans_offset; - // Offset (in bytes) of path tag stream in scene - uint pathtag_offset; // Offset (in bytes) of linewidth stream in scene uint linewidth_offset; + // Offset (in bytes) of path tag stream in scene + uint pathtag_offset; // Offset (in bytes) of path segment stream in scene uint pathseg_offset; }; diff --git a/piet-gpu/shader/tile_alloc.spv b/piet-gpu/shader/tile_alloc.spv index 0de00e3..b443b03 100644 Binary files a/piet-gpu/shader/tile_alloc.spv and b/piet-gpu/shader/tile_alloc.spv differ diff --git a/piet-gpu/src/encoder.rs b/piet-gpu/src/encoder.rs new file mode 100644 index 0000000..12e9db4 --- /dev/null +++ b/piet-gpu/src/encoder.rs @@ -0,0 +1,199 @@ +// Copyright 2021 The piet-gpu authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +//! Low-level scene encoding. + +use bytemuck::{Pod, Zeroable}; +use piet_gpu_hal::BufWrite; + +use crate::stages::{self, Config, PathEncoder, Transform}; + +pub struct Encoder { + transform_stream: Vec, + tag_stream: Vec, + pathseg_stream: Vec, + linewidth_stream: Vec, + drawobj_stream: Vec, + n_path: u32, + n_pathseg: u32, +} + +// Currently same as Element, but may change - should become packed. +const DRAWOBJ_SIZE: usize = 36; +const TRANSFORM_SIZE: usize = 24; +const LINEWIDTH_SIZE: usize = 4; +const PATHSEG_SIZE: usize = 52; +const BBOX_SIZE: usize = 24; +const DRAWMONOID_SIZE: usize = 8; +const ANNOTATED_SIZE: usize = 40; + +// Maybe pull these from the relevant stages? In any case, they may depend +// on runtime query of GPU (supported workgroup size). +const TRANSFORM_PART_SIZE: usize = 4096; +const PATHSEG_PART_SIZE: usize = 2048; +const DRAWOBJ_PART_SIZE: usize = 4096; + +// These are bytemuck versions of elements currently defined in the +// Element struct in piet-gpu-types; that's pretty much going away. + +const ELEMENT_FILLCOLOR: u32 = 4; + +#[repr(C)] +#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] +pub struct FillColor { + tag: u32, + rgba_color: u32, + padding: [u32; 7], +} + +impl Encoder { + pub fn new() -> Encoder { + Encoder { + transform_stream: vec![Transform::IDENTITY], + tag_stream: Vec::new(), + pathseg_stream: Vec::new(), + linewidth_stream: vec![-1.0], + drawobj_stream: Vec::new(), + n_path: 0, + n_pathseg: 0, + } + } + + pub fn path_encoder(&mut self) -> PathEncoder { + PathEncoder::new(&mut self.tag_stream, &mut self.pathseg_stream) + } + + pub fn finish_path(&mut self, n_pathseg: u32) { + self.n_path += 1; + self.n_pathseg += n_pathseg; + } + + pub fn transform(&mut self, transform: Transform) { + self.tag_stream.push(0x20); + self.transform_stream.push(transform); + } + + // -1.0 means "fill" + pub fn linewidth(&mut self, linewidth: f32) { + self.tag_stream.push(0x40); + self.linewidth_stream.push(linewidth); + } + + /// Encode a fill color draw object. + /// + /// This should be encoded after a path. + pub fn fill_color(&mut self, rgba_color: u32) { + let element = FillColor { + tag: ELEMENT_FILLCOLOR, + rgba_color, + ..Default::default() + }; + self.drawobj_stream.extend(bytemuck::bytes_of(&element)); + } + + /// Return a config for the element processing pipeline. + /// + /// This does not include further pipeline processing. Also returns the + /// beginning of free memory. + pub fn stage_config(&self) -> (Config, usize) { + // Layout of scene buffer + let n_drawobj = self.n_drawobj(); + let n_drawobj_padded = align_up(n_drawobj, DRAWOBJ_PART_SIZE); + let trans_offset = n_drawobj_padded * DRAWOBJ_SIZE; + let n_trans = self.transform_stream.len(); + let n_trans_padded = align_up(n_trans, TRANSFORM_PART_SIZE); + let linewidth_offset = trans_offset + n_trans_padded * TRANSFORM_SIZE; + let n_linewidth = self.linewidth_stream.len(); + let pathtag_offset = linewidth_offset + n_linewidth * LINEWIDTH_SIZE; + let n_pathtag = self.tag_stream.len(); + let n_pathtag_padded = align_up(n_pathtag, PATHSEG_PART_SIZE); + let pathseg_offset = pathtag_offset + n_pathtag_padded; + + // Layout of memory + let mut alloc = 0; + let trans_alloc = alloc; + alloc += trans_alloc + n_trans_padded * TRANSFORM_SIZE; + let pathseg_alloc = alloc; + alloc += pathseg_alloc + self.n_pathseg as usize * PATHSEG_SIZE; + let bbox_alloc = alloc; + let n_path = self.n_path as usize; + alloc += bbox_alloc + n_path * BBOX_SIZE; + let drawmonoid_alloc = alloc; + alloc += n_drawobj_padded * DRAWMONOID_SIZE; + let anno_alloc = alloc; + alloc += n_drawobj * ANNOTATED_SIZE; + + let config = Config { + n_elements: n_drawobj as u32, + n_pathseg: self.n_pathseg, + pathseg_alloc: pathseg_alloc as u32, + anno_alloc: anno_alloc as u32, + trans_alloc: trans_alloc as u32, + bbox_alloc: bbox_alloc as u32, + drawmonoid_alloc: drawmonoid_alloc as u32, + n_trans: n_trans as u32, + n_path: self.n_path, + trans_offset: trans_offset as u32, + linewidth_offset: linewidth_offset as u32, + pathtag_offset: pathtag_offset as u32, + pathseg_offset: pathseg_offset as u32, + ..Default::default() + }; + (config, alloc) + } + + pub fn write_scene(&self, buf: &mut BufWrite) { + buf.extend_slice(&self.drawobj_stream); + let n_drawobj = self.drawobj_stream.len() / DRAWOBJ_SIZE; + buf.fill_zero(padding(n_drawobj, DRAWOBJ_PART_SIZE) * DRAWOBJ_SIZE); + buf.extend_slice(&self.transform_stream); + let n_trans = self.transform_stream.len(); + buf.fill_zero(padding(n_trans, TRANSFORM_PART_SIZE) * TRANSFORM_SIZE); + buf.extend_slice(&self.linewidth_stream); + buf.extend_slice(&self.tag_stream); + let n_pathtag = self.tag_stream.len(); + buf.fill_zero(padding(n_pathtag, PATHSEG_PART_SIZE)); + buf.extend_slice(&self.pathseg_stream); + } + + /// The number of elements in the draw object stream. + pub(crate) fn n_drawobj(&self) -> usize { + self.drawobj_stream.len() / DRAWOBJ_SIZE + } + + /// The number of paths. + pub(crate) fn n_path(&self) -> u32 { + self.n_path + } + + /// The number of path segments. + pub(crate) fn n_pathseg(&self) -> u32 { + self.n_pathseg + } + + pub(crate) fn n_transform(&self) -> usize { + self.transform_stream.len() + } +} + +fn align_up(x: usize, align: usize) -> usize { + debug_assert!(align.is_power_of_two()); + (x + align - 1) & !(align - 1) +} + +fn padding(x: usize, align: usize) -> usize { + x.wrapping_neg() & (align - 1) +} diff --git a/piet-gpu/src/lib.rs b/piet-gpu/src/lib.rs index 2b45e7c..25627f6 100644 --- a/piet-gpu/src/lib.rs +++ b/piet-gpu/src/lib.rs @@ -1,3 +1,4 @@ +mod encoder; mod gradient; mod pico_svg; mod render_ctx; @@ -12,16 +13,15 @@ pub use render_ctx::PietGpuRenderContext; use piet::kurbo::Vec2; use piet::{ImageFormat, RenderContext}; -use piet_gpu_types::encoder::Encode; - use piet_gpu_hal::{ BindType, Buffer, BufferUsage, CmdBuf, DescriptorSet, Error, Image, ImageLayout, Pipeline, QueryPool, Session, ShaderCode, }; use pico_svg::PicoSvg; +use stages::{ElementBinding, ElementCode}; -use crate::stages::Config; +use crate::stages::{Config, ElementStage}; const TILE_W: usize = 16; const TILE_H: usize = 16; @@ -70,8 +70,10 @@ pub struct Renderer { // Device config buf config_buf: Buffer, - el_pipeline: Pipeline, - el_ds: Vec, + // New element pipeline + element_code: ElementCode, + element_stage: ElementStage, + element_bindings: Vec, tile_pipeline: Pipeline, tile_ds: DescriptorSet, @@ -91,7 +93,8 @@ pub struct Renderer { k4_pipeline: Pipeline, k4_ds: DescriptorSet, - n_elements: usize, + n_transform: usize, + n_drawobj: usize, n_paths: usize, n_pathseg: usize, @@ -120,7 +123,7 @@ impl Renderer { // TODO: separate staging buffer (if needed) let scene_bufs = (0..n_bufs) .map(|_| session.create_buffer(8 * 1024 * 1024, host_upload).unwrap()) - .collect(); + .collect::>(); let state_buf = session.create_buffer(1 * 1024 * 1024, dev)?; let image_dev = session.create_image2d(width as u32, height as u32)?; @@ -142,23 +145,21 @@ impl Renderer { .collect(); let memory_buf_dev = session.create_buffer(128 * 1024 * 1024, dev)?; - let el_code = ShaderCode::Spv(include_bytes!("../shader/elements.spv")); - let el_pipeline = session.create_compute_pipeline( - el_code, - &[ - BindType::Buffer, - BindType::Buffer, - BindType::Buffer, - BindType::Buffer, - ], - )?; - let mut el_ds = Vec::with_capacity(n_bufs); - for scene_buf in &scene_bufs { - el_ds.push(session.create_simple_descriptor_set( - &el_pipeline, - &[&memory_buf_dev, &config_buf, scene_buf, &state_buf], - )?); - } + let element_code = ElementCode::new(session); + let element_stage = ElementStage::new(session, &element_code); + let element_bindings = scene_bufs + .iter() + .zip(&config_bufs) + .map(|(scene_buf, config_buf)| { + element_stage.bind( + session, + &element_code, + config_buf, + scene_buf, + &memory_buf_dev, + ) + }) + .collect(); let tile_alloc_code = ShaderCode::Spv(include_bytes!("../shader/tile_alloc.spv")); let tile_pipeline = session @@ -237,8 +238,9 @@ impl Renderer { config_buf, config_bufs, image_dev, - el_pipeline, - el_ds, + element_code, + element_stage, + element_bindings, tile_pipeline, tile_ds, path_pipeline, @@ -251,7 +253,8 @@ impl Renderer { coarse_ds, k4_pipeline, k4_ds, - n_elements: 0, + n_transform: 0, + n_drawobj: 0, n_paths: 0, n_pathseg: 0, _bg_image: bg_image, @@ -270,55 +273,38 @@ impl Renderer { render_ctx: &mut PietGpuRenderContext, buf_ix: usize, ) -> Result<(), Error> { - let n_paths = render_ctx.path_count(); - let n_pathseg = render_ctx.pathseg_count(); - let n_trans = render_ctx.trans_count(); - self.n_paths = n_paths; - self.n_pathseg = n_pathseg; + let (mut config, mut alloc) = render_ctx.stage_config(); + let n_drawobj = render_ctx.n_drawobj(); + // TODO: be more consistent in size types + let n_path = render_ctx.n_path() as usize; + self.n_paths = n_path; + self.n_transform = render_ctx.n_transform(); + self.n_drawobj = render_ctx.n_drawobj(); + self.n_pathseg = render_ctx.n_pathseg() as usize; // These constants depend on encoding and may need to be updated. // Perhaps we can plumb these from piet-gpu-derive? const PATH_SIZE: usize = 12; const BIN_SIZE: usize = 8; - const PATHSEG_SIZE: usize = 52; - const ANNO_SIZE: usize = 40; - const TRANS_SIZE: usize = 24; let width_in_tiles = self.width / TILE_W; let height_in_tiles = self.height / TILE_H; - let mut alloc = 0; let tile_base = alloc; - alloc += ((n_paths + 3) & !3) * PATH_SIZE; + alloc += ((n_path + 3) & !3) * PATH_SIZE; let bin_base = alloc; - alloc += ((n_paths + 255) & !255) * BIN_SIZE; + alloc += ((n_drawobj + 255) & !255) * BIN_SIZE; let ptcl_base = alloc; alloc += width_in_tiles * height_in_tiles * PTCL_INITIAL_ALLOC; - let pathseg_base = alloc; - alloc += (n_pathseg * PATHSEG_SIZE + 3) & !3; - let anno_base = alloc; - alloc += (n_paths * ANNO_SIZE + 3) & !3; - let trans_base = alloc; - alloc += (n_trans * TRANS_SIZE + 3) & !3; - let config = Config { - n_elements: n_paths as u32, - n_pathseg: n_pathseg as u32, - width_in_tiles: width_in_tiles as u32, - height_in_tiles: height_in_tiles as u32, - tile_alloc: tile_base as u32, - bin_alloc: bin_base as u32, - ptcl_alloc: ptcl_base as u32, - pathseg_alloc: pathseg_base as u32, - anno_alloc: anno_base as u32, - trans_alloc: trans_base as u32, - n_trans: n_trans as u32, - // We'll fill the rest of the fields in when we hook up the new element pipeline. - ..Default::default() - }; + config.width_in_tiles = width_in_tiles as u32; + config.height_in_tiles = height_in_tiles as u32; + config.tile_alloc = tile_base as u32; + config.bin_alloc = bin_base as u32; + config.ptcl_alloc = ptcl_base as u32; unsafe { - let scene = render_ctx.get_scene_buf(); - self.n_elements = scene.len() / piet_gpu_types::scene::Element::fixed_size(); // TODO: reallocate scene buffer if size is inadequate - assert!(self.scene_bufs[buf_ix].size() as usize >= scene.len()); - self.scene_bufs[buf_ix].write(scene)?; + { + let mut mapped_scene = self.scene_bufs[buf_ix].map_write(..)?; + render_ctx.write_scene(&mut mapped_scene); + } self.config_bufs[buf_ix].write(&[config])?; self.memory_buf_host[buf_ix].write(&[alloc as u32, 0 /* Overflow flag */])?; @@ -355,11 +341,14 @@ impl Renderer { cmd_buf.image_barrier(&self.gradients, ImageLayout::BlitDst, ImageLayout::General); cmd_buf.reset_query_pool(&query_pool); cmd_buf.write_timestamp(&query_pool, 0); - cmd_buf.dispatch( - &self.el_pipeline, - &self.el_ds[buf_ix], - (((self.n_elements + 127) / 128) as u32, 1, 1), - (128, 1, 1), + self.element_stage.record( + cmd_buf, + &self.element_code, + &self.element_bindings[buf_ix], + self.n_transform as u64, + self.n_paths as u32, + self.n_pathseg as u32, + self.n_drawobj as u64, ); cmd_buf.write_timestamp(&query_pool, 1); cmd_buf.memory_barrier(); diff --git a/piet-gpu/src/render_ctx.rs b/piet-gpu/src/render_ctx.rs index f050c76..5b10fec 100644 --- a/piet-gpu/src/render_ctx.rs +++ b/piet-gpu/src/render_ctx.rs @@ -1,5 +1,6 @@ use std::borrow::Cow; +use crate::stages::Config; use crate::MAX_BLEND_STACK; use piet::kurbo::{Affine, Insets, PathEl, Point, Rect, Shape}; use piet::{ @@ -7,11 +8,9 @@ use piet::{ StrokeStyle, }; +use piet_gpu_hal::BufWrite; use piet_gpu_types::encoder::{Encode, Encoder}; -use piet_gpu_types::scene::{ - Clip, CubicSeg, Element, FillColor, FillLinGradient, LineSeg, QuadSeg, SetFillMode, - SetLineWidth, Transform, -}; +use piet_gpu_types::scene::{Clip, Element, FillColor, FillLinGradient, SetFillMode, Transform}; use crate::gradient::{LinearGradient, RampCache}; use crate::text::Font; @@ -40,6 +39,10 @@ pub struct PietGpuRenderContext { clip_stack: Vec, ramp_cache: RampCache, + + // Fields for new element processing pipeline below + // TODO: delete old encoder, rename + new_encoder: crate::encoder::Encoder, } #[derive(Clone)] @@ -81,7 +84,7 @@ impl PietGpuRenderContext { let elements = Vec::new(); let font = Font::new(); let inner_text = PietGpuText::new(font); - let stroke_width = 0.0; + let stroke_width = -1.0; PietGpuRenderContext { encoder, elements, @@ -95,9 +98,40 @@ impl PietGpuRenderContext { state_stack: Vec::new(), clip_stack: Vec::new(), ramp_cache: RampCache::default(), + new_encoder: crate::encoder::Encoder::new(), } } + pub fn stage_config(&self) -> (Config, usize) { + self.new_encoder.stage_config() + } + + /// Number of draw objects. + /// + /// This is for the new element processing pipeline. It's not necessarily the + /// same as the number of paths (as in the old pipeline), but it might take a + /// while to sort that out. + pub fn n_drawobj(&self) -> usize { + self.new_encoder.n_drawobj() + } + + /// Number of paths. + pub fn n_path(&self) -> u32 { + self.new_encoder.n_path() + } + + pub fn n_pathseg(&self) -> u32 { + self.new_encoder.n_pathseg() + } + + pub fn n_transform(&self) -> usize { + self.new_encoder.n_transform() + } + + pub fn write_scene(&self, buf: &mut BufWrite) { + self.new_encoder.write_scene(buf); + } + pub fn get_scene_buf(&mut self) -> &[u8] { const ALIGN: usize = 128; let padded_size = (self.elements.len() + (ALIGN - 1)) & ALIGN.wrapping_neg(); @@ -171,13 +205,7 @@ impl RenderContext for PietGpuRenderContext { fn clear(&mut self, _color: Color) {} fn stroke(&mut self, shape: impl Shape, brush: &impl IntoBrush, width: f64) { - let width_f32 = width as f32; - if self.stroke_width != width_f32 { - self.elements - .push(Element::SetLineWidth(SetLineWidth { width: width_f32 })); - self.stroke_width = width_f32; - } - self.set_fill_mode(FillMode::Stroke); + self.encode_linewidth(width.abs() as f32); let brush = brush.make_brush(self, || shape.bounding_box()).into_owned(); // Note: the bbox contribution of stroke becomes more complicated with miter joins. self.accumulate_bbox(|| shape.bounding_box() + Insets::uniform(width * 0.5)); @@ -201,7 +229,7 @@ impl RenderContext for PietGpuRenderContext { // Perhaps that should be added to kurbo. self.accumulate_bbox(|| shape.bounding_box()); let path = shape.path_elements(TOLERANCE); - self.set_fill_mode(FillMode::Nonzero); + self.encode_linewidth(-1.0); self.encode_path(path, true); self.encode_brush(&brush); } @@ -318,21 +346,6 @@ impl RenderContext for PietGpuRenderContext { } impl PietGpuRenderContext { - fn encode_line_seg(&mut self, seg: LineSeg) { - self.elements.push(Element::Line(seg)); - self.pathseg_count += 1; - } - - fn encode_quad_seg(&mut self, seg: QuadSeg) { - self.elements.push(Element::Quad(seg)); - self.pathseg_count += 1; - } - - fn encode_cubic_seg(&mut self, seg: CubicSeg) { - self.elements.push(Element::Cubic(seg)); - self.pathseg_count += 1; - } - fn encode_path(&mut self, path: impl Iterator, is_fill: bool) { if is_fill { self.encode_path_inner( @@ -352,99 +365,34 @@ impl PietGpuRenderContext { } fn encode_path_inner(&mut self, path: impl Iterator) { - let flatten = false; - if flatten { - let mut start_pt = None; - let mut last_pt = None; - piet::kurbo::flatten(path, TOLERANCE, |el| { - match el { - PathEl::MoveTo(p) => { - let scene_pt = to_f32_2(p); - start_pt = Some(scene_pt); - last_pt = Some(scene_pt); - } - PathEl::LineTo(p) => { - let scene_pt = to_f32_2(p); - let seg = LineSeg { - p0: last_pt.unwrap(), - p1: scene_pt, - }; - self.encode_line_seg(seg); - last_pt = Some(scene_pt); - } - PathEl::ClosePath => { - if let (Some(start), Some(last)) = (start_pt.take(), last_pt.take()) { - if last != start { - let seg = LineSeg { - p0: last, - p1: start, - }; - self.encode_line_seg(seg); - } - } - } - _ => (), + let mut pe = self.new_encoder.path_encoder(); + for el in path { + match el { + PathEl::MoveTo(p) => { + let p = to_f32_2(p); + pe.move_to(p[0], p[1]); } - //println!("{:?}", el); - }); - } else { - let mut start_pt = None; - let mut last_pt = None; - for el in path { - match el { - PathEl::MoveTo(p) => { - let scene_pt = to_f32_2(p); - start_pt = Some(scene_pt); - last_pt = Some(scene_pt); - } - PathEl::LineTo(p) => { - let scene_pt = to_f32_2(p); - let seg = LineSeg { - p0: last_pt.unwrap(), - p1: scene_pt, - }; - self.encode_line_seg(seg); - last_pt = Some(scene_pt); - } - PathEl::QuadTo(p1, p2) => { - let scene_p1 = to_f32_2(p1); - let scene_p2 = to_f32_2(p2); - let seg = QuadSeg { - p0: last_pt.unwrap(), - p1: scene_p1, - p2: scene_p2, - }; - self.encode_quad_seg(seg); - last_pt = Some(scene_p2); - } - PathEl::CurveTo(p1, p2, p3) => { - let scene_p1 = to_f32_2(p1); - let scene_p2 = to_f32_2(p2); - let scene_p3 = to_f32_2(p3); - let seg = CubicSeg { - p0: last_pt.unwrap(), - p1: scene_p1, - p2: scene_p2, - p3: scene_p3, - }; - self.encode_cubic_seg(seg); - last_pt = Some(scene_p3); - } - PathEl::ClosePath => { - if let (Some(start), Some(last)) = (start_pt.take(), last_pt.take()) { - if last != start { - let seg = LineSeg { - p0: last, - p1: start, - }; - self.encode_line_seg(seg); - } - } - } + PathEl::LineTo(p) => { + let p = to_f32_2(p); + pe.line_to(p[0], p[1]); } - //println!("{:?}", el); + PathEl::QuadTo(p1, p2) => { + let p1 = to_f32_2(p1); + let p2 = to_f32_2(p2); + pe.quad_to(p1[0], p1[1], p2[0], p2[1]); + } + PathEl::CurveTo(p1, p2, p3) => { + let p1 = to_f32_2(p1); + let p2 = to_f32_2(p2); + let p3 = to_f32_2(p3); + pe.cubic_to(p1[0], p1[1], p2[0], p2[1], p3[0], p3[1]); + } + PathEl::ClosePath => pe.close_path(), } } + pe.path(); + let n_pathseg = pe.n_pathseg(); + self.new_encoder.finish_path(n_pathseg); } fn pop_clip(&mut self) { @@ -511,14 +459,17 @@ impl PietGpuRenderContext { self.trans_count += 1; } + fn encode_linewidth(&mut self, linewidth: f32) { + if self.stroke_width != linewidth { + self.new_encoder.linewidth(linewidth); + self.stroke_width = linewidth; + } + } + fn encode_brush(&mut self, brush: &PietGpuBrush) { match brush { PietGpuBrush::Solid(rgba_color) => { - let fill = FillColor { - rgba_color: *rgba_color, - }; - self.elements.push(Element::FillColor(fill)); - self.path_count += 1; + self.new_encoder.fill_color(*rgba_color); } PietGpuBrush::LinGradient(lin) => { let fill_lin = FillLinGradient { diff --git a/piet-gpu/src/stages.rs b/piet-gpu/src/stages.rs index f4a086c..1683cac 100644 --- a/piet-gpu/src/stages.rs +++ b/piet-gpu/src/stages.rs @@ -24,13 +24,14 @@ use bytemuck::{Pod, Zeroable}; pub use draw::{DrawBinding, DrawCode, DrawMonoid, DrawStage}; pub use path::{PathBinding, PathCode, PathEncoder, PathStage}; +use piet_gpu_hal::{Buffer, CmdBuf, Session}; pub use transform::{Transform, TransformBinding, TransformCode, TransformStage}; /// The configuration block passed to piet-gpu shaders. /// /// Note: this should be kept in sync with the version in setup.h. #[repr(C)] -#[derive(Clone, Copy, Default, Zeroable, Pod)] +#[derive(Clone, Copy, Default, Debug, Zeroable, Pod)] pub struct Config { pub n_elements: u32, // paths pub n_pathseg: u32, @@ -45,8 +46,111 @@ pub struct Config { pub bbox_alloc: u32, pub drawmonoid_alloc: u32, pub n_trans: u32, + pub n_path: u32, pub trans_offset: u32, - pub pathtag_offset: u32, pub linewidth_offset: u32, + pub pathtag_offset: u32, pub pathseg_offset: u32, } + +// The "element" stage combines a number of stages for parts of the pipeline. + +pub struct ElementCode { + transform_code: TransformCode, + path_code: PathCode, + draw_code: DrawCode, +} + +pub struct ElementStage { + transform_stage: TransformStage, + path_stage: PathStage, + draw_stage: DrawStage, +} + +pub struct ElementBinding { + transform_binding: TransformBinding, + path_binding: PathBinding, + draw_binding: DrawBinding, +} + +impl ElementCode { + pub unsafe fn new(session: &Session) -> ElementCode { + ElementCode { + transform_code: TransformCode::new(session), + path_code: PathCode::new(session), + draw_code: DrawCode::new(session), + } + } +} + +impl ElementStage { + pub unsafe fn new(session: &Session, code: &ElementCode) -> ElementStage { + ElementStage { + transform_stage: TransformStage::new(session, &code.transform_code), + path_stage: PathStage::new(session, &code.path_code), + draw_stage: DrawStage::new(session, &code.draw_code), + } + } + + pub unsafe fn bind( + &self, + session: &Session, + code: &ElementCode, + config_buf: &Buffer, + scene_buf: &Buffer, + memory_buf: &Buffer, + ) -> ElementBinding { + ElementBinding { + transform_binding: self.transform_stage.bind( + session, + &code.transform_code, + config_buf, + scene_buf, + memory_buf, + ), + path_binding: self.path_stage.bind( + session, + &code.path_code, + config_buf, + scene_buf, + memory_buf, + ), + draw_binding: self.draw_stage.bind( + session, + &code.draw_code, + config_buf, + scene_buf, + memory_buf, + ), + } + } + + pub unsafe fn record( + &self, + cmd_buf: &mut CmdBuf, + code: &ElementCode, + binding: &ElementBinding, + n_transform: u64, + n_paths: u32, + n_tags: u32, + n_drawobj: u64, + ) { + self.transform_stage.record( + cmd_buf, + &code.transform_code, + &binding.transform_binding, + n_transform, + ); + // No memory barrier needed here; path has at least one before pathseg + self.path_stage.record( + cmd_buf, + &code.path_code, + &binding.path_binding, + n_paths, + n_tags, + ); + // No memory barrier needed here; draw has at least one before draw_leaf + self.draw_stage + .record(cmd_buf, &code.draw_code, &binding.draw_binding, n_drawobj); + } +} diff --git a/piet-gpu/src/stages/draw.rs b/piet-gpu/src/stages/draw.rs index d50c6cb..da773cf 100644 --- a/piet-gpu/src/stages/draw.rs +++ b/piet-gpu/src/stages/draw.rs @@ -151,8 +151,8 @@ impl DrawStage { (1, 1, 1), (DRAW_WG as u32, 1, 1), ); - cmd_buf.memory_barrier(); } + cmd_buf.memory_barrier(); cmd_buf.dispatch( &code.leaf_pipeline, &binding.leaf_ds, diff --git a/piet-gpu/src/stages/path.rs b/piet-gpu/src/stages/path.rs index e233c65..c9d2c60 100644 --- a/piet-gpu/src/stages/path.rs +++ b/piet-gpu/src/stages/path.rs @@ -258,11 +258,11 @@ impl<'a> PathEncoder<'a> { self.n_pathseg += 1; } - pub fn quad_to(&mut self, x0: f32, y0: f32, x1: f32, y1: f32) { + pub fn quad_to(&mut self, x1: f32, y1: f32, x2: f32, y2: f32) { if self.state == State::Start { return; } - let buf = [x0, y0, x1, y1]; + let buf = [x1, y1, x2, y2]; let bytes = bytemuck::bytes_of(&buf); self.pathseg_stream.extend_from_slice(bytes); self.tag_stream.push(10); @@ -270,11 +270,11 @@ impl<'a> PathEncoder<'a> { self.n_pathseg += 1; } - pub fn cubic_to(&mut self, x0: f32, y0: f32, x1: f32, y1: f32, x2: f32, y2: f32) { + pub fn cubic_to(&mut self, x1: f32, y1: f32, x2: f32, y2: f32, x3: f32, y3: f32) { if self.state == State::Start { return; } - let buf = [x0, y0, x1, y1, x2, y2]; + let buf = [x1, y1, x2, y2, x3, y3]; let bytes = bytemuck::bytes_of(&buf); self.pathseg_stream.extend_from_slice(bytes); self.tag_stream.push(11); @@ -288,6 +288,7 @@ impl<'a> PathEncoder<'a> { State::MoveTo => { let new_len = self.pathseg_stream.len() - 8; self.pathseg_stream.truncate(new_len); + self.state = State::Start; return; } State::NonemptySubpath => (), @@ -333,7 +334,9 @@ impl<'a> PathEncoder<'a> { /// /// This is the number of path segments that will be written by the /// path stage; use this for allocating the output buffer. - pub fn n_pathseg(&self) -> u32 { + /// + /// Also note: it takes `self` for lifetime reasons. + pub fn n_pathseg(self) -> u32 { self.n_pathseg } } diff --git a/piet-gpu/src/stages/transform.rs b/piet-gpu/src/stages/transform.rs index 4fb5e9f..4383c14 100644 --- a/piet-gpu/src/stages/transform.rs +++ b/piet-gpu/src/stages/transform.rs @@ -167,6 +167,11 @@ impl TransformStage { } impl Transform { + pub const IDENTITY: Transform = Transform { + mat: [1.0, 0.0, 0.0, 1.0], + translate: [0.0, 0.0], + }; + pub fn from_kurbo(a: Affine) -> Transform { let c = a.as_coeffs(); Transform { diff --git a/tests/src/draw.rs b/tests/src/draw.rs index ca19312..2447444 100644 --- a/tests/src/draw.rs +++ b/tests/src/draw.rs @@ -24,6 +24,7 @@ use crate::{Config, Runner, TestResult}; use piet_gpu::stages::{self, DrawCode, DrawMonoid, DrawStage}; const ELEMENT_SIZE: usize = 36; +const ANNOTATED_SIZE: usize = 40; const ELEMENT_FILLCOLOR: u32 = 4; const ELEMENT_FILLLINGRADIENT: u32 = 5; @@ -99,16 +100,18 @@ impl DrawTestData { // Layout of memory let drawmonoid_alloc = 0; + let anno_alloc = drawmonoid_alloc + 8 * n_tags; let stage_config = stages::Config { n_elements: n_tags as u32, - drawmonoid_alloc, + anno_alloc: anno_alloc as u32, + drawmonoid_alloc: drawmonoid_alloc as u32, ..Default::default() }; stage_config } fn memory_size(&self) -> u64 { - 8 + self.tags.len() as u64 * 8 + (8 + self.tags.len() * (8 + ANNOTATED_SIZE)) as u64 } fn fill_scene(&self, buf: &mut BufWrite) { diff --git a/tests/src/path.rs b/tests/src/path.rs index 948bd6f..7c5388f 100644 --- a/tests/src/path.rs +++ b/tests/src/path.rs @@ -19,7 +19,7 @@ use crate::{Config, Runner, TestResult}; use bytemuck::{Pod, Zeroable}; -use piet_gpu::stages::{self, PathCode, PathEncoder, PathStage}; +use piet_gpu::stages::{self, PathCode, PathEncoder, PathStage, Transform}; use piet_gpu_hal::{BufWrite, BufferUsage}; use rand::{prelude::ThreadRng, Rng}; @@ -55,6 +55,8 @@ struct Bbox { top: u32, right: u32, bottom: u32, + linewidth: f32, + trans_ix: u32, } pub unsafe fn path_test(runner: &mut Runner, config: &Config) -> TestResult { @@ -206,11 +208,11 @@ impl PathData { let pathseg_alloc = trans_alloc + n_trans * 24; let bbox_alloc = pathseg_alloc + self.n_pathseg * PATHSEG_SIZE; let stage_config = stages::Config { - n_elements: self.n_path, pathseg_alloc, trans_alloc, bbox_alloc, n_trans, + n_path: self.n_path, pathtag_offset, linewidth_offset, pathseg_offset, @@ -236,7 +238,7 @@ impl PathData { fn memory_full_size(&self) -> u64 { let mut size = self.memory_init_size(); size += (self.n_pathseg * PATHSEG_SIZE) as u64; - size += (self.n_path * 16) as u64; + size += (self.n_path * 24) as u64; size } @@ -246,7 +248,7 @@ impl PathData { let mem_error = 0u32; let mem_init = [mem_offset, mem_error]; buf.push(mem_init); - let trans = [1.0f32, 0.0, 0.0, 1.0, 0.0, 0.0]; + let trans = Transform::IDENTITY; buf.push(trans); } @@ -274,17 +276,15 @@ impl PathData { } let begin_bbox = 32 + PATHSEG_SIZE * self.n_pathseg; for i in 0..self.n_path { - let offset = (begin_bbox + 16 * i) as usize; - let actual = bytemuck::from_bytes::(&memory[offset..offset + 16]); + let offset = (begin_bbox + 24 * i) as usize; + let actual = bytemuck::from_bytes::(&memory[offset..offset + 24]); let expected_f32 = self.bbox[i as usize]; - let expected = Bbox { - left: round_down(expected_f32.0), - top: round_down(expected_f32.1), - right: round_up(expected_f32.2), - bottom: round_up(expected_f32.3), - }; - if expected != *actual { - println!("{}: {:?} {:?}", i, actual, expected); + if round_down(expected_f32.0) != actual.left + || round_down(expected_f32.1) != actual.top + || round_up(expected_f32.2) != actual.right + || round_up(expected_f32.3) != actual.bottom + { + println!("{}: {:?} {:?}", i, actual, expected_f32); return Some(format!("bbox mismatch at {}", i)); } }