diff --git a/piet-gpu/shader/backdrop.spv b/piet-gpu/shader/backdrop.spv index 3bc1365..4dd01ed 100644 Binary files a/piet-gpu/shader/backdrop.spv and b/piet-gpu/shader/backdrop.spv differ diff --git a/piet-gpu/shader/backdrop_lg.spv b/piet-gpu/shader/backdrop_lg.spv index c02f92c..b00e3cd 100644 Binary files a/piet-gpu/shader/backdrop_lg.spv and b/piet-gpu/shader/backdrop_lg.spv differ diff --git a/piet-gpu/shader/binning.spv b/piet-gpu/shader/binning.spv index 7c5c316..38d10b3 100644 Binary files a/piet-gpu/shader/binning.spv and b/piet-gpu/shader/binning.spv differ diff --git a/piet-gpu/shader/build.ninja b/piet-gpu/shader/build.ninja index c8b4858..1df1876 100644 --- a/piet-gpu/shader/build.ninja +++ b/piet-gpu/shader/build.ninja @@ -78,3 +78,19 @@ build gen/pathseg.spv: glsl pathseg.comp | tile.h pathseg.h pathtag.h setup.h me build gen/pathseg.hlsl: hlsl gen/pathseg.spv build gen/pathseg.dxil: dxil gen/pathseg.hlsl build gen/pathseg.msl: msl gen/pathseg.spv + +build gen/draw_reduce.spv: glsl draw_reduce.comp | scene.h drawtag.h setup.h mem.h +build gen/draw_reduce.hlsl: hlsl gen/draw_reduce.spv +build gen/draw_reduce.dxil: dxil gen/draw_reduce.hlsl +build gen/draw_reduce.msl: msl gen/draw_reduce.spv + +build gen/draw_root.spv: glsl draw_scan.comp | drawtag.h + flags = -DROOT +build gen/draw_root.hlsl: hlsl gen/draw_root.spv +build gen/draw_root.dxil: dxil gen/draw_root.hlsl +build gen/draw_root.msl: msl gen/draw_root.spv + +build gen/draw_leaf.spv: glsl draw_leaf.comp | scene.h drawtag.h setup.h mem.h +build gen/draw_leaf.hlsl: hlsl gen/draw_leaf.spv +build gen/draw_leaf.dxil: dxil gen/draw_leaf.hlsl +build gen/draw_leaf.msl: msl gen/draw_leaf.spv diff --git a/piet-gpu/shader/coarse.spv b/piet-gpu/shader/coarse.spv index a0ad82a..a2071ad 100644 Binary files a/piet-gpu/shader/coarse.spv and b/piet-gpu/shader/coarse.spv differ diff --git a/piet-gpu/shader/draw_leaf.comp b/piet-gpu/shader/draw_leaf.comp new file mode 100644 index 0000000..ec6a928 --- /dev/null +++ b/piet-gpu/shader/draw_leaf.comp @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense + +// The leaf scan pass for draw tag scan implemented as a tree reduction. +// This stage can be fused with its consumer but is separate now. + + +#version 450 +#extension GL_GOOGLE_include_directive : enable + +#include "mem.h" +#include "setup.h" + +#define N_ROWS 8 +#define LG_WG_SIZE 9 +#define WG_SIZE (1 << LG_WG_SIZE) +#define PARTITION_SIZE (WG_SIZE * N_ROWS) + +layout(local_size_x = WG_SIZE, local_size_y = 1) in; + +layout(binding = 1) readonly buffer ConfigBuf { + Config conf; +}; + +layout(binding = 2) readonly buffer SceneBuf { + uint[] scene; +}; + +#include "scene.h" +#include "tile.h" +#include "drawtag.h" + +#define Monoid DrawMonoid + +layout(set = 0, binding = 3) readonly buffer ParentBuf { + Monoid[] parent; +}; + +shared Monoid sh_scratch[WG_SIZE]; + +void main() { + Monoid local[N_ROWS]; + + uint ix = gl_GlobalInvocationID.x * N_ROWS; + ElementRef ref = ElementRef(ix * Element_size); + uint tag_word = Element_tag(ref).tag; + + Monoid agg = map_tag(tag_word); + local[0] = agg; + for (uint i = 1; i < N_ROWS; i++) { + tag_word = Element_tag(Element_index(ref, i)).tag; + agg = combine_tag_monoid(agg, map_tag(tag_word)); + local[i] = agg; + } + sh_scratch[gl_LocalInvocationID.x] = agg; + for (uint i = 0; i < LG_WG_SIZE; i++) { + barrier(); + if (gl_LocalInvocationID.x >= (1u << i)) { + Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i)]; + agg = combine_tag_monoid(other, agg); + } + barrier(); + sh_scratch[gl_LocalInvocationID.x] = agg; + } + + barrier(); + Monoid row = tag_monoid_identity(); + if (gl_WorkGroupID.x > 0) { + row = parent[gl_WorkGroupID.x - 1]; + } + if (gl_LocalInvocationID.x > 0) { + row = combine_tag_monoid(row, sh_scratch[gl_LocalInvocationID.x - 1]); + } + uint out_base = (conf.drawmonoid_alloc.offset >> 2) + gl_GlobalInvocationID.x * 2 * N_ROWS; + for (uint i = 0; i < N_ROWS; i++) { + Monoid m = combine_tag_monoid(row, local[i]); + memory[out_base + i * 2] = m.path_ix; + memory[out_base + i * 2 + 1] = m.clip_ix; + } +} diff --git a/piet-gpu/shader/draw_reduce.comp b/piet-gpu/shader/draw_reduce.comp new file mode 100644 index 0000000..fe9ab2c --- /dev/null +++ b/piet-gpu/shader/draw_reduce.comp @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense + +// The reduction phase for draw scan implemented as a tree reduction. + +#version 450 +#extension GL_GOOGLE_include_directive : enable + +#include "mem.h" +#include "setup.h" + +#define N_ROWS 8 +#define LG_WG_SIZE 9 +#define WG_SIZE (1 << LG_WG_SIZE) +#define PARTITION_SIZE (WG_SIZE * N_ROWS) + +layout(local_size_x = WG_SIZE, local_size_y = 1) in; + +layout(binding = 1) readonly buffer ConfigBuf { + Config conf; +}; + +layout(binding = 2) readonly buffer SceneBuf { + uint[] scene; +}; + +#include "scene.h" +#include "drawtag.h" + +#define Monoid DrawMonoid + +layout(set = 0, binding = 3) buffer OutBuf { + Monoid[] outbuf; +}; + +shared Monoid sh_scratch[WG_SIZE]; + +void main() { + uint ix = gl_GlobalInvocationID.x * N_ROWS; + ElementRef ref = ElementRef(ix * Element_size); + uint tag_word = Element_tag(ref).tag; + + Monoid agg = map_tag(tag_word); + for (uint i = 1; i < N_ROWS; i++) { + tag_word = Element_tag(Element_index(ref, i)).tag; + agg = combine_tag_monoid(agg, map_tag(tag_word)); + } + sh_scratch[gl_LocalInvocationID.x] = agg; + for (uint i = 0; i < LG_WG_SIZE; i++) { + barrier(); + // We could make this predicate tighter, but would it help? + if (gl_LocalInvocationID.x + (1u << i) < WG_SIZE) { + Monoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i)]; + agg = combine_tag_monoid(agg, other); + } + barrier(); + sh_scratch[gl_LocalInvocationID.x] = agg; + } + if (gl_LocalInvocationID.x == 0) { + outbuf[gl_WorkGroupID.x] = agg; + } +} diff --git a/piet-gpu/shader/draw_scan.comp b/piet-gpu/shader/draw_scan.comp new file mode 100644 index 0000000..d883671 --- /dev/null +++ b/piet-gpu/shader/draw_scan.comp @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense + +// A scan pass for draw tag scan implemented as a tree reduction. + +#version 450 +#extension GL_GOOGLE_include_directive : enable + +#include "drawtag.h" + +#define N_ROWS 8 +#define LG_WG_SIZE 9 +#define WG_SIZE (1 << LG_WG_SIZE) +#define PARTITION_SIZE (WG_SIZE * N_ROWS) + +layout(local_size_x = WG_SIZE, local_size_y = 1) in; + +#define Monoid DrawMonoid +#define combine_monoid combine_tag_monoid +#define monoid_identity tag_monoid_identity + +layout(binding = 0) buffer DataBuf { + Monoid[] data; +}; + +#ifndef ROOT +layout(binding = 1) readonly buffer ParentBuf { + Monoid[] parent; +}; +#endif + +shared Monoid sh_scratch[WG_SIZE]; + +void main() { + Monoid local[N_ROWS]; + + uint ix = gl_GlobalInvocationID.x * N_ROWS; + + local[0] = data[ix]; + for (uint i = 1; i < N_ROWS; i++) { + local[i] = combine_monoid(local[i - 1], data[ix + i]); + } + Monoid agg = local[N_ROWS - 1]; + sh_scratch[gl_LocalInvocationID.x] = agg; + for (uint i = 0; i < LG_WG_SIZE; i++) { + barrier(); + if (gl_LocalInvocationID.x >= (1u << i)) { + Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i)]; + agg = combine_monoid(other, agg); + } + barrier(); + sh_scratch[gl_LocalInvocationID.x] = agg; + } + + barrier(); + // This could be a semigroup instead of a monoid if we reworked the + // conditional logic, but that might impact performance. + Monoid row = monoid_identity(); +#ifdef ROOT + if (gl_LocalInvocationID.x > 0) { + row = sh_scratch[gl_LocalInvocationID.x - 1]; + } +#else + if (gl_WorkGroupID.x > 0) { + row = parent[gl_WorkGroupID.x - 1]; + } + if (gl_LocalInvocationID.x > 0) { + row = combine_monoid(row, sh_scratch[gl_LocalInvocationID.x - 1]); + } +#endif + for (uint i = 0; i < N_ROWS; i++) { + Monoid m = combine_monoid(row, local[i]); + data[ix + i] = m; + } +} diff --git a/piet-gpu/shader/drawtag.h b/piet-gpu/shader/drawtag.h new file mode 100644 index 0000000..a9e8a1d --- /dev/null +++ b/piet-gpu/shader/drawtag.h @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense + +// Common data structures and functions for the draw tag stream. + +struct DrawMonoid { + uint path_ix; + uint clip_ix; +}; + +DrawMonoid tag_monoid_identity() { + return DrawMonoid(0, 0); +} + +DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b) { + DrawMonoid c; + c.path_ix = a.path_ix + b.path_ix; + c.clip_ix = a.clip_ix + b.clip_ix; + return c; +} + +#ifdef Element_size +DrawMonoid map_tag(uint tag_word) { + switch (tag_word) { + case Element_FillColor: + case Element_FillLinGradient: + case Element_FillImage: + return DrawMonoid(1, 0); + case Element_BeginClip: + return DrawMonoid(1, 1); + case Element_EndClip: + return DrawMonoid(0, 1); + default: + return DrawMonoid(0, 0); + } +} +#endif diff --git a/piet-gpu/shader/gen/bbox_clear.hlsl b/piet-gpu/shader/gen/bbox_clear.hlsl index ae40b13..7a4e86a 100644 --- a/piet-gpu/shader/gen/bbox_clear.hlsl +++ b/piet-gpu/shader/gen/bbox_clear.hlsl @@ -16,6 +16,7 @@ struct Config Alloc anno_alloc; Alloc trans_alloc; Alloc bbox_alloc; + Alloc drawmonoid_alloc; uint n_trans; uint trans_offset; uint pathtag_offset; diff --git a/piet-gpu/shader/gen/bbox_clear.msl b/piet-gpu/shader/gen/bbox_clear.msl index f424448..6f73531 100644 --- a/piet-gpu/shader/gen/bbox_clear.msl +++ b/piet-gpu/shader/gen/bbox_clear.msl @@ -21,6 +21,7 @@ struct Config Alloc anno_alloc; Alloc trans_alloc; Alloc bbox_alloc; + Alloc drawmonoid_alloc; uint n_trans; uint trans_offset; uint pathtag_offset; diff --git a/piet-gpu/shader/gen/bbox_clear.spv b/piet-gpu/shader/gen/bbox_clear.spv index 181f99b..2b659f4 100644 Binary files a/piet-gpu/shader/gen/bbox_clear.spv and b/piet-gpu/shader/gen/bbox_clear.spv differ diff --git a/piet-gpu/shader/gen/draw_leaf.dxil b/piet-gpu/shader/gen/draw_leaf.dxil new file mode 100644 index 0000000..17bace7 Binary files /dev/null and b/piet-gpu/shader/gen/draw_leaf.dxil differ diff --git a/piet-gpu/shader/gen/draw_leaf.hlsl b/piet-gpu/shader/gen/draw_leaf.hlsl new file mode 100644 index 0000000..e5f50fd --- /dev/null +++ b/piet-gpu/shader/gen/draw_leaf.hlsl @@ -0,0 +1,190 @@ +struct ElementRef +{ + uint offset; +}; + +struct ElementTag +{ + uint tag; + uint flags; +}; + +struct DrawMonoid +{ + uint path_ix; + uint clip_ix; +}; + +struct Alloc +{ + uint offset; +}; + +struct Config +{ + uint n_elements; + uint n_pathseg; + uint width_in_tiles; + uint height_in_tiles; + Alloc tile_alloc; + Alloc bin_alloc; + Alloc ptcl_alloc; + Alloc pathseg_alloc; + Alloc anno_alloc; + Alloc trans_alloc; + Alloc bbox_alloc; + Alloc drawmonoid_alloc; + uint n_trans; + uint trans_offset; + uint pathtag_offset; + uint linewidth_offset; + uint pathseg_offset; +}; + +static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u); + +static const DrawMonoid _67 = { 0u, 0u }; +static const DrawMonoid _94 = { 1u, 0u }; +static const DrawMonoid _96 = { 1u, 1u }; +static const DrawMonoid _98 = { 0u, 1u }; + +ByteAddressBuffer _49 : register(t2); +ByteAddressBuffer _218 : register(t3); +ByteAddressBuffer _248 : register(t1); +RWByteAddressBuffer _277 : register(u0); + +static uint3 gl_WorkGroupID; +static uint3 gl_LocalInvocationID; +static uint3 gl_GlobalInvocationID; +struct SPIRV_Cross_Input +{ + uint3 gl_WorkGroupID : SV_GroupID; + uint3 gl_LocalInvocationID : SV_GroupThreadID; + uint3 gl_GlobalInvocationID : SV_DispatchThreadID; +}; + +groupshared DrawMonoid sh_scratch[512]; + +ElementTag Element_tag(ElementRef ref) +{ + uint tag_and_flags = _49.Load((ref.offset >> uint(2)) * 4 + 0); + ElementTag _63 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; + return _63; +} + +DrawMonoid map_tag(uint tag_word) +{ + switch (tag_word) + { + case 4u: + case 5u: + case 6u: + { + return _94; + } + case 9u: + { + return _96; + } + case 10u: + { + return _98; + } + default: + { + return _67; + } + } +} + +ElementRef Element_index(ElementRef ref, uint index) +{ + ElementRef _42 = { ref.offset + (index * 36u) }; + return _42; +} + +DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b) +{ + DrawMonoid c; + c.path_ix = a.path_ix + b.path_ix; + c.clip_ix = a.clip_ix + b.clip_ix; + return c; +} + +DrawMonoid tag_monoid_identity() +{ + return _67; +} + +void comp_main() +{ + uint ix = gl_GlobalInvocationID.x * 8u; + ElementRef _115 = { ix * 36u }; + ElementRef ref = _115; + ElementRef param = ref; + uint tag_word = Element_tag(param).tag; + uint param_1 = tag_word; + DrawMonoid agg = map_tag(param_1); + DrawMonoid local[8]; + local[0] = agg; + for (uint i = 1u; i < 8u; i++) + { + ElementRef param_2 = ref; + uint param_3 = i; + ElementRef param_4 = Element_index(param_2, param_3); + tag_word = Element_tag(param_4).tag; + uint param_5 = tag_word; + DrawMonoid param_6 = agg; + DrawMonoid param_7 = map_tag(param_5); + agg = combine_tag_monoid(param_6, param_7); + local[i] = agg; + } + sh_scratch[gl_LocalInvocationID.x] = agg; + for (uint i_1 = 0u; i_1 < 9u; i_1++) + { + GroupMemoryBarrierWithGroupSync(); + if (gl_LocalInvocationID.x >= (1u << i_1)) + { + DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; + DrawMonoid param_8 = other; + DrawMonoid param_9 = agg; + agg = combine_tag_monoid(param_8, param_9); + } + GroupMemoryBarrierWithGroupSync(); + sh_scratch[gl_LocalInvocationID.x] = agg; + } + GroupMemoryBarrierWithGroupSync(); + DrawMonoid row = tag_monoid_identity(); + if (gl_WorkGroupID.x > 0u) + { + DrawMonoid _224; + _224.path_ix = _218.Load((gl_WorkGroupID.x - 1u) * 8 + 0); + _224.clip_ix = _218.Load((gl_WorkGroupID.x - 1u) * 8 + 4); + row.path_ix = _224.path_ix; + row.clip_ix = _224.clip_ix; + } + if (gl_LocalInvocationID.x > 0u) + { + DrawMonoid param_10 = row; + DrawMonoid param_11 = sh_scratch[gl_LocalInvocationID.x - 1u]; + row = combine_tag_monoid(param_10, param_11); + } + uint out_base = (_248.Load(44) >> uint(2)) + ((gl_GlobalInvocationID.x * 2u) * 8u); + for (uint i_2 = 0u; i_2 < 8u; i_2++) + { + DrawMonoid param_12 = row; + DrawMonoid param_13 = local[i_2]; + DrawMonoid m = combine_tag_monoid(param_12, param_13); + _277.Store((out_base + (i_2 * 2u)) * 4 + 8, m.path_ix); + _277.Store(((out_base + (i_2 * 2u)) + 1u) * 4 + 8, m.clip_ix); + } +} + +[numthreads(512, 1, 1)] +void main(SPIRV_Cross_Input stage_input) +{ + gl_WorkGroupID = stage_input.gl_WorkGroupID; + gl_LocalInvocationID = stage_input.gl_LocalInvocationID; + gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; + comp_main(); +} diff --git a/piet-gpu/shader/gen/draw_leaf.msl b/piet-gpu/shader/gen/draw_leaf.msl new file mode 100644 index 0000000..d52a560 --- /dev/null +++ b/piet-gpu/shader/gen/draw_leaf.msl @@ -0,0 +1,235 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct ElementRef +{ + uint offset; +}; + +struct ElementTag +{ + uint tag; + uint flags; +}; + +struct DrawMonoid +{ + uint path_ix; + uint clip_ix; +}; + +struct SceneBuf +{ + uint scene[1]; +}; + +struct DrawMonoid_1 +{ + uint path_ix; + uint clip_ix; +}; + +struct ParentBuf +{ + DrawMonoid_1 parent[1]; +}; + +struct Alloc +{ + uint offset; +}; + +struct Config +{ + uint n_elements; + uint n_pathseg; + uint width_in_tiles; + uint height_in_tiles; + Alloc tile_alloc; + Alloc bin_alloc; + Alloc ptcl_alloc; + Alloc pathseg_alloc; + Alloc anno_alloc; + Alloc trans_alloc; + Alloc bbox_alloc; + Alloc drawmonoid_alloc; + uint n_trans; + uint trans_offset; + uint pathtag_offset; + uint linewidth_offset; + uint pathseg_offset; +}; + +struct ConfigBuf +{ + Config conf; +}; + +struct Memory +{ + uint mem_offset; + uint mem_error; + uint memory[1]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(512u, 1u, 1u); + +static inline __attribute__((always_inline)) +ElementTag Element_tag(thread const ElementRef& ref, const device SceneBuf& v_49) +{ + uint tag_and_flags = v_49.scene[ref.offset >> uint(2)]; + return ElementTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) }; +} + +static inline __attribute__((always_inline)) +DrawMonoid map_tag(thread const uint& tag_word) +{ + switch (tag_word) + { + case 4u: + case 5u: + case 6u: + { + return DrawMonoid{ 1u, 0u }; + } + case 9u: + { + return DrawMonoid{ 1u, 1u }; + } + case 10u: + { + return DrawMonoid{ 0u, 1u }; + } + default: + { + return DrawMonoid{ 0u, 0u }; + } + } +} + +static inline __attribute__((always_inline)) +ElementRef Element_index(thread const ElementRef& ref, thread const uint& index) +{ + return ElementRef{ ref.offset + (index * 36u) }; +} + +static inline __attribute__((always_inline)) +DrawMonoid combine_tag_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b) +{ + DrawMonoid c; + c.path_ix = a.path_ix + b.path_ix; + c.clip_ix = a.clip_ix + b.clip_ix; + return c; +} + +static inline __attribute__((always_inline)) +DrawMonoid tag_monoid_identity() +{ + return DrawMonoid{ 0u, 0u }; +} + +kernel void main0(device Memory& _277 [[buffer(0)]], const device ConfigBuf& _248 [[buffer(1)]], const device SceneBuf& v_49 [[buffer(2)]], const device ParentBuf& _218 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) +{ + threadgroup DrawMonoid sh_scratch[512]; + uint ix = gl_GlobalInvocationID.x * 8u; + ElementRef ref = ElementRef{ ix * 36u }; + ElementRef param = ref; + uint tag_word = Element_tag(param, v_49).tag; + uint param_1 = tag_word; + DrawMonoid agg = map_tag(param_1); + spvUnsafeArray local; + local[0] = agg; + for (uint i = 1u; i < 8u; i++) + { + ElementRef param_2 = ref; + uint param_3 = i; + ElementRef param_4 = Element_index(param_2, param_3); + tag_word = Element_tag(param_4, v_49).tag; + uint param_5 = tag_word; + DrawMonoid param_6 = agg; + DrawMonoid param_7 = map_tag(param_5); + agg = combine_tag_monoid(param_6, param_7); + local[i] = agg; + } + sh_scratch[gl_LocalInvocationID.x] = agg; + for (uint i_1 = 0u; i_1 < 9u; i_1++) + { + threadgroup_barrier(mem_flags::mem_threadgroup); + if (gl_LocalInvocationID.x >= (1u << i_1)) + { + DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; + DrawMonoid param_8 = other; + DrawMonoid param_9 = agg; + agg = combine_tag_monoid(param_8, param_9); + } + threadgroup_barrier(mem_flags::mem_threadgroup); + sh_scratch[gl_LocalInvocationID.x] = agg; + } + threadgroup_barrier(mem_flags::mem_threadgroup); + DrawMonoid row = tag_monoid_identity(); + if (gl_WorkGroupID.x > 0u) + { + uint _221 = gl_WorkGroupID.x - 1u; + row.path_ix = _218.parent[_221].path_ix; + row.clip_ix = _218.parent[_221].clip_ix; + } + if (gl_LocalInvocationID.x > 0u) + { + DrawMonoid param_10 = row; + DrawMonoid param_11 = sh_scratch[gl_LocalInvocationID.x - 1u]; + row = combine_tag_monoid(param_10, param_11); + } + uint out_base = (_248.conf.drawmonoid_alloc.offset >> uint(2)) + ((gl_GlobalInvocationID.x * 2u) * 8u); + for (uint i_2 = 0u; i_2 < 8u; i_2++) + { + DrawMonoid param_12 = row; + DrawMonoid param_13 = local[i_2]; + DrawMonoid m = combine_tag_monoid(param_12, param_13); + _277.memory[out_base + (i_2 * 2u)] = m.path_ix; + _277.memory[(out_base + (i_2 * 2u)) + 1u] = m.clip_ix; + } +} + diff --git a/piet-gpu/shader/gen/draw_leaf.spv b/piet-gpu/shader/gen/draw_leaf.spv new file mode 100644 index 0000000..30740a2 Binary files /dev/null and b/piet-gpu/shader/gen/draw_leaf.spv differ diff --git a/piet-gpu/shader/gen/draw_reduce.dxil b/piet-gpu/shader/gen/draw_reduce.dxil new file mode 100644 index 0000000..f1e48e1 Binary files /dev/null and b/piet-gpu/shader/gen/draw_reduce.dxil differ diff --git a/piet-gpu/shader/gen/draw_reduce.hlsl b/piet-gpu/shader/gen/draw_reduce.hlsl new file mode 100644 index 0000000..27c206a --- /dev/null +++ b/piet-gpu/shader/gen/draw_reduce.hlsl @@ -0,0 +1,162 @@ +struct ElementRef +{ + uint offset; +}; + +struct ElementTag +{ + uint tag; + uint flags; +}; + +struct DrawMonoid +{ + uint path_ix; + uint clip_ix; +}; + +static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u); + +struct Alloc +{ + uint offset; +}; + +struct Config +{ + uint n_elements; + uint n_pathseg; + uint width_in_tiles; + uint height_in_tiles; + Alloc tile_alloc; + Alloc bin_alloc; + Alloc ptcl_alloc; + Alloc pathseg_alloc; + Alloc anno_alloc; + Alloc trans_alloc; + Alloc bbox_alloc; + Alloc drawmonoid_alloc; + uint n_trans; + uint trans_offset; + uint pathtag_offset; + uint linewidth_offset; + uint pathseg_offset; +}; + +static const DrawMonoid _88 = { 1u, 0u }; +static const DrawMonoid _90 = { 1u, 1u }; +static const DrawMonoid _92 = { 0u, 1u }; +static const DrawMonoid _94 = { 0u, 0u }; + +ByteAddressBuffer _46 : register(t2); +RWByteAddressBuffer _203 : register(u3); +RWByteAddressBuffer _217 : register(u0); +ByteAddressBuffer _223 : register(t1); + +static uint3 gl_WorkGroupID; +static uint3 gl_LocalInvocationID; +static uint3 gl_GlobalInvocationID; +struct SPIRV_Cross_Input +{ + uint3 gl_WorkGroupID : SV_GroupID; + uint3 gl_LocalInvocationID : SV_GroupThreadID; + uint3 gl_GlobalInvocationID : SV_DispatchThreadID; +}; + +groupshared DrawMonoid sh_scratch[512]; + +ElementTag Element_tag(ElementRef ref) +{ + uint tag_and_flags = _46.Load((ref.offset >> uint(2)) * 4 + 0); + ElementTag _60 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; + return _60; +} + +DrawMonoid map_tag(uint tag_word) +{ + switch (tag_word) + { + case 4u: + case 5u: + case 6u: + { + return _88; + } + case 9u: + { + return _90; + } + case 10u: + { + return _92; + } + default: + { + return _94; + } + } +} + +ElementRef Element_index(ElementRef ref, uint index) +{ + ElementRef _39 = { ref.offset + (index * 36u) }; + return _39; +} + +DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b) +{ + DrawMonoid c; + c.path_ix = a.path_ix + b.path_ix; + c.clip_ix = a.clip_ix + b.clip_ix; + return c; +} + +void comp_main() +{ + uint ix = gl_GlobalInvocationID.x * 8u; + ElementRef _110 = { ix * 36u }; + ElementRef ref = _110; + ElementRef param = ref; + uint tag_word = Element_tag(param).tag; + uint param_1 = tag_word; + DrawMonoid agg = map_tag(param_1); + for (uint i = 1u; i < 8u; i++) + { + ElementRef param_2 = ref; + uint param_3 = i; + ElementRef param_4 = Element_index(param_2, param_3); + tag_word = Element_tag(param_4).tag; + uint param_5 = tag_word; + DrawMonoid param_6 = agg; + DrawMonoid param_7 = map_tag(param_5); + agg = combine_tag_monoid(param_6, param_7); + } + sh_scratch[gl_LocalInvocationID.x] = agg; + for (uint i_1 = 0u; i_1 < 9u; i_1++) + { + GroupMemoryBarrierWithGroupSync(); + if ((gl_LocalInvocationID.x + (1u << i_1)) < 512u) + { + DrawMonoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)]; + DrawMonoid param_8 = agg; + DrawMonoid param_9 = other; + agg = combine_tag_monoid(param_8, param_9); + } + GroupMemoryBarrierWithGroupSync(); + sh_scratch[gl_LocalInvocationID.x] = agg; + } + if (gl_LocalInvocationID.x == 0u) + { + _203.Store(gl_WorkGroupID.x * 8 + 0, agg.path_ix); + _203.Store(gl_WorkGroupID.x * 8 + 4, agg.clip_ix); + } +} + +[numthreads(512, 1, 1)] +void main(SPIRV_Cross_Input stage_input) +{ + gl_WorkGroupID = stage_input.gl_WorkGroupID; + gl_LocalInvocationID = stage_input.gl_LocalInvocationID; + gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; + comp_main(); +} diff --git a/piet-gpu/shader/gen/draw_reduce.msl b/piet-gpu/shader/gen/draw_reduce.msl new file mode 100644 index 0000000..dd2f517 --- /dev/null +++ b/piet-gpu/shader/gen/draw_reduce.msl @@ -0,0 +1,169 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct ElementRef +{ + uint offset; +}; + +struct ElementTag +{ + uint tag; + uint flags; +}; + +struct DrawMonoid +{ + uint path_ix; + uint clip_ix; +}; + +struct SceneBuf +{ + uint scene[1]; +}; + +struct DrawMonoid_1 +{ + uint path_ix; + uint clip_ix; +}; + +struct OutBuf +{ + DrawMonoid_1 outbuf[1]; +}; + +struct Memory +{ + uint mem_offset; + uint mem_error; + uint memory[1]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(512u, 1u, 1u); + +struct Alloc +{ + uint offset; +}; + +struct Config +{ + uint n_elements; + uint n_pathseg; + uint width_in_tiles; + uint height_in_tiles; + Alloc tile_alloc; + Alloc bin_alloc; + Alloc ptcl_alloc; + Alloc pathseg_alloc; + Alloc anno_alloc; + Alloc trans_alloc; + Alloc bbox_alloc; + Alloc drawmonoid_alloc; + uint n_trans; + uint trans_offset; + uint pathtag_offset; + uint linewidth_offset; + uint pathseg_offset; +}; + +struct ConfigBuf +{ + Config conf; +}; + +static inline __attribute__((always_inline)) +ElementTag Element_tag(thread const ElementRef& ref, const device SceneBuf& v_46) +{ + uint tag_and_flags = v_46.scene[ref.offset >> uint(2)]; + return ElementTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) }; +} + +static inline __attribute__((always_inline)) +DrawMonoid map_tag(thread const uint& tag_word) +{ + switch (tag_word) + { + case 4u: + case 5u: + case 6u: + { + return DrawMonoid{ 1u, 0u }; + } + case 9u: + { + return DrawMonoid{ 1u, 1u }; + } + case 10u: + { + return DrawMonoid{ 0u, 1u }; + } + default: + { + return DrawMonoid{ 0u, 0u }; + } + } +} + +static inline __attribute__((always_inline)) +ElementRef Element_index(thread const ElementRef& ref, thread const uint& index) +{ + return ElementRef{ ref.offset + (index * 36u) }; +} + +static inline __attribute__((always_inline)) +DrawMonoid combine_tag_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b) +{ + DrawMonoid c; + c.path_ix = a.path_ix + b.path_ix; + c.clip_ix = a.clip_ix + b.clip_ix; + return c; +} + +kernel void main0(const device SceneBuf& v_46 [[buffer(2)]], device OutBuf& _203 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) +{ + threadgroup DrawMonoid sh_scratch[512]; + uint ix = gl_GlobalInvocationID.x * 8u; + ElementRef ref = ElementRef{ ix * 36u }; + ElementRef param = ref; + uint tag_word = Element_tag(param, v_46).tag; + uint param_1 = tag_word; + DrawMonoid agg = map_tag(param_1); + for (uint i = 1u; i < 8u; i++) + { + ElementRef param_2 = ref; + uint param_3 = i; + ElementRef param_4 = Element_index(param_2, param_3); + tag_word = Element_tag(param_4, v_46).tag; + uint param_5 = tag_word; + DrawMonoid param_6 = agg; + DrawMonoid param_7 = map_tag(param_5); + agg = combine_tag_monoid(param_6, param_7); + } + sh_scratch[gl_LocalInvocationID.x] = agg; + for (uint i_1 = 0u; i_1 < 9u; i_1++) + { + threadgroup_barrier(mem_flags::mem_threadgroup); + if ((gl_LocalInvocationID.x + (1u << i_1)) < 512u) + { + DrawMonoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)]; + DrawMonoid param_8 = agg; + DrawMonoid param_9 = other; + agg = combine_tag_monoid(param_8, param_9); + } + threadgroup_barrier(mem_flags::mem_threadgroup); + sh_scratch[gl_LocalInvocationID.x] = agg; + } + if (gl_LocalInvocationID.x == 0u) + { + _203.outbuf[gl_WorkGroupID.x].path_ix = agg.path_ix; + _203.outbuf[gl_WorkGroupID.x].clip_ix = agg.clip_ix; + } +} + diff --git a/piet-gpu/shader/gen/draw_reduce.spv b/piet-gpu/shader/gen/draw_reduce.spv new file mode 100644 index 0000000..286bd33 Binary files /dev/null and b/piet-gpu/shader/gen/draw_reduce.spv differ diff --git a/piet-gpu/shader/gen/draw_root.dxil b/piet-gpu/shader/gen/draw_root.dxil new file mode 100644 index 0000000..da5cfe2 Binary files /dev/null and b/piet-gpu/shader/gen/draw_root.dxil differ diff --git a/piet-gpu/shader/gen/draw_root.hlsl b/piet-gpu/shader/gen/draw_root.hlsl new file mode 100644 index 0000000..7dc68b1 --- /dev/null +++ b/piet-gpu/shader/gen/draw_root.hlsl @@ -0,0 +1,94 @@ +struct DrawMonoid +{ + uint path_ix; + uint clip_ix; +}; + +static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u); + +static const DrawMonoid _18 = { 0u, 0u }; + +RWByteAddressBuffer _57 : register(u0); + +static uint3 gl_LocalInvocationID; +static uint3 gl_GlobalInvocationID; +struct SPIRV_Cross_Input +{ + uint3 gl_LocalInvocationID : SV_GroupThreadID; + uint3 gl_GlobalInvocationID : SV_DispatchThreadID; +}; + +groupshared DrawMonoid sh_scratch[512]; + +DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b) +{ + DrawMonoid c; + c.path_ix = a.path_ix + b.path_ix; + c.clip_ix = a.clip_ix + b.clip_ix; + return c; +} + +DrawMonoid tag_monoid_identity() +{ + return _18; +} + +void comp_main() +{ + uint ix = gl_GlobalInvocationID.x * 8u; + DrawMonoid _61; + _61.path_ix = _57.Load(ix * 8 + 0); + _61.clip_ix = _57.Load(ix * 8 + 4); + DrawMonoid local[8]; + local[0].path_ix = _61.path_ix; + local[0].clip_ix = _61.clip_ix; + DrawMonoid param_1; + for (uint i = 1u; i < 8u; i++) + { + DrawMonoid param = local[i - 1u]; + DrawMonoid _88; + _88.path_ix = _57.Load((ix + i) * 8 + 0); + _88.clip_ix = _57.Load((ix + i) * 8 + 4); + param_1.path_ix = _88.path_ix; + param_1.clip_ix = _88.clip_ix; + local[i] = combine_tag_monoid(param, param_1); + } + DrawMonoid agg = local[7]; + sh_scratch[gl_LocalInvocationID.x] = agg; + for (uint i_1 = 0u; i_1 < 9u; i_1++) + { + GroupMemoryBarrierWithGroupSync(); + if (gl_LocalInvocationID.x >= (1u << i_1)) + { + DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; + DrawMonoid param_2 = other; + DrawMonoid param_3 = agg; + agg = combine_tag_monoid(param_2, param_3); + } + GroupMemoryBarrierWithGroupSync(); + sh_scratch[gl_LocalInvocationID.x] = agg; + } + GroupMemoryBarrierWithGroupSync(); + DrawMonoid row = tag_monoid_identity(); + if (gl_LocalInvocationID.x > 0u) + { + row = sh_scratch[gl_LocalInvocationID.x - 1u]; + } + for (uint i_2 = 0u; i_2 < 8u; i_2++) + { + DrawMonoid param_4 = row; + DrawMonoid param_5 = local[i_2]; + DrawMonoid m = combine_tag_monoid(param_4, param_5); + uint _178 = ix + i_2; + _57.Store(_178 * 8 + 0, m.path_ix); + _57.Store(_178 * 8 + 4, m.clip_ix); + } +} + +[numthreads(512, 1, 1)] +void main(SPIRV_Cross_Input stage_input) +{ + gl_LocalInvocationID = stage_input.gl_LocalInvocationID; + gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; + comp_main(); +} diff --git a/piet-gpu/shader/gen/draw_root.msl b/piet-gpu/shader/gen/draw_root.msl new file mode 100644 index 0000000..2ed7ba2 --- /dev/null +++ b/piet-gpu/shader/gen/draw_root.msl @@ -0,0 +1,128 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct DrawMonoid +{ + uint path_ix; + uint clip_ix; +}; + +struct DrawMonoid_1 +{ + uint path_ix; + uint clip_ix; +}; + +struct DataBuf +{ + DrawMonoid_1 data[1]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(512u, 1u, 1u); + +static inline __attribute__((always_inline)) +DrawMonoid combine_tag_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b) +{ + DrawMonoid c; + c.path_ix = a.path_ix + b.path_ix; + c.clip_ix = a.clip_ix + b.clip_ix; + return c; +} + +static inline __attribute__((always_inline)) +DrawMonoid tag_monoid_identity() +{ + return DrawMonoid{ 0u, 0u }; +} + +kernel void main0(device DataBuf& _57 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) +{ + threadgroup DrawMonoid sh_scratch[512]; + uint ix = gl_GlobalInvocationID.x * 8u; + spvUnsafeArray local; + local[0].path_ix = _57.data[ix].path_ix; + local[0].clip_ix = _57.data[ix].clip_ix; + DrawMonoid param_1; + for (uint i = 1u; i < 8u; i++) + { + uint _82 = ix + i; + DrawMonoid param = local[i - 1u]; + param_1.path_ix = _57.data[_82].path_ix; + param_1.clip_ix = _57.data[_82].clip_ix; + local[i] = combine_tag_monoid(param, param_1); + } + DrawMonoid agg = local[7]; + sh_scratch[gl_LocalInvocationID.x] = agg; + for (uint i_1 = 0u; i_1 < 9u; i_1++) + { + threadgroup_barrier(mem_flags::mem_threadgroup); + if (gl_LocalInvocationID.x >= (1u << i_1)) + { + DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; + DrawMonoid param_2 = other; + DrawMonoid param_3 = agg; + agg = combine_tag_monoid(param_2, param_3); + } + threadgroup_barrier(mem_flags::mem_threadgroup); + sh_scratch[gl_LocalInvocationID.x] = agg; + } + threadgroup_barrier(mem_flags::mem_threadgroup); + DrawMonoid row = tag_monoid_identity(); + if (gl_LocalInvocationID.x > 0u) + { + row = sh_scratch[gl_LocalInvocationID.x - 1u]; + } + for (uint i_2 = 0u; i_2 < 8u; i_2++) + { + DrawMonoid param_4 = row; + DrawMonoid param_5 = local[i_2]; + DrawMonoid m = combine_tag_monoid(param_4, param_5); + uint _178 = ix + i_2; + _57.data[_178].path_ix = m.path_ix; + _57.data[_178].clip_ix = m.clip_ix; + } +} + diff --git a/piet-gpu/shader/gen/draw_root.spv b/piet-gpu/shader/gen/draw_root.spv new file mode 100644 index 0000000..acecee3 Binary files /dev/null and b/piet-gpu/shader/gen/draw_root.spv differ diff --git a/piet-gpu/shader/gen/pathseg.dxil b/piet-gpu/shader/gen/pathseg.dxil index 5ad35e7..4464d9d 100644 Binary files a/piet-gpu/shader/gen/pathseg.dxil and b/piet-gpu/shader/gen/pathseg.dxil differ diff --git a/piet-gpu/shader/gen/pathseg.hlsl b/piet-gpu/shader/gen/pathseg.hlsl index 40e60cd..e29ddd3 100644 --- a/piet-gpu/shader/gen/pathseg.hlsl +++ b/piet-gpu/shader/gen/pathseg.hlsl @@ -63,6 +63,7 @@ struct Config Alloc anno_alloc; Alloc trans_alloc; Alloc bbox_alloc; + Alloc drawmonoid_alloc; uint n_trans; uint trans_offset; uint pathtag_offset; @@ -354,7 +355,7 @@ uint round_up(float x) void comp_main() { uint ix = gl_GlobalInvocationID.x * 4u; - uint tag_word = _574.Load(((_639.Load(52) >> uint(2)) + (ix >> uint(2))) * 4 + 0); + uint tag_word = _574.Load(((_639.Load(56) >> uint(2)) + (ix >> uint(2))) * 4 + 0); uint param = tag_word; TagMonoid local_tm = reduce_tag(param); sh_tag[gl_LocalInvocationID.x] = local_tm; @@ -393,13 +394,13 @@ void comp_main() TagMonoid param_4 = sh_tag[gl_LocalInvocationID.x - 1u]; tm = combine_tag_monoid(param_3, param_4); } - uint ps_ix = (_639.Load(60) >> uint(2)) + tm.pathseg_offset; - uint lw_ix = (_639.Load(56) >> uint(2)) + tm.linewidth_ix; + uint ps_ix = (_639.Load(64) >> uint(2)) + tm.pathseg_offset; + uint lw_ix = (_639.Load(60) >> uint(2)) + tm.linewidth_ix; uint save_path_ix = tm.path_ix; - TransformSegRef _769 = { _639.Load(36) + (tm.trans_ix * 24u) }; - TransformSegRef trans_ref = _769; - PathSegRef _779 = { _639.Load(28) + (tm.pathseg_ix * 52u) }; - PathSegRef ps_ref = _779; + TransformSegRef _768 = { _639.Load(36) + (tm.trans_ix * 24u) }; + TransformSegRef trans_ref = _768; + PathSegRef _778 = { _639.Load(28) + (tm.pathseg_ix * 52u) }; + PathSegRef ps_ref = _778; float2 p0; float2 p1; float2 p2; @@ -449,9 +450,9 @@ void comp_main() } } float linewidth = asfloat(_574.Load(lw_ix * 4 + 0)); - Alloc _865; - _865.offset = _639.Load(36); - param_13.offset = _865.offset; + Alloc _864; + _864.offset = _639.Load(36); + param_13.offset = _864.offset; TransformSegRef param_14 = trans_ref; TransformSeg transform = TransformSeg_read(param_13, param_14); p0 = ((transform.mat.xy * p0.x) + (transform.mat.zw * p0.y)) + transform.translate; @@ -460,25 +461,25 @@ void comp_main() if (seg_type >= 2u) { p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate; - float4 _935 = bbox; - float2 _938 = min(_935.xy, p2); - bbox.x = _938.x; - bbox.y = _938.y; - float4 _943 = bbox; - float2 _946 = max(_943.zw, p2); - bbox.z = _946.x; - bbox.w = _946.y; + float4 _934 = bbox; + float2 _937 = min(_934.xy, p2); + bbox.x = _937.x; + bbox.y = _937.y; + float4 _942 = bbox; + float2 _945 = max(_942.zw, p2); + bbox.z = _945.x; + bbox.w = _945.y; if (seg_type == 3u) { p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate; - float4 _971 = bbox; - float2 _974 = min(_971.xy, p3); - bbox.x = _974.x; - bbox.y = _974.y; - float4 _979 = bbox; - float2 _982 = max(_979.zw, p3); - bbox.z = _982.x; - bbox.w = _982.y; + float4 _970 = bbox; + float2 _973 = min(_970.xy, p3); + bbox.x = _973.x; + bbox.y = _973.y; + float4 _978 = bbox; + float2 _981 = max(_978.zw, p3); + bbox.z = _981.x; + bbox.w = _981.y; } else { @@ -509,9 +510,9 @@ void comp_main() cubic.trans_ix = (gl_GlobalInvocationID.x * 4u) + i_1; cubic.stroke = stroke; uint fill_mode = uint(linewidth >= 0.0f); - Alloc _1071; - _1071.offset = _639.Load(28); - param_15.offset = _1071.offset; + Alloc _1070; + _1070.offset = _639.Load(28); + param_15.offset = _1070.offset; PathSegRef param_16 = ps_ref; uint param_17 = fill_mode; PathCubic param_18 = cubic; @@ -567,17 +568,17 @@ void comp_main() Monoid param_24 = local[i_4]; Monoid m = combine_monoid(param_23, param_24); bool do_atomic = false; - bool _1241 = i_4 == 3u; - bool _1248; - if (_1241) + bool _1240 = i_4 == 3u; + bool _1247; + if (_1240) { - _1248 = gl_LocalInvocationID.x == 511u; + _1247 = gl_LocalInvocationID.x == 511u; } else { - _1248 = _1241; + _1247 = _1240; } - if (_1248) + if (_1247) { do_atomic = true; } @@ -603,30 +604,30 @@ void comp_main() } if (do_atomic) { - bool _1300 = m.bbox.z > m.bbox.x; - bool _1309; - if (!_1300) + bool _1299 = m.bbox.z > m.bbox.x; + bool _1308; + if (!_1299) { - _1309 = m.bbox.w > m.bbox.y; + _1308 = m.bbox.w > m.bbox.y; } else { - _1309 = _1300; + _1308 = _1299; } - if (_1309) + if (_1308) { float param_29 = m.bbox.x; - uint _1318; - _111.InterlockedMin(bbox_out_ix * 4 + 8, round_down(param_29), _1318); + uint _1317; + _111.InterlockedMin(bbox_out_ix * 4 + 8, round_down(param_29), _1317); float param_30 = m.bbox.y; - uint _1326; - _111.InterlockedMin((bbox_out_ix + 1u) * 4 + 8, round_down(param_30), _1326); + uint _1325; + _111.InterlockedMin((bbox_out_ix + 1u) * 4 + 8, round_down(param_30), _1325); float param_31 = m.bbox.z; - uint _1334; - _111.InterlockedMax((bbox_out_ix + 2u) * 4 + 8, round_up(param_31), _1334); + uint _1333; + _111.InterlockedMax((bbox_out_ix + 2u) * 4 + 8, round_up(param_31), _1333); float param_32 = m.bbox.w; - uint _1342; - _111.InterlockedMax((bbox_out_ix + 3u) * 4 + 8, round_up(param_32), _1342); + uint _1341; + _111.InterlockedMax((bbox_out_ix + 3u) * 4 + 8, round_up(param_32), _1341); } bbox_out_ix += 4u; } diff --git a/piet-gpu/shader/gen/pathseg.msl b/piet-gpu/shader/gen/pathseg.msl index 25d001f..71299bd 100644 --- a/piet-gpu/shader/gen/pathseg.msl +++ b/piet-gpu/shader/gen/pathseg.msl @@ -128,6 +128,7 @@ struct Config Alloc_1 anno_alloc; Alloc_1 trans_alloc; Alloc_1 bbox_alloc; + Alloc_1 drawmonoid_alloc; uint n_trans; uint trans_offset; uint pathtag_offset; @@ -530,25 +531,25 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6 if (seg_type >= 2u) { p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate; - float4 _935 = bbox; - float2 _938 = fast::min(_935.xy, p2); - bbox.x = _938.x; - bbox.y = _938.y; - float4 _943 = bbox; - float2 _946 = fast::max(_943.zw, p2); - bbox.z = _946.x; - bbox.w = _946.y; + float4 _934 = bbox; + float2 _937 = fast::min(_934.xy, p2); + bbox.x = _937.x; + bbox.y = _937.y; + float4 _942 = bbox; + float2 _945 = fast::max(_942.zw, p2); + bbox.z = _945.x; + bbox.w = _945.y; if (seg_type == 3u) { p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate; - float4 _971 = bbox; - float2 _974 = fast::min(_971.xy, p3); - bbox.x = _974.x; - bbox.y = _974.y; - float4 _979 = bbox; - float2 _982 = fast::max(_979.zw, p3); - bbox.z = _982.x; - bbox.w = _982.y; + float4 _970 = bbox; + float2 _973 = fast::min(_970.xy, p3); + bbox.x = _973.x; + bbox.y = _973.y; + float4 _978 = bbox; + float2 _981 = fast::max(_978.zw, p3); + bbox.z = _981.x; + bbox.w = _981.y; } else { @@ -635,17 +636,17 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6 Monoid param_24 = local[i_4]; Monoid m = combine_monoid(param_23, param_24); bool do_atomic = false; - bool _1241 = i_4 == 3u; - bool _1248; - if (_1241) + bool _1240 = i_4 == 3u; + bool _1247; + if (_1240) { - _1248 = gl_LocalInvocationID.x == 511u; + _1247 = gl_LocalInvocationID.x == 511u; } else { - _1248 = _1241; + _1247 = _1240; } - if (_1248) + if (_1247) { do_atomic = true; } @@ -671,26 +672,26 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6 } if (do_atomic) { - bool _1300 = m.bbox.z > m.bbox.x; - bool _1309; - if (!_1300) + bool _1299 = m.bbox.z > m.bbox.x; + bool _1308; + if (!_1299) { - _1309 = m.bbox.w > m.bbox.y; + _1308 = m.bbox.w > m.bbox.y; } else { - _1309 = _1300; + _1308 = _1299; } - if (_1309) + if (_1308) { float param_29 = m.bbox.x; - uint _1318 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix], round_down(param_29), memory_order_relaxed); + uint _1317 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix], round_down(param_29), memory_order_relaxed); float param_30 = m.bbox.y; - uint _1326 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 1u], round_down(param_30), memory_order_relaxed); + uint _1325 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 1u], round_down(param_30), memory_order_relaxed); float param_31 = m.bbox.z; - uint _1334 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 2u], round_up(param_31), memory_order_relaxed); + uint _1333 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 2u], round_up(param_31), memory_order_relaxed); float param_32 = m.bbox.w; - uint _1342 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 3u], round_up(param_32), memory_order_relaxed); + uint _1341 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 3u], round_up(param_32), memory_order_relaxed); } bbox_out_ix += 4u; } diff --git a/piet-gpu/shader/gen/pathseg.spv b/piet-gpu/shader/gen/pathseg.spv index 2ac684d..bc165ac 100644 Binary files a/piet-gpu/shader/gen/pathseg.spv and b/piet-gpu/shader/gen/pathseg.spv differ diff --git a/piet-gpu/shader/gen/pathtag_reduce.dxil b/piet-gpu/shader/gen/pathtag_reduce.dxil index 81448e7..02a4750 100644 Binary files a/piet-gpu/shader/gen/pathtag_reduce.dxil and b/piet-gpu/shader/gen/pathtag_reduce.dxil differ diff --git a/piet-gpu/shader/gen/pathtag_reduce.hlsl b/piet-gpu/shader/gen/pathtag_reduce.hlsl index 5ed84b8..5e98362 100644 --- a/piet-gpu/shader/gen/pathtag_reduce.hlsl +++ b/piet-gpu/shader/gen/pathtag_reduce.hlsl @@ -25,6 +25,7 @@ struct Config Alloc anno_alloc; Alloc trans_alloc; Alloc bbox_alloc; + Alloc drawmonoid_alloc; uint n_trans; uint trans_offset; uint pathtag_offset; @@ -81,7 +82,7 @@ TagMonoid combine_tag_monoid(TagMonoid a, TagMonoid b) void comp_main() { uint ix = gl_GlobalInvocationID.x * 4u; - uint scene_ix = (_139.Load(52) >> uint(2)) + ix; + uint scene_ix = (_139.Load(56) >> uint(2)) + ix; uint tag_word = _151.Load(scene_ix * 4 + 0); uint param = tag_word; TagMonoid agg = reduce_tag(param); diff --git a/piet-gpu/shader/gen/pathtag_reduce.msl b/piet-gpu/shader/gen/pathtag_reduce.msl index edb6d03..38451d4 100644 --- a/piet-gpu/shader/gen/pathtag_reduce.msl +++ b/piet-gpu/shader/gen/pathtag_reduce.msl @@ -32,6 +32,7 @@ struct Config Alloc anno_alloc; Alloc trans_alloc; Alloc bbox_alloc; + Alloc drawmonoid_alloc; uint n_trans; uint trans_offset; uint pathtag_offset; diff --git a/piet-gpu/shader/gen/pathtag_reduce.spv b/piet-gpu/shader/gen/pathtag_reduce.spv index 44cd938..eef46a2 100644 Binary files a/piet-gpu/shader/gen/pathtag_reduce.spv and b/piet-gpu/shader/gen/pathtag_reduce.spv differ diff --git a/piet-gpu/shader/gen/transform_leaf.dxil b/piet-gpu/shader/gen/transform_leaf.dxil index 3864dbe..dabc049 100644 Binary files a/piet-gpu/shader/gen/transform_leaf.dxil and b/piet-gpu/shader/gen/transform_leaf.dxil differ diff --git a/piet-gpu/shader/gen/transform_leaf.hlsl b/piet-gpu/shader/gen/transform_leaf.hlsl index c0343f0..2f0de05 100644 --- a/piet-gpu/shader/gen/transform_leaf.hlsl +++ b/piet-gpu/shader/gen/transform_leaf.hlsl @@ -38,6 +38,7 @@ struct Config Alloc anno_alloc; Alloc trans_alloc; Alloc bbox_alloc; + Alloc drawmonoid_alloc; uint n_trans; uint trans_offset; uint pathtag_offset; @@ -148,7 +149,7 @@ void TransformSeg_write(Alloc a, TransformSegRef ref, TransformSeg s) void comp_main() { uint ix = gl_GlobalInvocationID.x * 8u; - TransformRef _285 = { _278.Load(48) + (ix * 24u) }; + TransformRef _285 = { _278.Load(52) + (ix * 24u) }; TransformRef ref = _285; TransformRef param = ref; Transform agg = Transform_read(param); diff --git a/piet-gpu/shader/gen/transform_leaf.msl b/piet-gpu/shader/gen/transform_leaf.msl index 16c1e13..3120b3d 100644 --- a/piet-gpu/shader/gen/transform_leaf.msl +++ b/piet-gpu/shader/gen/transform_leaf.msl @@ -101,6 +101,7 @@ struct Config Alloc_1 anno_alloc; Alloc_1 trans_alloc; Alloc_1 bbox_alloc; + Alloc_1 drawmonoid_alloc; uint n_trans; uint trans_offset; uint pathtag_offset; diff --git a/piet-gpu/shader/gen/transform_leaf.spv b/piet-gpu/shader/gen/transform_leaf.spv index 49c9789..01f047b 100644 Binary files a/piet-gpu/shader/gen/transform_leaf.spv and b/piet-gpu/shader/gen/transform_leaf.spv differ diff --git a/piet-gpu/shader/gen/transform_reduce.dxil b/piet-gpu/shader/gen/transform_reduce.dxil index f9e1cbf..68997d0 100644 Binary files a/piet-gpu/shader/gen/transform_reduce.dxil and b/piet-gpu/shader/gen/transform_reduce.dxil differ diff --git a/piet-gpu/shader/gen/transform_reduce.hlsl b/piet-gpu/shader/gen/transform_reduce.hlsl index 75e7e3f..9d8a5d6 100644 --- a/piet-gpu/shader/gen/transform_reduce.hlsl +++ b/piet-gpu/shader/gen/transform_reduce.hlsl @@ -27,6 +27,7 @@ struct Config Alloc anno_alloc; Alloc trans_alloc; Alloc bbox_alloc; + Alloc drawmonoid_alloc; uint n_trans; uint trans_offset; uint pathtag_offset; @@ -85,7 +86,7 @@ Transform combine_monoid(Transform a, Transform b) void comp_main() { uint ix = gl_GlobalInvocationID.x * 8u; - TransformRef _168 = { _161.Load(48) + (ix * 24u) }; + TransformRef _168 = { _161.Load(52) + (ix * 24u) }; TransformRef ref = _168; TransformRef param = ref; Transform agg = Transform_read(param); diff --git a/piet-gpu/shader/gen/transform_reduce.msl b/piet-gpu/shader/gen/transform_reduce.msl index aabfaed..e61b602 100644 --- a/piet-gpu/shader/gen/transform_reduce.msl +++ b/piet-gpu/shader/gen/transform_reduce.msl @@ -39,6 +39,7 @@ struct Config Alloc anno_alloc; Alloc trans_alloc; Alloc bbox_alloc; + Alloc drawmonoid_alloc; uint n_trans; uint trans_offset; uint pathtag_offset; diff --git a/piet-gpu/shader/gen/transform_reduce.spv b/piet-gpu/shader/gen/transform_reduce.spv index 451775d..77eadb2 100644 Binary files a/piet-gpu/shader/gen/transform_reduce.spv and b/piet-gpu/shader/gen/transform_reduce.spv differ diff --git a/piet-gpu/shader/kernel4.spv b/piet-gpu/shader/kernel4.spv index 4db2c3a..04b6364 100644 Binary files a/piet-gpu/shader/kernel4.spv and b/piet-gpu/shader/kernel4.spv differ diff --git a/piet-gpu/shader/path_coarse.spv b/piet-gpu/shader/path_coarse.spv index 2fc59fe..240f8f7 100644 Binary files a/piet-gpu/shader/path_coarse.spv and b/piet-gpu/shader/path_coarse.spv differ diff --git a/piet-gpu/shader/setup.h b/piet-gpu/shader/setup.h index c74903e..3bb1fdd 100644 --- a/piet-gpu/shader/setup.h +++ b/piet-gpu/shader/setup.h @@ -42,6 +42,8 @@ struct Config { // Bounding boxes of paths, stored as int (so atomics work) Alloc bbox_alloc; + // Monoid for draw objects + Alloc drawmonoid_alloc; // Number of transforms in scene // This is probably not needed. diff --git a/piet-gpu/shader/tile_alloc.spv b/piet-gpu/shader/tile_alloc.spv index 69dddf5..0de00e3 100644 Binary files a/piet-gpu/shader/tile_alloc.spv and b/piet-gpu/shader/tile_alloc.spv differ diff --git a/piet-gpu/src/stages.rs b/piet-gpu/src/stages.rs index 59e8b50..f4a086c 100644 --- a/piet-gpu/src/stages.rs +++ b/piet-gpu/src/stages.rs @@ -16,11 +16,13 @@ //! Stages for new element pipeline, exposed for testing. +mod draw; mod path; mod transform; use bytemuck::{Pod, Zeroable}; +pub use draw::{DrawBinding, DrawCode, DrawMonoid, DrawStage}; pub use path::{PathBinding, PathCode, PathEncoder, PathStage}; pub use transform::{Transform, TransformBinding, TransformCode, TransformStage}; @@ -41,6 +43,7 @@ pub struct Config { pub anno_alloc: u32, pub trans_alloc: u32, pub bbox_alloc: u32, + pub drawmonoid_alloc: u32, pub n_trans: u32, pub trans_offset: u32, pub pathtag_offset: u32, diff --git a/piet-gpu/src/stages/draw.rs b/piet-gpu/src/stages/draw.rs new file mode 100644 index 0000000..d50c6cb --- /dev/null +++ b/piet-gpu/src/stages/draw.rs @@ -0,0 +1,163 @@ +// Copyright 2021 The piet-gpu authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +//! The draw object stage of the element processing pipeline. + +use bytemuck::{Pod, Zeroable}; + +use piet_gpu_hal::{ + include_shader, BindType, Buffer, BufferUsage, CmdBuf, DescriptorSet, Pipeline, Session, +}; + +/// The output element of the draw object stage. +#[repr(C)] +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Zeroable, Pod)] +pub struct DrawMonoid { + pub path_ix: u32, + pub clip_ix: u32, +} + +const DRAW_WG: u64 = 512; +const DRAW_N_ROWS: u64 = 8; +const DRAW_PART_SIZE: u64 = DRAW_WG * DRAW_N_ROWS; + +pub struct DrawCode { + reduce_pipeline: Pipeline, + root_pipeline: Pipeline, + leaf_pipeline: Pipeline, +} +pub struct DrawStage { + // Right now we're limited to partition^2 (~16M) elements. This can be + // expanded but is tedious. + root_buf: Buffer, + root_ds: DescriptorSet, +} + +pub struct DrawBinding { + reduce_ds: DescriptorSet, + leaf_ds: DescriptorSet, +} + +impl DrawCode { + pub unsafe fn new(session: &Session) -> DrawCode { + let reduce_code = include_shader!(session, "../../shader/gen/draw_reduce"); + let reduce_pipeline = session + .create_compute_pipeline( + reduce_code, + &[ + BindType::Buffer, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::Buffer, + ], + ) + .unwrap(); + let root_code = include_shader!(session, "../../shader/gen/draw_root"); + let root_pipeline = session + .create_compute_pipeline(root_code, &[BindType::Buffer]) + .unwrap(); + let leaf_code = include_shader!(session, "../../shader/gen/draw_leaf"); + let leaf_pipeline = session + .create_compute_pipeline( + leaf_code, + &[ + BindType::Buffer, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + ], + ) + .unwrap(); + DrawCode { + reduce_pipeline, + root_pipeline, + leaf_pipeline, + } + } +} + +impl DrawStage { + pub unsafe fn new(session: &Session, code: &DrawCode) -> DrawStage { + // We're limited to DRAW_PART_SIZE^2 + // Also note: size here allows padding + let root_buf_size = DRAW_PART_SIZE * 8; + let root_buf = session + .create_buffer(root_buf_size, BufferUsage::STORAGE) + .unwrap(); + let root_ds = session + .create_simple_descriptor_set(&code.root_pipeline, &[&root_buf]) + .unwrap(); + DrawStage { root_buf, root_ds } + } + + pub unsafe fn bind( + &self, + session: &Session, + code: &DrawCode, + config_buf: &Buffer, + scene_buf: &Buffer, + memory_buf: &Buffer, + ) -> DrawBinding { + let reduce_ds = session + .create_simple_descriptor_set( + &code.reduce_pipeline, + &[memory_buf, config_buf, scene_buf, &self.root_buf], + ) + .unwrap(); + let leaf_ds = session + .create_simple_descriptor_set( + &code.leaf_pipeline, + &[memory_buf, config_buf, scene_buf, &self.root_buf], + ) + .unwrap(); + DrawBinding { reduce_ds, leaf_ds } + } + + pub unsafe fn record( + &self, + cmd_buf: &mut CmdBuf, + code: &DrawCode, + binding: &DrawBinding, + size: u64, + ) { + if size > DRAW_PART_SIZE.pow(2) { + panic!("very large scan not yet implemented"); + } + let n_workgroups = (size + DRAW_PART_SIZE - 1) / DRAW_PART_SIZE; + if n_workgroups > 1 { + cmd_buf.dispatch( + &code.reduce_pipeline, + &binding.reduce_ds, + (n_workgroups as u32, 1, 1), + (DRAW_WG as u32, 1, 1), + ); + cmd_buf.memory_barrier(); + cmd_buf.dispatch( + &code.root_pipeline, + &self.root_ds, + (1, 1, 1), + (DRAW_WG as u32, 1, 1), + ); + cmd_buf.memory_barrier(); + } + cmd_buf.dispatch( + &code.leaf_pipeline, + &binding.leaf_ds, + (n_workgroups as u32, 1, 1), + (DRAW_WG as u32, 1, 1), + ); + } +} diff --git a/tests/src/draw.rs b/tests/src/draw.rs new file mode 100644 index 0000000..ca19312 --- /dev/null +++ b/tests/src/draw.rs @@ -0,0 +1,147 @@ +// Copyright 2021 The piet-gpu authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +//! Tests for the piet-gpu draw object stage. + +use piet_gpu_hal::{BufWrite, BufferUsage}; +use rand::Rng; + +use crate::{Config, Runner, TestResult}; + +use piet_gpu::stages::{self, DrawCode, DrawMonoid, DrawStage}; + +const ELEMENT_SIZE: usize = 36; + +const ELEMENT_FILLCOLOR: u32 = 4; +const ELEMENT_FILLLINGRADIENT: u32 = 5; +const ELEMENT_FILLIMAGE: u32 = 6; +const ELEMENT_BEGINCLIP: u32 = 9; +const ELEMENT_ENDCLIP: u32 = 10; + +struct DrawTestData { + tags: Vec, +} + +pub unsafe fn draw_test(runner: &mut Runner, config: &Config) -> TestResult { + let mut result = TestResult::new("draw"); + let n_tag: u64 = config.size.choose(1 << 12, 1 << 20, 1 << 24); + let data = DrawTestData::new(n_tag); + let stage_config = data.get_config(); + + let config_buf = runner + .session + .create_buffer_init(std::slice::from_ref(&stage_config), BufferUsage::STORAGE) + .unwrap(); + let scene_size = n_tag * ELEMENT_SIZE as u64; + let scene_buf = runner + .session + .create_buffer_with(scene_size, |b| data.fill_scene(b), BufferUsage::STORAGE) + .unwrap(); + let memory = runner.buf_down(data.memory_size(), BufferUsage::STORAGE); + + let code = DrawCode::new(&runner.session); + let stage = DrawStage::new(&runner.session, &code); + let binding = stage.bind( + &runner.session, + &code, + &config_buf, + &scene_buf, + &memory.dev_buf, + ); + + let mut total_elapsed = 0.0; + let n_iter = config.n_iter; + for i in 0..n_iter { + let mut commands = runner.commands(); + commands.write_timestamp(0); + stage.record(&mut commands.cmd_buf, &code, &binding, n_tag); + commands.write_timestamp(1); + if i == 0 || config.verify_all { + commands.cmd_buf.memory_barrier(); + commands.download(&memory); + } + total_elapsed += runner.submit(commands); + if i == 0 || config.verify_all { + let dst = memory.map_read(..); + if let Some(failure) = data.verify(&dst) { + result.fail(failure); + } + } + } + let n_elements = n_tag; + result.timing(total_elapsed, n_elements * n_iter); + + result +} + +impl DrawTestData { + fn new(n: u64) -> DrawTestData { + let mut rng = rand::thread_rng(); + let tags = (0..n).map(|_| rng.gen_range(0, 12)).collect(); + DrawTestData { tags } + } + + fn get_config(&self) -> stages::Config { + let n_tags = self.tags.len(); + + // Layout of memory + let drawmonoid_alloc = 0; + let stage_config = stages::Config { + n_elements: n_tags as u32, + drawmonoid_alloc, + ..Default::default() + }; + stage_config + } + + fn memory_size(&self) -> u64 { + 8 + self.tags.len() as u64 * 8 + } + + fn fill_scene(&self, buf: &mut BufWrite) { + let mut element = [0u32; ELEMENT_SIZE / 4]; + for tag in &self.tags { + element[0] = *tag; + buf.push(element); + } + } + + fn verify(&self, buf: &[u8]) -> Option { + let size = self.tags.len() * 8; + let actual = bytemuck::cast_slice::(&buf[8..8 + size]); + let mut expected = DrawMonoid::default(); + for (i, (tag, actual)) in self.tags.iter().zip(actual).enumerate() { + // We compute an inclusive prefix sum, but for this application + // exclusive would be slightly better. We can adapt though. + let (path_ix, clip_ix) = Self::reduce_tag(*tag); + expected.path_ix += path_ix; + expected.clip_ix += clip_ix; + if *actual != expected { + return Some(format!("draw mismatch at {}", i)); + } + } + None + } + + fn reduce_tag(tag: u32) -> (u32, u32) { + match tag { + ELEMENT_FILLCOLOR | ELEMENT_FILLLINGRADIENT | ELEMENT_FILLIMAGE => (1, 0), + ELEMENT_BEGINCLIP => (1, 1), + ELEMENT_ENDCLIP => (0, 1), + _ => (0, 0), + } + } +} diff --git a/tests/src/main.rs b/tests/src/main.rs index 9aab351..e52ce85 100644 --- a/tests/src/main.rs +++ b/tests/src/main.rs @@ -18,6 +18,7 @@ mod clear; mod config; +mod draw; mod linkedlist; mod message_passing; mod prefix; @@ -137,6 +138,7 @@ fn main() { if config.groups.matches("piet") { report(&transform::transform_test(&mut runner, &config)); report(&path::path_test(&mut runner, &config)); + report(&draw::draw_test(&mut runner, &config)); } } }