From 875c8badf41ff0aebd352f0684e6e6d2feda432e Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Thu, 2 Dec 2021 08:41:41 -0800 Subject: [PATCH] Add draw object stage This is one of the stages in the new element pipeline. It's a simple one, just a prefix sum of a couple counts, and some of it will probably get merged with a downstream stage, but we'll do it separately for now for convenience. This patch also contains an update to Vulkan tools 1.2.198, which accounts for the large diff of translated shaders. --- piet-gpu/shader/backdrop.spv | Bin 12536 -> 12592 bytes piet-gpu/shader/backdrop_lg.spv | Bin 12568 -> 12624 bytes piet-gpu/shader/binning.spv | Bin 16316 -> 16372 bytes piet-gpu/shader/build.ninja | 16 ++ piet-gpu/shader/coarse.spv | Bin 64100 -> 64156 bytes piet-gpu/shader/draw_leaf.comp | 79 ++++++++ piet-gpu/shader/draw_reduce.comp | 61 ++++++ piet-gpu/shader/draw_scan.comp | 74 +++++++ piet-gpu/shader/drawtag.h | 36 ++++ piet-gpu/shader/gen/bbox_clear.hlsl | 1 + piet-gpu/shader/gen/bbox_clear.msl | 1 + piet-gpu/shader/gen/bbox_clear.spv | Bin 2632 -> 2688 bytes piet-gpu/shader/gen/draw_leaf.dxil | Bin 0 -> 4428 bytes piet-gpu/shader/gen/draw_leaf.hlsl | 190 +++++++++++++++++ piet-gpu/shader/gen/draw_leaf.msl | 235 ++++++++++++++++++++++ piet-gpu/shader/gen/draw_leaf.spv | Bin 0 -> 8536 bytes piet-gpu/shader/gen/draw_reduce.dxil | Bin 0 -> 3956 bytes piet-gpu/shader/gen/draw_reduce.hlsl | 162 +++++++++++++++ piet-gpu/shader/gen/draw_reduce.msl | 169 ++++++++++++++++ piet-gpu/shader/gen/draw_reduce.spv | Bin 0 -> 6864 bytes piet-gpu/shader/gen/draw_root.dxil | Bin 0 -> 3944 bytes piet-gpu/shader/gen/draw_root.hlsl | 94 +++++++++ piet-gpu/shader/gen/draw_root.msl | 128 ++++++++++++ piet-gpu/shader/gen/draw_root.spv | Bin 0 -> 4684 bytes piet-gpu/shader/gen/pathseg.dxil | Bin 9424 -> 9428 bytes piet-gpu/shader/gen/pathseg.hlsl | 99 ++++----- piet-gpu/shader/gen/pathseg.msl | 65 +++--- piet-gpu/shader/gen/pathseg.spv | Bin 33920 -> 33960 bytes piet-gpu/shader/gen/pathtag_reduce.dxil | Bin 4700 -> 4700 bytes piet-gpu/shader/gen/pathtag_reduce.hlsl | 3 +- piet-gpu/shader/gen/pathtag_reduce.msl | 1 + piet-gpu/shader/gen/pathtag_reduce.spv | Bin 7752 -> 7808 bytes piet-gpu/shader/gen/transform_leaf.dxil | Bin 5664 -> 5664 bytes piet-gpu/shader/gen/transform_leaf.hlsl | 3 +- piet-gpu/shader/gen/transform_leaf.msl | 1 + piet-gpu/shader/gen/transform_leaf.spv | Bin 12424 -> 12480 bytes piet-gpu/shader/gen/transform_reduce.dxil | Bin 4696 -> 4696 bytes piet-gpu/shader/gen/transform_reduce.hlsl | 3 +- piet-gpu/shader/gen/transform_reduce.msl | 1 + piet-gpu/shader/gen/transform_reduce.spv | Bin 7776 -> 7832 bytes piet-gpu/shader/kernel4.spv | Bin 38988 -> 39080 bytes piet-gpu/shader/path_coarse.spv | Bin 43344 -> 43400 bytes piet-gpu/shader/setup.h | 2 + piet-gpu/shader/tile_alloc.spv | Bin 15092 -> 15148 bytes piet-gpu/src/stages.rs | 3 + piet-gpu/src/stages/draw.rs | 163 +++++++++++++++ tests/src/draw.rs | 147 ++++++++++++++ tests/src/main.rs | 2 + 48 files changed, 1655 insertions(+), 84 deletions(-) create mode 100644 piet-gpu/shader/draw_leaf.comp create mode 100644 piet-gpu/shader/draw_reduce.comp create mode 100644 piet-gpu/shader/draw_scan.comp create mode 100644 piet-gpu/shader/drawtag.h create mode 100644 piet-gpu/shader/gen/draw_leaf.dxil create mode 100644 piet-gpu/shader/gen/draw_leaf.hlsl create mode 100644 piet-gpu/shader/gen/draw_leaf.msl create mode 100644 piet-gpu/shader/gen/draw_leaf.spv create mode 100644 piet-gpu/shader/gen/draw_reduce.dxil create mode 100644 piet-gpu/shader/gen/draw_reduce.hlsl create mode 100644 piet-gpu/shader/gen/draw_reduce.msl create mode 100644 piet-gpu/shader/gen/draw_reduce.spv create mode 100644 piet-gpu/shader/gen/draw_root.dxil create mode 100644 piet-gpu/shader/gen/draw_root.hlsl create mode 100644 piet-gpu/shader/gen/draw_root.msl create mode 100644 piet-gpu/shader/gen/draw_root.spv create mode 100644 piet-gpu/src/stages/draw.rs create mode 100644 tests/src/draw.rs diff --git a/piet-gpu/shader/backdrop.spv b/piet-gpu/shader/backdrop.spv index 3bc136540453378bcdf34013c80261bf36db7341..4dd01ed09041003f552686fe65cb30abee67e2bd 100644 GIT binary patch delta 134 zcmey7xFKnS7rQzK!*&J+25uluDM~EQ&CkovOo>m-$;nR!3Nf%Tu!7}zCI_+$%klz6 zN{SNmisSRs(uz|{AS&3wD)=ThvP(1aPoB#z&nU2YFZ)^+eu!29pekh`cG#TC6Uodd TJb6FA;p7MWESo>@b0`7;l_MW) delta 82 zcmdmx^doVD7dtyE!*&J+2JXp)?8343=YA-gmq h-{igQ@{IhO@3OCD**uRYgqcxj@@Ia-%^Ct6iU4*e6*T|= diff --git a/piet-gpu/shader/backdrop_lg.spv b/piet-gpu/shader/backdrop_lg.spv index c02f92cda6b66ebda10e3ac0b5941266de458e2d..b00e3cd68b5eeca1a1554487c6e1a4fb0e15daeb 100644 GIT binary patch delta 134 zcmbP{bRlVj7rQzK!*&J+25uluDM~EQ&CkovOo>m-$;nR!3Nf%Tu!7}zCI_+$%klz6 zN{SNmisSRs(uz|{AS&3wD)=ThvP(1aPoB#z&nU2YFZ)^+eu!29pekh`cG#TC6Uodd TJb6FA;p7MWESo>@YbXK$bSNK9 delta 82 zcmcbRG$Uz)7dtyE!*&J+2JXp)?8343=YA-gmq h-{igQ@{IhO@3OCD**uRYgqcxj@@Ia-%^Ct4iU4Bv6&3&h diff --git a/piet-gpu/shader/binning.spv b/piet-gpu/shader/binning.spv index 7c5c316e8a8d29542cec4eaa6ab830e45f3abe92..38d10b372c31dfd40157815a30b97eee8b316662 100644 GIT binary patch delta 134 zcmdl}|D}F|6sI}|!!HH~25uluDM~EQ&CkovOo>m-$;nR!3Nf%Tu!7}zCM$9Z%klz6 zN{SNmisSRs(uz|{AS&3wD)=S`a!NDuPtN6(XB61n%Nfba5783>DkW-qG hZ*niEJR|?+wVaWxo2B@-Ff$5GJ}+drnML@64gh#)6;%KL diff --git a/piet-gpu/shader/build.ninja b/piet-gpu/shader/build.ninja index c8b4858..1df1876 100644 --- a/piet-gpu/shader/build.ninja +++ b/piet-gpu/shader/build.ninja @@ -78,3 +78,19 @@ build gen/pathseg.spv: glsl pathseg.comp | tile.h pathseg.h pathtag.h setup.h me build gen/pathseg.hlsl: hlsl gen/pathseg.spv build gen/pathseg.dxil: dxil gen/pathseg.hlsl build gen/pathseg.msl: msl gen/pathseg.spv + +build gen/draw_reduce.spv: glsl draw_reduce.comp | scene.h drawtag.h setup.h mem.h +build gen/draw_reduce.hlsl: hlsl gen/draw_reduce.spv +build gen/draw_reduce.dxil: dxil gen/draw_reduce.hlsl +build gen/draw_reduce.msl: msl gen/draw_reduce.spv + +build gen/draw_root.spv: glsl draw_scan.comp | drawtag.h + flags = -DROOT +build gen/draw_root.hlsl: hlsl gen/draw_root.spv +build gen/draw_root.dxil: dxil gen/draw_root.hlsl +build gen/draw_root.msl: msl gen/draw_root.spv + +build gen/draw_leaf.spv: glsl draw_leaf.comp | scene.h drawtag.h setup.h mem.h +build gen/draw_leaf.hlsl: hlsl gen/draw_leaf.spv +build gen/draw_leaf.dxil: dxil gen/draw_leaf.hlsl +build gen/draw_leaf.msl: msl gen/draw_leaf.spv diff --git a/piet-gpu/shader/coarse.spv b/piet-gpu/shader/coarse.spv index a0ad82ad99e94a7b95aa555339267e1ae40aa9f1..a2071ad5468f95741035f45785e3e17d0dffb073 100644 GIT binary patch delta 132 zcmaFzg?Y|b<_*8p)j1gMu`n=jGcYiu6eX7D=I7;Sro<=a$}O$RL>j delta 84 zcmbR9mHEjR<_*8p*;yIxu`n=jPgc|rmgQk!U??d{%qx!1PfIIKEdg>F*cjNsDtIRc lYDhEkP0rPjXXM}9tFe=BbCkIS7o*VR$&QAbPdHxL0RRM}7?A(~ diff --git a/piet-gpu/shader/draw_leaf.comp b/piet-gpu/shader/draw_leaf.comp new file mode 100644 index 0000000..ec6a928 --- /dev/null +++ b/piet-gpu/shader/draw_leaf.comp @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense + +// The leaf scan pass for draw tag scan implemented as a tree reduction. +// This stage can be fused with its consumer but is separate now. + + +#version 450 +#extension GL_GOOGLE_include_directive : enable + +#include "mem.h" +#include "setup.h" + +#define N_ROWS 8 +#define LG_WG_SIZE 9 +#define WG_SIZE (1 << LG_WG_SIZE) +#define PARTITION_SIZE (WG_SIZE * N_ROWS) + +layout(local_size_x = WG_SIZE, local_size_y = 1) in; + +layout(binding = 1) readonly buffer ConfigBuf { + Config conf; +}; + +layout(binding = 2) readonly buffer SceneBuf { + uint[] scene; +}; + +#include "scene.h" +#include "tile.h" +#include "drawtag.h" + +#define Monoid DrawMonoid + +layout(set = 0, binding = 3) readonly buffer ParentBuf { + Monoid[] parent; +}; + +shared Monoid sh_scratch[WG_SIZE]; + +void main() { + Monoid local[N_ROWS]; + + uint ix = gl_GlobalInvocationID.x * N_ROWS; + ElementRef ref = ElementRef(ix * Element_size); + uint tag_word = Element_tag(ref).tag; + + Monoid agg = map_tag(tag_word); + local[0] = agg; + for (uint i = 1; i < N_ROWS; i++) { + tag_word = Element_tag(Element_index(ref, i)).tag; + agg = combine_tag_monoid(agg, map_tag(tag_word)); + local[i] = agg; + } + sh_scratch[gl_LocalInvocationID.x] = agg; + for (uint i = 0; i < LG_WG_SIZE; i++) { + barrier(); + if (gl_LocalInvocationID.x >= (1u << i)) { + Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i)]; + agg = combine_tag_monoid(other, agg); + } + barrier(); + sh_scratch[gl_LocalInvocationID.x] = agg; + } + + barrier(); + Monoid row = tag_monoid_identity(); + if (gl_WorkGroupID.x > 0) { + row = parent[gl_WorkGroupID.x - 1]; + } + if (gl_LocalInvocationID.x > 0) { + row = combine_tag_monoid(row, sh_scratch[gl_LocalInvocationID.x - 1]); + } + uint out_base = (conf.drawmonoid_alloc.offset >> 2) + gl_GlobalInvocationID.x * 2 * N_ROWS; + for (uint i = 0; i < N_ROWS; i++) { + Monoid m = combine_tag_monoid(row, local[i]); + memory[out_base + i * 2] = m.path_ix; + memory[out_base + i * 2 + 1] = m.clip_ix; + } +} diff --git a/piet-gpu/shader/draw_reduce.comp b/piet-gpu/shader/draw_reduce.comp new file mode 100644 index 0000000..fe9ab2c --- /dev/null +++ b/piet-gpu/shader/draw_reduce.comp @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense + +// The reduction phase for draw scan implemented as a tree reduction. + +#version 450 +#extension GL_GOOGLE_include_directive : enable + +#include "mem.h" +#include "setup.h" + +#define N_ROWS 8 +#define LG_WG_SIZE 9 +#define WG_SIZE (1 << LG_WG_SIZE) +#define PARTITION_SIZE (WG_SIZE * N_ROWS) + +layout(local_size_x = WG_SIZE, local_size_y = 1) in; + +layout(binding = 1) readonly buffer ConfigBuf { + Config conf; +}; + +layout(binding = 2) readonly buffer SceneBuf { + uint[] scene; +}; + +#include "scene.h" +#include "drawtag.h" + +#define Monoid DrawMonoid + +layout(set = 0, binding = 3) buffer OutBuf { + Monoid[] outbuf; +}; + +shared Monoid sh_scratch[WG_SIZE]; + +void main() { + uint ix = gl_GlobalInvocationID.x * N_ROWS; + ElementRef ref = ElementRef(ix * Element_size); + uint tag_word = Element_tag(ref).tag; + + Monoid agg = map_tag(tag_word); + for (uint i = 1; i < N_ROWS; i++) { + tag_word = Element_tag(Element_index(ref, i)).tag; + agg = combine_tag_monoid(agg, map_tag(tag_word)); + } + sh_scratch[gl_LocalInvocationID.x] = agg; + for (uint i = 0; i < LG_WG_SIZE; i++) { + barrier(); + // We could make this predicate tighter, but would it help? + if (gl_LocalInvocationID.x + (1u << i) < WG_SIZE) { + Monoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i)]; + agg = combine_tag_monoid(agg, other); + } + barrier(); + sh_scratch[gl_LocalInvocationID.x] = agg; + } + if (gl_LocalInvocationID.x == 0) { + outbuf[gl_WorkGroupID.x] = agg; + } +} diff --git a/piet-gpu/shader/draw_scan.comp b/piet-gpu/shader/draw_scan.comp new file mode 100644 index 0000000..d883671 --- /dev/null +++ b/piet-gpu/shader/draw_scan.comp @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense + +// A scan pass for draw tag scan implemented as a tree reduction. + +#version 450 +#extension GL_GOOGLE_include_directive : enable + +#include "drawtag.h" + +#define N_ROWS 8 +#define LG_WG_SIZE 9 +#define WG_SIZE (1 << LG_WG_SIZE) +#define PARTITION_SIZE (WG_SIZE * N_ROWS) + +layout(local_size_x = WG_SIZE, local_size_y = 1) in; + +#define Monoid DrawMonoid +#define combine_monoid combine_tag_monoid +#define monoid_identity tag_monoid_identity + +layout(binding = 0) buffer DataBuf { + Monoid[] data; +}; + +#ifndef ROOT +layout(binding = 1) readonly buffer ParentBuf { + Monoid[] parent; +}; +#endif + +shared Monoid sh_scratch[WG_SIZE]; + +void main() { + Monoid local[N_ROWS]; + + uint ix = gl_GlobalInvocationID.x * N_ROWS; + + local[0] = data[ix]; + for (uint i = 1; i < N_ROWS; i++) { + local[i] = combine_monoid(local[i - 1], data[ix + i]); + } + Monoid agg = local[N_ROWS - 1]; + sh_scratch[gl_LocalInvocationID.x] = agg; + for (uint i = 0; i < LG_WG_SIZE; i++) { + barrier(); + if (gl_LocalInvocationID.x >= (1u << i)) { + Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i)]; + agg = combine_monoid(other, agg); + } + barrier(); + sh_scratch[gl_LocalInvocationID.x] = agg; + } + + barrier(); + // This could be a semigroup instead of a monoid if we reworked the + // conditional logic, but that might impact performance. + Monoid row = monoid_identity(); +#ifdef ROOT + if (gl_LocalInvocationID.x > 0) { + row = sh_scratch[gl_LocalInvocationID.x - 1]; + } +#else + if (gl_WorkGroupID.x > 0) { + row = parent[gl_WorkGroupID.x - 1]; + } + if (gl_LocalInvocationID.x > 0) { + row = combine_monoid(row, sh_scratch[gl_LocalInvocationID.x - 1]); + } +#endif + for (uint i = 0; i < N_ROWS; i++) { + Monoid m = combine_monoid(row, local[i]); + data[ix + i] = m; + } +} diff --git a/piet-gpu/shader/drawtag.h b/piet-gpu/shader/drawtag.h new file mode 100644 index 0000000..a9e8a1d --- /dev/null +++ b/piet-gpu/shader/drawtag.h @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense + +// Common data structures and functions for the draw tag stream. + +struct DrawMonoid { + uint path_ix; + uint clip_ix; +}; + +DrawMonoid tag_monoid_identity() { + return DrawMonoid(0, 0); +} + +DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b) { + DrawMonoid c; + c.path_ix = a.path_ix + b.path_ix; + c.clip_ix = a.clip_ix + b.clip_ix; + return c; +} + +#ifdef Element_size +DrawMonoid map_tag(uint tag_word) { + switch (tag_word) { + case Element_FillColor: + case Element_FillLinGradient: + case Element_FillImage: + return DrawMonoid(1, 0); + case Element_BeginClip: + return DrawMonoid(1, 1); + case Element_EndClip: + return DrawMonoid(0, 1); + default: + return DrawMonoid(0, 0); + } +} +#endif diff --git a/piet-gpu/shader/gen/bbox_clear.hlsl b/piet-gpu/shader/gen/bbox_clear.hlsl index ae40b13..7a4e86a 100644 --- a/piet-gpu/shader/gen/bbox_clear.hlsl +++ b/piet-gpu/shader/gen/bbox_clear.hlsl @@ -16,6 +16,7 @@ struct Config Alloc anno_alloc; Alloc trans_alloc; Alloc bbox_alloc; + Alloc drawmonoid_alloc; uint n_trans; uint trans_offset; uint pathtag_offset; diff --git a/piet-gpu/shader/gen/bbox_clear.msl b/piet-gpu/shader/gen/bbox_clear.msl index f424448..6f73531 100644 --- a/piet-gpu/shader/gen/bbox_clear.msl +++ b/piet-gpu/shader/gen/bbox_clear.msl @@ -21,6 +21,7 @@ struct Config Alloc anno_alloc; Alloc trans_alloc; Alloc bbox_alloc; + Alloc drawmonoid_alloc; uint n_trans; uint trans_offset; uint pathtag_offset; diff --git a/piet-gpu/shader/gen/bbox_clear.spv b/piet-gpu/shader/gen/bbox_clear.spv index 181f99b828e164e5da3381ac79033a89650a1f6f..2b659f4150af48bb12a07b6376675f7eca199a18 100644 GIT binary patch delta 136 zcmX>h(jdCQi%Fe>L5P8Yfg6ZZiW19n^YijEQ{oeIa`KabLJVvStYCSb$$?D5vb;c% zlA^@C;`scuwBpnfhzfSF3cksWOwx?}ljkzYGYV|p%OuOh578!PxR9X11iazZNU6P`0t$9-NI*cW!B@3w*Te_K77&d^5Z&H9 zNFrj5ii%jf!3V>x*5acoyRDN1)*=rJ)@iX)8!A%UT5B0wZO68IZUonLXJ>Y1cW3sG zely=W-|Kwm+;hI1--&i%hUP2v&+)sDJrg7EI$iDBcsL9J0OW-LAb@>3CqrHgxd^fX zIxzwO3i8~n92#~HDQE7C#8I8=XFsOv<}RR{pq+j=;va=yAn_kH@c`h~!Z8L}Hqyfm zK$o9`I0X{t=Lj9)$L*teM(y0UJxZhY(L9`Q?mYE89}a*X@WC0FK)Wy}k6R20eeKYm zp>bbT3ITWv+JelKe?1cD6PTj6soSv}oJ^X!9kB{y`9ydmws|Ql&E#cl56U=5CP`BX z+04MCIgV_nl+t854uSBbjKOTNb6ZrKPMvS68L(Wh#wKLp`S7E;waIII-1Z0Ol(N2*q3o@pm$DZ7=cV?v#-Ltv{Q8xso?XBDvzcfCuKK{r07E?vyOk>cp znU_gSf-!q2(1{zUWCBwgib~9YmfKKe9I^Y{z~;CY&0w+wb6<9n9T~Jn17glDUs@VR zxKAjF*U08nwQ@6$uV6qzQ7q!ZSt+<&$6Df=6uQ<*wz-zpN zgWln@eOJ$U`_8rv-{=^MBUr5}@7ak7-)#Q6IHBh2&0C6IdrepO`r?IG$9{5o@s-^R zBVV}Ok0EvUtmn^HUEkQdulwzd$F_C%<mzeIX2devcv~v4uXJc=GtvF)K@qYmE}j z6}4?BDv%CKC5`IcrpXS+LP#@)E*&h4Mry``kY@(Nz)#2w>~Ao7zY zIkSp&s}=1YMUOh74>1agbyZusk9NmyInuSIB$u40Y&DJ?#NJSHR{QP6H@$r~I|duE z%!s7)-BVA}$CFwbQ;sK{nt;r)3E$5=T6F8pXG@M0mE=0-N3|LoIxSBItua;}2s!}I za_`>I@>gJvj{X~i!>+3r`)>?2Vk>&8gzvAo`R0W!NApTba!dVJ9#<3juqGxEtfFl| zQ6NR8J2a`*)|w1fP*E;0)~yx}f#9!6@f}t?NthAo^+a4~P!!O7f0#wyg-&nROs_$w zca_(yEfrJ}f{JW=X{lg+DOy%;FQx3^T_NI|PVoR8TtWw5yDh%06Hg-BBCaYGeaZ;0 zA+mtpl=hA#tw%HUB%1z?CEbdqb={Vh=>?UAf|Nr08rZV-Qm4ICFDO$9%I@)D+I6gb z31MF?v#+VMSJasjZrv8&@`=B5i*M5C()I?$V1uH|-}W069ZE%;DWb!Gf9TXqZP!e# z^S5VOQ)@M;4m7Q{^=UZieRJ9+G_AQ+*rpV0C`H$+>}9MyyjLT>=fejn@#jALdr~~? z77sXsztIKXp)IC%WPCrvP2@F1Z~}L;t)>8Y8uok=H+`!3Y2ViCh96}j?1IY^pC5TP z<V+8oLUSNtaj$yq=L_!*(;I7W-itl@ovPqZ_pZ8|HVfxjj@E@wZoQtibv($o zlG~IzzpR5dEsTe=K5aDOO=+nj6<Z3oNM)k!64=)&M_s zh#=?O3ZM9@H!@>b;|d?c<9905Iw7tDF6HNq1H(_;RK~m-dvIsnhQU+$T!GZ`crxH> zQbDMO#AbZ(FOaZ(+N}Dh+`9W=SDQEW*pyK*D-Tqg9XdcJ&j5j&Kuw)WNdIG8|Xx1W!p^(tG`j}o!c{q;fK50-k zR#7(FewALME2d(CKpauU?0+huyV&f?kzk;n0=+hq+$O>ria)XOI@!*{-g5;X4Q*&>rEll??nCdk%nESCEia!~W+*CKv1TssfH?e(PQ~-GMSB*1AYT-Ir4~%k z*glVXF8S_)CsGz9r^GgY3CGs9^q#-2e)mvIZ}+SK?s@Oun_n^ES4*8A!_F%FZga723@iM1l(MI)L^J*5_rm5yi!O_{YG+1h9vhvW2TkmdM8aqZtl zD?TYi?yB+Twc)A*I!jD6PQY;TUDG;DPuG(mtaMr)ilN$aOr!Ta(G^Obz}ga!L>+q1 z>kHzgVm`p4KX|_akb5?!zP_wwu{;mn_UpF z-89Y}Np48J?P<1qBLgc{_)A~|b9Uu(lPqvO;$=5~4uqna#p^z`1=2T5o96<{BK*aGETIr!9?;95 z!&D8}QkwX5-3s;%mya29l=lofvO#6)up5-Vs@Dos-e7$jKp0XOwWP2rjrg5mvJw;o zxJ-ed;*K}G2{`OezdnGc-{bN)_q&GmQav9{vuc{nrieB*1yLCsz_46Kzl#|Mqa9{E z9z^YTjZD;M7@KDZThAqp&lV)iNwq(zWEYuNx_Ca(guNdo*Ll;hm*sg#Bo@l5d33Jc zh2dW|+YeKwz%K{l8G$UZ5RjN<<@ol2G3GM*z_#^gB<2#VCymfTHC!$|H Uw&t>cJDRV!qq!udV7+tnH%7;TL;wH) literal 0 HcmV?d00001 diff --git a/piet-gpu/shader/gen/draw_leaf.hlsl b/piet-gpu/shader/gen/draw_leaf.hlsl new file mode 100644 index 0000000..e5f50fd --- /dev/null +++ b/piet-gpu/shader/gen/draw_leaf.hlsl @@ -0,0 +1,190 @@ +struct ElementRef +{ + uint offset; +}; + +struct ElementTag +{ + uint tag; + uint flags; +}; + +struct DrawMonoid +{ + uint path_ix; + uint clip_ix; +}; + +struct Alloc +{ + uint offset; +}; + +struct Config +{ + uint n_elements; + uint n_pathseg; + uint width_in_tiles; + uint height_in_tiles; + Alloc tile_alloc; + Alloc bin_alloc; + Alloc ptcl_alloc; + Alloc pathseg_alloc; + Alloc anno_alloc; + Alloc trans_alloc; + Alloc bbox_alloc; + Alloc drawmonoid_alloc; + uint n_trans; + uint trans_offset; + uint pathtag_offset; + uint linewidth_offset; + uint pathseg_offset; +}; + +static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u); + +static const DrawMonoid _67 = { 0u, 0u }; +static const DrawMonoid _94 = { 1u, 0u }; +static const DrawMonoid _96 = { 1u, 1u }; +static const DrawMonoid _98 = { 0u, 1u }; + +ByteAddressBuffer _49 : register(t2); +ByteAddressBuffer _218 : register(t3); +ByteAddressBuffer _248 : register(t1); +RWByteAddressBuffer _277 : register(u0); + +static uint3 gl_WorkGroupID; +static uint3 gl_LocalInvocationID; +static uint3 gl_GlobalInvocationID; +struct SPIRV_Cross_Input +{ + uint3 gl_WorkGroupID : SV_GroupID; + uint3 gl_LocalInvocationID : SV_GroupThreadID; + uint3 gl_GlobalInvocationID : SV_DispatchThreadID; +}; + +groupshared DrawMonoid sh_scratch[512]; + +ElementTag Element_tag(ElementRef ref) +{ + uint tag_and_flags = _49.Load((ref.offset >> uint(2)) * 4 + 0); + ElementTag _63 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; + return _63; +} + +DrawMonoid map_tag(uint tag_word) +{ + switch (tag_word) + { + case 4u: + case 5u: + case 6u: + { + return _94; + } + case 9u: + { + return _96; + } + case 10u: + { + return _98; + } + default: + { + return _67; + } + } +} + +ElementRef Element_index(ElementRef ref, uint index) +{ + ElementRef _42 = { ref.offset + (index * 36u) }; + return _42; +} + +DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b) +{ + DrawMonoid c; + c.path_ix = a.path_ix + b.path_ix; + c.clip_ix = a.clip_ix + b.clip_ix; + return c; +} + +DrawMonoid tag_monoid_identity() +{ + return _67; +} + +void comp_main() +{ + uint ix = gl_GlobalInvocationID.x * 8u; + ElementRef _115 = { ix * 36u }; + ElementRef ref = _115; + ElementRef param = ref; + uint tag_word = Element_tag(param).tag; + uint param_1 = tag_word; + DrawMonoid agg = map_tag(param_1); + DrawMonoid local[8]; + local[0] = agg; + for (uint i = 1u; i < 8u; i++) + { + ElementRef param_2 = ref; + uint param_3 = i; + ElementRef param_4 = Element_index(param_2, param_3); + tag_word = Element_tag(param_4).tag; + uint param_5 = tag_word; + DrawMonoid param_6 = agg; + DrawMonoid param_7 = map_tag(param_5); + agg = combine_tag_monoid(param_6, param_7); + local[i] = agg; + } + sh_scratch[gl_LocalInvocationID.x] = agg; + for (uint i_1 = 0u; i_1 < 9u; i_1++) + { + GroupMemoryBarrierWithGroupSync(); + if (gl_LocalInvocationID.x >= (1u << i_1)) + { + DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; + DrawMonoid param_8 = other; + DrawMonoid param_9 = agg; + agg = combine_tag_monoid(param_8, param_9); + } + GroupMemoryBarrierWithGroupSync(); + sh_scratch[gl_LocalInvocationID.x] = agg; + } + GroupMemoryBarrierWithGroupSync(); + DrawMonoid row = tag_monoid_identity(); + if (gl_WorkGroupID.x > 0u) + { + DrawMonoid _224; + _224.path_ix = _218.Load((gl_WorkGroupID.x - 1u) * 8 + 0); + _224.clip_ix = _218.Load((gl_WorkGroupID.x - 1u) * 8 + 4); + row.path_ix = _224.path_ix; + row.clip_ix = _224.clip_ix; + } + if (gl_LocalInvocationID.x > 0u) + { + DrawMonoid param_10 = row; + DrawMonoid param_11 = sh_scratch[gl_LocalInvocationID.x - 1u]; + row = combine_tag_monoid(param_10, param_11); + } + uint out_base = (_248.Load(44) >> uint(2)) + ((gl_GlobalInvocationID.x * 2u) * 8u); + for (uint i_2 = 0u; i_2 < 8u; i_2++) + { + DrawMonoid param_12 = row; + DrawMonoid param_13 = local[i_2]; + DrawMonoid m = combine_tag_monoid(param_12, param_13); + _277.Store((out_base + (i_2 * 2u)) * 4 + 8, m.path_ix); + _277.Store(((out_base + (i_2 * 2u)) + 1u) * 4 + 8, m.clip_ix); + } +} + +[numthreads(512, 1, 1)] +void main(SPIRV_Cross_Input stage_input) +{ + gl_WorkGroupID = stage_input.gl_WorkGroupID; + gl_LocalInvocationID = stage_input.gl_LocalInvocationID; + gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; + comp_main(); +} diff --git a/piet-gpu/shader/gen/draw_leaf.msl b/piet-gpu/shader/gen/draw_leaf.msl new file mode 100644 index 0000000..d52a560 --- /dev/null +++ b/piet-gpu/shader/gen/draw_leaf.msl @@ -0,0 +1,235 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct ElementRef +{ + uint offset; +}; + +struct ElementTag +{ + uint tag; + uint flags; +}; + +struct DrawMonoid +{ + uint path_ix; + uint clip_ix; +}; + +struct SceneBuf +{ + uint scene[1]; +}; + +struct DrawMonoid_1 +{ + uint path_ix; + uint clip_ix; +}; + +struct ParentBuf +{ + DrawMonoid_1 parent[1]; +}; + +struct Alloc +{ + uint offset; +}; + +struct Config +{ + uint n_elements; + uint n_pathseg; + uint width_in_tiles; + uint height_in_tiles; + Alloc tile_alloc; + Alloc bin_alloc; + Alloc ptcl_alloc; + Alloc pathseg_alloc; + Alloc anno_alloc; + Alloc trans_alloc; + Alloc bbox_alloc; + Alloc drawmonoid_alloc; + uint n_trans; + uint trans_offset; + uint pathtag_offset; + uint linewidth_offset; + uint pathseg_offset; +}; + +struct ConfigBuf +{ + Config conf; +}; + +struct Memory +{ + uint mem_offset; + uint mem_error; + uint memory[1]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(512u, 1u, 1u); + +static inline __attribute__((always_inline)) +ElementTag Element_tag(thread const ElementRef& ref, const device SceneBuf& v_49) +{ + uint tag_and_flags = v_49.scene[ref.offset >> uint(2)]; + return ElementTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) }; +} + +static inline __attribute__((always_inline)) +DrawMonoid map_tag(thread const uint& tag_word) +{ + switch (tag_word) + { + case 4u: + case 5u: + case 6u: + { + return DrawMonoid{ 1u, 0u }; + } + case 9u: + { + return DrawMonoid{ 1u, 1u }; + } + case 10u: + { + return DrawMonoid{ 0u, 1u }; + } + default: + { + return DrawMonoid{ 0u, 0u }; + } + } +} + +static inline __attribute__((always_inline)) +ElementRef Element_index(thread const ElementRef& ref, thread const uint& index) +{ + return ElementRef{ ref.offset + (index * 36u) }; +} + +static inline __attribute__((always_inline)) +DrawMonoid combine_tag_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b) +{ + DrawMonoid c; + c.path_ix = a.path_ix + b.path_ix; + c.clip_ix = a.clip_ix + b.clip_ix; + return c; +} + +static inline __attribute__((always_inline)) +DrawMonoid tag_monoid_identity() +{ + return DrawMonoid{ 0u, 0u }; +} + +kernel void main0(device Memory& _277 [[buffer(0)]], const device ConfigBuf& _248 [[buffer(1)]], const device SceneBuf& v_49 [[buffer(2)]], const device ParentBuf& _218 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) +{ + threadgroup DrawMonoid sh_scratch[512]; + uint ix = gl_GlobalInvocationID.x * 8u; + ElementRef ref = ElementRef{ ix * 36u }; + ElementRef param = ref; + uint tag_word = Element_tag(param, v_49).tag; + uint param_1 = tag_word; + DrawMonoid agg = map_tag(param_1); + spvUnsafeArray local; + local[0] = agg; + for (uint i = 1u; i < 8u; i++) + { + ElementRef param_2 = ref; + uint param_3 = i; + ElementRef param_4 = Element_index(param_2, param_3); + tag_word = Element_tag(param_4, v_49).tag; + uint param_5 = tag_word; + DrawMonoid param_6 = agg; + DrawMonoid param_7 = map_tag(param_5); + agg = combine_tag_monoid(param_6, param_7); + local[i] = agg; + } + sh_scratch[gl_LocalInvocationID.x] = agg; + for (uint i_1 = 0u; i_1 < 9u; i_1++) + { + threadgroup_barrier(mem_flags::mem_threadgroup); + if (gl_LocalInvocationID.x >= (1u << i_1)) + { + DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; + DrawMonoid param_8 = other; + DrawMonoid param_9 = agg; + agg = combine_tag_monoid(param_8, param_9); + } + threadgroup_barrier(mem_flags::mem_threadgroup); + sh_scratch[gl_LocalInvocationID.x] = agg; + } + threadgroup_barrier(mem_flags::mem_threadgroup); + DrawMonoid row = tag_monoid_identity(); + if (gl_WorkGroupID.x > 0u) + { + uint _221 = gl_WorkGroupID.x - 1u; + row.path_ix = _218.parent[_221].path_ix; + row.clip_ix = _218.parent[_221].clip_ix; + } + if (gl_LocalInvocationID.x > 0u) + { + DrawMonoid param_10 = row; + DrawMonoid param_11 = sh_scratch[gl_LocalInvocationID.x - 1u]; + row = combine_tag_monoid(param_10, param_11); + } + uint out_base = (_248.conf.drawmonoid_alloc.offset >> uint(2)) + ((gl_GlobalInvocationID.x * 2u) * 8u); + for (uint i_2 = 0u; i_2 < 8u; i_2++) + { + DrawMonoid param_12 = row; + DrawMonoid param_13 = local[i_2]; + DrawMonoid m = combine_tag_monoid(param_12, param_13); + _277.memory[out_base + (i_2 * 2u)] = m.path_ix; + _277.memory[(out_base + (i_2 * 2u)) + 1u] = m.clip_ix; + } +} + diff --git a/piet-gpu/shader/gen/draw_leaf.spv b/piet-gpu/shader/gen/draw_leaf.spv new file mode 100644 index 0000000000000000000000000000000000000000..30740a287f301b01fbf5674faf684dddcab4dff7 GIT binary patch literal 8536 zcmaKv2b5f86~|w)Gn)dbgdW0%0EtSFXec3oCX@wDB$RWoGR*EyHUqOW>+CE^uz(Q@ zB8sAj9TOE%Lay^?_wOJW# zRnM*HH#z55tq<15hw=_ZJo0@M>5RoC$JZ(&m3r&c%G!i(+8A)sSi5$t(n>&fPNsGI zO4a&MWy7MeR&#u?wYU?!cznr{<>O11lg_hUznIJ+?zm=o{pv=&Q5|ZpQOs|&+*((v zZb)m2S#z*f9Th7w&)v|eRT@cSOVuF;Rc&ot1dM%7GFSh`$behX@r%|vRT)k?pKNOX zYe@6;BzwWjnY=fAAeD~*U;W2xTPgjqHWE^a?+$@h|{u|-t;bC$YlJj=36_WGOYZRzuzMJ6HR4ybJ!#jBj z$t7L-rCs`EUHau+`V~9q#bh)3*t*i#V6)sBT-RQ^_m;J<*iuvWwve=Gi zxtQDywx-zQC=~libsWkPFHEZCkcih2()9^kVWLdSkp*8Yqu>gwmLY z;45nE8r&M{ex3KEko+6{*hYPAHNF46sjg4GRB69PT>4)A>E}A?C093A@f`N@YKi+k zuCESxf$ODKwN^=E+O{5S>nhdZb*+vu@)fY@UMjbn47>==MGkm-{a*0V)?h6RQGb$n zyQ?s5kC}|ET(3965H+TNx0>bpSY`-(D)_)aV?)P(a=J#}(IMWh?ET3)ZqL;j__nfm zGj5;DG_ZD!n0RMq)3MpJ??zaAdFRaKo9q86?U{zP6FW1DWf!)`dL|zRZ|`O2T#5M& zvd41I)m8;Q<%w)+`=0b9Q<2+m0smJZ=kLh$E`1mD)s>M(lSjkZsr!voMz;49xF+#R zv)S+tdKUHf+<>P!>}A(1M?dHN;MA}E|MiLJ@kZ_eej@RqFF^O}Ctl3+X1wp`evn`2 z^O3Eo?%U~}`u~RdmQ44pSNAu8cK;u+Ufk`d7nm)Jd6td)Dcp1FxwG%M*U@+Ak;lHp zdLZMy^ZCBj?OVGw;|@o>m}kbX8_V|=ey(4?c7N?ybBlR~!_W1IdDa5+UW(1hVXjpy z)@Lu?M=@s`vDOpo%VFL@vHf$H_fKq54s##G4$5KnDt1T?^S+5K&0*d%u_JPr_e<=k z9OgR3o||HY^e!Q8Usv2Cef-=vv7S^eq<1R6Z(G6LH}}AOosHnEoU1d5`x5yA(_bv_ zm)|q%8XI-JGx@se^NIJf?m~3uwSMo6o(s z|0>)%;;vWScTr6HIhmYaUmeVSoQwTcaMvKF|EuBdT^pa6T!b9!{vfi0H10}7AC1fA!n<262YYXxONdK!cn>ix?b;zEF(62|P+;;s2a3+)P0}DwBMV_5&ICjIp=Znd>5leaP9w(!Y|M5>0#o`XU#|8 z))%uU^{p8e=jl;&-vHMZ@6bHXpYKTI7B}A>#{7E50f2o##qAj9l8? zFF()MRuKQcFo*jx40$%xJwKy4U3)91Yah?)-usO?UH|iPy65ivoUVOSPB-7hIbHiD zIbHjuIbHi@IoH7OON32)-2Y)@} zr~M6Z?eggJjmYM>j#?MN-vo&{=f}vQej#KoW0hlOZ$^%pdB)Wv?^}?~Yrmd%^=0g& zvwkZy4KhZXKHdTGnEBg~ow<0-{O!o%e#SUov8eeDWHD{tA@!K~JCXY!asB7O-vv2m z{k>!A%2&Ykw_p9<4f%UgA8q;=CmyxmgY0@E_j{4`i9LKDvU{kX@#^}xFYb}El}En! zBb)Dn?7V#dS!^|Ayf(4?zPm?pr#_6l1zHJ3&W|9AM~@#xjvlW<{us0w($5;|`Y8Lx zn9F(}hug2*b9N2FD1DAx8F z!wuKyJ< z@u>e*kQ`XZ>$i`_iw|nq)$UV3Tiha5nS=@cN5w0G#Tam@I-2zt++c%Ki zUv0O+)#FUxjyxAKMw>qRio1ugZuwK~!#A;Y&*58O;?cvmki-Z~ z{F>~{{0Lc0AJ5EB;C`MNZ9j&@T!(Vx{yB1Y?q7h3$9ecAvY6*&1*^Xo{wwGnXaS_% zzV3qG2WgA9@z)s!OKVPn{|0i6#=2g0{UYYK$oD}LAp11tcaSkLUv>SQZ>&om>-s(N z7AQZnKcI`p%>IZh7Bl-ZvY#{4_9sZpnJGu^zaV$#{wtVx%Y4bQw zkArEynSMOq|AEAoLb2{Akk5wlGkp?WJbvG7Ll(O}hq;mBbtvx31Tbr8v-bxTGkGH1 z8m^-U+4;+T(~Iz4$WQyE%r1|$_&1F?;!U4|EM|T8R=oh3%iPKlyA!gpu?N$TW6%7( zsO~)WAlD4!^WnyX|4d|Y*W~X>_1Lpn$nKeWwCUq-Qt_z28?v(qeRp*GiTUnagzIqXqPKlk6W;2z2&#{tL_p{RWzvRJ$w z+Qjnb%d_wVhw%u=zuhL$(=^C$Ds(t?C&PEjWM{lHWbNtjSkwN<`sC++0J?becObH{ zv8RiW#TG)*^V5)x(-waxJRMp5Nop!buQQNmLw+-%=yevd{p|vezd08pySC`}S;+e3 z`+YXLc)SINB8#=(g7(=t9DNC7oc37T5y;xyCu^yPZ5guI?b#c%99is0D144W)+cHo zgDiG5w1zm>`W(nuZO%eHW-$lZ|403HgiUKK1<2N34aNLU zMivjBHOSV^`<#L<9<@(JHcmX^PD8e~an@GX&pOWdbV$2%RnE_~58ZDb<{9 literal 0 HcmV?d00001 diff --git a/piet-gpu/shader/gen/draw_reduce.dxil b/piet-gpu/shader/gen/draw_reduce.dxil new file mode 100644 index 0000000000000000000000000000000000000000..f1e48e1659e60f7d22e6af4278883153380e2f63 GIT binary patch literal 3956 zcmeHKe^3*57XK!@*(_m`ED&lSkuFdu0!J4_q6W{+4-hccp!6!$P7_6>4nG3n5Y)SF zen6r|jY^dtZ7*1L=v{|HakTUtCjn|HqNUY#q~#P&q~265$7#>&c-Q-GlpCkh>pwR) z)0^p=dEf7Q@B7}{@7sOz{p?r2Myp%_j zb&$&;--5gyF0?>iotsbNP@kpz)yvXnb#7ky9o@M47j!+e)3X8JnZJc3c-+PZfZGel zPy}+wC}fT}o#RkWfyC{VBjgd!+GqW_wbezMA~yzr2?)Spm{h+e-@qj&;hF{7wL1UR zSOS1nXbZJd!Tb?$O=yeXrx`@^F*0M(ex8#^5a3ZU=&tpwEQhb%AF4e z4zDcSu+tOjJYpcrt*FV%q9!s{o(l?ScQX$##|Q*yttkH1wx;vH>2`S^kY!DcOrR|u z-x*-}zI`b}0KNiXM606HUg?X`&OEPuVaEr{Dix8?S`f=a*eS+u{#aJnH(7N>U)E2C z?LS$VqtPdUl9dOJxx*7*X;hWRoiw}g{LeZMQ*koTXGMJ=LRRB<)tX#cWv+%Y1j^#y z8zmJZlC5wA(yg?x#wWB=!V0gXdRC~P6{<}_&S3=m^J8vxjBu+-SUZAEk;1x%o+b&* z@X&J#F8PGer(#?J^E}EBv@DiB<&BNc(Sy1yTCJ!b)CKzs76*Xe`##`PK@k8pb}tWQ z9w|TpWjJ!kB}(8(hM@cjSWm+k4 z9y3!}II1z1SK0xsXhLEW@j2%vcAYD=gUkrjKk6liw6snK6#Pwg@5ZOjmY7NtasPZx zS(iAJm*dRLyp+jP@HZoWDFU=6$tm_X+5FvN%_*}yZMS~ynzqkTO-~{YGBx0^xL-vm z@WX%gn{?IHY$WFFo1E;MIeYWs)MsZ0W^N8mCF1PB(}ri$Qoq`Dr@ZU-uHEH(_PExx zyteLzR%KXvFMT@wOJVkJh*9Yt>%@6bed2BN!pmoSgwj3zQG>knCnGEZcv-kqm0HWDAc z*JY?oyM3XefXR&Qw;m7O&;ws&xa^mM)9a>2`UZz4+pZ1{T{(v4MP+2q={?bVdU5aZ z6TR~^&+Q;SY)q4Or4C-Wy*tgrrKl|MeFAgLv%Y!YykqNf>znQE;ZDt*jO;_lPcK%z zo6*}lFQ5S2_bY_oZ$8c{2PTxIGTsumZlSZ^qh*B+^+Ib)H8BOGw@JxeRx+Q^#`Fc| zUhGho(t@CyLotjj8PqNDAWMepJ+&6$HeA?{=dxIYTP;Xky~{$mqK3mHx4e=GT3Sg< zuTM*+jgtA~z}%~`$}6#R`^+(=^bYkIhk8V}=p>ST#*ytr)Wg%VI+Ji)nNU^cs(~Zx zvUpt{G99(V%WoOK^Ore_^w3p&L;*_df$4`oONfjHtW&iGf(*b_=pK$yLX? zqCV0|?vG=Wl;n$X>}ygoc~r?+x=eaxr?*7f3FQIjg#MXG~(r#-MD3C)rVi_<7CeO-Avdud;v5L<+)=ABYHpmO6^F zoF&8jdM7hK)hX649URTP$C6 z`8qW6QG^o1tp_PjvpP@qNIo!i?c$dAerL#UZlyM*JS*X_=`sX04y*3Lq94bqGtfSZ zRk!|6uqqYS`GZ(h`=h}sT0pyl82DqvtTb9Zhr_IY)BhN=xPO5k#H{~-RSb;#Z?OtO zNsmTOh&EUi*Z>8wD#!GFK=oV%I^IfUUTSJ&pNNZ&&fm|911D|uNuY(2@pqi+&O9Ar zPh3HQGCR;KunKt=i5{#_h`!_}cZn43FSaj_E^bx)v6<@BHhr=F`9#&BucxvSs5b9J z@0G^mqpyCjBE++}q(04BW-i%Sv!beGWBF?S5hCi1UF$~2*1kTrW^}BX{jIL)i&ay( zPahKAJIEnb!Z@t|r!)VjGym~3-%J7U3havrv@n&sbBG|LrJ&&Oq$u6@;9F@yN0KPh zCqs6seA|%%m5&V(y+!4{>gVoK@mL73o5r1i_}bzH>%D-&u|0Ce{(f;hhQS5XYqpu) zayh{5wrWfy_sog~@*>}$o5yDq-R_^*rF1T$xSS96Su}b51a-Bd)sL(A8)-8^0hLYFhy0U)=EXn7@m7Z9aUuO-MU8Kej} zikpO@0&&4aE)fmT0N5;0mpS}WV(B!pwB8{qFfV&(uW3|U9OF{Y()*63tfQEZb3ZST F{sl`s@Javx literal 0 HcmV?d00001 diff --git a/piet-gpu/shader/gen/draw_reduce.hlsl b/piet-gpu/shader/gen/draw_reduce.hlsl new file mode 100644 index 0000000..27c206a --- /dev/null +++ b/piet-gpu/shader/gen/draw_reduce.hlsl @@ -0,0 +1,162 @@ +struct ElementRef +{ + uint offset; +}; + +struct ElementTag +{ + uint tag; + uint flags; +}; + +struct DrawMonoid +{ + uint path_ix; + uint clip_ix; +}; + +static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u); + +struct Alloc +{ + uint offset; +}; + +struct Config +{ + uint n_elements; + uint n_pathseg; + uint width_in_tiles; + uint height_in_tiles; + Alloc tile_alloc; + Alloc bin_alloc; + Alloc ptcl_alloc; + Alloc pathseg_alloc; + Alloc anno_alloc; + Alloc trans_alloc; + Alloc bbox_alloc; + Alloc drawmonoid_alloc; + uint n_trans; + uint trans_offset; + uint pathtag_offset; + uint linewidth_offset; + uint pathseg_offset; +}; + +static const DrawMonoid _88 = { 1u, 0u }; +static const DrawMonoid _90 = { 1u, 1u }; +static const DrawMonoid _92 = { 0u, 1u }; +static const DrawMonoid _94 = { 0u, 0u }; + +ByteAddressBuffer _46 : register(t2); +RWByteAddressBuffer _203 : register(u3); +RWByteAddressBuffer _217 : register(u0); +ByteAddressBuffer _223 : register(t1); + +static uint3 gl_WorkGroupID; +static uint3 gl_LocalInvocationID; +static uint3 gl_GlobalInvocationID; +struct SPIRV_Cross_Input +{ + uint3 gl_WorkGroupID : SV_GroupID; + uint3 gl_LocalInvocationID : SV_GroupThreadID; + uint3 gl_GlobalInvocationID : SV_DispatchThreadID; +}; + +groupshared DrawMonoid sh_scratch[512]; + +ElementTag Element_tag(ElementRef ref) +{ + uint tag_and_flags = _46.Load((ref.offset >> uint(2)) * 4 + 0); + ElementTag _60 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; + return _60; +} + +DrawMonoid map_tag(uint tag_word) +{ + switch (tag_word) + { + case 4u: + case 5u: + case 6u: + { + return _88; + } + case 9u: + { + return _90; + } + case 10u: + { + return _92; + } + default: + { + return _94; + } + } +} + +ElementRef Element_index(ElementRef ref, uint index) +{ + ElementRef _39 = { ref.offset + (index * 36u) }; + return _39; +} + +DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b) +{ + DrawMonoid c; + c.path_ix = a.path_ix + b.path_ix; + c.clip_ix = a.clip_ix + b.clip_ix; + return c; +} + +void comp_main() +{ + uint ix = gl_GlobalInvocationID.x * 8u; + ElementRef _110 = { ix * 36u }; + ElementRef ref = _110; + ElementRef param = ref; + uint tag_word = Element_tag(param).tag; + uint param_1 = tag_word; + DrawMonoid agg = map_tag(param_1); + for (uint i = 1u; i < 8u; i++) + { + ElementRef param_2 = ref; + uint param_3 = i; + ElementRef param_4 = Element_index(param_2, param_3); + tag_word = Element_tag(param_4).tag; + uint param_5 = tag_word; + DrawMonoid param_6 = agg; + DrawMonoid param_7 = map_tag(param_5); + agg = combine_tag_monoid(param_6, param_7); + } + sh_scratch[gl_LocalInvocationID.x] = agg; + for (uint i_1 = 0u; i_1 < 9u; i_1++) + { + GroupMemoryBarrierWithGroupSync(); + if ((gl_LocalInvocationID.x + (1u << i_1)) < 512u) + { + DrawMonoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)]; + DrawMonoid param_8 = agg; + DrawMonoid param_9 = other; + agg = combine_tag_monoid(param_8, param_9); + } + GroupMemoryBarrierWithGroupSync(); + sh_scratch[gl_LocalInvocationID.x] = agg; + } + if (gl_LocalInvocationID.x == 0u) + { + _203.Store(gl_WorkGroupID.x * 8 + 0, agg.path_ix); + _203.Store(gl_WorkGroupID.x * 8 + 4, agg.clip_ix); + } +} + +[numthreads(512, 1, 1)] +void main(SPIRV_Cross_Input stage_input) +{ + gl_WorkGroupID = stage_input.gl_WorkGroupID; + gl_LocalInvocationID = stage_input.gl_LocalInvocationID; + gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; + comp_main(); +} diff --git a/piet-gpu/shader/gen/draw_reduce.msl b/piet-gpu/shader/gen/draw_reduce.msl new file mode 100644 index 0000000..dd2f517 --- /dev/null +++ b/piet-gpu/shader/gen/draw_reduce.msl @@ -0,0 +1,169 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct ElementRef +{ + uint offset; +}; + +struct ElementTag +{ + uint tag; + uint flags; +}; + +struct DrawMonoid +{ + uint path_ix; + uint clip_ix; +}; + +struct SceneBuf +{ + uint scene[1]; +}; + +struct DrawMonoid_1 +{ + uint path_ix; + uint clip_ix; +}; + +struct OutBuf +{ + DrawMonoid_1 outbuf[1]; +}; + +struct Memory +{ + uint mem_offset; + uint mem_error; + uint memory[1]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(512u, 1u, 1u); + +struct Alloc +{ + uint offset; +}; + +struct Config +{ + uint n_elements; + uint n_pathseg; + uint width_in_tiles; + uint height_in_tiles; + Alloc tile_alloc; + Alloc bin_alloc; + Alloc ptcl_alloc; + Alloc pathseg_alloc; + Alloc anno_alloc; + Alloc trans_alloc; + Alloc bbox_alloc; + Alloc drawmonoid_alloc; + uint n_trans; + uint trans_offset; + uint pathtag_offset; + uint linewidth_offset; + uint pathseg_offset; +}; + +struct ConfigBuf +{ + Config conf; +}; + +static inline __attribute__((always_inline)) +ElementTag Element_tag(thread const ElementRef& ref, const device SceneBuf& v_46) +{ + uint tag_and_flags = v_46.scene[ref.offset >> uint(2)]; + return ElementTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) }; +} + +static inline __attribute__((always_inline)) +DrawMonoid map_tag(thread const uint& tag_word) +{ + switch (tag_word) + { + case 4u: + case 5u: + case 6u: + { + return DrawMonoid{ 1u, 0u }; + } + case 9u: + { + return DrawMonoid{ 1u, 1u }; + } + case 10u: + { + return DrawMonoid{ 0u, 1u }; + } + default: + { + return DrawMonoid{ 0u, 0u }; + } + } +} + +static inline __attribute__((always_inline)) +ElementRef Element_index(thread const ElementRef& ref, thread const uint& index) +{ + return ElementRef{ ref.offset + (index * 36u) }; +} + +static inline __attribute__((always_inline)) +DrawMonoid combine_tag_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b) +{ + DrawMonoid c; + c.path_ix = a.path_ix + b.path_ix; + c.clip_ix = a.clip_ix + b.clip_ix; + return c; +} + +kernel void main0(const device SceneBuf& v_46 [[buffer(2)]], device OutBuf& _203 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) +{ + threadgroup DrawMonoid sh_scratch[512]; + uint ix = gl_GlobalInvocationID.x * 8u; + ElementRef ref = ElementRef{ ix * 36u }; + ElementRef param = ref; + uint tag_word = Element_tag(param, v_46).tag; + uint param_1 = tag_word; + DrawMonoid agg = map_tag(param_1); + for (uint i = 1u; i < 8u; i++) + { + ElementRef param_2 = ref; + uint param_3 = i; + ElementRef param_4 = Element_index(param_2, param_3); + tag_word = Element_tag(param_4, v_46).tag; + uint param_5 = tag_word; + DrawMonoid param_6 = agg; + DrawMonoid param_7 = map_tag(param_5); + agg = combine_tag_monoid(param_6, param_7); + } + sh_scratch[gl_LocalInvocationID.x] = agg; + for (uint i_1 = 0u; i_1 < 9u; i_1++) + { + threadgroup_barrier(mem_flags::mem_threadgroup); + if ((gl_LocalInvocationID.x + (1u << i_1)) < 512u) + { + DrawMonoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)]; + DrawMonoid param_8 = agg; + DrawMonoid param_9 = other; + agg = combine_tag_monoid(param_8, param_9); + } + threadgroup_barrier(mem_flags::mem_threadgroup); + sh_scratch[gl_LocalInvocationID.x] = agg; + } + if (gl_LocalInvocationID.x == 0u) + { + _203.outbuf[gl_WorkGroupID.x].path_ix = agg.path_ix; + _203.outbuf[gl_WorkGroupID.x].clip_ix = agg.clip_ix; + } +} + diff --git a/piet-gpu/shader/gen/draw_reduce.spv b/piet-gpu/shader/gen/draw_reduce.spv new file mode 100644 index 0000000000000000000000000000000000000000..286bd332de696ebae624c53e6906d69400a05eed GIT binary patch literal 6864 zcmaKviIdk6nYPjgohexrGK00G@$>v6Fyw>boT^mX0Cc}V}*2u_Yt&@PvN>1te zmFvyn+P-rqJMF1zXGJ%7#nh@*>!((&$A22LNs=}RtvL%hje%QpPB+)Jl~HS!QVclh z0N8^*&S9ic8J&dpk#RmeFIUFz1v=1oS~}0HeqVALahuwey*paXR(&{IqnO`Br88Ep z?@McnS+m-xPly@0P`x!i26jQpRPZj%KsY3V_ys4m0Ug(K9tIR$zu3; zWg=ZyHp9MT5tudiw%T-Ue1CFYj#Zk&i@mlj3meuS}6l zz8txl}741JM~5_jmd1jwqv#W=vb#~ zjC>_*x|b_ilY#ptbEyM9R=*#7qEl^zA?i;T&$oxomAx@x&F1QCd`DTl8IR4ycR8yO6YtJ!Ha2_qeulN5`)w}Y zSO0!wGYxAuc5WWaE*y*X?fx@(wwKvM?XXLL6e1q)&`EcjzUTo#xc$a+t zw&ET7GIYN|;>Fxo<9)Nvf&4;WitL$C_ib}_i*nsNt$sFSe(#rG;3r5Q{qA z$6_Dq_TgvUKfvu{A!NR|qvrDy^Gx{lV!gfS=iH;0h_x57Sx{ikRm`&#nD(i&iJe=(T)&v-D)QN@m_OPA^FE2KE@0jxv9$%v z`y+N)0dt*V8&a%v>|VsW;-2W^=RS%}OXX5}XNvpi9daMtgHxHhb6<+L{>rDbx6W<= z@>_z%xn-EY2D3Se4dCmn!oHL}cTVd1XoQ(iitd`k^wI9Vh25N|qnktAoM+^+ zxMU9Rp>}K7^NZjEkn1|XprfX``Q1-xF>5*>F6R2&KXq%bfIA1V^||g_Ar^U8<+}bC z<+8c7%hy0hk9PepMR&~+zXdt^)c>_T`gQ1j7qE8Mcs-<#cwjf=n0R0}<~~L2rW`YN z0sU3s)^xw^e;95r;;vWSw@*y_NG=-_>#HNXcc(Fxn+y2+kR8+_;@+RylSIrsAApx2 zaqFo2+es|y-iI7@Ka$&Af7E>t+5HavA>`N>`+FFgK?2XzA!ISh{eBWI=6*i~SGTX9 z!p$LOAL@s5Ec!Zv9(_HX+stL&XW-UY!d^X_%j)9h{~g@?+RUf!TPk(|k)H%6t zl0EvKW_eFd-%;fvY5#vz^bOg(z74*G_VyRJeTdncy6=P7C9Fs8{`h%b{tn_>tFQYo z1bI)@JrBDJy7pE<*FI6uy~mw`uK!ezzOSGge``V4|F(jz{mz1}{jP#;y}Nt#dkVV# z_ZM{iAMMc(^yrW0`bMTQf;lX!%yB6hov#@nOO9*2U*N~ z?vJ|pyq9~S#gMpm_ft&Sz4I*pgEO-q?%9$5jXvK9e-q@V{mpRg@~Hb3WOF=5E#H`1 zATj6mPjaZg0Wz1d$}xktBF79oGwPA|c4YI~qvuC`9XsRf?tuJTZHzX3JX7K^!?z(j zL-Cm5+mXfnjB#FKQS%+hF+h$j(q)f9Lux$XS}pGpnxbdDY*3^?Nt85Yk7R zKE{bVS7+>OR`ToBbS!@eryf(4ozB%JK;~zx64=SGV521@k{~t!S4`Y1C zKMLIo>1PdfeU!a(=Ca<$;Pxx`oO}ZQ0OY6rlW^_wSkI@B%@OD1)5u~ELY@=#k3;4% zR{1&bhv3q~P^|4U$h~X(ESPxI`y8_Mp2+9-2(sr~JZAcNWaG3&Utd5L_nm*9b$8t4WBpy9|4cRztu`gdoc3-r`-hBgE+`V}e zt{%2;BD=5J4#3sJ_AO-nwLK13k8}EMWY4KF+VnAxxceIGmcP*6dy z*}Sp7?<2cD{j903k2N2FJ8QY|{qP?^&M{{8!yJpdp-rrKRy|j-R|k=Md;AfY_#>Q` zo8eDDVxD==!B60Ro&#+^hQ#bmIdcCD`3PiO^!syU@mT*akhAsoCr=~)5;_bGK>Ege z^($n3{mg$6{MV5F1CX;)*DqpzgKW$RP{jNe*_fD*x_-_l)*$~kQEMT8Q%pxc1@fB- zt)bQo_{q6^5_}fq`e(zfCAJjOX8?-zpMxwO`Z8o=<8SASk;Rrnv8OLVHcngoJ$)&% z_mqTK!pp78z z!)jIsmMcj zZXQh{rfn*<(z4D5tsPct4c1kuolZz-i!36o9S8eBurz}XR;I2i>vSLIhT?SG$3Hte zyF0tz%y-UrzVn^$eCILW?;QOGt?qo9?~9|iRNh)Ye@WrG-a3u|0BYj^AYfex*$8

#sM$`A=nQG>o=4bc_$RKF+#mo7o1R%0QjIT)=mRB4M>1Y z*wXfBhOiQxQWZRpILQ@4QBo@Q+-6QzB+xz|t361mWcea_nOOC6kHIISb;X`nL6S;4 zZIJr*EE_aw%5B~$$CVB&vluUj!Sb;wyp#O02v*Q5an11}8U$zYs(D^SeB9%x<~lC| z0{|s(49dr1H*0C5$Q6Si;8_}g6;P*IW{9DycVL~%4mx3R+1cZhEira@W1R<$RJ9c| z`#8)@DdokWl=19E0DFi;f!2zNe%}(B6m)MlyK`&Zt+gnWC2yLf^<&aDSc14VhG_B=R+^~yNgEf1mPMh_Oz;L{_!l>s zHM2yUnb+9u_Pku**+CoBBy9^mD^QRC8C{a-TA-NDrEG8MUHiNEf{` zFhl@?zSBTJM@0ZQQ1ku!0eo$Svg}p~u7_!g7w?ViF6JP0Nee_t@T@VC*Ht&+up4 z3eWgSSpNhy`icb!`a9)Su~s$Om)+T2)^0`jH})f%o>+Bv`<|@LS>gj(-yi6=wXQ6) z##WVmN?{2Yy`KidcqN@H!Zek24R(;DaG}a{(F3QZUXRq+0h8dwF9fJzErou(S`tuN za;YE+5}ev`P$w6aoTrQmm-1!9+dE$EUrPnH*}>`rEchu?9S+#&+l?1Y`lX=W8=9UD zT{|&8aVj)^V({9`a3oz6)MXg&y*=ma=f0}X@qYE(&bnQ2Zq~VAIWrnLzi7w~-nL+P zweCpNZkKyQ*Dp6cwErT8_&lBucJ%M;dEvvg6!&b=feXo(pw@3V*nfuCbch~Knz-e{ zMtM@gG1c)m3y$UWzZD}e_w|-pU+KPJesY3<3&*PfUTwl_EO@OCe>{NKalf%CjwAAl z%BrK=tR-pL&~^#7erRsf?C^Bt?8L=Gm_9*Oc+2}t?)$3#Oij6){Ai2xr{2THBVX)1 z?5!{5_A0~HJ=xvuuXif4yStYZz5vJ0T$>!WU%WUm6Pkf8B8RYw6jkAis=T}U3wz&P zop-E1bKsL5(9!H0qdSin8yZUI;BDd0b}c;HRU$5Va4NY*hOF`E@||I?mLpnQ2y0g( z83A!$QPSUW(p983H8hYi+LK(v2%}~W#R$4)NVmp|t{G|ZZnqHaBBIscwpfTZ3)PddfKH863P=cIEc=_+b4WkQ)eu1pD4rq(des*gL=W4eO3(Zb`7LMN&o znUgh}iS}9|uh!iJOU`Zaxh-a*IfH1P7ecpBaqdS&?nb%0smtBkWy`rbC%rl;{Wd6_ zWzfw-J;~EO$s^J7VrBBMGI`LJGF*v&;?orj=?c1{<$K}$y}Eo4s@@yE15SF~u0DsV z17Yuwl6cyJHc@V?1?AUbc3XzrPxTUQT?2*FTwDV!y=Ib51*E(H8fgHs;ob=;-1b@W zA79#irSf~Z1RM1aWR<6$$ouT-QwK;4e>fl(If75@`qSv?Ys@oVT(EV_{6a?DXWdVH zr(TN-97nnmSBI~tci#%ioz$KbQ&ct;Y4oq6r9A;ni`td zoS@=^dMCRo#~E+nhuobKkC2~FroCH_erox{=;izF(E~@mMkND@ zpT!`E3U6uTq`1nezz6Ac(TjTX0sx)=TL46#S-LNhs|mgwBN#JMU~AYU3FG#zjBUF3 z60p6|V14ECH=jiI;olWM5Gf{9uBGXxYqQ>v?0SQ&p4n2nMd&Qe$f(r?AZl$5`x`I0 zl;0eagui=o{rLFo`AuhshA++>!Ze9|rdRd%zn%APCUV9}esuD7?}so!mzvMPEWckD zo0i?Vtt~CPs(T5&1?Ktb;mE|7)2D`EhQIHxGQ9A{Uj09(|L^4VN*aJpcUSq}eS z;Qt#~4G^#NFCX@^sQBgN(v|+{(memu=;K^P3`oOcfZaSv|Rp(?L&Pv zs}iq@Px5)SlF`JzOCCf3d-P^HX<+pM3fyT?Z{`3k!q3PnO3XXcvEZm~8NQjRW3iK9 zws3hBgNJcFLk4p+j0uqkmy#7gY7wG|TsC;njG%V$JBpR?O^P`@lETdS;sk*oUp*d= zVnD-qCTQ|CmV`RDEnIVmCCqU<%ZUA z$3v6>I54I(-|s|c=TWL+Qk=JWj6avoI}U3zvA5VEd(MzOv%(jvUeJGhN5Gzm9c4#C pIU}LWuFqo`d4uQ?gse&0yFAFjv>N?EY{b*&-y!dv<9|_$^cP%6``Z8j literal 0 HcmV?d00001 diff --git a/piet-gpu/shader/gen/draw_root.hlsl b/piet-gpu/shader/gen/draw_root.hlsl new file mode 100644 index 0000000..7dc68b1 --- /dev/null +++ b/piet-gpu/shader/gen/draw_root.hlsl @@ -0,0 +1,94 @@ +struct DrawMonoid +{ + uint path_ix; + uint clip_ix; +}; + +static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u); + +static const DrawMonoid _18 = { 0u, 0u }; + +RWByteAddressBuffer _57 : register(u0); + +static uint3 gl_LocalInvocationID; +static uint3 gl_GlobalInvocationID; +struct SPIRV_Cross_Input +{ + uint3 gl_LocalInvocationID : SV_GroupThreadID; + uint3 gl_GlobalInvocationID : SV_DispatchThreadID; +}; + +groupshared DrawMonoid sh_scratch[512]; + +DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b) +{ + DrawMonoid c; + c.path_ix = a.path_ix + b.path_ix; + c.clip_ix = a.clip_ix + b.clip_ix; + return c; +} + +DrawMonoid tag_monoid_identity() +{ + return _18; +} + +void comp_main() +{ + uint ix = gl_GlobalInvocationID.x * 8u; + DrawMonoid _61; + _61.path_ix = _57.Load(ix * 8 + 0); + _61.clip_ix = _57.Load(ix * 8 + 4); + DrawMonoid local[8]; + local[0].path_ix = _61.path_ix; + local[0].clip_ix = _61.clip_ix; + DrawMonoid param_1; + for (uint i = 1u; i < 8u; i++) + { + DrawMonoid param = local[i - 1u]; + DrawMonoid _88; + _88.path_ix = _57.Load((ix + i) * 8 + 0); + _88.clip_ix = _57.Load((ix + i) * 8 + 4); + param_1.path_ix = _88.path_ix; + param_1.clip_ix = _88.clip_ix; + local[i] = combine_tag_monoid(param, param_1); + } + DrawMonoid agg = local[7]; + sh_scratch[gl_LocalInvocationID.x] = agg; + for (uint i_1 = 0u; i_1 < 9u; i_1++) + { + GroupMemoryBarrierWithGroupSync(); + if (gl_LocalInvocationID.x >= (1u << i_1)) + { + DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; + DrawMonoid param_2 = other; + DrawMonoid param_3 = agg; + agg = combine_tag_monoid(param_2, param_3); + } + GroupMemoryBarrierWithGroupSync(); + sh_scratch[gl_LocalInvocationID.x] = agg; + } + GroupMemoryBarrierWithGroupSync(); + DrawMonoid row = tag_monoid_identity(); + if (gl_LocalInvocationID.x > 0u) + { + row = sh_scratch[gl_LocalInvocationID.x - 1u]; + } + for (uint i_2 = 0u; i_2 < 8u; i_2++) + { + DrawMonoid param_4 = row; + DrawMonoid param_5 = local[i_2]; + DrawMonoid m = combine_tag_monoid(param_4, param_5); + uint _178 = ix + i_2; + _57.Store(_178 * 8 + 0, m.path_ix); + _57.Store(_178 * 8 + 4, m.clip_ix); + } +} + +[numthreads(512, 1, 1)] +void main(SPIRV_Cross_Input stage_input) +{ + gl_LocalInvocationID = stage_input.gl_LocalInvocationID; + gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; + comp_main(); +} diff --git a/piet-gpu/shader/gen/draw_root.msl b/piet-gpu/shader/gen/draw_root.msl new file mode 100644 index 0000000..2ed7ba2 --- /dev/null +++ b/piet-gpu/shader/gen/draw_root.msl @@ -0,0 +1,128 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct DrawMonoid +{ + uint path_ix; + uint clip_ix; +}; + +struct DrawMonoid_1 +{ + uint path_ix; + uint clip_ix; +}; + +struct DataBuf +{ + DrawMonoid_1 data[1]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(512u, 1u, 1u); + +static inline __attribute__((always_inline)) +DrawMonoid combine_tag_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b) +{ + DrawMonoid c; + c.path_ix = a.path_ix + b.path_ix; + c.clip_ix = a.clip_ix + b.clip_ix; + return c; +} + +static inline __attribute__((always_inline)) +DrawMonoid tag_monoid_identity() +{ + return DrawMonoid{ 0u, 0u }; +} + +kernel void main0(device DataBuf& _57 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) +{ + threadgroup DrawMonoid sh_scratch[512]; + uint ix = gl_GlobalInvocationID.x * 8u; + spvUnsafeArray local; + local[0].path_ix = _57.data[ix].path_ix; + local[0].clip_ix = _57.data[ix].clip_ix; + DrawMonoid param_1; + for (uint i = 1u; i < 8u; i++) + { + uint _82 = ix + i; + DrawMonoid param = local[i - 1u]; + param_1.path_ix = _57.data[_82].path_ix; + param_1.clip_ix = _57.data[_82].clip_ix; + local[i] = combine_tag_monoid(param, param_1); + } + DrawMonoid agg = local[7]; + sh_scratch[gl_LocalInvocationID.x] = agg; + for (uint i_1 = 0u; i_1 < 9u; i_1++) + { + threadgroup_barrier(mem_flags::mem_threadgroup); + if (gl_LocalInvocationID.x >= (1u << i_1)) + { + DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; + DrawMonoid param_2 = other; + DrawMonoid param_3 = agg; + agg = combine_tag_monoid(param_2, param_3); + } + threadgroup_barrier(mem_flags::mem_threadgroup); + sh_scratch[gl_LocalInvocationID.x] = agg; + } + threadgroup_barrier(mem_flags::mem_threadgroup); + DrawMonoid row = tag_monoid_identity(); + if (gl_LocalInvocationID.x > 0u) + { + row = sh_scratch[gl_LocalInvocationID.x - 1u]; + } + for (uint i_2 = 0u; i_2 < 8u; i_2++) + { + DrawMonoid param_4 = row; + DrawMonoid param_5 = local[i_2]; + DrawMonoid m = combine_tag_monoid(param_4, param_5); + uint _178 = ix + i_2; + _57.data[_178].path_ix = m.path_ix; + _57.data[_178].clip_ix = m.clip_ix; + } +} + diff --git a/piet-gpu/shader/gen/draw_root.spv b/piet-gpu/shader/gen/draw_root.spv new file mode 100644 index 0000000000000000000000000000000000000000..acecee3679ae274a8ebf82ee416e177df63eb992 GIT binary patch literal 4684 zcmZ{mcXO0g6o=oqECf*y6#+pEqK+t_C@P4Jq7jW^@49X_Az9sIW3mCU0%Aezy&F3! z_TK(+{2+b`+l-FSv$+RecE$_8Ilt5Hx#!&bE)@1#Fe%I0vi-C6?B{HJP0#kjl4Vn} ziA~wrbw$^bkw(vwWv8BG#MG?N3^Zn1HYqD2CxC%+bx_A5Pz48(uYEdT%jRC@6!?Rh z{em)q%AH-M&h_g%yUr_BhK5Qbjm@>*Qms1JTk5F}_f{I!jlIUT=i;h^mD*@eKA=cM z{tQJx1=bPsVA+ZJ4v6a-+Xgy{S1Wba>fPnqy1|Y0O1V+353XC=jGd4zL)I|XntCxi6}k6RWtd2iwZH3nx{k_9TYwtkjYwz4gFJ#Z6 z57##}_Zfqa?^DQj!Q;Fi!t;CV&gq@h@E)dsx!?kJikU7zcZJQFYV(bncN%gcXa~l` zOe3$8-n12f@y_=s;0k^IxZ2b`yXTmb{68gqHu4a@Hov#|?B;&*8SH8O4&a)H?)Rci z80*=zb>MSvxtPy+4@g|QI3exV{MOMI5o7GZK%21UVaN{92Fy{P30&5hg*4AIg#Spm z@iXxspM3UV{0YgYePzyw4OHOPf5CVXX1UDBaUDu-kY|0 zaPLRmyyZyes?Bq#duQ78uT0$Bn3dmZ8#t7xi;~@WbOX;6as2!>H}bq2l24oa_91;! z+O4bJpKLJ?--dOJ_gx#G-+v?eWDxcEiD{lY^v&q)Y+(L3$>%&4F%{?TTk|~jBTnP) z?b|vo@9Ix1MWko)ju!&Ypzh55y$W6bnw;+Y@^>Ws>Q}eu<(zK5zYF>Jtu6YF7JX-n z?r%fnyWdz&xBjjc-QS6b*YEFy`YO8i4K4w`OTUA&=-K(52()h}Mty0r`4-P+YR*Vo z#Ge7zwu)%q)0x0G=J%t|IN#S#G|VIji92 zw1LQ34L8Sc)Esr=d^9>>GPw877 zMvefN{svONGHQ>)-6P)WCb+hZz_+S?J8&;^<-cLS11UCx=TieV>py^QeeLm$ zJqR~XU);e%aPL50+|$Ew?cT#yqH8Dp$= z?{Ev!zLo!X50B$(ox>B@w8t5qgu8F__Y~azjI*Y?G1l}ASyO5LMC8-JImU0yGsza; zjXrJpcj+5yJ@X3on}q#2Fd10;*`&w4K9Ano&kNYJ?@n*#MYy)n>5G}Z0oNWqsM~+kRq7iF(27>IB7eYm!hfc>h!1>DPAdE|ZoA4|G({|Gp5&-G!_?QshHW3UXE z6Fq*C?8fhgtH;~@6g~~Ok3M5|A+<;S&)^Gl`seU55bx*^iMv7SfWxXAeqZcZDBobTc0#9LN3&in9goT)N;`T;%$qK6;h z+9F?{w)}m2H~&y|Iq?6T-o<3#Y6sTXANeoxU}Qed8M_Vur_+;nbzqK9g`0m6(*LLG zp8>34kF$`ofjQ14`ZzqDc6Vu5#!4A8o9)sQ>@~ literal 0 HcmV?d00001 diff --git a/piet-gpu/shader/gen/pathseg.dxil b/piet-gpu/shader/gen/pathseg.dxil index 5ad35e75a7f24f16e30db7ccedc2b2ac1cb4792a..4464d9db5599c85b03c1fe932eefc153927385d0 100644 GIT binary patch delta 3660 zcmYjTX;>5I7M?6KSqKoaz_3peL}YV<7`CbjBCE8iVvK+~q_tWKHEOkL#VjDA0tN<< zy4(gq#fmjnP_(p702LK%YVj5|(&$x-)q9IxZy#x~y)zJe`Ui5pbLKtgd%v^UGX^t! zQ`E`xFQt8+IB(6PuqyrDtr?$|?4$qyFnHQ`H7mjY@sb(Ry^J2O{qbrbV^vCuI%DA* z{s4dhAn+Xo0Hh2M=3L^75aK*}zI9l{0f0+q&a>t-++WZEfKT7XbtC>uaF$n)uKulC zg4OZ_K`x6YP9Bsi>etF2LxOzY7)`f891^CaKy02^5QbuAIfX|2u1g#96s(mW$w^<8 zlR|+6D=0#hp+0#mSQ$%IQe!K<^a!*29h8Qq0{8#Y?X|%arV$yh@@nhIJ@FHhh)M#n z03)V7)a@J4jIz_Vq8bx`Fd?6CvqH=^TNp0=kz~KHqZoUeOT90=)uv&$*ae-yO~8%Rz&2pC^=e2qDD`m?~gJi^Xt0TBm+5i zsL&ADXQsy4y=XK*fu?UQ@HB=G%hNa> z##)+4g?5(BrANT|n&2`aC)(!O!-n>aFf?Fk)=NBvzkvQQ^r;B1;)`^juwYIc{dWL5 z!%5**!Z5%zD!Tdi0RALtmXVaXqdnUBTt0?g!vqS}7$U5sRl*2iXV}D_MsYJ_REE;! zUJr>!ViGKyAJa4YlTAF5(8~$d3JR5=d(E8Fyt7lGl{3P3`unF=q7RbHemNx|C$ z=o0fi@0TXtS~3vf3s9w>in>MQGj8Q=m^d$kRHmu()&d_y;qx(h-eKKQ$DKNFYw`(^ z`g}5Be;k(z6Lor7deHks#AW?8G9^XZ7x4b<{!79Px>`{@qMdK{CrCBTP`Dp!>T##6 z)hp(FwNCf|;>5vD5%C_^lczWdI^D!CEh0Gqd#We7e=n&0d;Wsr$upJM-$m;FtBL!XS8#eOu|bc?$E7nxOSwt(PY-i?%$ZxH8~X+9yK`oIyke=(IM=8T$9GBoI@B zw}_x;6ZG_6UWNqwc48Lsh-4;Q)C*9ET4@)Y_q5At<8U@p&mR3(WZzy+KC1TmqsE6L zyEdBgp;BuKcHJ|#a-2~}M`Fk))AudoiYKsALkO%+gQtehdC%7z)+&0#=?P2aL{-wV z)081HqrjJhUi2VRSu+70iL-om>P%OvCP_US8qW(-R8KPD9o1E#(n?$oZy$J(zLh?g zbhyCIoMLV|)*Ym1B<$sZ1ytAdZ(eEaN|T3@1#q=`imJ_;(C1zUuaM1z7CHs_kCu!Q zdSMKozyxbpF3d;Mi;TevTnsOb{7M|Y8`WsZX5@aVa<{k=4j{|(hls=7TnWS=s?GlL zYAf2SEuH7YkH8U8?iac;B`zyG+H!;GvTD6{BDS}}Ms~i~C)5acsl&o^;;&$I+W)#E zs$4FrzJBrg;|(*rZcMpwX5ou`pVGEL@8SwSA+5sw3ck!QO;;=HINe9dAG{>~9@OKpuU_TA`p3x73#4vX{K}D!Z)s2iafi*8@eqMKl-k ztAX1iS);PlnD( z(tnp?0dG!u+n2a;evD$6xdo5gawbMtHFbV0ans@$!|>Cc!y{G0mW3J8DXf#%?=~*9 z#N8QwIxwtj@83nSEV#mPh5lpWmeLr-Pt1}Z;!6G-69-uvI$nqV7{70Ko->oUV=JePV zxnBFJV@AyakLc#4(3m1QH(?f#+ynKRX3@|hIg^lxcO?X+M3bae{5m3IG(J8g@RcFd z0wA-(tCcVHZ$q{Xc28$2z!S~FcBWuYrKbwRhXc~1dGk>CL*@{JDA=IWv-bIH6t z0XN>;{%Cd0{M&ZVSCQ+}5Bz@ovsE7NzccSQJSQ-2DPqGu*?|(aabiK_`o05RJnw`D z?kx8`0nd+?Rjl&Z@=h`+>@jQL98T%(0wtwVlbgQN-{?&(y;pHR>>D06uo^Ypj}Y0H zm7^oZL@Yfbep9E&jNcEG#1Dzk##Kxj%1q^W5To5JY8FyU3wxd9DBmBW5uvRvh4k-= zSsHv!P&CtA`GqlNh~0;82ntgd;j!LgPaK>%o}uRxW<1_8&PYlsDI&RrWKOru0%!cx~R;qIg%wZg}fwLE4vPZ<5{CFsHz9ea>>R1Hu_MGq)CQY2Wu0Ip%ixg)r$4LbjzsnZbSadV)$fCsJ>uxALC6P?UIQ5G z5Kh{=2S(kAKlm>qe!L-%q#&S=pkQJg3@jhZfpc8l-Ffq^9g5kLitH7CQ;giiXNzSz zbSZ{9xNrSQhlI?n6zc#FOsS|6l zb4ygm;Ye(T?xkxYN%B74vB(Bm>qJW@S~$^EKlpcjCa9Y>eskrkB@3%2wolu9!fY9L z@u=@)x;#2N&OT~%(;uJKj`fM-?T?t2$!vxKCg%LyS-mI>&$^C!x=m+P1vl=fLzjMv^Kc=ntNSF^S z%8$V~;*~S<+27|+GYz|R2z%#@RF?HjPKu*Ou5cNn|ib#{x*9*EEAi{>= z8AzpXBfLOr&cs?~xp5IJwBbvpvwy0B~LmeA8pmV%pi&7y7nDOEQt}uevgxC#y2D z;)4MI1t8!S6#xhsz};E+8$4W}m}DR1GXbFM(q|q?G_T(&0N_h0WqIQNvoQ}Ju7;C0 zVb0z`H79wnXD9Z{-vGZlZm zwlfiKY_HUTS{@vJkwra&{pK@InJy5r26N~A4$DMbY5C5Dx8n9&z!bucO#hG+#)Qsy66x^D`lE8+o&)%o%@Z+pN@ zDP;nnUScM*j4%w)_41E>hsW5{q*+HOOCNQplK@{7`5N_;dyMlqMzRs+;eNU|e8WLc zd4@59BJ;VAe$X*GOaJsj=(!~CRb*>ZV(tOk#ndWz3MO$)= zh7u!wmj!%lq_{C7{1JzzJPXIPN9n{0kY3)xB6GB>Y8kkYUfxOwN(&)W%A16pYI1xd z1oy`B(iFoowpFj2@0_i2*aNom&!Iz}mtiF`)|JN+!Q=X9hEOYqkQRW-!RdzA#3{PS zYmzamAF@W%=RaP{E+!J;j6q86OxDdl0nhF1k}18Gs;r&SpII5e69?=j^p~%en^`mZ zvvmRC{D6H^{+F?ZoCFOjN#}0GGcN0`lPExSPsrAVJ=ZxI6cxCARGm~CjFVa$Dfc?v z)ZyhQ*2UO={jg zSE`C)BT@)puG8GyHj~?L7lSwZj9tgSKtA#)itM|D^Ni|-_Yb>P=MV02LzTaTI$%s& zM;V!hTI_Om(QuyQRw@SGn!%jZaZ7woc28jz?HvK#H(-Lc|89zti{Ai^+F`v!>2qHK z{!{K=K6Px09(#8#D=g54T4(<(m=70q0w7*1aefzPIc0|b!P#^z_?dSh{|WO zE`TZddMvqEq1J}E&lzMKrxj9OOkq9m&IM*qVI_y-ST@}(l)`)?&@!)u|H|E2uvmsy zB{@3n)o=;PFOYy%j6|i^O+kw=uYf}u;|$7!`$3u?Od=C_m_pDB=_L05jp87Q2AezbVFCqVF z$v8mS8taQ=LM9J4=AF6Q^kE8hYx6(9Jrly^i75jg1@HTLBk5nz`r0G+6JvV1N#N4y1~FE4vj5|5}JSUwl^RWq`*l~aeT1R z^aZ6*&|(^B$-d@V46N(?#$G8WX5uBDFKQCw~;k&IJcBG<$>oWwcS7=^CX(&|`A-WZ%dS z1=jTr%tSj$mhya^CYf|+l63CdyMdxbF=ThJcWPXfS*TS1!(Ef+hm*ZOfL5q)CfcEd zok_9aAbpQPy5~|X=kSdFSi-K=vGT!3X@eu#gVwAJ@eEjN%@2)P7U_F~qo%>O-ov0Z zbK+zrH*5^*Y1XF7LxpCF(#|WL=7P42B&G z;YP1otOEZtDxozFpYRKm@JO40<&{1yzT)6>Kxw~yE?o|N*vx68`&C!YQYx^YLegVM z%MiGjeusva<0s*&ky^=~vCm9Awu z(Ea5I?knZ{MV0gf`>_2NjohSrRfjySD68D;y?1pY#ObgUz&T7q)g}dKEKsE%3MToJ z48xT_&HIu~3avqEe-iPXxfRH$K4C+;D1Nt%--!JjDu{RTk;ct*GD1&fdgFuJ5-8x1 z427LT#Gnp}&Eq32ZbRhX4GZ!xIX8x`HGZUzz00_c{ev5k>!efhmJRD1Q3hD;U{GIK z-uxu;X{4ZaAp`X>a4)Vzie!{qK>viLu0I&ERyG|OoCo8SZ2^`axRd_CL{fDE%S-&2 z!7N=I@a@v#a3~FoDu7QVV1II#YV2IHxRSOIo?a+UnR`Z!A0E*M7u(*~;!O5ynfmrH ze8$^uhe2~5)#c2?1Aq<5krou*{JhQ_iHo|1EJ8eGJlao);ISM+ynp;?j*ze(%O^xX zu$h#5d$>Z@U`OK%9#&l-$qxzQFO0K=tep*fudjG5A2qbqpofC>`B+z2L_BcZj^cVJ z4;IH0n@+?D^u{`akAYZ;w)96fe1c~@Wq9KR>haQWHxd)@;uyCwL|Vgn(}|Br*e0Ho zGQRa#DR!JE(2RNzOwPxd>ttuP+)WzkcA1rELGv2WJ3(vDLf`@?$m|4;*E;;-cN0PKajB(I7%}nllLT zc->JUAr6_BV@vsBjjoZp4app$;*&b0%~3e&kU{8adGNUkZd3_6=Us;XBI~MbkLnQB zjnfq=-sg6%?u?}=+gtVc{Sz~V74j~8tw>$C>=b^G6*`AHd57Od51n@!oc^SqH5%0s za#gN4q}mE~drK*AK-2(0{53$^!DFVq{@$1u{)0W{`^L-H5fp^<;1o=CeAD`8IdG1< zfxGHgl*<=R8@x9B%`j4rEt)OS@YbS;i?!k}3YA0O2eQ8XtsW1xAHtW2-7=c|ynN$& zsL0AGb10o_6I?897Gi+L zTz>H3N&VHJr@{+Qt4^g(ZN5W0qT4=*LL)Wf?xEwmZg=@A2V^()i=CzdPEnahywGPQdY2;9? zf+iHxgmU~5a8Y*+(cb2L3Wnl)?`PO)LAa*&*v9iZr~rE3eU)!jUoI8efW}n^9}3mc zuvkT$U-(6;=v=3$<@`X9`|!CJZdZ{#Z!_>oI5mrH-xbxbQG>`Xo4*l`BIe4o4XwMB zqnt-~iap#^cLr>~r-R;l8ESOfr>-}NScRpYsZ4*oDQQ2v@Li$kJ4U> uint(2)) + (ix >> uint(2))) * 4 + 0); + uint tag_word = _574.Load(((_639.Load(56) >> uint(2)) + (ix >> uint(2))) * 4 + 0); uint param = tag_word; TagMonoid local_tm = reduce_tag(param); sh_tag[gl_LocalInvocationID.x] = local_tm; @@ -393,13 +394,13 @@ void comp_main() TagMonoid param_4 = sh_tag[gl_LocalInvocationID.x - 1u]; tm = combine_tag_monoid(param_3, param_4); } - uint ps_ix = (_639.Load(60) >> uint(2)) + tm.pathseg_offset; - uint lw_ix = (_639.Load(56) >> uint(2)) + tm.linewidth_ix; + uint ps_ix = (_639.Load(64) >> uint(2)) + tm.pathseg_offset; + uint lw_ix = (_639.Load(60) >> uint(2)) + tm.linewidth_ix; uint save_path_ix = tm.path_ix; - TransformSegRef _769 = { _639.Load(36) + (tm.trans_ix * 24u) }; - TransformSegRef trans_ref = _769; - PathSegRef _779 = { _639.Load(28) + (tm.pathseg_ix * 52u) }; - PathSegRef ps_ref = _779; + TransformSegRef _768 = { _639.Load(36) + (tm.trans_ix * 24u) }; + TransformSegRef trans_ref = _768; + PathSegRef _778 = { _639.Load(28) + (tm.pathseg_ix * 52u) }; + PathSegRef ps_ref = _778; float2 p0; float2 p1; float2 p2; @@ -449,9 +450,9 @@ void comp_main() } } float linewidth = asfloat(_574.Load(lw_ix * 4 + 0)); - Alloc _865; - _865.offset = _639.Load(36); - param_13.offset = _865.offset; + Alloc _864; + _864.offset = _639.Load(36); + param_13.offset = _864.offset; TransformSegRef param_14 = trans_ref; TransformSeg transform = TransformSeg_read(param_13, param_14); p0 = ((transform.mat.xy * p0.x) + (transform.mat.zw * p0.y)) + transform.translate; @@ -460,25 +461,25 @@ void comp_main() if (seg_type >= 2u) { p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate; - float4 _935 = bbox; - float2 _938 = min(_935.xy, p2); - bbox.x = _938.x; - bbox.y = _938.y; - float4 _943 = bbox; - float2 _946 = max(_943.zw, p2); - bbox.z = _946.x; - bbox.w = _946.y; + float4 _934 = bbox; + float2 _937 = min(_934.xy, p2); + bbox.x = _937.x; + bbox.y = _937.y; + float4 _942 = bbox; + float2 _945 = max(_942.zw, p2); + bbox.z = _945.x; + bbox.w = _945.y; if (seg_type == 3u) { p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate; - float4 _971 = bbox; - float2 _974 = min(_971.xy, p3); - bbox.x = _974.x; - bbox.y = _974.y; - float4 _979 = bbox; - float2 _982 = max(_979.zw, p3); - bbox.z = _982.x; - bbox.w = _982.y; + float4 _970 = bbox; + float2 _973 = min(_970.xy, p3); + bbox.x = _973.x; + bbox.y = _973.y; + float4 _978 = bbox; + float2 _981 = max(_978.zw, p3); + bbox.z = _981.x; + bbox.w = _981.y; } else { @@ -509,9 +510,9 @@ void comp_main() cubic.trans_ix = (gl_GlobalInvocationID.x * 4u) + i_1; cubic.stroke = stroke; uint fill_mode = uint(linewidth >= 0.0f); - Alloc _1071; - _1071.offset = _639.Load(28); - param_15.offset = _1071.offset; + Alloc _1070; + _1070.offset = _639.Load(28); + param_15.offset = _1070.offset; PathSegRef param_16 = ps_ref; uint param_17 = fill_mode; PathCubic param_18 = cubic; @@ -567,17 +568,17 @@ void comp_main() Monoid param_24 = local[i_4]; Monoid m = combine_monoid(param_23, param_24); bool do_atomic = false; - bool _1241 = i_4 == 3u; - bool _1248; - if (_1241) + bool _1240 = i_4 == 3u; + bool _1247; + if (_1240) { - _1248 = gl_LocalInvocationID.x == 511u; + _1247 = gl_LocalInvocationID.x == 511u; } else { - _1248 = _1241; + _1247 = _1240; } - if (_1248) + if (_1247) { do_atomic = true; } @@ -603,30 +604,30 @@ void comp_main() } if (do_atomic) { - bool _1300 = m.bbox.z > m.bbox.x; - bool _1309; - if (!_1300) + bool _1299 = m.bbox.z > m.bbox.x; + bool _1308; + if (!_1299) { - _1309 = m.bbox.w > m.bbox.y; + _1308 = m.bbox.w > m.bbox.y; } else { - _1309 = _1300; + _1308 = _1299; } - if (_1309) + if (_1308) { float param_29 = m.bbox.x; - uint _1318; - _111.InterlockedMin(bbox_out_ix * 4 + 8, round_down(param_29), _1318); + uint _1317; + _111.InterlockedMin(bbox_out_ix * 4 + 8, round_down(param_29), _1317); float param_30 = m.bbox.y; - uint _1326; - _111.InterlockedMin((bbox_out_ix + 1u) * 4 + 8, round_down(param_30), _1326); + uint _1325; + _111.InterlockedMin((bbox_out_ix + 1u) * 4 + 8, round_down(param_30), _1325); float param_31 = m.bbox.z; - uint _1334; - _111.InterlockedMax((bbox_out_ix + 2u) * 4 + 8, round_up(param_31), _1334); + uint _1333; + _111.InterlockedMax((bbox_out_ix + 2u) * 4 + 8, round_up(param_31), _1333); float param_32 = m.bbox.w; - uint _1342; - _111.InterlockedMax((bbox_out_ix + 3u) * 4 + 8, round_up(param_32), _1342); + uint _1341; + _111.InterlockedMax((bbox_out_ix + 3u) * 4 + 8, round_up(param_32), _1341); } bbox_out_ix += 4u; } diff --git a/piet-gpu/shader/gen/pathseg.msl b/piet-gpu/shader/gen/pathseg.msl index 25d001f..71299bd 100644 --- a/piet-gpu/shader/gen/pathseg.msl +++ b/piet-gpu/shader/gen/pathseg.msl @@ -128,6 +128,7 @@ struct Config Alloc_1 anno_alloc; Alloc_1 trans_alloc; Alloc_1 bbox_alloc; + Alloc_1 drawmonoid_alloc; uint n_trans; uint trans_offset; uint pathtag_offset; @@ -530,25 +531,25 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6 if (seg_type >= 2u) { p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate; - float4 _935 = bbox; - float2 _938 = fast::min(_935.xy, p2); - bbox.x = _938.x; - bbox.y = _938.y; - float4 _943 = bbox; - float2 _946 = fast::max(_943.zw, p2); - bbox.z = _946.x; - bbox.w = _946.y; + float4 _934 = bbox; + float2 _937 = fast::min(_934.xy, p2); + bbox.x = _937.x; + bbox.y = _937.y; + float4 _942 = bbox; + float2 _945 = fast::max(_942.zw, p2); + bbox.z = _945.x; + bbox.w = _945.y; if (seg_type == 3u) { p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate; - float4 _971 = bbox; - float2 _974 = fast::min(_971.xy, p3); - bbox.x = _974.x; - bbox.y = _974.y; - float4 _979 = bbox; - float2 _982 = fast::max(_979.zw, p3); - bbox.z = _982.x; - bbox.w = _982.y; + float4 _970 = bbox; + float2 _973 = fast::min(_970.xy, p3); + bbox.x = _973.x; + bbox.y = _973.y; + float4 _978 = bbox; + float2 _981 = fast::max(_978.zw, p3); + bbox.z = _981.x; + bbox.w = _981.y; } else { @@ -635,17 +636,17 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6 Monoid param_24 = local[i_4]; Monoid m = combine_monoid(param_23, param_24); bool do_atomic = false; - bool _1241 = i_4 == 3u; - bool _1248; - if (_1241) + bool _1240 = i_4 == 3u; + bool _1247; + if (_1240) { - _1248 = gl_LocalInvocationID.x == 511u; + _1247 = gl_LocalInvocationID.x == 511u; } else { - _1248 = _1241; + _1247 = _1240; } - if (_1248) + if (_1247) { do_atomic = true; } @@ -671,26 +672,26 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6 } if (do_atomic) { - bool _1300 = m.bbox.z > m.bbox.x; - bool _1309; - if (!_1300) + bool _1299 = m.bbox.z > m.bbox.x; + bool _1308; + if (!_1299) { - _1309 = m.bbox.w > m.bbox.y; + _1308 = m.bbox.w > m.bbox.y; } else { - _1309 = _1300; + _1308 = _1299; } - if (_1309) + if (_1308) { float param_29 = m.bbox.x; - uint _1318 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix], round_down(param_29), memory_order_relaxed); + uint _1317 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix], round_down(param_29), memory_order_relaxed); float param_30 = m.bbox.y; - uint _1326 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 1u], round_down(param_30), memory_order_relaxed); + uint _1325 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 1u], round_down(param_30), memory_order_relaxed); float param_31 = m.bbox.z; - uint _1334 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 2u], round_up(param_31), memory_order_relaxed); + uint _1333 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 2u], round_up(param_31), memory_order_relaxed); float param_32 = m.bbox.w; - uint _1342 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 3u], round_up(param_32), memory_order_relaxed); + uint _1341 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 3u], round_up(param_32), memory_order_relaxed); } bbox_out_ix += 4u; } diff --git a/piet-gpu/shader/gen/pathseg.spv b/piet-gpu/shader/gen/pathseg.spv index 2ac684df95b29911ff6e00f1707287c9abf47852..bc165ac544b202659b7c62175b161a7100b2d61a 100644 GIT binary patch literal 33960 zcmb822bf(|8Lm&5Nul=u0wJM>PAH+5&_eGWVVES7Ffy45$s~XvT~L|=f(2sjCT{r_73zsfFW&zYfPfz^h$T0>e3 zwMMjd8rrIkC0h%iv|00}cX3ylo-Z?$`jVSzj`zQC!o>Dpt#iPta&fr=T`uYYYQx79kxyQiNsk3?pTPO=L zo>pscVD{u=yQcR{A3JMs=IqIX>sO7|pFM8e*0aZLtzTzr5&XLAzT}I}*U!SOW#A{w z>>cd+(0;0}s=l0$vahA)>#L)+G<;5XUzOhq;F&$$Q$Dm??B=r^b=l|g@Ok4L+FB8Q zQ1`UG2l@wkr+|m?pPI+u%X}wq zoLk$tGt?E+k82X&dHIZJt&Bd{J*{hcl~7mjl%D><-ocZ`BGcRQt<|U}4@{rrHkjv^ z1)4h&7ODIT^LzRbTr+rGol z8SKDuOwiEQO08|+b93sd&cqL$owN#&zZXMe5Jk1P`w-wucNgkb>?v^ zaC;s*TH0q-K0{mEQQK#|HL7u{dCZ&7m0P>OYrn2){{J8HZ}(Yc&mR5%8-V#wAZGI4 z5!}wdv$Yd=YG3!X_82-^JHy+!vo#T%3y5nb_n0(kpuTprHP?M!A4|6Og6CXrpYK_k zc5h7ersX;`)M~!>ppWFcr!gsB(f6fp_u0IL3~wEXt-5wj-DKmg83VojgE^l&S_feB zif12(#IJYU_@>{%bNz<3j?`~pcK?*FDFY|;kDc1SH*~a)Kz9N>kB*+}GkZqcqrG2_ zn(Ncqnhc)NJ+pgy)uyxMIbL%0y?dq)%sk0+V^~Y>!5ykA`zE?`t==!QEs z8hX`->pcMX@Ln;8x{PP;^LR(=bhtjYJCE9Yhw|SY)YUY0O=cDbEBoDr{YT*b_5`|R zD*Ln4S-020?K$sky$PP#eM0q2VNUPB=jG7Zdbh#<*Y@|4MZa7r7EFv{ zs#_IePk zL(zFqjyyNM&qy-{C-=pW_`}uPS;aKB%?NDW{rv+mB*ud1?dPy|@aPMnd;g#3KcX7r z!st_2u=?IyXWX7w=fKZXjL+xPx%0B<+!&ehd2{CHG;?;%Vjae3Li<`A+q};zvCeC| zMBUcUE3vu==;w3ct`7gxS9Cu@c@`O zhYrj$ue)=)jXI-bo1_LJyvVZ!}H*Bj$dr>m*n+_SpEMF?>nLTcuHTx zT5rH-bZ4iJ$MR_I zt-+IKco{?MXsvD^Jl&Yf8sfRndmXK@a-L+gt=q7Tlk?P~ZPSKrD>+Xn+P0m~*4f&j z!FOr!y&L@C`S{S*q42qDUX5XB>!WDZ-1zyno|~iQYsa%}gP#M>=K=HlCw$)Lw9eLI zY>Mjqsry?7KH1Nlbi{rcEopHI3)R z?W~?%G%rsv<~DZDkv!~QE&clpvufL=>Q8R}R2BCfsrSc~DE61QyWc5e+$;y>V@Vrf_`HQ z&GDd_-}s)}aro|2{y@z?0yl@;W7B`M<{Ik9zbep3^j%*Oe=zHj;$r3Ym-{7}% zUY0h-A5HNXg~h$n)_iN<-aBfe@pXU1z6`y&H^wkm#t`eT&lq}ejK+)B)`zx6qS=4+ zQS|5|qbH^|W?z>)n)Q!C%wVLCwUt_t?6U&zc#!?%_tc<6&ZCNs2HOIF(wQclu4n0RQ zhxVu4IKGEWobAE3Q5yr=fjVupIS=i2?RS8VA#Ha9+jb(wG3{Psb&K=APpvt(`-9EZ zKF#+qYTG+++7G9;Z}qil>z+7^J;k zc=A`j0`6YZ?)a{R8!!Ghz};i9-v}@Jy9>T>ZFi4u=HNKiBUAGl4_7P8dPJCrZF6+( z?iKfMo+~{6wn3LWu5GFL*Vbz8GlQCW8Fz|=(=7aB@()g%=yhfm*qohb`x}IlcBOkB z$bIHe(>|xhwlNobXN)7VZDHQu+qlulG`)6O4H(HUBEjoaEk5)iU>|Ll&#K z{amP#vJ(CmQG5QW+23cV-AC&7C--x=TE^`CIb*)8w%LdA?Eh*s-wVgz`?vnyPrTP= zEM@IFde7DF7`>Ou&FN8U{#BZN%DtbeX@8=|w#}Gd1()aFJMipR3#=TlA{oOaeN4w?bo40dMdfvPS&t8qsn`j){JI{+Sf$m+;Q~O*LZaj7E zXV>-c{M{(xN8gSpRr z9x=Ap9m{ye<(P4+4u|p%is!ibIG4T`G)B1Z{=!#5_nl6-{=UyCx$kGf_4gf3$$dvt za^KOE+;=o3_gzNGeU}k#eBWi1+;#KcHd)!Yxg}yxOU%VlzfK<_gzM5_gzNGeU}mL zbA#_P!o8>X9wXd)gYPiHoj>1UgnQ5N9Y(nM`VJ%9^TBr*;XW7n4kO(0_zol7cbw)J)F#9BWaAFpdC!PQ2x zzqaGL=QT=g0#=_x&j5RWJRf{Ib^19ItnHz?pR>SfKF7xAY`Fc!=aXPHeNM02oddV6 zw!}Oa>~n`PwWq&Nfh!w%dhL|Qc0O3mF?ijSZ%GXAvloEZqE^@Lbyh9&bP3pdbU9C# z!qx5HK0XJwoo&2_UrzBpub+L&^$~lWwy*zi{r>{k`+)UZ^z|j`D<~e?zg*j`GnOmC z_HhTXyboOkR`VJ1UHXuJkz!x=Eq(#rXI_iX6B*kz;O5w_MN?0_uYisB2=Uw#*MYsK zsoz%T@Kvzww7tz3Z=m)tA8prD)XYbm_&0)^@oz#?PyCy~W&B&<##cYH&goXL?X+bL zz7BQ`v}K)c1FO3Z-XG<$-2qn9=DkuL+g)JSSDW`u`IgLvYyS=KYSgyTrjNeru3^S) z{bB2HH@4ft!?kR-3N9ab8dYLtmbDw+iO!R z_oaKNne&$P=eWO(&F{zA_I|KD>-rtA&#C1(^B`RP)_Tu;7p$g_d*=IK5BH3=?@`no zhd6yd4sQ1S12pyQhaZB~+$UZ)o&Y~dd5q$9L%Z|x5cN|OZTr`I>FHWC7h`(ue+JDl z+Sc*P^-G%{f$fLWYi^q#!)=rKmFt%_KLOi@e{<4OCN8+)%AaaT0WsQyn0sn4>`bn;4|hQa7i!spZM31Du@v%v#3lglkVuL&4_cJSOj9XzIyn1X#__ zyUA%muzhJWr|oFJ5Ln%u{2U|~`x(gd$33?Q*mK3&XM@G47pHh=U!t~KC!Zz3_K|0U zrNC-Fzj+?Y7p2&jeT&m}8Suv$U-+_c`^wlxg4KL?mA$nb+;-Z`-_LAn>0<@3+T-=L z^&?<4-?!=SXF9d`uLxGV1%IDyRsyT#nlcLP;aIe-Oxd@yJr;5LSp{6i9Sv7A_w+pm zuAaVE1E+6ot5UY7I8WmAy(ZXm(fs|)Dfd~@_ZDk`52RMt?q^iBv|Sr)+w6^X;A%dX z>F;M$we+_h*#6YD`m{f!5k*Uj}hZVpy+-OX!Du!nhR+k&EIUgGq#HMq=c8@QV9 zQ;q9qa<$~O9oW3owfh-f&GRMwTgx*K)^gY1_^x^4PXK4FcL2*1Ye(=Q#8NkwYps^H zJAupGcZRFIPaFMr0js6IUBULJuHCg(OMer=|tKo_NS&Z%_R6y6yw{sTtEBpzBUEy{Y=|r zu-xYve=gAj-k93<+VuG-wYvTL{%9&#E%!WsPB0CwzAyJ|uV=@C)hr$~tJ?eiq8Dv- ziq}tbaxOjwSHGyf9v%m_5B0?D12^MNM^jJSez2N_A(H*PxC0R5dLAY247hsY9uKw; z^)u>RW`fN}o4NSTY!=vD@*L!t2jS{zHydnQb?4F8Y8lTQaC1B-ps8m(CxX>19_2iq zgf^B^&f~}7>KV_;VEa%{+*82KxSv2%Pux?%YL@0ao`z=Javo2It0(RmVEa(de4Gh3 zA8qF1Je~zMmvSD@hO4LDC&9K=cOH$c<{CQh=YpNv@K1sDS)|^Jp9VW`Yt+8l<9i;s zYctrbbt?9QPd3p|AjAozqVHW@k%-PQgmw=b2+(sTg>s$&}%k}&+u!q-8ZJ(pOMDe;Q&OW#t+}sDBM^n!}_ySnX z;$gnF_dNb0+BizN|GorQ&px;UY#-{0`()+xy!6;O4&kHkx|o<^iyp#lx{D z_wS%>MJdmR2jS}3m)`~3hkD|E58RCVeKhsNeF&^($(khY!)V4W&xc3g>WTX(*gn+H zV7y*89|N0@Hv95?cpPjl<@xXfxO&?C5Num@*TdLq*-uY^UBl%5Bv{Sj;hbeZJ%wib zoDWZfwR=9;PJ71r4A_3l^WjHu+jy9ZK5A+6W3X+?^Wi7(#IcP&YRUhn;4=T8!PP98 z`{e&~v@-u+z|G(D!FJk{%d=qnEzgHv!foSWF8ZjY&9A_=DbI&r!xP6g`l$JQ>RxwW zKS!M$e*+#z(brt%vHcc2w&1@5>$_jQN1g}&zAlUK%@g;{3vkEi^P=r-qo4iTR(tw? z5nT5F2l(og^#4b2v;RM#rGMMoMnC(vFYV=69kcv0YRCQ(b>hDawoS$^kL}N3=Og?t z;4-(r!kfAM4K2CZ-ZuJW?Ap`cD`5Kz|2x=t8T+f?X8->{OaHdFjeZ%s_HwL_S=&FU zU!xeyXOq{#hf*G79eqal7g%jGHn5+g-vE1fP1N>pikkB$PCsvf?dOy9W4~{M-=wI! zN4;*o18!cw{)48TYrwl;HH(Mm(IQ;E?eo8Aj(JCle(Cpr;AX$?p{b|e_rYow`=FZJ z?+3MJzxrj~8FCd!)4)~AFj9e zo{@0BYmgXneKN<(fjyVwzdT(3^e@*Z{wsjJ9>xD7aQ*W$61hI{UlHv28UK~w`se2( za(%Qr#+AXo*Dc>;k3v(A&njU1@O`~=AlFCx9NLZs&!*12xE^EB)bsvyRj}Gf<|y;M z8r=EL`@q%V`l%=O8en7loj_yD^~-qI1iPp8cmCIc>+g32^_T0Tzvt=N;PRTX4qVM^ z%1x}hxvvXX_kQb`?8onmsOR0r`ry3VSb+HU@4Jl+(6xCU*w=<&HIIyGBe>r;$sUpG zXCK?)w=sAKeHz#Ap~$t_-=<)5(dKtj$4SDZFoK3?w_sUX=hxye(7Ud@Y49_9r$+IC_W3sXZzYG@6~sJ ztGUP1ZUS80HAy}@!tGC+vHcE?TKe4?tQNjYgYODAx5S+YSMyv*+}+^n#?4;d9j@;8 za^trL-2SyCzdga`pw0YtqE<^jdx6!$_Xd~m0QZ5b>1TUwYF-0Jk+=7`ec@@pA2`=9 zuX+2!7o(_Wuc?ivpR_p;Ue3WmaJ3tXu^kLocMidA2mSk3ln*8@&FbCm0s9H)ZI9H+t6yrw0`W8v!N zn0CEz-{B_q$KY!E8b_O2xqsY8^2O`@WS#zw!?(OX_rcY&hT7C@zewGFF>1%@cf|U! zx$c>tiD(0Gb@Q1{Ef>$Ab}aUFJlMT$?a%&ZQ4dl)w9l^X*2#4a*gozcw$F+ufYrR; z_*e_U~{ten({dH4=5hm ze^}eCvzAYQ?IYKeC&6mDU&tS$*q42a)AnhwZIknlz|OaMJwq*bjNSu(0$z+_oB00} ztgio$spUDheg^j3vL9{wJVmW8{sr~3l;rYDu({;D@vp#Yd2jq2*uyo{_G^loV-jZ# ze*;#&9paNS!?Nd4Zy49^#Dc7@X`o zHfwYAv&J95&V}c1#{WmSdY+5^1XgpO<-0B~!PT>`{|xqUPPDyDQ8O2D;{65OjQ3YG z^~C!dSk3!P=IIr9xohwo}tLvEKw6JJ+GNz-rEM@_7fI zeAJWAf6%qr@7vV!Ckb4&dZ7 z0&G6&$!9@u@-dF>)bve03xUgg7KW>NZPQ$#&dcx zu!rZ7`l1vy_pUf&UmTpVmus;Ey0-W&S@@kwzge@T(6!yqVwuCzVB4zSSm(72Slzrv zQhS(}`mz)?^AacCa^U1uuF3M~+TyoD;aBeMkDzNyUMqratM2?Irgg55D}l|^`O)8e z%y|@ba@J>Mikdl#v$m^%T|aa4{2dL?e$bYjR|RKmX@>>U-{PfRSuZynD{QR9BdGcEiY<}v= zZ+);alAk`wZv$|d--d9tTqm5rjo`^od-B^Doczi?y9v5B^YeF%z>}Z$ zd#^QwznL(k`n^)G2mU^?$HJ7&8TSb4MQZGG!=e;pFHZeF^)b|G&*OaccR8r4-=R1%Fk`jvC<^})Y6 z;xUqvxm&Kr&c*T+;bEYkRXMweu-|5uy*vhZY@Z0yqK za=3b8e;#Z*^|bo}*x0r+wp_p1zX&$=@sz~=5?DPxSAdOO`g|F#p4eA{ZKs}gSAmUf zJ7dfBi~Sm~vCE&kT?A*%e}l7++z)jd)e#9niTi% zSnB+3fIGnM-|XkR3vQbqHTbhN&sgt-+iyO@eFLm^7sZ_IUoHOM1gquU%)MZ>dnoDS zTVVD3C{Hui8PxYv)cvmXFzRpDTIT8jxVdMK$n{Cw?|{oZzYAA;kdheR1FI*-_rYq3 z@etS;IS1tWB*w$wGRC8DwMQt4@fcV=F&+o2CB_fH#>jq?>ysEi1eYWT3*SS>N00UP6difbg-Coz5uc8$V+0ybvex&0KZ=3HmZehyau86|uA7hwC+W*qmD zTKfGZSS|cl4gPC*8TUE3nsbx5zX7WoH+$-LVD;Zp;`e*7{cB5p&x6fDoB6r7)soK( zV6}|<4`8(yDe?IuSfBLyC$O5n=~FHKFM-QG{|r}qx$yZ*?UO$L3RlxNeX6C;zk%&D zWBof=?G;Ma{8g}Pegnlm-W{UD<>> z*Ok}7!wddq!AF4KDfoimcN_eF1=pWPulf30pusx}uD^fhri}03KQFm||2$lO|NeQn z`S|zGOYYx457*zne;%&CfB(GX{{8cC{YMvE|5XdVI(UtmXTAP~kMrs|nsxp+SUvmk z4X~Q$p=;`#tHu8iRYe_+n4)M|HNBS8~*xe)5krno_H&R%Xq86)%=@fi8ltE zc-3AUFx;qYO#+6yXLX44OfeO9k6Q@`?_GY z*w+I)r?IaOR?BzjHUJyber>DW^Uv>lY((*#%so=BfBGH=cK*`$#$dJBHvv1|*f#~M zuoFC{+m%-)1Ujv>$lgdoeS>uYS#vzSaALKDY*XoHTeDo*Z;7B>wiRpA6any zCly@(DGlCJaQ*uVu77`n4-{PgIR)4M#0K~OyHL*083ot>tOh^3;P!uh!S%ne!7nPf z{+AY9|H~Tu@`CH{|92tz>3>awUt4hfZ!Ng~w>9|f1=s)Hg6r@9ccF}Tf6a6M+6L~P zEAL<1qN(TpwH;XPc>LV&?!ii94|>np9Tbu)W#8eO|xBcodGsi{q@tHT#pBr zxz2>EjiMyiS^Ctumh(D@t}VIF2CF646Tr!J4p?q}UQ>-D_Ihf&2x$3W<_T+jp zxXkqvaJ5q?$@NryXn!@OoY&LPwI$cn!D`9%OmK2N11vYcv#5u12` zs=t2Plj~=}Wv-XN)h@1myiR@&oS2t_<;J;;TK`<{KM!_He!fiqUqDmW|8i=1^7#v{oeU|DMi1m*?Qog)p~q>4!4~? z<|@}G^Y#m{aeXeznm&uBp7(FR1gp7z`5x1+;Obrz-lpH@z#jgrOxv$1YK}pibM!ah z-iM(b#MF~O+7xZg3G-j*TI#{jTEB}J4XRo{g_HYc^{!LLco;dN|1Uv8L-h2y9J@MWKtGSo5H{SuP8_%(% zZ|ltOf567B=8oF9&hLMzJe)KmVeKT*?@m)w8+_kWs{W!iws68B?J_}RS9G^IQc2RJ~mwmk$ntFa` zc5$$p#e-0D+b@BZJ)ibVqN%6-QeZWUhwbzG`%9zA*Ps|j-^}SU;PmZU=l5NgMYo-Y z*I@nB(rzR;?OcnrTMj+(Y^R@E=3;qpb1qguQ_omF0#>u|k6z}rUlAd3GZ!nNsi*zQ zU^NT>s`i8-?Up5J@IU(pPKtD z`@~wFYnOGdC97d`uF7l4>S*e@maG9*GvBmZ6W+WAt%atZ=bN!$+p4GC+F-|FJoA<7 z7yCNkW*6wlH9sP)^t@LM14K#Iryl+B6Zv+e;D+Z;sgv&~`DThY#E zoA8}#p7CrCx1IZBka`Dr{sx)0_)P$7^I6|}?~Y(K$7c+Ae0K)hXPystfvY*+iL)y_ zakQuHM6kBR*$u2_9CMfJYYw}EjUB!RSf9K*-xI8lx^pPcTswz{P&~>xJQ(gA9!j0h z1pCmgoWlbe{IHs54Ew_ECu7|Yp0R3+-~Qmr#@LJ@kL^IPTK4BbU^Sorotqid2gB7L z=9x`?2v|*@fqIS(g{wP%<}FW*!@I}a=ETyW1CKHfIfHAA>Fw>GKs|^|e_ZS-qzER0`Q)t7vcdHPwXu{8Yn z={*D8pW08et7c!$N7>hs^Yqo$SPDL?tGAio3gGG8T@ydGTkPhu9Cg{}^6}ae6 zKd@`^-u-?3Jrlu$_)pDaV0u^IjLx1D#Dkb6b=S!?IKF4%z?3<*RvXuDTGzmo8Qqhc z7U$MB?hJLs^y8YucWypI8mpiWbWQG@+DxdkXJU8XK+nL5Ba!KC`NnG06Z)r)cN@(0 z%L2_A35!(z+jyI51wY-P*?(JOICWQrM^KM%;z5nIsHb;NoH?Nxc4W2w9gQ{7&Exp~ z=}sRx4r;7TeV}tUsek&^1G*>g-#rPnqp9mNcf5ld8`ONYdv>UMOzj$gxBQ&@=9=_& z4Y2j;ammIe)MczrH$|pn?tabi=gq3QmF=gk;YplvZw#JJ&W>d>@Hp0PrCO8ld z$D-~rcPt%^QSdpwV8@~6F^vG>j$;}@!khbsfrK~r%ck;HpV=#|{WGZ9r`MNx_h-v@ z5ITeHKb8sVXsq1W7CtAZ&gPl;sk1Z3k8w7qrkb@m`uejbZ8A@3?J`s^2gGY@Y(<@U z+#1}P$F_#{8BL###%OB$thPpToN69(=W~_DF7V2)vpN6&5BazHY-Z0M{r?+)`HvxH z^4}5M%D=s_6L?Z@*W}h1+8R5k$TyCB3 zS(;XFO!a2vI@HwWeD6UY$#+j{O**Ib_w)_qd~R#( zkIgHdeHh!he*H81CU#EjKfZ6|q}IKmt#LTI6X1Dt)Eu9g(^?*_ z{c_|SpZ3NC@U*V!T~nKF+8dtZC0F0Oduspm6FoNuHRK-Lp*pi~qC3~>-P5P{PY1g$ z`g^XSH~VnC`{5qmD`rub@yvZLZ)=EIumjCAIe^|{vVjkYsSOeZV z@3^LbGjBW3dzA#2W9e@!lHmtMUFCpH<^0 z)cA=tenyR-RpV#Z`0N@#ug1@>@r!Ew;u^oC#;>UHt7`n}8o#N=Z?5rMYW%Jmzo*9U zt?>tH{Gl3uxW=ET@h5Bisd;!?;~9ACdePR{nu}2Dy4ca!7JR^j?!N9FXU=)HQ1h6f zPVMz(bft-R0&@v$>id8>pPP7paI4?8#-WAJba3lD8Pfb5GP$>NY;XVguHN1IX7x|# z8tCco+ui$_*RqrFo%eaJy>T*H^HB;PucLC0Tys6oPeI$Mzi(2{X-=CV*t+`q`eR6p1<+g1VXffN7ex2|Ki9vb zImU(1$=~PDIpg*-RI887=-fG31e-azCb16UGoSP2XSBJWMPi-XcCo6hpGRVKjXz7< z)>^eToolM*F@ZI-cAuSAX}0+ym`{vdy`2Ni+pcSVE?Rl7`3ii-6hD^Oen|84E_-b+ zo7J{jv;VK*-_wlM-uN!Ke_%?t_iFpTrPlXt^YFIDy>K(}+2TGhaSrL9e(YGDL#DZz z&G`ZJ18bk{)I8>Wb{^dHbzjb%PtTavp0$n5vNyCiXX@@t^?bHdb3ewnocD*Yjq94u z$uM``)jXI-bn|X&JW^@S!^_}uj$f_u*W}fQRQ=zC_a5K;_(@-b8t=eobj|8+o^CA} zy`%9ydTS1R!fnm@{0xS^nu2*O3~#MZM`LNQdvN@T&5vHqK2|_;Zw;I{&C3)T_hBU^u6wi5{Yiq16=SfA|dNtc7a-K@GZC z!so1ca||7gBhi|3FlX`l4`QSgXb^LpAplRLl(&HnE}o8I5L_dN`5<=WPG5}b8=8eHc6%sjlK z@f>_2SM`DZsnuGvH{PDdhiC8_|D?tTaib|?jHvN-YJ8I#-@4#q**m?=x%Vi-GXvd8Lu&XCQwVxYQ|F=MoEkXDsA;b^BF+Dk%i{?)z&LCb5-+0 zU;105()=J5&HKH6TNax4cs1Yi?8X}BKAtgz>*L{?skJq6d-EPYqG@TZk@tMp%=vbY z_|Dp#eUB->ui`7h%`5lQ^dGIhUb%Dsi`s=bkWX2uwv z92}!sTh-qC=?Jig{mC6;>^rd;9HZ})mtt38#_rqr)*f4iHpU-D@feE5ebLf<1K-*w zYQyk#FT}nyy}1vDGgrnC>#xsndT@-!i`LRR8q1>DfApdB=o+Zii@CZ#wHtp`YU9g| zJFLR$mf`rUQEA3=JZn>DJnMjMueL0$5a!)6gAN-A_Hk4TzMQz)%WV~vQZwqSM=<6JMK4cE?hc4n?hA(%)=s9^n^)%XnA9J$GWi7VLgWyxX;Ngx?8o%}*;&1N=$BKL%%f&etG_^BMm|;Tf;? zW#Q#`hr+$zIRC4I9joVre8Y;n-{hkz?*2*tr=Zzy_yzFfuYM`qy{6sqT@E*1{I7+( z$6~(@UiNo8e4)zj9^Kr*ajZwC<~0hgR+jaMFc#Yu=-S;Y?%zB^c>ZmRE_YnpQS+~* zRov$THS;p=LXo@25i+skr@|r;)NU{^wJB?y1?|m#N)D z>h>r1^R-&W>%BSSy|}X3hw<$HN;Kc^#@~Cl{@zQx&t@!T?HYQ&)$SO*kIK#IVQT(0 zHT#r%FICh2c!h17F~1Hj&%L+ci{c;q2k`7&^Y|FPd|_{6!OLsI5V-dW<8^{@Z?2pD zxrbgxyYYq_M!P>fhu(%~-^S-%G>+|^_eGdc_p#@!eJ%<&p1StWRrajQ=i&To>dt}O z&pv9lpIu@5%ouM+^Ze8PYq0&x*I|CGpGVWz@xKKAgk08xx#xX0F}Bwr%P7X>m~m?! z4&{9m&v)~2E`8r;jBwxSg|CY4yPRzMCnz?`BHwJB*V1 z4kO(7zQZW_n>GGU!PmjxcNg*B9`1XKaQpMUMag}4QF7l~gnPa9okh6qeQy!2-S-yZ z+I@EsuHAPR;o5zF5w6|$7vb7{hf(q|HSRl%((XHqlKT!L+-C^iVT5~s@%=@(_X*!! zggbw}y9oFGz>yrwJ}(ICOre} z{c<+=bn5hTCRp18RX=Be)qJLn&)IPMi_ho4YWkdBwfj8Ww%QW&9I($I#?+qvz5s68 z$kS`5Jhs_jHOJs}Q@#~3yzhPqycV^(cCWK)nWqcE-m}Yjx(Kdr|Mu}!u$H7+!1e#@VDAIg@6y*dsV}8?XuqtoTW2hngYDy1VtF6B0<7lq|6W_y3fBBpD!}DtHAZKU5%!mc;5mW?;+y3C%z5#o~C|tmBTe)+i81`Fhd2F|XU0-e9H|1L~8?OEL!K+c*Mw>qRs=J06xAmv3!yVY_^Y8;S_2h6T z*nTsvlKTdCa+W4_M95fwtGCR_;spP(A0Z=+ANg5S!n7 zv+ccLdDituV4qvdbLM`y`c2iI`7u~cANR~p!5;1zZ9k!?ISz69eiU5q`!O{2?1!I$ z)!ZjuHy#H+L3xDYbwj)J@&NVEDcbg{_R^D;W-i9`+W!=qW3;W~mFt%_PlN4;(`!zf zU%+jX`IYOJHqU@68|6I z#D5E%e*Ov8miTXj)ynw)f*aqxnRfq%t0(?{z_wL4{+rZl8N<6^`*7c-kN4o}`oBXh zAJZ7zd{+1noP0k3J8xY+u^UX*BH@0;`*o zpM&ILKLdIGxaSrDd#+gfY_J&h;uH_aDXW4#9E-N0lyU8?v53>pFmM@nI9$!#)AtCtdiq`+oW8ZKM%jVlJc-ly zT42vb^Y=5S+-FJOTZ{xBK&`Ib&!}o?yAIg4*&FM^)qF0~-_NLO>2H0o{i$pBGp$o;e2V^h@$40#OY^KaGBR;aJ4-5``K76d5r>_m%4fRSz0aq zZ2>l~8>)5O60GLBo7dK05A)Kt6-CXw#OY^SaGBS3a5dkp8rRR{YRPMRuz9I#_cOej z=S%vxmS-NU<*vW+cc4!E9l=@aF<^OO?F2rUSn9@dt<}2Ei1xz@YG)w0%mf<2s5ZF^AE9FsWx>;*3K+8eHxJ-828EqUz=HZOJa za;??U-#Bo&*89QLvepNHJJnLfn3|P$^4yBgcMOdd=i*qn`uWxMuor9} z>WMoQT#ws_rk=R{U^NRvB>TB>r$LPCd6c-v!POIYI@mtc&!}>l0X83P=HffE0kFB` zImj{3gsZ3BEU<0WokwG*F~AO+Di|5v*qMDChAcw6!VaJe~|!&v;G&+lPAM zeimGhdn%fG;+_Upv()GDbTs3Z^LPeaJ#o(j+lPAQ<1DcGXfqe*@occUl=JvGxO&=s z9&B56=h4_|uA%e(1+a4){zb4p3s-ybT(I-DM&+wLzO%t)+b_Y(w&zuCN77bb?Vfw? z+4I5n?Gf$Em1ewXUqQ1Uk7yTEnttQR({t!TH2bs<+vwwX&!N_4&VC-h2)qL2X7cb^ z=c{0~T+c57dwAW{b}{93iq}nX_QBV{^?mSlH1+I*Z-CV-9_DL%&*N{RZ9*yc-=%Q% z?1Rg|_Mx7*mxJqZuRv2z+$+Iqmiqp?3eC9X{<|8kp19uv+lPAg(YL|oqs?60f7gJ` zrQCntfvcz8wP4$-yB@|?b3I(Q?}A;U>d# zJwI%xJ-K`zY`^Zy+JREy+zaMQIN_jr~7_Odu`4g~x zs3-1E!S%Qgps6SBgJ3mF)+BKsLNji8K0FLpPuxeq_Mv_T&ENCEcG{E6 z^I-ce&xhZ@ZR24s`lzMNZ^5=H&xhZ^6UR3CsQG;AUUy%=K%E?41aCso*IebX{T{q_ z!CwOFJFeOzuYg~!%EEjL#eMTC+%fvRXnWh}XaBavA<58_FO<2{vBF{ua32|3A>uzwK?KU&gMz9IIp2_D|}!DaP{IYWojG&G{3jpZCD_^Evvl-}k}qQqgo3ru$sj_sOI#`(B|~3U*>&2bYp7I^>KcE zDUL7msg|{C13Sm3(a3Y49j+F8M`h1FYY+QW~Ik?|7NDR3?nd9Zbp3CuH z0j_`gm+KS%6~SJQ;=dAH|NM+Zu21||277+Se-*g?`T2-kAMK8DDA@P9<$LT^(bVHJ z3~V31uXhgQ`e>g;+u`7u)R`C8V+5Ld-k+`pR$G=i%6zX5cfRvJa1FSA>WRH3*w}tQ z(AaYQGTybo?rHs<|B-P0{l1|7a((poJY5G|UQ^bEt9ebio^?0(_2BB>Zyl5U_(bi;Q8s(xPA{suFd{7 z2b+sFzmp=@HVWGoV8^b_@2AMO;(kY3jV-~uP#f1iZL3dm+6rt;b4skO(bTg(+kn-E zRP*ir*%qF5#+B=rK1PF=#y{`Cw}-3wED)a^DxbVp9|Ko&kEh*^aCO%t`RoL@KW)bL zJ2-0TcNcNNcdhZUaC1xC-Qa4T3yHfsT-~_Y%X`4p{f=(@_JrHNw&b@L*c`N(-_F!( z$!~A4TKGQT@*Uv5a5epGuT9Ns;Hu>9eQq2)?e_!c`sFomfB0e)_3SmZQS_5G2g1ua zI0&wGZ85fk;p)zTc^m*%OCN`TbM1AH9tzhd*U`hk=AmD%b87mfkHf*OHm&Q@5%BtT z=`(2RncpM9YPQWjJ_?+6<|@}OaXP`}c#npwd40)RcEQ!lH6IUG_vdYiH39BB$nu<- zh^C$#yTNL*Nbm?eVz(e%NlA^v;87f`^Bgor{59l$L6|ce#W9rgR7fQAGKV39JOPy zujyd-wzWU|8=#&^@z6f2vRfzD_oB8RZ5g+E;-3yKpHl}{I zm+RwPI4_RJI{lpmw!iFyv%zXvH*IR=Tstqu^M3vXaQ!<8Uqn;Sn9l_#$JyX7QO=?0 zXO41x#OG0;${ZN)e6aaiyS87UzJTJP{ldy_o$*`*wvVjsSHWue&X@el6#KGo@hkN0 zXH?6Tj45Ng7+Za8m!PR9-q*m!d#JjHe;w@JRKK~}U*7=RPMe=aE~WNxuW9=xMa_J~ ziGMk`9{&n7^~AptT*ki&ZhZAKtDLR|+fJK(y9VE)xCYv?PT#I{*Wog-JhtzE)wEqh zEsyQHVAogMwbb(L>FdDmY1?SiM_+Z1TYVmGKvPc+H-haqYq_Ov_D?it+SR-fbApKl%Ipua=(y2 zO0h5d7N_k~VB03=Ux1x&^LmL%tM^99gUNH$7XGge%AOS*tzih&G`QWSI=|N zYhX3^S-$J?I$S;b`Y&J)=S16|DQe~-PQ1T@>+$}Erk;3jfYrRuWS;&GS5LgRz=@~r zP0Bwg#uL}auD>>YGR}X3J>Pw%_U9`90-KxH8RO`;F7?0BwAts|)N;Qw;C1Lf;3KHj z?Z@x1zXSGnF})TU!**)=Cic5vW9K^b9$3vePCg&NlaG4x`4C;3{k~5vPd@(zn~!?( z`3RhRjAJ`BeUs1sz-2xk!_{1aJ^3sMPCmx5otnPMXCZKz z&%$suuWkBkQxoTU;yIUnXD!b;VQt?&S1&?4+jved2KMkgQeTv!=H3-&?2Cgl_Hr$j zK-U((B@4e(={IY(6uP#1SvYf88f;tj>#Dq#0jrzWa?~E?rM@gh&Ah~kw>&s`m20vB zy0-YOSooEDdnI&j$!le>ZPlH>#I(-!aTTz6IzRfGk2$YOot*UMXsan^Pi*!43v z&)?zj><4Yhc{OmxR?g+>=-OO+e}_h%{k8_!^HKdxuIFAe)&v{FaodkR$z?5Yxz9$z z)$~n%Yr~VD_T;xNIQi+HwO$WhoB8=WJ@VwYKG^)!livnlV`E3bT%XP#2 zwt^==?a6N&aPlkn?zZUK%+KF>k|)3Iz~-l({6=f1BtL!JyV-lz$#;8jnePs8weq^6 zX8T+RtmQtBxtFce-xzE@yX3yIBU~-d=i1b4pJ%+s(Ei6yM7E^(d!=3v{M}rSg(zDv z?jh6*SJ>xp=%8x;K7HCR0T` zcE6ljarMMK18m&P*_q(XnYQ?y1=ePMr&G&gI~%NK3~~14N??Dt$YW)SxeTS&?;PsV zZ#B5bFpBfPD)sOJj{w_#b!z8d`?(b7KhL>e0^7!C$>eq(SUo=HgYB#I`7&HR=kHg* zwo^~L3&6&+opT`9FZPST#=ek}*k1*!$LC_Ou}hyz;OdF}HL&f})9&kFW82Qya{Xfe z2H4ogQ4;%`VDa~WJcu`dVPPCf0e02|wO#+K_B`&D3Lmp^yA8m=CnZ-I?n z`g|L%p4iubZKs}g-vJxjcE*_wrhBk2NUnWv?G=Qry2Isq?o1ZUwu4v!CxMxNV-U@#iX@ zvEBx^-+YGqK3MH`iaFcATKseBXF7LkKtrlGyfCTKI2j{I~Ej?(g7g&Q0RJ z09H3{_SEme>Mv5__Y&CtwI#oo!RDaN{M_4W$>$ZYTE_hcu-dDX`1}#9Px|~5SWVyb zsTTj&z-6C*hO50^`23~vNuPg(tLd9Q)zatR!1kH3{vE9L1|@6$CfGHbEG`y_Se`E!hCP3B_YWN}fwLs_;708&kZlY)YN$%G=<<1%IdDL%{DBd;#!>HU3e- z_2q@%Do2@87*CnmXrd@qZVrmVNXdSnX7b zKIZoU#kI7(Ya$oB9{T(IH;DSblocrYXw%0vRZqPCfy;QGz|}ske2h0A<4io)RBm6c zr~Zl81~;BQ+VpWxs3%?rxQsU#u9nZ3##;cMc7z{_&k6O! zTNCUa@;w%rq*w+WE0+pKlhW@Z?9K7 z7u@UBt~EZk;QH@VaQ(;C_go7hL~|HQrrt{d)_pe_xIF7hL~Y z1=s(C8u$OFP|nX81=s(q8b7<>_CLGe`kzwj0l_4og$P{zBr;<J zz2|HXSIhlt2e6v=ue`e(qfZql_cgC2JECjz{Ijo}z-pdL=DiEp^TW9Ab-CDmZXB_D z+;&^h!gFjl@Y2-!>!&^E`|jW}*FE5B-usg4p8CL@liU-{buV;n$#rkAT5{bNoLoK6 zw$2!L6qcrkUq5cT2#*K!RXqO>mgvZ12$Lv_0yhQj|7*w9tBrhl#*OK^{H|#=k;iGZOOF@td?9S zfRpQZu-yE-wirk3b;WkcbrRTI_18~(a-9q=bDaWLTb7bsd-S2bd%m34W6-rF*JHtI z$#p6?xq5Arn_nNbal~HJY?oZ8fz4Ha{j?|7q%5wQlgH?axLfe zi|E>t>$zaHxFQ&3o0M4lV1fV z=0#w+aW1CTKiB)OfgO{dFVp|m(bV<7gj$}wz5zBb`?N3ZY4=TVnb&1-wM#1>^STn8 zysiMtjdK;X{>kfGVDq}1I{kketgipn)biwY4cNTw)4sH)-FLucUf+eQT}#Q@Uat@B zucI#a;SF$YS=$@IYWD4Tz6Z{DZUW2A?`CS_h;N~`U2?q@Y_9t2r#-pe1}<~`K3wf~ zN^-qJpDNdKAN~McTXMYU`|BL%vqdV{s>Oai-MEOpTOGU z_gdkXz3$jvN7t5|{|vURx;ZDNb#nd-*qj`P{^n!Of1~y=XMO%kd5dDs;;j1{VApjv z#eCfxe@9b~&zsL5!PT=@{sZ=K4BGxp zQ8S)6@!kPD@8#Zn7fn6!-UF+-m$EnC2df*;v7~S7%joH+PKc|f2lppO`i`b zYUU=+T7Lx2{FeLpf9Trc_i^E8KG{2;pli$bgXq3tTlM@bdOkFD^J)`wn3wwe6gBe_ zCtf=^d6j#$16^DE1{Hp;Rq`5)t}S^D0ozvHxlBy!%;f@L^K^dnHy?d7Z`Sc$P#fH} zu$}!lzJ;kh9G^Z5QPdotID2*xaK@K?y(pS`erI+uu$sk#P;=TZj+Q;2_Di6tr~Q&( zHH(Ms^ZWZtp~=^v7)RgC>C)iz?ONyeU6(<(orl+8{nXNKS#a997HPK}dg9qmKef!o z^5FVhtbnGTv8)JIv+$2z=C)r6A#pPoE2F8W{VHHJ3;&w!GZ#YbaJz z0ai2Lv|AHizXq*^rk>}Ukzm`Zr`_6M$6-A4mFpM#I^cRf>!PVApY_0M?uk5Ktq)hX z-}JQsJh5CypSdGmS z)^2X%GPk3!dz85y2{*S+>g0AT?bMT7FWh*ZpW2e!RIs)(x4z2Pd*Cr(eUe*0Si8B2 H&F%jHs}wK5 diff --git a/piet-gpu/shader/gen/pathtag_reduce.dxil b/piet-gpu/shader/gen/pathtag_reduce.dxil index 81448e70c67c30ad935a58050ef5752e800ec0d8..02a4750c6b787e11dbfdeba9239086b8ca9d7322 100644 GIT binary patch delta 57 zcmV-90LK5^B-|tvL|8&Ydl5;|)6{fx{j62tqeH;ju@o=}5bpOqNFO6zsS=ebLG-Ym P=Cf1?s|Eq!vJYGX`?48= delta 57 zcmV-90LK5^B-|tvL|8&Y`5D{_RpC>~N!IeZrwPSnu@o=}5dDu3uhFph?*0g?`FJWR P@Uv71s|EqkvJYGX87&(6 diff --git a/piet-gpu/shader/gen/pathtag_reduce.hlsl b/piet-gpu/shader/gen/pathtag_reduce.hlsl index 5ed84b8..5e98362 100644 --- a/piet-gpu/shader/gen/pathtag_reduce.hlsl +++ b/piet-gpu/shader/gen/pathtag_reduce.hlsl @@ -25,6 +25,7 @@ struct Config Alloc anno_alloc; Alloc trans_alloc; Alloc bbox_alloc; + Alloc drawmonoid_alloc; uint n_trans; uint trans_offset; uint pathtag_offset; @@ -81,7 +82,7 @@ TagMonoid combine_tag_monoid(TagMonoid a, TagMonoid b) void comp_main() { uint ix = gl_GlobalInvocationID.x * 4u; - uint scene_ix = (_139.Load(52) >> uint(2)) + ix; + uint scene_ix = (_139.Load(56) >> uint(2)) + ix; uint tag_word = _151.Load(scene_ix * 4 + 0); uint param = tag_word; TagMonoid agg = reduce_tag(param); diff --git a/piet-gpu/shader/gen/pathtag_reduce.msl b/piet-gpu/shader/gen/pathtag_reduce.msl index edb6d03..38451d4 100644 --- a/piet-gpu/shader/gen/pathtag_reduce.msl +++ b/piet-gpu/shader/gen/pathtag_reduce.msl @@ -32,6 +32,7 @@ struct Config Alloc anno_alloc; Alloc trans_alloc; Alloc bbox_alloc; + Alloc drawmonoid_alloc; uint n_trans; uint trans_offset; uint pathtag_offset; diff --git a/piet-gpu/shader/gen/pathtag_reduce.spv b/piet-gpu/shader/gen/pathtag_reduce.spv index 44cd9384713fe63c71a37771ec7769871840891e..eef46a24459484ae0ac3583f17afc40053fd5eb5 100644 GIT binary patch delta 138 zcmX?M(_p*d8M8G9Lk9x`12+(-6eX7D=I7;Sro<=aW<}nkER4dFp9>mJ T77${YEFt8>$h$dJNLda5L~0rc delta 80 zcmX?**pay56T37kgD@il12+Q$LtcDIQDR;(0~-T7Sd3?~E{8ND?_^IP$u~KbL!Oa; gb1TO|md#PT2Ur+|CQAz$P7Vx!#oBC25uluDM~EQ&CkovOo>m-$;nR!3Nf%Tu!7}zfb6{ZlA^@C zVxSm1Sd4daCyTT!A5f+su_U7;F+Dy%Ev-1U1f&aMD*xnzEb@#3n;){=Waft$Bmh*d l48#tbw{o0iW)^0cH+dtE@Z=dhGLtv(1TgY$X5@_*1OQY$B$)sJ delta 102 zcmbPX`@m*H9*Z<9!#oBC25unEi!Uik%qwPKV_*l1@l0OHA}z}clqpCo$tX!okIzp_ xD^4w80IJ{stKggbkVT%6e={TNP3Fz-I8HD#3QgY5V>tN@j}IfyW=r09K>!Be8ZH0; diff --git a/piet-gpu/shader/kernel4.spv b/piet-gpu/shader/kernel4.spv index 4db2c3a971dfd506946f377c8f0d0bf49c559236..04b6364ccf608fe0086b189a14d827bec4935a87 100644 GIT binary patch delta 185 zcmX@Jfoa7?rVXn^)HxU~FflN2GcYiu6eX7D=I7;Sro<=a5@4tms;9eu!29pekh`c9?ur zVe{q|MLTA7W`;YgAiWF>?hMQfcahllkl6P(?^MaoR&*`*nsOZVOSxPm9gHdR5y{_TrExKQ_0NLal6#xJL diff --git a/piet-gpu/shader/path_coarse.spv b/piet-gpu/shader/path_coarse.spv index 2fc59fe716919d33634cf7d60c3ae74e654e8fa7..240f8f717e522719baa00836d819ff5f7aac99b5 100644 GIT binary patch delta 146 zcmca`iK*i<(}sUS)*K81%nS_N3=9k@MTzCP`FZ)7De;LpIr+&zAqF-ER0Gri}6mD6qc6d1IiR6mSmJ9rpM=}r4^@^fOJ7j<)0iVEYB#gxlni|7eB-x b0ibeaAa>X+D)*9&QF!uy6~oOsswyo2f@~i+ delta 98 zcmeCU%yi)r(}sUS(yR;u%nS_N3=9l;@g+rxdBqHD4D4Vrp2?QN(z3ijnS#WUjFQCk u`24iA;?xobpb8GK3cksO!t#v#n DrawCode { + let reduce_code = include_shader!(session, "../../shader/gen/draw_reduce"); + let reduce_pipeline = session + .create_compute_pipeline( + reduce_code, + &[ + BindType::Buffer, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::Buffer, + ], + ) + .unwrap(); + let root_code = include_shader!(session, "../../shader/gen/draw_root"); + let root_pipeline = session + .create_compute_pipeline(root_code, &[BindType::Buffer]) + .unwrap(); + let leaf_code = include_shader!(session, "../../shader/gen/draw_leaf"); + let leaf_pipeline = session + .create_compute_pipeline( + leaf_code, + &[ + BindType::Buffer, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + ], + ) + .unwrap(); + DrawCode { + reduce_pipeline, + root_pipeline, + leaf_pipeline, + } + } +} + +impl DrawStage { + pub unsafe fn new(session: &Session, code: &DrawCode) -> DrawStage { + // We're limited to DRAW_PART_SIZE^2 + // Also note: size here allows padding + let root_buf_size = DRAW_PART_SIZE * 8; + let root_buf = session + .create_buffer(root_buf_size, BufferUsage::STORAGE) + .unwrap(); + let root_ds = session + .create_simple_descriptor_set(&code.root_pipeline, &[&root_buf]) + .unwrap(); + DrawStage { root_buf, root_ds } + } + + pub unsafe fn bind( + &self, + session: &Session, + code: &DrawCode, + config_buf: &Buffer, + scene_buf: &Buffer, + memory_buf: &Buffer, + ) -> DrawBinding { + let reduce_ds = session + .create_simple_descriptor_set( + &code.reduce_pipeline, + &[memory_buf, config_buf, scene_buf, &self.root_buf], + ) + .unwrap(); + let leaf_ds = session + .create_simple_descriptor_set( + &code.leaf_pipeline, + &[memory_buf, config_buf, scene_buf, &self.root_buf], + ) + .unwrap(); + DrawBinding { reduce_ds, leaf_ds } + } + + pub unsafe fn record( + &self, + cmd_buf: &mut CmdBuf, + code: &DrawCode, + binding: &DrawBinding, + size: u64, + ) { + if size > DRAW_PART_SIZE.pow(2) { + panic!("very large scan not yet implemented"); + } + let n_workgroups = (size + DRAW_PART_SIZE - 1) / DRAW_PART_SIZE; + if n_workgroups > 1 { + cmd_buf.dispatch( + &code.reduce_pipeline, + &binding.reduce_ds, + (n_workgroups as u32, 1, 1), + (DRAW_WG as u32, 1, 1), + ); + cmd_buf.memory_barrier(); + cmd_buf.dispatch( + &code.root_pipeline, + &self.root_ds, + (1, 1, 1), + (DRAW_WG as u32, 1, 1), + ); + cmd_buf.memory_barrier(); + } + cmd_buf.dispatch( + &code.leaf_pipeline, + &binding.leaf_ds, + (n_workgroups as u32, 1, 1), + (DRAW_WG as u32, 1, 1), + ); + } +} diff --git a/tests/src/draw.rs b/tests/src/draw.rs new file mode 100644 index 0000000..ca19312 --- /dev/null +++ b/tests/src/draw.rs @@ -0,0 +1,147 @@ +// Copyright 2021 The piet-gpu authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +//! Tests for the piet-gpu draw object stage. + +use piet_gpu_hal::{BufWrite, BufferUsage}; +use rand::Rng; + +use crate::{Config, Runner, TestResult}; + +use piet_gpu::stages::{self, DrawCode, DrawMonoid, DrawStage}; + +const ELEMENT_SIZE: usize = 36; + +const ELEMENT_FILLCOLOR: u32 = 4; +const ELEMENT_FILLLINGRADIENT: u32 = 5; +const ELEMENT_FILLIMAGE: u32 = 6; +const ELEMENT_BEGINCLIP: u32 = 9; +const ELEMENT_ENDCLIP: u32 = 10; + +struct DrawTestData { + tags: Vec, +} + +pub unsafe fn draw_test(runner: &mut Runner, config: &Config) -> TestResult { + let mut result = TestResult::new("draw"); + let n_tag: u64 = config.size.choose(1 << 12, 1 << 20, 1 << 24); + let data = DrawTestData::new(n_tag); + let stage_config = data.get_config(); + + let config_buf = runner + .session + .create_buffer_init(std::slice::from_ref(&stage_config), BufferUsage::STORAGE) + .unwrap(); + let scene_size = n_tag * ELEMENT_SIZE as u64; + let scene_buf = runner + .session + .create_buffer_with(scene_size, |b| data.fill_scene(b), BufferUsage::STORAGE) + .unwrap(); + let memory = runner.buf_down(data.memory_size(), BufferUsage::STORAGE); + + let code = DrawCode::new(&runner.session); + let stage = DrawStage::new(&runner.session, &code); + let binding = stage.bind( + &runner.session, + &code, + &config_buf, + &scene_buf, + &memory.dev_buf, + ); + + let mut total_elapsed = 0.0; + let n_iter = config.n_iter; + for i in 0..n_iter { + let mut commands = runner.commands(); + commands.write_timestamp(0); + stage.record(&mut commands.cmd_buf, &code, &binding, n_tag); + commands.write_timestamp(1); + if i == 0 || config.verify_all { + commands.cmd_buf.memory_barrier(); + commands.download(&memory); + } + total_elapsed += runner.submit(commands); + if i == 0 || config.verify_all { + let dst = memory.map_read(..); + if let Some(failure) = data.verify(&dst) { + result.fail(failure); + } + } + } + let n_elements = n_tag; + result.timing(total_elapsed, n_elements * n_iter); + + result +} + +impl DrawTestData { + fn new(n: u64) -> DrawTestData { + let mut rng = rand::thread_rng(); + let tags = (0..n).map(|_| rng.gen_range(0, 12)).collect(); + DrawTestData { tags } + } + + fn get_config(&self) -> stages::Config { + let n_tags = self.tags.len(); + + // Layout of memory + let drawmonoid_alloc = 0; + let stage_config = stages::Config { + n_elements: n_tags as u32, + drawmonoid_alloc, + ..Default::default() + }; + stage_config + } + + fn memory_size(&self) -> u64 { + 8 + self.tags.len() as u64 * 8 + } + + fn fill_scene(&self, buf: &mut BufWrite) { + let mut element = [0u32; ELEMENT_SIZE / 4]; + for tag in &self.tags { + element[0] = *tag; + buf.push(element); + } + } + + fn verify(&self, buf: &[u8]) -> Option { + let size = self.tags.len() * 8; + let actual = bytemuck::cast_slice::(&buf[8..8 + size]); + let mut expected = DrawMonoid::default(); + for (i, (tag, actual)) in self.tags.iter().zip(actual).enumerate() { + // We compute an inclusive prefix sum, but for this application + // exclusive would be slightly better. We can adapt though. + let (path_ix, clip_ix) = Self::reduce_tag(*tag); + expected.path_ix += path_ix; + expected.clip_ix += clip_ix; + if *actual != expected { + return Some(format!("draw mismatch at {}", i)); + } + } + None + } + + fn reduce_tag(tag: u32) -> (u32, u32) { + match tag { + ELEMENT_FILLCOLOR | ELEMENT_FILLLINGRADIENT | ELEMENT_FILLIMAGE => (1, 0), + ELEMENT_BEGINCLIP => (1, 1), + ELEMENT_ENDCLIP => (0, 1), + _ => (0, 0), + } + } +} diff --git a/tests/src/main.rs b/tests/src/main.rs index 9aab351..e52ce85 100644 --- a/tests/src/main.rs +++ b/tests/src/main.rs @@ -18,6 +18,7 @@ mod clear; mod config; +mod draw; mod linkedlist; mod message_passing; mod prefix; @@ -137,6 +138,7 @@ fn main() { if config.groups.matches("piet") { report(&transform::transform_test(&mut runner, &config)); report(&path::path_test(&mut runner, &config)); + report(&draw::draw_test(&mut runner, &config)); } } }