From 10a624ee7584fc78878a3c507fd45f027b7d6fe5 Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Thu, 11 Nov 2021 16:17:04 -0800 Subject: [PATCH] Add message passing litmus test This is our version of the standard message passing litmus test for atomics. It does a bunch in parallel and permutes the reads and writes extensively, so it's been more sensitive than existing tests. --- tests/shader/build.ninja | 8 + tests/shader/gen/message_passing.dxil | Bin 0 -> 3116 bytes tests/shader/gen/message_passing.hlsl | 54 +++++++ tests/shader/gen/message_passing.msl | 54 +++++++ tests/shader/gen/message_passing.spv | Bin 0 -> 2196 bytes tests/shader/gen/message_passing_vkmm.spv | Bin 0 -> 2300 bytes tests/shader/message_passing.comp | 60 ++++++++ tests/src/clear.rs | 2 +- tests/src/main.rs | 15 ++ tests/src/message_passing.rs | 180 ++++++++++++++++++++++ 10 files changed, 372 insertions(+), 1 deletion(-) create mode 100644 tests/shader/gen/message_passing.dxil create mode 100644 tests/shader/gen/message_passing.hlsl create mode 100644 tests/shader/gen/message_passing.msl create mode 100644 tests/shader/gen/message_passing.spv create mode 100644 tests/shader/gen/message_passing_vkmm.spv create mode 100644 tests/shader/message_passing.comp create mode 100644 tests/src/message_passing.rs diff --git a/tests/shader/build.ninja b/tests/shader/build.ninja index 8a25473..fbdddb5 100644 --- a/tests/shader/build.ninja +++ b/tests/shader/build.ninja @@ -57,3 +57,11 @@ build gen/prefix_scan.spv: glsl prefix_scan.comp build gen/prefix_scan.hlsl: hlsl gen/prefix_scan.spv build gen/prefix_scan.dxil: dxil gen/prefix_scan.hlsl build gen/prefix_scan.msl: msl gen/prefix_scan.spv + +build gen/message_passing.spv: glsl message_passing.comp +build gen/message_passing.hlsl: hlsl gen/message_passing.spv +build gen/message_passing.dxil: dxil gen/message_passing.hlsl +build gen/message_passing.msl: msl gen/message_passing.spv + +build gen/message_passing_vkmm.spv: glsl message_passing.comp + flags = -DVKMM diff --git a/tests/shader/gen/message_passing.dxil b/tests/shader/gen/message_passing.dxil new file mode 100644 index 0000000000000000000000000000000000000000..2be73da19d9a41131935c2c68a6e89477f90dcd0 GIT binary patch literal 3116 zcmeH}eN0=|6~M3k9)8dGVIINj>|n?Pic6`LJq+PvhRk0fW|+>zjHMH7HUwrgK5gpp zQEmRhrir6IQb%(Q|Y{l)X-Holbd&*yN0043V01WCPCn5&H3(dj=r(I81McH8;0RGiq}fsYRFCiXZ;ktzFG#DeS2gwgoY)%3=ABhIv#pB>=L(Zd&iCRYJ?u#rZuw>tr~J)i_IhCL5O z%8*EsU|<4#*}+*8!t%M3i$KG>P$}H_6H*pXH){qWA1%IVrQc>1PwY>Kk-ZXxv39 zj;|OEV(R7G>3g?s{$W;zho-f1!;5?0Gr$^X-OT#*>g>||#^JHa#pT6v(&y8iPWq$t z*rnY3OOyQRaFcmIjU(A&kBgWO5)(NpPgYZtqj_)Rk@}VnlY1mqa^_ggsnzP6!z%+$ ztHyV?_M5W}Kak~TWd4O&n_L*}?H|0d?{fd(g>pWlIMTV-R&Q^8*;ZF?H@VO4lAICL zq*TAe46h{Bh-+Yq)q&uU5-`0k8w;_&Be;ehOEF=gw8Q~fo-$KXV-V$yZg*oI*@~u?q z_0;5POG*R%s`5jRa!Fe_ffRk{De@x9g)Nc8jP+Qte5>CEd#AsR@wb^VM<(WY90Sw- z%<2Cz?r#_SZA1R9A(#BYmheGX_<2ycNh7s$qtf+J=|ZGmwMgevr8BPNc?<6oMq4Lav=0Axak*e<;RF}JVeZ-Vz0Cz-I;!e(=S=p3Ll4g>y+?rnD77~#y_Gxt~t)0RTaC@v?ZA}*n8bh0gwQ-%oz8^!=n9kA(7Uk_w3(qTgh7@ zpy%g6m!Q#=Q0(k)$%LTJtICmkWAvdr%GbV=oWLRe6i)w5Ge8k44rT#BuQusL5etAc z4*lrw+|W95>66E^IgT2Zz2~=C{8PC3P8JXBU~w&+`=4NO<$s;UmRR_1zg8^XK5MA{ zZ5H$4V#s3hU$gj23bPC@hOz%CgCPVjsKrEVojr*+LqP^>%@OuCe97K%i)5fz%zLMC zx7iaeO2Prx$#(9Cu)MCAMPf98Gtm}j8?*az=qBC3_M|CH>nNp1r;nlrLKKSIUk zj^ME;-w8kY#dq^Z2-p0RCr$$&r1SW|Z4OtLnO|D!Q*)h^xImnKxGJ~S3@Dt3{-pXF zx%su@tX+!5m6Hc<_PIDPNL~~Acw8TItctncH^r`c)}l(TFn<3>$$`q?xtM+;t8c>H zdXRVqpi@*K@}9(FUYRw|1c8bqPUd_df8k data) + { + uint _73; + control_buf.InterlockedAdd(0, 1u, _73); + } +} + +[numthreads(256, 1, 1)] +void main(SPIRV_Cross_Input stage_input) +{ + gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; + comp_main(); +} diff --git a/tests/shader/gen/message_passing.msl b/tests/shader/gen/message_passing.msl new file mode 100644 index 0000000..e48f48a --- /dev/null +++ b/tests/shader/gen/message_passing.msl @@ -0,0 +1,54 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +struct Element +{ + uint data; + uint flag; +}; + +struct DataBuf +{ + Element data[1]; +}; + +struct ControlBuf +{ + uint failures; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); + +static inline __attribute__((always_inline)) +uint permute_flag_ix(thread const uint& data_ix) +{ + return (data_ix * 419u) & 65535u; +} + +kernel void main0(device DataBuf& data_buf [[buffer(0)]], device ControlBuf& control_buf [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + atomic_store_explicit((device atomic_uint*)&data_buf.data[gl_GlobalInvocationID.x].data, 1u, memory_order_relaxed); + threadgroup_barrier(mem_flags::mem_device); + uint param = gl_GlobalInvocationID.x; + uint write_flag_ix = permute_flag_ix(param); + atomic_store_explicit((device atomic_uint*)&data_buf.data[write_flag_ix].flag, 1u, memory_order_relaxed); + uint read_ix = (gl_GlobalInvocationID.x * 4099u) & 65535u; + uint param_1 = read_ix; + uint read_flag_ix = permute_flag_ix(param_1); + uint _58 = atomic_load_explicit((device atomic_uint*)&data_buf.data[read_flag_ix].flag, memory_order_relaxed); + uint flag = _58; + threadgroup_barrier(mem_flags::mem_device); + uint _62 = atomic_load_explicit((device atomic_uint*)&data_buf.data[read_ix].data, memory_order_relaxed); + uint data = _62; + if (flag > data) + { + uint _73 = atomic_fetch_add_explicit((device atomic_uint*)&control_buf.failures, 1u, memory_order_relaxed); + } +} + diff --git a/tests/shader/gen/message_passing.spv b/tests/shader/gen/message_passing.spv new file mode 100644 index 0000000000000000000000000000000000000000..e5f56d6d6a95620755d8f6954abdaf4042835412 GIT binary patch literal 2196 zcmZ9O*-jKu5QdKoGk}Od5OD(;P(VQx*+r1m0hud(fKD?sWRh9PEH3xHiMKw5H$I2a zD__7RQT)H|uGpDl%BufAb!t1OM^kO%9nM{F7hR{zyJ+>hHZjh1yG*Ea`B(YbdgFL@ zVet_jmt87!#HUw2Y3X)J*)LYuxFzWm57!SNM|_c#{f_E|>QX_g{MDpj@rJ z^Xi4_S>V-!vR`Qw3w8OW<&}(`b{(qKxwD{FZZ-n%q~xD^#kW()xC!Yuwc`KEL%!WjN}u^PzZ~NS(zU=pj{3;BdBbtTf?=Ht z(#dnn!pT0?gp)Pz3GY-Zjas!tYw9iI;8-VqvDB;ub!_^}Nf#pDu*H3GHotM4w2WhZ zaNJc+e_xT#NIE6>*!=8VPVWGYmxMpF9FwpNXCoM5=GrYm6VGZ+#)GqX)Siq7CLVPX z|4^EE*Chjbk-W*QPIDf6P(IW@Dq)QZcSs}fSz&s^hMSK4dt`r562ra1=jYCiXrI~P zj|?jlGlCnD%`Cv=xhfsi;M`h_nUB?VE%b|eArF{7SVO`$q{%@q#Jef2l0u$%VCD#C zbq>pB;XfkH9x=dKX>x`R^n!es?AN@jjRzj#M8#wz)0Y^vr!(j_0z;0Y{Bs z?hMXyzK~7Q8J(g>?huYzkR`nf{CR^c`u`~=Elo`BX+nZFJlTTz?vnfSEtu~uxzG2O z#C&hyEoEhnw;b^SC={!7h@yEIt8H2~oYgiX+iGKvdZ-OPS)0`}E55a!hvMMh$2H8! zhT~0qj=9IO;rU*ui+g~xx)x=_vBw>Nt*0f~^aRg7cLrx?S7gIkPph)6C-&&$tER(z zo=DiY-iXH?!zbr&v7d^kE{nY`+hViFom%V-+3Y7{^Ijr8dK=G#$!GC5W#jWvbK<+% zmf*7~`Jvi&rCGoAw%FT|!13;}xfk9T-(pTeJ>T{2==DGX&zrJ0_DkpNJo=A5xD^TC v7H60EDYBpu{+n#f&MQM-T7ag2b1d`H|C)K literal 0 HcmV?d00001 diff --git a/tests/shader/gen/message_passing_vkmm.spv b/tests/shader/gen/message_passing_vkmm.spv new file mode 100644 index 0000000000000000000000000000000000000000..8527c2bae4567003bae7f8e8db8578114729a358 GIT binary patch literal 2300 zcmZ9M*-~6p6o$7zLkJp3B{5Dx$V4;|jb;=R8$!}xP%~6{+jRm>s>>dzraS7rZ{n>_ z;f>GXaODd)RB8OabIz)qVpFTv`v1N5n)W`7kDZxH>7{fy9ZC;Vv5uxOn3UAN953{* zXnh_(_^Nq-<5Bb3sPlE!Yj#`R{@}Z2x4+Zsq?t6CpKEMA-df%t?kr!q`hgLTr13nk zzq9z%&=W{EYxmrE3z>tLJ1RkP^DE>0jiG-6Or+`5*viwFdH1*bd#&bvtDE(P?d^U1 zYIs#+*U}Vqr?l4^bVtKhbGMT{ZMMI?FuJ^w`%bgBlMOSt93KPU?XWr`EID(YK(FJoHrma7#I873!|P;;n@@0J)tINfKkYOdo&J-ov)Oyr-_C~Zes6O< z-=9q9(ccW(FDlRZiL`{?%LZAu#14AU%65uACeyXRWy8R{&gg??BEsq`$K~jkp3Y7BI)obp}zJGit8JlZ$xPUX3RCSh>hAg6&OP{$lC>3ib~q2|Npa{ye35_MM&n$|*8ABe{8OXCXGv ztLUPJlvYaYe4?(`a=)S%^N8)kbrSpr+8p*`yf@L5l=H?DJ4d;w^AxsA|9P}~#t<)} z%`Fap=dMrWKU=~2okRS;5c}_8*DH7#+qV+&-pBSE3H%ZE_}q*0x4_mA`?s;zkmIoT z(9Y2NkDBje>n~@|V&6bD#+P85xWp-E=9`eSH{}`~=x?1a=lK_meHqhxKaZ#bFCD;s z$JKqm<0|(1tzy65Dz2A!m8{;&+lY7PechslqThmF<)n9Y8u7jOCKgKlJLl!SC(qhn z#JqrgFy=dO^0(=F2`#=!4D-E<97g2bcLs7%&qZvxsOJ)P)Z?BtSdYB6cVDgNGWcLU zAHvChU)FF1Th4d#bID!BmY+qe%h}6CT{p1h+;i^Y=;Pca=@abe z$vyk{jp;a_PZ9U!e5c0qUgaa+Dz>#myfy5I=bm>L@$O)|Z#?hLcy+XV#9PNUN8r2I zb>v58a}V7>^pXFQ^P6bbKfEFLHV`@Ax3+J?xAZyUcVj(&@V@Nt3q<}L5^w2m&c%84 yzkFCX5WgYMI9t~t data) + { + atomicAdd(control_buf.failures, 1u); + } +} diff --git a/tests/src/clear.rs b/tests/src/clear.rs index f691928..d643161 100644 --- a/tests/src/clear.rs +++ b/tests/src/clear.rs @@ -74,7 +74,7 @@ pub unsafe fn run_clear_test(runner: &mut Runner, config: &Config) -> TestResult impl ClearCode { pub unsafe fn new(runner: &mut Runner) -> ClearCode { - let code = include_shader!(&runner.session, "../shader/gen/Clear"); + let code = include_shader!(&runner.session, "../shader/gen/clear"); let pipeline = runner .session .create_compute_pipeline(code, &[BindType::BufReadOnly, BindType::Buffer]) diff --git a/tests/src/main.rs b/tests/src/main.rs index 186ce25..c8f57bd 100644 --- a/tests/src/main.rs +++ b/tests/src/main.rs @@ -18,6 +18,7 @@ mod clear; mod config; +mod message_passing; mod prefix; mod prefix_tree; mod runner; @@ -105,5 +106,19 @@ fn main() { } report(&prefix_tree::run_prefix_test(&mut runner, &config)); } + if config.groups.matches("atomic") { + report(&message_passing::run_message_passing_test( + &mut runner, + &config, + message_passing::Variant::Atomic, + )); + if runner.session.gpu_info().has_memory_model { + report(&message_passing::run_message_passing_test( + &mut runner, + &config, + message_passing::Variant::Vkmm, + )); + } + } } } diff --git a/tests/src/message_passing.rs b/tests/src/message_passing.rs new file mode 100644 index 0000000..8accc25 --- /dev/null +++ b/tests/src/message_passing.rs @@ -0,0 +1,180 @@ +// Copyright 2021 The piet-gpu authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +use piet_gpu_hal::{include_shader, BackendType, BindType, BufferUsage, DescriptorSet, ShaderCode}; +use piet_gpu_hal::{Buffer, Pipeline}; + +use crate::clear::{ClearBinding, ClearCode, ClearStage}; +use crate::config::Config; +use crate::runner::{Commands, Runner}; +use crate::test_result::TestResult; + +const N_ELEMENTS: u64 = 65536; + +/// The shader code forMessagePassing sum example. +struct MessagePassingCode { + pipeline: Pipeline, + clear_code: Option, +} + +/// The stage resources for the prefix sum example. +struct MessagePassingStage { + data_buf: Buffer, + clear_stages: Option<(ClearStage, ClearBinding, ClearStage)>, +} + +/// The binding for the prefix sum example. +struct MessagePassingBinding { + descriptor_set: DescriptorSet, + clear_binding: Option, +} + +#[derive(Debug)] +pub enum Variant { + Atomic, + Vkmm, +} + +pub unsafe fn run_message_passing_test( + runner: &mut Runner, + config: &Config, + variant: Variant, +) -> TestResult { + let mut result = TestResult::new(format!("message passing litmus, {:?}", variant)); + let out_buf = runner.buf_down(4); + let code = MessagePassingCode::new(runner, variant); + let stage = MessagePassingStage::new(runner, &code); + let binding = stage.bind(runner, &code, &out_buf.dev_buf); + let n_iter = config.n_iter; + let mut total_elapsed = 0.0; + let mut failures = 0; + for _ in 0..n_iter { + let mut commands = runner.commands(); + commands.write_timestamp(0); + stage.record(&mut commands, &code, &binding, &out_buf.dev_buf); + commands.write_timestamp(1); + commands.cmd_buf.memory_barrier(); + commands.download(&out_buf); + total_elapsed += runner.submit(commands); + let mut dst: Vec = Default::default(); + out_buf.read(&mut dst); + failures += dst[0]; + } + if failures > 0 { + result.fail(format!("{} failures", failures)); + } + result.timing(total_elapsed, N_ELEMENTS * n_iter); + result +} + +impl MessagePassingCode { + unsafe fn new(runner: &mut Runner, variant: Variant) -> MessagePassingCode { + let code = match variant { + Variant::Atomic => include_shader!(&runner.session, "../shader/gen/message_passing"), + Variant::Vkmm => { + ShaderCode::Spv(include_bytes!("../shader/gen/message_passing_vkmm.spv")) + } + }; + let pipeline = runner + .session + .create_compute_pipeline(code, &[BindType::Buffer, BindType::Buffer]) + .unwrap(); + // Currently, DX12 and Metal backends don't support buffer clearing, so use a + // compute shader as a workaround. + let clear_code = if runner.backend_type() != BackendType::Vulkan { + Some(ClearCode::new(runner)) + } else { + None + }; + MessagePassingCode { + pipeline, + clear_code, + } + } +} + +impl MessagePassingStage { + unsafe fn new(runner: &mut Runner, code: &MessagePassingCode) -> MessagePassingStage { + let data_buf_size = 8 * N_ELEMENTS; + let data_buf = runner + .session + .create_buffer(data_buf_size, BufferUsage::STORAGE | BufferUsage::COPY_DST) + .unwrap(); + let clear_stages = if let Some(clear_code) = &code.clear_code { + let stage0 = ClearStage::new(runner, N_ELEMENTS * 2); + let binding0 = stage0.bind(runner, clear_code, &data_buf); + let stage1 = ClearStage::new(runner, 1); + Some((stage0, binding0, stage1)) + } else { + None + }; + MessagePassingStage { + data_buf, + clear_stages, + } + } + + unsafe fn bind( + &self, + runner: &mut Runner, + code: &MessagePassingCode, + out_buf: &Buffer, + ) -> MessagePassingBinding { + let descriptor_set = runner + .session + .create_simple_descriptor_set(&code.pipeline, &[&self.data_buf, out_buf]) + .unwrap(); + let clear_binding = if let Some(clear_code) = &code.clear_code { + Some( + self.clear_stages + .as_ref() + .unwrap() + .2 + .bind(runner, clear_code, out_buf), + ) + } else { + None + }; + MessagePassingBinding { + descriptor_set, + clear_binding, + } + } + + unsafe fn record( + &self, + commands: &mut Commands, + code: &MessagePassingCode, + bindings: &MessagePassingBinding, + out_buf: &Buffer, + ) { + if let Some((stage0, binding0, stage1)) = &self.clear_stages { + let code = code.clear_code.as_ref().unwrap(); + stage0.record(commands, code, binding0); + stage1.record(commands, code, bindings.clear_binding.as_ref().unwrap()); + } else { + commands.cmd_buf.clear_buffer(&self.data_buf, None); + commands.cmd_buf.clear_buffer(out_buf, None); + } + commands.cmd_buf.memory_barrier(); + commands.cmd_buf.dispatch( + &code.pipeline, + &bindings.descriptor_set, + (256, 1, 1), + (256, 1, 1), + ); + } +}