diff --git a/tests/shader/build.ninja b/tests/shader/build.ninja index 8a25473..fbdddb5 100644 --- a/tests/shader/build.ninja +++ b/tests/shader/build.ninja @@ -57,3 +57,11 @@ build gen/prefix_scan.spv: glsl prefix_scan.comp build gen/prefix_scan.hlsl: hlsl gen/prefix_scan.spv build gen/prefix_scan.dxil: dxil gen/prefix_scan.hlsl build gen/prefix_scan.msl: msl gen/prefix_scan.spv + +build gen/message_passing.spv: glsl message_passing.comp +build gen/message_passing.hlsl: hlsl gen/message_passing.spv +build gen/message_passing.dxil: dxil gen/message_passing.hlsl +build gen/message_passing.msl: msl gen/message_passing.spv + +build gen/message_passing_vkmm.spv: glsl message_passing.comp + flags = -DVKMM diff --git a/tests/shader/gen/message_passing.dxil b/tests/shader/gen/message_passing.dxil new file mode 100644 index 0000000..2be73da Binary files /dev/null and b/tests/shader/gen/message_passing.dxil differ diff --git a/tests/shader/gen/message_passing.hlsl b/tests/shader/gen/message_passing.hlsl new file mode 100644 index 0000000..ba8ce5f --- /dev/null +++ b/tests/shader/gen/message_passing.hlsl @@ -0,0 +1,54 @@ +struct Element +{ + uint data; + uint flag; +}; + +static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); + +RWByteAddressBuffer data_buf : register(u0); +RWByteAddressBuffer control_buf : register(u1); + +static uint3 gl_GlobalInvocationID; +struct SPIRV_Cross_Input +{ + uint3 gl_GlobalInvocationID : SV_DispatchThreadID; +}; + +uint permute_flag_ix(uint data_ix) +{ + return (data_ix * 419u) & 65535u; +} + +void comp_main() +{ + uint _76; + data_buf.InterlockedExchange(gl_GlobalInvocationID.x * 8 + 0, 1u, _76); + DeviceMemoryBarrier(); + uint param = gl_GlobalInvocationID.x; + uint write_flag_ix = permute_flag_ix(param); + uint _77; + data_buf.InterlockedExchange(write_flag_ix * 8 + 4, 1u, _77); + uint read_ix = (gl_GlobalInvocationID.x * 4099u) & 65535u; + uint param_1 = read_ix; + uint read_flag_ix = permute_flag_ix(param_1); + uint _58; + data_buf.InterlockedAdd(read_flag_ix * 8 + 4, 0, _58); + uint flag = _58; + DeviceMemoryBarrier(); + uint _62; + data_buf.InterlockedAdd(read_ix * 8 + 0, 0, _62); + uint data = _62; + if (flag > data) + { + uint _73; + control_buf.InterlockedAdd(0, 1u, _73); + } +} + +[numthreads(256, 1, 1)] +void main(SPIRV_Cross_Input stage_input) +{ + gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; + comp_main(); +} diff --git a/tests/shader/gen/message_passing.msl b/tests/shader/gen/message_passing.msl new file mode 100644 index 0000000..e48f48a --- /dev/null +++ b/tests/shader/gen/message_passing.msl @@ -0,0 +1,54 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +struct Element +{ + uint data; + uint flag; +}; + +struct DataBuf +{ + Element data[1]; +}; + +struct ControlBuf +{ + uint failures; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); + +static inline __attribute__((always_inline)) +uint permute_flag_ix(thread const uint& data_ix) +{ + return (data_ix * 419u) & 65535u; +} + +kernel void main0(device DataBuf& data_buf [[buffer(0)]], device ControlBuf& control_buf [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + atomic_store_explicit((device atomic_uint*)&data_buf.data[gl_GlobalInvocationID.x].data, 1u, memory_order_relaxed); + threadgroup_barrier(mem_flags::mem_device); + uint param = gl_GlobalInvocationID.x; + uint write_flag_ix = permute_flag_ix(param); + atomic_store_explicit((device atomic_uint*)&data_buf.data[write_flag_ix].flag, 1u, memory_order_relaxed); + uint read_ix = (gl_GlobalInvocationID.x * 4099u) & 65535u; + uint param_1 = read_ix; + uint read_flag_ix = permute_flag_ix(param_1); + uint _58 = atomic_load_explicit((device atomic_uint*)&data_buf.data[read_flag_ix].flag, memory_order_relaxed); + uint flag = _58; + threadgroup_barrier(mem_flags::mem_device); + uint _62 = atomic_load_explicit((device atomic_uint*)&data_buf.data[read_ix].data, memory_order_relaxed); + uint data = _62; + if (flag > data) + { + uint _73 = atomic_fetch_add_explicit((device atomic_uint*)&control_buf.failures, 1u, memory_order_relaxed); + } +} + diff --git a/tests/shader/gen/message_passing.spv b/tests/shader/gen/message_passing.spv new file mode 100644 index 0000000..e5f56d6 Binary files /dev/null and b/tests/shader/gen/message_passing.spv differ diff --git a/tests/shader/gen/message_passing_vkmm.spv b/tests/shader/gen/message_passing_vkmm.spv new file mode 100644 index 0000000..8527c2b Binary files /dev/null and b/tests/shader/gen/message_passing_vkmm.spv differ diff --git a/tests/shader/message_passing.comp b/tests/shader/message_passing.comp new file mode 100644 index 0000000..e5e53b6 --- /dev/null +++ b/tests/shader/message_passing.comp @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense + +// Our version of the message passing atomic litmus test. + +#version 450 + +#extension GL_KHR_memory_scope_semantics : enable + +#ifdef VKMM +#pragma use_vulkan_memory_model +#define ACQUIRE gl_StorageSemanticsBuffer, gl_SemanticsAcquire +#define RELEASE gl_StorageSemanticsBuffer, gl_SemanticsRelease +#else +#define ACQUIRE 0, 0 +#define RELEASE 0, 0 +#endif + +layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in; + +struct Element +{ + uint data; + uint flag; +}; + +layout(binding = 0) buffer DataBuf +{ + Element data[]; +} data_buf; + +layout(binding = 1) buffer ControlBuf +{ + uint failures; +} control_buf; + +uint permute_flag_ix(uint data_ix) +{ + return (data_ix * 419u) & 65535u; +} + +void main() +{ + atomicStore(data_buf.data[gl_GlobalInvocationID.x].data, 1u, gl_ScopeDevice, 0, 0); +#ifndef VKMM + memoryBarrierBuffer(); +#endif + uint write_flag_ix = permute_flag_ix(gl_GlobalInvocationID.x); + atomicStore(data_buf.data[write_flag_ix].flag, 1u, gl_ScopeDevice, RELEASE); + uint read_ix = (gl_GlobalInvocationID.x * 4099u) & 65535u; + uint read_flag_ix = permute_flag_ix(read_ix); + uint flag = atomicLoad(data_buf.data[read_flag_ix].flag, gl_ScopeDevice, ACQUIRE); +#ifndef VKMM + memoryBarrierBuffer(); +#endif + uint data = atomicLoad(data_buf.data[read_ix].data, gl_ScopeDevice, 0, 0); + if (flag > data) + { + atomicAdd(control_buf.failures, 1u); + } +} diff --git a/tests/src/clear.rs b/tests/src/clear.rs index f691928..d643161 100644 --- a/tests/src/clear.rs +++ b/tests/src/clear.rs @@ -74,7 +74,7 @@ pub unsafe fn run_clear_test(runner: &mut Runner, config: &Config) -> TestResult impl ClearCode { pub unsafe fn new(runner: &mut Runner) -> ClearCode { - let code = include_shader!(&runner.session, "../shader/gen/Clear"); + let code = include_shader!(&runner.session, "../shader/gen/clear"); let pipeline = runner .session .create_compute_pipeline(code, &[BindType::BufReadOnly, BindType::Buffer]) diff --git a/tests/src/main.rs b/tests/src/main.rs index 186ce25..c8f57bd 100644 --- a/tests/src/main.rs +++ b/tests/src/main.rs @@ -18,6 +18,7 @@ mod clear; mod config; +mod message_passing; mod prefix; mod prefix_tree; mod runner; @@ -105,5 +106,19 @@ fn main() { } report(&prefix_tree::run_prefix_test(&mut runner, &config)); } + if config.groups.matches("atomic") { + report(&message_passing::run_message_passing_test( + &mut runner, + &config, + message_passing::Variant::Atomic, + )); + if runner.session.gpu_info().has_memory_model { + report(&message_passing::run_message_passing_test( + &mut runner, + &config, + message_passing::Variant::Vkmm, + )); + } + } } } diff --git a/tests/src/message_passing.rs b/tests/src/message_passing.rs new file mode 100644 index 0000000..8accc25 --- /dev/null +++ b/tests/src/message_passing.rs @@ -0,0 +1,180 @@ +// Copyright 2021 The piet-gpu authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +use piet_gpu_hal::{include_shader, BackendType, BindType, BufferUsage, DescriptorSet, ShaderCode}; +use piet_gpu_hal::{Buffer, Pipeline}; + +use crate::clear::{ClearBinding, ClearCode, ClearStage}; +use crate::config::Config; +use crate::runner::{Commands, Runner}; +use crate::test_result::TestResult; + +const N_ELEMENTS: u64 = 65536; + +/// The shader code forMessagePassing sum example. +struct MessagePassingCode { + pipeline: Pipeline, + clear_code: Option, +} + +/// The stage resources for the prefix sum example. +struct MessagePassingStage { + data_buf: Buffer, + clear_stages: Option<(ClearStage, ClearBinding, ClearStage)>, +} + +/// The binding for the prefix sum example. +struct MessagePassingBinding { + descriptor_set: DescriptorSet, + clear_binding: Option, +} + +#[derive(Debug)] +pub enum Variant { + Atomic, + Vkmm, +} + +pub unsafe fn run_message_passing_test( + runner: &mut Runner, + config: &Config, + variant: Variant, +) -> TestResult { + let mut result = TestResult::new(format!("message passing litmus, {:?}", variant)); + let out_buf = runner.buf_down(4); + let code = MessagePassingCode::new(runner, variant); + let stage = MessagePassingStage::new(runner, &code); + let binding = stage.bind(runner, &code, &out_buf.dev_buf); + let n_iter = config.n_iter; + let mut total_elapsed = 0.0; + let mut failures = 0; + for _ in 0..n_iter { + let mut commands = runner.commands(); + commands.write_timestamp(0); + stage.record(&mut commands, &code, &binding, &out_buf.dev_buf); + commands.write_timestamp(1); + commands.cmd_buf.memory_barrier(); + commands.download(&out_buf); + total_elapsed += runner.submit(commands); + let mut dst: Vec = Default::default(); + out_buf.read(&mut dst); + failures += dst[0]; + } + if failures > 0 { + result.fail(format!("{} failures", failures)); + } + result.timing(total_elapsed, N_ELEMENTS * n_iter); + result +} + +impl MessagePassingCode { + unsafe fn new(runner: &mut Runner, variant: Variant) -> MessagePassingCode { + let code = match variant { + Variant::Atomic => include_shader!(&runner.session, "../shader/gen/message_passing"), + Variant::Vkmm => { + ShaderCode::Spv(include_bytes!("../shader/gen/message_passing_vkmm.spv")) + } + }; + let pipeline = runner + .session + .create_compute_pipeline(code, &[BindType::Buffer, BindType::Buffer]) + .unwrap(); + // Currently, DX12 and Metal backends don't support buffer clearing, so use a + // compute shader as a workaround. + let clear_code = if runner.backend_type() != BackendType::Vulkan { + Some(ClearCode::new(runner)) + } else { + None + }; + MessagePassingCode { + pipeline, + clear_code, + } + } +} + +impl MessagePassingStage { + unsafe fn new(runner: &mut Runner, code: &MessagePassingCode) -> MessagePassingStage { + let data_buf_size = 8 * N_ELEMENTS; + let data_buf = runner + .session + .create_buffer(data_buf_size, BufferUsage::STORAGE | BufferUsage::COPY_DST) + .unwrap(); + let clear_stages = if let Some(clear_code) = &code.clear_code { + let stage0 = ClearStage::new(runner, N_ELEMENTS * 2); + let binding0 = stage0.bind(runner, clear_code, &data_buf); + let stage1 = ClearStage::new(runner, 1); + Some((stage0, binding0, stage1)) + } else { + None + }; + MessagePassingStage { + data_buf, + clear_stages, + } + } + + unsafe fn bind( + &self, + runner: &mut Runner, + code: &MessagePassingCode, + out_buf: &Buffer, + ) -> MessagePassingBinding { + let descriptor_set = runner + .session + .create_simple_descriptor_set(&code.pipeline, &[&self.data_buf, out_buf]) + .unwrap(); + let clear_binding = if let Some(clear_code) = &code.clear_code { + Some( + self.clear_stages + .as_ref() + .unwrap() + .2 + .bind(runner, clear_code, out_buf), + ) + } else { + None + }; + MessagePassingBinding { + descriptor_set, + clear_binding, + } + } + + unsafe fn record( + &self, + commands: &mut Commands, + code: &MessagePassingCode, + bindings: &MessagePassingBinding, + out_buf: &Buffer, + ) { + if let Some((stage0, binding0, stage1)) = &self.clear_stages { + let code = code.clear_code.as_ref().unwrap(); + stage0.record(commands, code, binding0); + stage1.record(commands, code, bindings.clear_binding.as_ref().unwrap()); + } else { + commands.cmd_buf.clear_buffer(&self.data_buf, None); + commands.cmd_buf.clear_buffer(out_buf, None); + } + commands.cmd_buf.memory_barrier(); + commands.cmd_buf.dispatch( + &code.pipeline, + &bindings.descriptor_set, + (256, 1, 1), + (256, 1, 1), + ); + } +}