Add message passing litmus test

This is our version of the standard message passing litmus test for atomics. It does a bunch in parallel and permutes the reads and writes extensively, so it's been more sensitive than existing tests.
2025-01-10 20:51:29 +11:00 · 2021-11-11 16:17:04 -08:00 · 2021-11-11 16:17:04 -08:00 · 10a624ee75
parent 825a1eb04c
commit 10a624ee75
10 changed files with 372 additions and 1 deletions
--- a/tests/shader/build.ninja
+++ b/tests/shader/build.ninja
@ -57,3 +57,11 @@ build gen/prefix_scan.spv: glsl prefix_scan.comp
 build gen/prefix_scan.hlsl: hlsl gen/prefix_scan.spv
 build gen/prefix_scan.dxil: dxil gen/prefix_scan.hlsl
 build gen/prefix_scan.msl: msl gen/prefix_scan.spv
 build gen/message_passing.spv: glsl message_passing.comp
 build gen/message_passing.hlsl: hlsl gen/message_passing.spv
 build gen/message_passing.dxil: dxil gen/message_passing.hlsl
 build gen/message_passing.msl: msl gen/message_passing.spv
 build gen/message_passing_vkmm.spv: glsl message_passing.comp
  flags = -DVKMM
--- a/tests/shader/gen/message_passing.dxil
+++ b/tests/shader/gen/message_passing.dxil
--- a/tests/shader/gen/message_passing.hlsl
+++ b/tests/shader/gen/message_passing.hlsl
@ -0,0 +1,54 @@
 struct Element
 {
    uint data;
    uint flag;
 };
 static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
 RWByteAddressBuffer data_buf : register(u0);
 RWByteAddressBuffer control_buf : register(u1);
 static uint3 gl_GlobalInvocationID;
 struct SPIRV_Cross_Input
 {
    uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
 };
 uint permute_flag_ix(uint data_ix)
 {
    return (data_ix * 419u) & 65535u;
 }
 void comp_main()
 {
    uint _76;
    data_buf.InterlockedExchange(gl_GlobalInvocationID.x * 8 + 0, 1u, _76);
    DeviceMemoryBarrier();
    uint param = gl_GlobalInvocationID.x;
    uint write_flag_ix = permute_flag_ix(param);
    uint _77;
    data_buf.InterlockedExchange(write_flag_ix * 8 + 4, 1u, _77);
    uint read_ix = (gl_GlobalInvocationID.x * 4099u) & 65535u;
    uint param_1 = read_ix;
    uint read_flag_ix = permute_flag_ix(param_1);
    uint _58;
    data_buf.InterlockedAdd(read_flag_ix * 8 + 4, 0, _58);
    uint flag = _58;
    DeviceMemoryBarrier();
    uint _62;
    data_buf.InterlockedAdd(read_ix * 8 + 0, 0, _62);
    uint data = _62;
    if (flag > data)
    {
        uint _73;
        control_buf.InterlockedAdd(0, 1u, _73);
    }
 }
 [numthreads(256, 1, 1)]
 void main(SPIRV_Cross_Input stage_input)
 {
    gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
    comp_main();
 }
--- a/tests/shader/gen/message_passing.msl
+++ b/tests/shader/gen/message_passing.msl
@ -0,0 +1,54 @@
 #pragma clang diagnostic ignored "-Wmissing-prototypes"
 #pragma clang diagnostic ignored "-Wunused-variable"
 #include <metal_stdlib>
 #include <simd/simd.h>
 #include <metal_atomic>
 using namespace metal;
 struct Element
 {
    uint data;
    uint flag;
 };
 struct DataBuf
 {
    Element data[1];
 };
 struct ControlBuf
 {
    uint failures;
 };
 constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
 static inline __attribute__((always_inline))
 uint permute_flag_ix(thread const uint& data_ix)
 {
    return (data_ix * 419u) & 65535u;
 }
 kernel void main0(device DataBuf& data_buf [[buffer(0)]], device ControlBuf& control_buf [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
    atomic_store_explicit((device atomic_uint*)&data_buf.data[gl_GlobalInvocationID.x].data, 1u, memory_order_relaxed);
    threadgroup_barrier(mem_flags::mem_device);
    uint param = gl_GlobalInvocationID.x;
    uint write_flag_ix = permute_flag_ix(param);
    atomic_store_explicit((device atomic_uint*)&data_buf.data[write_flag_ix].flag, 1u, memory_order_relaxed);
    uint read_ix = (gl_GlobalInvocationID.x * 4099u) & 65535u;
    uint param_1 = read_ix;
    uint read_flag_ix = permute_flag_ix(param_1);
    uint _58 = atomic_load_explicit((device atomic_uint*)&data_buf.data[read_flag_ix].flag, memory_order_relaxed);
    uint flag = _58;
    threadgroup_barrier(mem_flags::mem_device);
    uint _62 = atomic_load_explicit((device atomic_uint*)&data_buf.data[read_ix].data, memory_order_relaxed);
    uint data = _62;
    if (flag > data)
    {
        uint _73 = atomic_fetch_add_explicit((device atomic_uint*)&control_buf.failures, 1u, memory_order_relaxed);
    }
 }
--- a/tests/shader/gen/message_passing.spv
+++ b/tests/shader/gen/message_passing.spv
--- a/tests/shader/gen/message_passing_vkmm.spv
+++ b/tests/shader/gen/message_passing_vkmm.spv
--- a/tests/shader/message_passing.comp
+++ b/tests/shader/message_passing.comp
@ -0,0 +1,60 @@
 // SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
 // Our version of the message passing atomic litmus test.
 #version 450
 #extension GL_KHR_memory_scope_semantics : enable
 #ifdef VKMM
 #pragma use_vulkan_memory_model
 #define ACQUIRE gl_StorageSemanticsBuffer, gl_SemanticsAcquire
 #define RELEASE gl_StorageSemanticsBuffer, gl_SemanticsRelease
 #else
 #define ACQUIRE 0, 0
 #define RELEASE 0, 0
 #endif
 layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;
 struct Element
 {
    uint data;
    uint flag;
 };
 layout(binding = 0) buffer DataBuf
 {
    Element data[];
 } data_buf;
 layout(binding = 1) buffer ControlBuf
 {
    uint failures;
 } control_buf;
 uint permute_flag_ix(uint data_ix)
 {
    return (data_ix * 419u) & 65535u;
 }
 void main()
 {
    atomicStore(data_buf.data[gl_GlobalInvocationID.x].data, 1u, gl_ScopeDevice, 0, 0);
 #ifndef VKMM
    memoryBarrierBuffer();
 #endif
    uint write_flag_ix = permute_flag_ix(gl_GlobalInvocationID.x);
    atomicStore(data_buf.data[write_flag_ix].flag, 1u, gl_ScopeDevice, RELEASE);
    uint read_ix = (gl_GlobalInvocationID.x * 4099u) & 65535u;
    uint read_flag_ix = permute_flag_ix(read_ix);
    uint flag = atomicLoad(data_buf.data[read_flag_ix].flag, gl_ScopeDevice, ACQUIRE);
 #ifndef VKMM
    memoryBarrierBuffer();
 #endif
    uint data = atomicLoad(data_buf.data[read_ix].data, gl_ScopeDevice, 0, 0);
    if (flag > data)
    {
        atomicAdd(control_buf.failures, 1u);
    }
 }
--- a/tests/src/clear.rs
+++ b/tests/src/clear.rs
@ -74,7 +74,7 @@ pub unsafe fn run_clear_test(runner: &mut Runner, config: &Config) -> TestResult
 impl ClearCode {
    pub unsafe fn new(runner: &mut Runner) -> ClearCode {
-        let code = include_shader!(&runner.session, "../shader/gen/Clear");
+        let code = include_shader!(&runner.session, "../shader/gen/clear");
        let pipeline = runner
            .session
            .create_compute_pipeline(code, &[BindType::BufReadOnly, BindType::Buffer])
--- a/tests/src/main.rs
+++ b/tests/src/main.rs
@ -18,6 +18,7 @@
 mod clear;
 mod config;
 mod message_passing;
 mod prefix;
 mod prefix_tree;
 mod runner;
@ -105,5 +106,19 @@ fn main() {
            }
            report(&prefix_tree::run_prefix_test(&mut runner, &config));
        }
        if config.groups.matches("atomic") {
            report(&message_passing::run_message_passing_test(
                &mut runner,
                &config,
                message_passing::Variant::Atomic,
            ));
            if runner.session.gpu_info().has_memory_model {
                report(&message_passing::run_message_passing_test(
                    &mut runner,
                    &config,
                    message_passing::Variant::Vkmm,
                ));
            }
        }
    }
 }
--- a/tests/src/message_passing.rs
+++ b/tests/src/message_passing.rs
@ -0,0 +1,180 @@
 // Copyright 2021 The piet-gpu authors.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     https://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
 // Also licensed under MIT license, at your choice.
 use piet_gpu_hal::{include_shader, BackendType, BindType, BufferUsage, DescriptorSet, ShaderCode};
 use piet_gpu_hal::{Buffer, Pipeline};
 use crate::clear::{ClearBinding, ClearCode, ClearStage};
 use crate::config::Config;
 use crate::runner::{Commands, Runner};
 use crate::test_result::TestResult;
 const N_ELEMENTS: u64 = 65536;
 /// The shader code forMessagePassing sum example.
 struct MessagePassingCode {
    pipeline: Pipeline,
    clear_code: Option<ClearCode>,
 }
 /// The stage resources for the prefix sum example.
 struct MessagePassingStage {
    data_buf: Buffer,
    clear_stages: Option<(ClearStage, ClearBinding, ClearStage)>,
 }
 /// The binding for the prefix sum example.
 struct MessagePassingBinding {
    descriptor_set: DescriptorSet,
    clear_binding: Option<ClearBinding>,
 }
 #[derive(Debug)]
 pub enum Variant {
    Atomic,
    Vkmm,
 }
 pub unsafe fn run_message_passing_test(
    runner: &mut Runner,
    config: &Config,
    variant: Variant,
 ) -> TestResult {
    let mut result = TestResult::new(format!("message passing litmus, {:?}", variant));
    let out_buf = runner.buf_down(4);
    let code = MessagePassingCode::new(runner, variant);
    let stage = MessagePassingStage::new(runner, &code);
    let binding = stage.bind(runner, &code, &out_buf.dev_buf);
    let n_iter = config.n_iter;
    let mut total_elapsed = 0.0;
    let mut failures = 0;
    for _ in 0..n_iter {
        let mut commands = runner.commands();
        commands.write_timestamp(0);
        stage.record(&mut commands, &code, &binding, &out_buf.dev_buf);
        commands.write_timestamp(1);
        commands.cmd_buf.memory_barrier();
        commands.download(&out_buf);
        total_elapsed += runner.submit(commands);
        let mut dst: Vec<u32> = Default::default();
        out_buf.read(&mut dst);
        failures += dst[0];
    }
    if failures > 0 {
        result.fail(format!("{} failures", failures));
    }
    result.timing(total_elapsed, N_ELEMENTS * n_iter);
    result
 }
 impl MessagePassingCode {
    unsafe fn new(runner: &mut Runner, variant: Variant) -> MessagePassingCode {
        let code = match variant {
            Variant::Atomic => include_shader!(&runner.session, "../shader/gen/message_passing"),
            Variant::Vkmm => {
                ShaderCode::Spv(include_bytes!("../shader/gen/message_passing_vkmm.spv"))
            }
        };
        let pipeline = runner
            .session
            .create_compute_pipeline(code, &[BindType::Buffer, BindType::Buffer])
            .unwrap();
        // Currently, DX12 and Metal backends don't support buffer clearing, so use a
        // compute shader as a workaround.
        let clear_code = if runner.backend_type() != BackendType::Vulkan {
            Some(ClearCode::new(runner))
        } else {
            None
        };
        MessagePassingCode {
            pipeline,
            clear_code,
        }
    }
 }
 impl MessagePassingStage {
    unsafe fn new(runner: &mut Runner, code: &MessagePassingCode) -> MessagePassingStage {
        let data_buf_size = 8 * N_ELEMENTS;
        let data_buf = runner
            .session
            .create_buffer(data_buf_size, BufferUsage::STORAGE | BufferUsage::COPY_DST)
            .unwrap();
        let clear_stages = if let Some(clear_code) = &code.clear_code {
            let stage0 = ClearStage::new(runner, N_ELEMENTS * 2);
            let binding0 = stage0.bind(runner, clear_code, &data_buf);
            let stage1 = ClearStage::new(runner, 1);
            Some((stage0, binding0, stage1))
        } else {
            None
        };
        MessagePassingStage {
            data_buf,
            clear_stages,
        }
    }
    unsafe fn bind(
        &self,
        runner: &mut Runner,
        code: &MessagePassingCode,
        out_buf: &Buffer,
    ) -> MessagePassingBinding {
        let descriptor_set = runner
            .session
            .create_simple_descriptor_set(&code.pipeline, &[&self.data_buf, out_buf])
            .unwrap();
        let clear_binding = if let Some(clear_code) = &code.clear_code {
            Some(
                self.clear_stages
                    .as_ref()
                    .unwrap()
                    .2
                    .bind(runner, clear_code, out_buf),
            )
        } else {
            None
        };
        MessagePassingBinding {
            descriptor_set,
            clear_binding,
        }
    }
    unsafe fn record(
        &self,
        commands: &mut Commands,
        code: &MessagePassingCode,
        bindings: &MessagePassingBinding,
        out_buf: &Buffer,
    ) {
        if let Some((stage0, binding0, stage1)) = &self.clear_stages {
            let code = code.clear_code.as_ref().unwrap();
            stage0.record(commands, code, binding0);
            stage1.record(commands, code, bindings.clear_binding.as_ref().unwrap());
        } else {
            commands.cmd_buf.clear_buffer(&self.data_buf, None);
            commands.cmd_buf.clear_buffer(out_buf, None);
        }
        commands.cmd_buf.memory_barrier();
        commands.cmd_buf.dispatch(
            &code.pipeline,
            &bindings.descriptor_set,
            (256, 1, 1),
            (256, 1, 1),
        );
    }
 }