mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 20:51:29 +11:00
Add message passing litmus test
This is our version of the standard message passing litmus test for atomics. It does a bunch in parallel and permutes the reads and writes extensively, so it's been more sensitive than existing tests.
This commit is contained in:
parent
825a1eb04c
commit
10a624ee75
|
@ -57,3 +57,11 @@ build gen/prefix_scan.spv: glsl prefix_scan.comp
|
||||||
build gen/prefix_scan.hlsl: hlsl gen/prefix_scan.spv
|
build gen/prefix_scan.hlsl: hlsl gen/prefix_scan.spv
|
||||||
build gen/prefix_scan.dxil: dxil gen/prefix_scan.hlsl
|
build gen/prefix_scan.dxil: dxil gen/prefix_scan.hlsl
|
||||||
build gen/prefix_scan.msl: msl gen/prefix_scan.spv
|
build gen/prefix_scan.msl: msl gen/prefix_scan.spv
|
||||||
|
|
||||||
|
build gen/message_passing.spv: glsl message_passing.comp
|
||||||
|
build gen/message_passing.hlsl: hlsl gen/message_passing.spv
|
||||||
|
build gen/message_passing.dxil: dxil gen/message_passing.hlsl
|
||||||
|
build gen/message_passing.msl: msl gen/message_passing.spv
|
||||||
|
|
||||||
|
build gen/message_passing_vkmm.spv: glsl message_passing.comp
|
||||||
|
flags = -DVKMM
|
||||||
|
|
BIN
tests/shader/gen/message_passing.dxil
Normal file
BIN
tests/shader/gen/message_passing.dxil
Normal file
Binary file not shown.
54
tests/shader/gen/message_passing.hlsl
Normal file
54
tests/shader/gen/message_passing.hlsl
Normal file
|
@ -0,0 +1,54 @@
|
||||||
|
struct Element
|
||||||
|
{
|
||||||
|
uint data;
|
||||||
|
uint flag;
|
||||||
|
};
|
||||||
|
|
||||||
|
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
|
||||||
|
|
||||||
|
RWByteAddressBuffer data_buf : register(u0);
|
||||||
|
RWByteAddressBuffer control_buf : register(u1);
|
||||||
|
|
||||||
|
static uint3 gl_GlobalInvocationID;
|
||||||
|
struct SPIRV_Cross_Input
|
||||||
|
{
|
||||||
|
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
|
||||||
|
};
|
||||||
|
|
||||||
|
uint permute_flag_ix(uint data_ix)
|
||||||
|
{
|
||||||
|
return (data_ix * 419u) & 65535u;
|
||||||
|
}
|
||||||
|
|
||||||
|
void comp_main()
|
||||||
|
{
|
||||||
|
uint _76;
|
||||||
|
data_buf.InterlockedExchange(gl_GlobalInvocationID.x * 8 + 0, 1u, _76);
|
||||||
|
DeviceMemoryBarrier();
|
||||||
|
uint param = gl_GlobalInvocationID.x;
|
||||||
|
uint write_flag_ix = permute_flag_ix(param);
|
||||||
|
uint _77;
|
||||||
|
data_buf.InterlockedExchange(write_flag_ix * 8 + 4, 1u, _77);
|
||||||
|
uint read_ix = (gl_GlobalInvocationID.x * 4099u) & 65535u;
|
||||||
|
uint param_1 = read_ix;
|
||||||
|
uint read_flag_ix = permute_flag_ix(param_1);
|
||||||
|
uint _58;
|
||||||
|
data_buf.InterlockedAdd(read_flag_ix * 8 + 4, 0, _58);
|
||||||
|
uint flag = _58;
|
||||||
|
DeviceMemoryBarrier();
|
||||||
|
uint _62;
|
||||||
|
data_buf.InterlockedAdd(read_ix * 8 + 0, 0, _62);
|
||||||
|
uint data = _62;
|
||||||
|
if (flag > data)
|
||||||
|
{
|
||||||
|
uint _73;
|
||||||
|
control_buf.InterlockedAdd(0, 1u, _73);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[numthreads(256, 1, 1)]
|
||||||
|
void main(SPIRV_Cross_Input stage_input)
|
||||||
|
{
|
||||||
|
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
|
||||||
|
comp_main();
|
||||||
|
}
|
54
tests/shader/gen/message_passing.msl
Normal file
54
tests/shader/gen/message_passing.msl
Normal file
|
@ -0,0 +1,54 @@
|
||||||
|
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||||
|
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||||
|
|
||||||
|
#include <metal_stdlib>
|
||||||
|
#include <simd/simd.h>
|
||||||
|
#include <metal_atomic>
|
||||||
|
|
||||||
|
using namespace metal;
|
||||||
|
|
||||||
|
struct Element
|
||||||
|
{
|
||||||
|
uint data;
|
||||||
|
uint flag;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct DataBuf
|
||||||
|
{
|
||||||
|
Element data[1];
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ControlBuf
|
||||||
|
{
|
||||||
|
uint failures;
|
||||||
|
};
|
||||||
|
|
||||||
|
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
|
||||||
|
|
||||||
|
static inline __attribute__((always_inline))
|
||||||
|
uint permute_flag_ix(thread const uint& data_ix)
|
||||||
|
{
|
||||||
|
return (data_ix * 419u) & 65535u;
|
||||||
|
}
|
||||||
|
|
||||||
|
kernel void main0(device DataBuf& data_buf [[buffer(0)]], device ControlBuf& control_buf [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
|
||||||
|
{
|
||||||
|
atomic_store_explicit((device atomic_uint*)&data_buf.data[gl_GlobalInvocationID.x].data, 1u, memory_order_relaxed);
|
||||||
|
threadgroup_barrier(mem_flags::mem_device);
|
||||||
|
uint param = gl_GlobalInvocationID.x;
|
||||||
|
uint write_flag_ix = permute_flag_ix(param);
|
||||||
|
atomic_store_explicit((device atomic_uint*)&data_buf.data[write_flag_ix].flag, 1u, memory_order_relaxed);
|
||||||
|
uint read_ix = (gl_GlobalInvocationID.x * 4099u) & 65535u;
|
||||||
|
uint param_1 = read_ix;
|
||||||
|
uint read_flag_ix = permute_flag_ix(param_1);
|
||||||
|
uint _58 = atomic_load_explicit((device atomic_uint*)&data_buf.data[read_flag_ix].flag, memory_order_relaxed);
|
||||||
|
uint flag = _58;
|
||||||
|
threadgroup_barrier(mem_flags::mem_device);
|
||||||
|
uint _62 = atomic_load_explicit((device atomic_uint*)&data_buf.data[read_ix].data, memory_order_relaxed);
|
||||||
|
uint data = _62;
|
||||||
|
if (flag > data)
|
||||||
|
{
|
||||||
|
uint _73 = atomic_fetch_add_explicit((device atomic_uint*)&control_buf.failures, 1u, memory_order_relaxed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
BIN
tests/shader/gen/message_passing.spv
Normal file
BIN
tests/shader/gen/message_passing.spv
Normal file
Binary file not shown.
BIN
tests/shader/gen/message_passing_vkmm.spv
Normal file
BIN
tests/shader/gen/message_passing_vkmm.spv
Normal file
Binary file not shown.
60
tests/shader/message_passing.comp
Normal file
60
tests/shader/message_passing.comp
Normal file
|
@ -0,0 +1,60 @@
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||||
|
|
||||||
|
// Our version of the message passing atomic litmus test.
|
||||||
|
|
||||||
|
#version 450
|
||||||
|
|
||||||
|
#extension GL_KHR_memory_scope_semantics : enable
|
||||||
|
|
||||||
|
#ifdef VKMM
|
||||||
|
#pragma use_vulkan_memory_model
|
||||||
|
#define ACQUIRE gl_StorageSemanticsBuffer, gl_SemanticsAcquire
|
||||||
|
#define RELEASE gl_StorageSemanticsBuffer, gl_SemanticsRelease
|
||||||
|
#else
|
||||||
|
#define ACQUIRE 0, 0
|
||||||
|
#define RELEASE 0, 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;
|
||||||
|
|
||||||
|
struct Element
|
||||||
|
{
|
||||||
|
uint data;
|
||||||
|
uint flag;
|
||||||
|
};
|
||||||
|
|
||||||
|
layout(binding = 0) buffer DataBuf
|
||||||
|
{
|
||||||
|
Element data[];
|
||||||
|
} data_buf;
|
||||||
|
|
||||||
|
layout(binding = 1) buffer ControlBuf
|
||||||
|
{
|
||||||
|
uint failures;
|
||||||
|
} control_buf;
|
||||||
|
|
||||||
|
uint permute_flag_ix(uint data_ix)
|
||||||
|
{
|
||||||
|
return (data_ix * 419u) & 65535u;
|
||||||
|
}
|
||||||
|
|
||||||
|
void main()
|
||||||
|
{
|
||||||
|
atomicStore(data_buf.data[gl_GlobalInvocationID.x].data, 1u, gl_ScopeDevice, 0, 0);
|
||||||
|
#ifndef VKMM
|
||||||
|
memoryBarrierBuffer();
|
||||||
|
#endif
|
||||||
|
uint write_flag_ix = permute_flag_ix(gl_GlobalInvocationID.x);
|
||||||
|
atomicStore(data_buf.data[write_flag_ix].flag, 1u, gl_ScopeDevice, RELEASE);
|
||||||
|
uint read_ix = (gl_GlobalInvocationID.x * 4099u) & 65535u;
|
||||||
|
uint read_flag_ix = permute_flag_ix(read_ix);
|
||||||
|
uint flag = atomicLoad(data_buf.data[read_flag_ix].flag, gl_ScopeDevice, ACQUIRE);
|
||||||
|
#ifndef VKMM
|
||||||
|
memoryBarrierBuffer();
|
||||||
|
#endif
|
||||||
|
uint data = atomicLoad(data_buf.data[read_ix].data, gl_ScopeDevice, 0, 0);
|
||||||
|
if (flag > data)
|
||||||
|
{
|
||||||
|
atomicAdd(control_buf.failures, 1u);
|
||||||
|
}
|
||||||
|
}
|
|
@ -74,7 +74,7 @@ pub unsafe fn run_clear_test(runner: &mut Runner, config: &Config) -> TestResult
|
||||||
|
|
||||||
impl ClearCode {
|
impl ClearCode {
|
||||||
pub unsafe fn new(runner: &mut Runner) -> ClearCode {
|
pub unsafe fn new(runner: &mut Runner) -> ClearCode {
|
||||||
let code = include_shader!(&runner.session, "../shader/gen/Clear");
|
let code = include_shader!(&runner.session, "../shader/gen/clear");
|
||||||
let pipeline = runner
|
let pipeline = runner
|
||||||
.session
|
.session
|
||||||
.create_compute_pipeline(code, &[BindType::BufReadOnly, BindType::Buffer])
|
.create_compute_pipeline(code, &[BindType::BufReadOnly, BindType::Buffer])
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
|
|
||||||
mod clear;
|
mod clear;
|
||||||
mod config;
|
mod config;
|
||||||
|
mod message_passing;
|
||||||
mod prefix;
|
mod prefix;
|
||||||
mod prefix_tree;
|
mod prefix_tree;
|
||||||
mod runner;
|
mod runner;
|
||||||
|
@ -105,5 +106,19 @@ fn main() {
|
||||||
}
|
}
|
||||||
report(&prefix_tree::run_prefix_test(&mut runner, &config));
|
report(&prefix_tree::run_prefix_test(&mut runner, &config));
|
||||||
}
|
}
|
||||||
|
if config.groups.matches("atomic") {
|
||||||
|
report(&message_passing::run_message_passing_test(
|
||||||
|
&mut runner,
|
||||||
|
&config,
|
||||||
|
message_passing::Variant::Atomic,
|
||||||
|
));
|
||||||
|
if runner.session.gpu_info().has_memory_model {
|
||||||
|
report(&message_passing::run_message_passing_test(
|
||||||
|
&mut runner,
|
||||||
|
&config,
|
||||||
|
message_passing::Variant::Vkmm,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
180
tests/src/message_passing.rs
Normal file
180
tests/src/message_passing.rs
Normal file
|
@ -0,0 +1,180 @@
|
||||||
|
// Copyright 2021 The piet-gpu authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// https://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
// Also licensed under MIT license, at your choice.
|
||||||
|
|
||||||
|
use piet_gpu_hal::{include_shader, BackendType, BindType, BufferUsage, DescriptorSet, ShaderCode};
|
||||||
|
use piet_gpu_hal::{Buffer, Pipeline};
|
||||||
|
|
||||||
|
use crate::clear::{ClearBinding, ClearCode, ClearStage};
|
||||||
|
use crate::config::Config;
|
||||||
|
use crate::runner::{Commands, Runner};
|
||||||
|
use crate::test_result::TestResult;
|
||||||
|
|
||||||
|
const N_ELEMENTS: u64 = 65536;
|
||||||
|
|
||||||
|
/// The shader code forMessagePassing sum example.
|
||||||
|
struct MessagePassingCode {
|
||||||
|
pipeline: Pipeline,
|
||||||
|
clear_code: Option<ClearCode>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The stage resources for the prefix sum example.
|
||||||
|
struct MessagePassingStage {
|
||||||
|
data_buf: Buffer,
|
||||||
|
clear_stages: Option<(ClearStage, ClearBinding, ClearStage)>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The binding for the prefix sum example.
|
||||||
|
struct MessagePassingBinding {
|
||||||
|
descriptor_set: DescriptorSet,
|
||||||
|
clear_binding: Option<ClearBinding>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum Variant {
|
||||||
|
Atomic,
|
||||||
|
Vkmm,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub unsafe fn run_message_passing_test(
|
||||||
|
runner: &mut Runner,
|
||||||
|
config: &Config,
|
||||||
|
variant: Variant,
|
||||||
|
) -> TestResult {
|
||||||
|
let mut result = TestResult::new(format!("message passing litmus, {:?}", variant));
|
||||||
|
let out_buf = runner.buf_down(4);
|
||||||
|
let code = MessagePassingCode::new(runner, variant);
|
||||||
|
let stage = MessagePassingStage::new(runner, &code);
|
||||||
|
let binding = stage.bind(runner, &code, &out_buf.dev_buf);
|
||||||
|
let n_iter = config.n_iter;
|
||||||
|
let mut total_elapsed = 0.0;
|
||||||
|
let mut failures = 0;
|
||||||
|
for _ in 0..n_iter {
|
||||||
|
let mut commands = runner.commands();
|
||||||
|
commands.write_timestamp(0);
|
||||||
|
stage.record(&mut commands, &code, &binding, &out_buf.dev_buf);
|
||||||
|
commands.write_timestamp(1);
|
||||||
|
commands.cmd_buf.memory_barrier();
|
||||||
|
commands.download(&out_buf);
|
||||||
|
total_elapsed += runner.submit(commands);
|
||||||
|
let mut dst: Vec<u32> = Default::default();
|
||||||
|
out_buf.read(&mut dst);
|
||||||
|
failures += dst[0];
|
||||||
|
}
|
||||||
|
if failures > 0 {
|
||||||
|
result.fail(format!("{} failures", failures));
|
||||||
|
}
|
||||||
|
result.timing(total_elapsed, N_ELEMENTS * n_iter);
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MessagePassingCode {
|
||||||
|
unsafe fn new(runner: &mut Runner, variant: Variant) -> MessagePassingCode {
|
||||||
|
let code = match variant {
|
||||||
|
Variant::Atomic => include_shader!(&runner.session, "../shader/gen/message_passing"),
|
||||||
|
Variant::Vkmm => {
|
||||||
|
ShaderCode::Spv(include_bytes!("../shader/gen/message_passing_vkmm.spv"))
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let pipeline = runner
|
||||||
|
.session
|
||||||
|
.create_compute_pipeline(code, &[BindType::Buffer, BindType::Buffer])
|
||||||
|
.unwrap();
|
||||||
|
// Currently, DX12 and Metal backends don't support buffer clearing, so use a
|
||||||
|
// compute shader as a workaround.
|
||||||
|
let clear_code = if runner.backend_type() != BackendType::Vulkan {
|
||||||
|
Some(ClearCode::new(runner))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
MessagePassingCode {
|
||||||
|
pipeline,
|
||||||
|
clear_code,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MessagePassingStage {
|
||||||
|
unsafe fn new(runner: &mut Runner, code: &MessagePassingCode) -> MessagePassingStage {
|
||||||
|
let data_buf_size = 8 * N_ELEMENTS;
|
||||||
|
let data_buf = runner
|
||||||
|
.session
|
||||||
|
.create_buffer(data_buf_size, BufferUsage::STORAGE | BufferUsage::COPY_DST)
|
||||||
|
.unwrap();
|
||||||
|
let clear_stages = if let Some(clear_code) = &code.clear_code {
|
||||||
|
let stage0 = ClearStage::new(runner, N_ELEMENTS * 2);
|
||||||
|
let binding0 = stage0.bind(runner, clear_code, &data_buf);
|
||||||
|
let stage1 = ClearStage::new(runner, 1);
|
||||||
|
Some((stage0, binding0, stage1))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
MessagePassingStage {
|
||||||
|
data_buf,
|
||||||
|
clear_stages,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe fn bind(
|
||||||
|
&self,
|
||||||
|
runner: &mut Runner,
|
||||||
|
code: &MessagePassingCode,
|
||||||
|
out_buf: &Buffer,
|
||||||
|
) -> MessagePassingBinding {
|
||||||
|
let descriptor_set = runner
|
||||||
|
.session
|
||||||
|
.create_simple_descriptor_set(&code.pipeline, &[&self.data_buf, out_buf])
|
||||||
|
.unwrap();
|
||||||
|
let clear_binding = if let Some(clear_code) = &code.clear_code {
|
||||||
|
Some(
|
||||||
|
self.clear_stages
|
||||||
|
.as_ref()
|
||||||
|
.unwrap()
|
||||||
|
.2
|
||||||
|
.bind(runner, clear_code, out_buf),
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
MessagePassingBinding {
|
||||||
|
descriptor_set,
|
||||||
|
clear_binding,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe fn record(
|
||||||
|
&self,
|
||||||
|
commands: &mut Commands,
|
||||||
|
code: &MessagePassingCode,
|
||||||
|
bindings: &MessagePassingBinding,
|
||||||
|
out_buf: &Buffer,
|
||||||
|
) {
|
||||||
|
if let Some((stage0, binding0, stage1)) = &self.clear_stages {
|
||||||
|
let code = code.clear_code.as_ref().unwrap();
|
||||||
|
stage0.record(commands, code, binding0);
|
||||||
|
stage1.record(commands, code, bindings.clear_binding.as_ref().unwrap());
|
||||||
|
} else {
|
||||||
|
commands.cmd_buf.clear_buffer(&self.data_buf, None);
|
||||||
|
commands.cmd_buf.clear_buffer(out_buf, None);
|
||||||
|
}
|
||||||
|
commands.cmd_buf.memory_barrier();
|
||||||
|
commands.cmd_buf.dispatch(
|
||||||
|
&code.pipeline,
|
||||||
|
&bindings.descriptor_set,
|
||||||
|
(256, 1, 1),
|
||||||
|
(256, 1, 1),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in a new issue