From c6965de557d9eb9ec0b39242bc4d7492a43e278f Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Sat, 6 Nov 2021 21:03:20 -0700 Subject: [PATCH] Add linked list test Measure bandwidth of building linked lists with atomics. --- tests/shader/build.ninja | 5 ++ tests/shader/gen/linkedlist.hlsl | 39 +++++++++ tests/shader/gen/linkedlist.msl | 36 ++++++++ tests/shader/gen/linkedlist.spv | Bin 0 -> 1936 bytes tests/shader/linkedlist.comp | 31 +++++++ tests/src/linkedlist.rs | 145 +++++++++++++++++++++++++++++++ tests/src/main.rs | 7 +- 7 files changed, 261 insertions(+), 2 deletions(-) create mode 100644 tests/shader/gen/linkedlist.hlsl create mode 100644 tests/shader/gen/linkedlist.msl create mode 100644 tests/shader/gen/linkedlist.spv create mode 100644 tests/shader/linkedlist.comp create mode 100644 tests/src/linkedlist.rs diff --git a/tests/shader/build.ninja b/tests/shader/build.ninja index fbdddb5..49e0260 100644 --- a/tests/shader/build.ninja +++ b/tests/shader/build.ninja @@ -65,3 +65,8 @@ build gen/message_passing.msl: msl gen/message_passing.spv build gen/message_passing_vkmm.spv: glsl message_passing.comp flags = -DVKMM + +build gen/linkedlist.spv: glsl linkedlist.comp +build gen/linkedlist.hlsl: hlsl gen/linkedlist.spv +build gen/linkedlist.dxil: dxil gen/linkedlist.hlsl +build gen/linkedlist.msl: msl gen/linkedlist.spv diff --git a/tests/shader/gen/linkedlist.hlsl b/tests/shader/gen/linkedlist.hlsl new file mode 100644 index 0000000..614791a --- /dev/null +++ b/tests/shader/gen/linkedlist.hlsl @@ -0,0 +1,39 @@ +static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); + +RWByteAddressBuffer _56 : register(u0); + +static uint3 gl_GlobalInvocationID; +struct SPIRV_Cross_Input +{ + uint3 gl_GlobalInvocationID : SV_DispatchThreadID; +}; + +void comp_main() +{ + uint rng = gl_GlobalInvocationID.x + 1u; + for (uint i = 0u; i < 100u; i++) + { + rng ^= (rng << uint(13)); + rng ^= (rng >> uint(17)); + rng ^= (rng << uint(5)); + uint bucket = rng % 65536u; + if (bucket != 0u) + { + uint _61; + _56.InterlockedAdd(0, 2u, _61); + uint alloc = _61 + 65536u; + uint _67; + _56.InterlockedExchange(bucket * 4 + 0, alloc, _67); + uint old = _67; + _56.Store(alloc * 4 + 0, old); + _56.Store((alloc + 1u) * 4 + 0, gl_GlobalInvocationID.x); + } + } +} + +[numthreads(256, 1, 1)] +void main(SPIRV_Cross_Input stage_input) +{ + gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; + comp_main(); +} diff --git a/tests/shader/gen/linkedlist.msl b/tests/shader/gen/linkedlist.msl new file mode 100644 index 0000000..0461d79 --- /dev/null +++ b/tests/shader/gen/linkedlist.msl @@ -0,0 +1,36 @@ +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +struct MemBuf +{ + uint mem[1]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); + +kernel void main0(device MemBuf& _56 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + uint rng = gl_GlobalInvocationID.x + 1u; + for (uint i = 0u; i < 100u; i++) + { + rng ^= (rng << uint(13)); + rng ^= (rng >> uint(17)); + rng ^= (rng << uint(5)); + uint bucket = rng % 65536u; + if (bucket != 0u) + { + uint _61 = atomic_fetch_add_explicit((device atomic_uint*)&_56.mem[0], 2u, memory_order_relaxed); + uint alloc = _61 + 65536u; + uint _67 = atomic_exchange_explicit((device atomic_uint*)&_56.mem[bucket], alloc, memory_order_relaxed); + uint old = _67; + _56.mem[alloc] = old; + _56.mem[alloc + 1u] = gl_GlobalInvocationID.x; + } + } +} + diff --git a/tests/shader/gen/linkedlist.spv b/tests/shader/gen/linkedlist.spv new file mode 100644 index 0000000000000000000000000000000000000000..a7232834d87bb475ba0928dea6e121c1ec952af4 GIT binary patch literal 1936 zcmZ9M*-lhJ5QYz{2r3{TZr}heD2S+lD1w6MfPzbW05dES;w*`R*WUUp-uMi}_$J=? z0B(ti-*@Iza$-*^_1E8jSJ&yTo~Fi;LrKz*v?p!Ji=;lflSY^%=}1~rUtD{+Ht}w6 zYhr48QpK^PDHW>eMAL$A#ww-VsuLYp7rb1z1VXi16Te3M0YM9j{JIwA+L(J=-C?dR z&9hT}RV>#wO6Aq+``Tt{Z?{%mT}Gq!IDR+9&B-`^V}J9__Fg^j6mh9su5D)XGx+uG z%F_PpB(2QYQQ5Afxn}EOitmAI<*lShb$*L>7Bim4w_?`4f;#ey5Ic(0CFcpLH}5d! zh> z_6X)se-!VWe9U)cT*TJ5h4oU^X}om};0svfH6Fyw=Q?B0#U6-w6l}jl-U+Z72B<#H zb`b0t8e88jyk{Zj_ZWMwa*_8o*rEOo-nsnu+|9VquMYiZ||vdqnEVGc#-6R5 zH7-z#lXb4%Z}*9< zE%sOh#~#kbH!Oi&`z_n^0a$J9>Ac@=k2@FZmcg!z_#s$L?Dq&9`>8dDoVr+31e+ti zX$36j-tm@J!T-Od$8hqo*AsB;We)d}Q|B9rGuIpYe!K&H)UpQl%|tCv!E*lJn9n)6 O{G0Us{N>MQ7W)TUwQb1& literal 0 HcmV?d00001 diff --git a/tests/shader/linkedlist.comp b/tests/shader/linkedlist.comp new file mode 100644 index 0000000..87e051b --- /dev/null +++ b/tests/shader/linkedlist.comp @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense + +// Linked list building. + +#version 450 + +#define N_BUCKETS 65536 +#define N_ITER 100 + +layout(local_size_x = 256, local_size_y = 1) in; + +layout(set = 0, binding = 0) buffer MemBuf { + uint[] mem; +}; + +void main() { + uint rng = gl_GlobalInvocationID.x + 1; + for (uint i = 0; i < N_ITER; i++) { + // xorshift32 + rng ^= rng << 13; + rng ^= rng >> 17; + rng ^= rng << 5; + uint bucket = rng % N_BUCKETS; + if (bucket != 0) { + uint alloc = atomicAdd(mem[0], 2) + N_BUCKETS; + uint old = atomicExchange(mem[bucket], alloc); + mem[alloc] = old; + mem[alloc + 1] = gl_GlobalInvocationID.x; + } + } +} diff --git a/tests/src/linkedlist.rs b/tests/src/linkedlist.rs new file mode 100644 index 0000000..d755fd2 --- /dev/null +++ b/tests/src/linkedlist.rs @@ -0,0 +1,145 @@ +// Copyright 2021 The piet-gpu authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +use piet_gpu_hal::{include_shader, BackendType, BindType, DescriptorSet}; +use piet_gpu_hal::{Buffer, Pipeline}; + +use crate::clear::{ClearBinding, ClearCode, ClearStage}; +use crate::runner::{Commands, Runner}; +use crate::test_result::TestResult; +use crate::Config; + +const WG_SIZE: u64 = 256; +const N_BUCKETS: u64 = 65536; + +struct LinkedListCode { + pipeline: Pipeline, + clear_code: Option, +} + +struct LinkedListStage { + clear_stage: Option, +} + +struct LinkedListBinding { + descriptor_set: DescriptorSet, + clear_binding: Option, +} + +pub unsafe fn run_linkedlist_test(runner: &mut Runner, config: &Config) -> TestResult { + let mut result = TestResult::new("linked list"); + let mem_buf = runner.buf_down(256 * N_BUCKETS); + let code = LinkedListCode::new(runner); + let stage = LinkedListStage::new(runner, &code, N_BUCKETS); + let binding = stage.bind(runner, &code, &mem_buf.dev_buf); + let n_iter = config.n_iter; + let mut total_elapsed = 0.0; + for i in 0..n_iter { + let mut commands = runner.commands(); + // Might clear only buckets to save time. + commands.write_timestamp(0); + stage.record(&mut commands, &code, &binding, &mem_buf.dev_buf); + commands.write_timestamp(1); + if i == 0 { + commands.cmd_buf.memory_barrier(); + commands.download(&mem_buf); + } + total_elapsed += runner.submit(commands); + if i == 0 { + let mut dst: Vec = Default::default(); + mem_buf.read(&mut dst); + } + } + result.timing(total_elapsed, N_BUCKETS * 100 * n_iter); + result +} + +impl LinkedListCode { + unsafe fn new(runner: &mut Runner) -> LinkedListCode { + let code = include_shader!(&runner.session, "../shader/gen/linkedlist"); + let pipeline = runner + .session + .create_compute_pipeline(code, &[BindType::Buffer]) + .unwrap(); + let clear_code = if runner.backend_type() != BackendType::Vulkan { + Some(ClearCode::new(runner)) + } else { + None + }; + LinkedListCode { + pipeline, + clear_code, + } + } +} + +impl LinkedListStage { + unsafe fn new(runner: &mut Runner, code: &LinkedListCode, n_buckets: u64) -> LinkedListStage { + let clear_stage = if code.clear_code.is_some() { + Some(ClearStage::new(runner, n_buckets)) + } else { + None + }; + LinkedListStage { clear_stage } + } + + unsafe fn bind( + &self, + runner: &mut Runner, + code: &LinkedListCode, + mem_buf: &Buffer, + ) -> LinkedListBinding { + let descriptor_set = runner + .session + .create_simple_descriptor_set(&code.pipeline, &[mem_buf]) + .unwrap(); + let clear_binding = if let Some(stage) = &self.clear_stage { + Some(stage.bind(runner, &code.clear_code.as_ref().unwrap(), mem_buf)) + } else { + None + }; + LinkedListBinding { + descriptor_set, + clear_binding, + } + } + + unsafe fn record( + &self, + commands: &mut Commands, + code: &LinkedListCode, + bindings: &LinkedListBinding, + out_buf: &Buffer, + ) { + if let Some(stage) = &self.clear_stage { + stage.record( + commands, + code.clear_code.as_ref().unwrap(), + bindings.clear_binding.as_ref().unwrap(), + ); + } else { + commands.cmd_buf.clear_buffer(out_buf, None); + } + commands.cmd_buf.memory_barrier(); + let n_workgroups = N_BUCKETS / WG_SIZE; + commands.cmd_buf.dispatch( + &code.pipeline, + &bindings.descriptor_set, + (n_workgroups as u32, 1, 1), + (WG_SIZE as u32, 1, 1), + ); + } +} diff --git a/tests/src/main.rs b/tests/src/main.rs index c8f57bd..dd6f4bd 100644 --- a/tests/src/main.rs +++ b/tests/src/main.rs @@ -18,6 +18,7 @@ mod clear; mod config; +mod linkedlist; mod message_passing; mod prefix; mod prefix_tree; @@ -28,8 +29,9 @@ use clap::{App, Arg}; use piet_gpu_hal::InstanceFlags; use crate::config::Config; -use crate::runner::Runner; -use crate::test_result::{ReportStyle, TestResult}; +pub use crate::runner::Runner; +use crate::test_result::ReportStyle; +pub use crate::test_result::TestResult; fn main() { let matches = App::new("piet-gpu-tests") @@ -119,6 +121,7 @@ fn main() { message_passing::Variant::Vkmm, )); } + report(&linkedlist::run_linkedlist_test(&mut runner, &config)); } } }