mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 20:51:29 +11:00
Add linked list test
Measure bandwidth of building linked lists with atomics.
This commit is contained in:
parent
10a624ee75
commit
c6965de557
|
@ -65,3 +65,8 @@ build gen/message_passing.msl: msl gen/message_passing.spv
|
||||||
|
|
||||||
build gen/message_passing_vkmm.spv: glsl message_passing.comp
|
build gen/message_passing_vkmm.spv: glsl message_passing.comp
|
||||||
flags = -DVKMM
|
flags = -DVKMM
|
||||||
|
|
||||||
|
build gen/linkedlist.spv: glsl linkedlist.comp
|
||||||
|
build gen/linkedlist.hlsl: hlsl gen/linkedlist.spv
|
||||||
|
build gen/linkedlist.dxil: dxil gen/linkedlist.hlsl
|
||||||
|
build gen/linkedlist.msl: msl gen/linkedlist.spv
|
||||||
|
|
39
tests/shader/gen/linkedlist.hlsl
Normal file
39
tests/shader/gen/linkedlist.hlsl
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
|
||||||
|
|
||||||
|
RWByteAddressBuffer _56 : register(u0);
|
||||||
|
|
||||||
|
static uint3 gl_GlobalInvocationID;
|
||||||
|
struct SPIRV_Cross_Input
|
||||||
|
{
|
||||||
|
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
|
||||||
|
};
|
||||||
|
|
||||||
|
void comp_main()
|
||||||
|
{
|
||||||
|
uint rng = gl_GlobalInvocationID.x + 1u;
|
||||||
|
for (uint i = 0u; i < 100u; i++)
|
||||||
|
{
|
||||||
|
rng ^= (rng << uint(13));
|
||||||
|
rng ^= (rng >> uint(17));
|
||||||
|
rng ^= (rng << uint(5));
|
||||||
|
uint bucket = rng % 65536u;
|
||||||
|
if (bucket != 0u)
|
||||||
|
{
|
||||||
|
uint _61;
|
||||||
|
_56.InterlockedAdd(0, 2u, _61);
|
||||||
|
uint alloc = _61 + 65536u;
|
||||||
|
uint _67;
|
||||||
|
_56.InterlockedExchange(bucket * 4 + 0, alloc, _67);
|
||||||
|
uint old = _67;
|
||||||
|
_56.Store(alloc * 4 + 0, old);
|
||||||
|
_56.Store((alloc + 1u) * 4 + 0, gl_GlobalInvocationID.x);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[numthreads(256, 1, 1)]
|
||||||
|
void main(SPIRV_Cross_Input stage_input)
|
||||||
|
{
|
||||||
|
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
|
||||||
|
comp_main();
|
||||||
|
}
|
36
tests/shader/gen/linkedlist.msl
Normal file
36
tests/shader/gen/linkedlist.msl
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||||
|
|
||||||
|
#include <metal_stdlib>
|
||||||
|
#include <simd/simd.h>
|
||||||
|
#include <metal_atomic>
|
||||||
|
|
||||||
|
using namespace metal;
|
||||||
|
|
||||||
|
struct MemBuf
|
||||||
|
{
|
||||||
|
uint mem[1];
|
||||||
|
};
|
||||||
|
|
||||||
|
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
|
||||||
|
|
||||||
|
kernel void main0(device MemBuf& _56 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
|
||||||
|
{
|
||||||
|
uint rng = gl_GlobalInvocationID.x + 1u;
|
||||||
|
for (uint i = 0u; i < 100u; i++)
|
||||||
|
{
|
||||||
|
rng ^= (rng << uint(13));
|
||||||
|
rng ^= (rng >> uint(17));
|
||||||
|
rng ^= (rng << uint(5));
|
||||||
|
uint bucket = rng % 65536u;
|
||||||
|
if (bucket != 0u)
|
||||||
|
{
|
||||||
|
uint _61 = atomic_fetch_add_explicit((device atomic_uint*)&_56.mem[0], 2u, memory_order_relaxed);
|
||||||
|
uint alloc = _61 + 65536u;
|
||||||
|
uint _67 = atomic_exchange_explicit((device atomic_uint*)&_56.mem[bucket], alloc, memory_order_relaxed);
|
||||||
|
uint old = _67;
|
||||||
|
_56.mem[alloc] = old;
|
||||||
|
_56.mem[alloc + 1u] = gl_GlobalInvocationID.x;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
BIN
tests/shader/gen/linkedlist.spv
Normal file
BIN
tests/shader/gen/linkedlist.spv
Normal file
Binary file not shown.
31
tests/shader/linkedlist.comp
Normal file
31
tests/shader/linkedlist.comp
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||||
|
|
||||||
|
// Linked list building.
|
||||||
|
|
||||||
|
#version 450
|
||||||
|
|
||||||
|
#define N_BUCKETS 65536
|
||||||
|
#define N_ITER 100
|
||||||
|
|
||||||
|
layout(local_size_x = 256, local_size_y = 1) in;
|
||||||
|
|
||||||
|
layout(set = 0, binding = 0) buffer MemBuf {
|
||||||
|
uint[] mem;
|
||||||
|
};
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
uint rng = gl_GlobalInvocationID.x + 1;
|
||||||
|
for (uint i = 0; i < N_ITER; i++) {
|
||||||
|
// xorshift32
|
||||||
|
rng ^= rng << 13;
|
||||||
|
rng ^= rng >> 17;
|
||||||
|
rng ^= rng << 5;
|
||||||
|
uint bucket = rng % N_BUCKETS;
|
||||||
|
if (bucket != 0) {
|
||||||
|
uint alloc = atomicAdd(mem[0], 2) + N_BUCKETS;
|
||||||
|
uint old = atomicExchange(mem[bucket], alloc);
|
||||||
|
mem[alloc] = old;
|
||||||
|
mem[alloc + 1] = gl_GlobalInvocationID.x;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
145
tests/src/linkedlist.rs
Normal file
145
tests/src/linkedlist.rs
Normal file
|
@ -0,0 +1,145 @@
|
||||||
|
// Copyright 2021 The piet-gpu authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// https://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
// Also licensed under MIT license, at your choice.
|
||||||
|
|
||||||
|
use piet_gpu_hal::{include_shader, BackendType, BindType, DescriptorSet};
|
||||||
|
use piet_gpu_hal::{Buffer, Pipeline};
|
||||||
|
|
||||||
|
use crate::clear::{ClearBinding, ClearCode, ClearStage};
|
||||||
|
use crate::runner::{Commands, Runner};
|
||||||
|
use crate::test_result::TestResult;
|
||||||
|
use crate::Config;
|
||||||
|
|
||||||
|
const WG_SIZE: u64 = 256;
|
||||||
|
const N_BUCKETS: u64 = 65536;
|
||||||
|
|
||||||
|
struct LinkedListCode {
|
||||||
|
pipeline: Pipeline,
|
||||||
|
clear_code: Option<ClearCode>,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct LinkedListStage {
|
||||||
|
clear_stage: Option<ClearStage>,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct LinkedListBinding {
|
||||||
|
descriptor_set: DescriptorSet,
|
||||||
|
clear_binding: Option<ClearBinding>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub unsafe fn run_linkedlist_test(runner: &mut Runner, config: &Config) -> TestResult {
|
||||||
|
let mut result = TestResult::new("linked list");
|
||||||
|
let mem_buf = runner.buf_down(256 * N_BUCKETS);
|
||||||
|
let code = LinkedListCode::new(runner);
|
||||||
|
let stage = LinkedListStage::new(runner, &code, N_BUCKETS);
|
||||||
|
let binding = stage.bind(runner, &code, &mem_buf.dev_buf);
|
||||||
|
let n_iter = config.n_iter;
|
||||||
|
let mut total_elapsed = 0.0;
|
||||||
|
for i in 0..n_iter {
|
||||||
|
let mut commands = runner.commands();
|
||||||
|
// Might clear only buckets to save time.
|
||||||
|
commands.write_timestamp(0);
|
||||||
|
stage.record(&mut commands, &code, &binding, &mem_buf.dev_buf);
|
||||||
|
commands.write_timestamp(1);
|
||||||
|
if i == 0 {
|
||||||
|
commands.cmd_buf.memory_barrier();
|
||||||
|
commands.download(&mem_buf);
|
||||||
|
}
|
||||||
|
total_elapsed += runner.submit(commands);
|
||||||
|
if i == 0 {
|
||||||
|
let mut dst: Vec<u32> = Default::default();
|
||||||
|
mem_buf.read(&mut dst);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result.timing(total_elapsed, N_BUCKETS * 100 * n_iter);
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
impl LinkedListCode {
|
||||||
|
unsafe fn new(runner: &mut Runner) -> LinkedListCode {
|
||||||
|
let code = include_shader!(&runner.session, "../shader/gen/linkedlist");
|
||||||
|
let pipeline = runner
|
||||||
|
.session
|
||||||
|
.create_compute_pipeline(code, &[BindType::Buffer])
|
||||||
|
.unwrap();
|
||||||
|
let clear_code = if runner.backend_type() != BackendType::Vulkan {
|
||||||
|
Some(ClearCode::new(runner))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
LinkedListCode {
|
||||||
|
pipeline,
|
||||||
|
clear_code,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl LinkedListStage {
|
||||||
|
unsafe fn new(runner: &mut Runner, code: &LinkedListCode, n_buckets: u64) -> LinkedListStage {
|
||||||
|
let clear_stage = if code.clear_code.is_some() {
|
||||||
|
Some(ClearStage::new(runner, n_buckets))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
LinkedListStage { clear_stage }
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe fn bind(
|
||||||
|
&self,
|
||||||
|
runner: &mut Runner,
|
||||||
|
code: &LinkedListCode,
|
||||||
|
mem_buf: &Buffer,
|
||||||
|
) -> LinkedListBinding {
|
||||||
|
let descriptor_set = runner
|
||||||
|
.session
|
||||||
|
.create_simple_descriptor_set(&code.pipeline, &[mem_buf])
|
||||||
|
.unwrap();
|
||||||
|
let clear_binding = if let Some(stage) = &self.clear_stage {
|
||||||
|
Some(stage.bind(runner, &code.clear_code.as_ref().unwrap(), mem_buf))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
LinkedListBinding {
|
||||||
|
descriptor_set,
|
||||||
|
clear_binding,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe fn record(
|
||||||
|
&self,
|
||||||
|
commands: &mut Commands,
|
||||||
|
code: &LinkedListCode,
|
||||||
|
bindings: &LinkedListBinding,
|
||||||
|
out_buf: &Buffer,
|
||||||
|
) {
|
||||||
|
if let Some(stage) = &self.clear_stage {
|
||||||
|
stage.record(
|
||||||
|
commands,
|
||||||
|
code.clear_code.as_ref().unwrap(),
|
||||||
|
bindings.clear_binding.as_ref().unwrap(),
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
commands.cmd_buf.clear_buffer(out_buf, None);
|
||||||
|
}
|
||||||
|
commands.cmd_buf.memory_barrier();
|
||||||
|
let n_workgroups = N_BUCKETS / WG_SIZE;
|
||||||
|
commands.cmd_buf.dispatch(
|
||||||
|
&code.pipeline,
|
||||||
|
&bindings.descriptor_set,
|
||||||
|
(n_workgroups as u32, 1, 1),
|
||||||
|
(WG_SIZE as u32, 1, 1),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
|
@ -18,6 +18,7 @@
|
||||||
|
|
||||||
mod clear;
|
mod clear;
|
||||||
mod config;
|
mod config;
|
||||||
|
mod linkedlist;
|
||||||
mod message_passing;
|
mod message_passing;
|
||||||
mod prefix;
|
mod prefix;
|
||||||
mod prefix_tree;
|
mod prefix_tree;
|
||||||
|
@ -28,8 +29,9 @@ use clap::{App, Arg};
|
||||||
use piet_gpu_hal::InstanceFlags;
|
use piet_gpu_hal::InstanceFlags;
|
||||||
|
|
||||||
use crate::config::Config;
|
use crate::config::Config;
|
||||||
use crate::runner::Runner;
|
pub use crate::runner::Runner;
|
||||||
use crate::test_result::{ReportStyle, TestResult};
|
use crate::test_result::ReportStyle;
|
||||||
|
pub use crate::test_result::TestResult;
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let matches = App::new("piet-gpu-tests")
|
let matches = App::new("piet-gpu-tests")
|
||||||
|
@ -119,6 +121,7 @@ fn main() {
|
||||||
message_passing::Variant::Vkmm,
|
message_passing::Variant::Vkmm,
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
report(&linkedlist::run_linkedlist_test(&mut runner, &config));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue