diff --git a/tests/shader/build.ninja b/tests/shader/build.ninja index 93a0b66..c135fa2 100644 --- a/tests/shader/build.ninja +++ b/tests/shader/build.ninja @@ -14,6 +14,10 @@ rule hlsl rule msl command = $spirv_cross --msl $in --output $out +build gen/clear.spv: glsl clear.comp +build gen/clear.hlsl: hlsl gen/clear.spv +build gen/clear.msl: msl gen/clear.spv + build gen/prefix.spv: glsl prefix.comp build gen/prefix.hlsl: hlsl gen/prefix.spv build gen/prefix.msl: msl gen/prefix.spv diff --git a/tests/shader/clear.comp b/tests/shader/clear.comp new file mode 100644 index 0000000..62a5fb2 --- /dev/null +++ b/tests/shader/clear.comp @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense + +// Clear a buffer. + +#version 450 + +layout(local_size_x = 256) in; + +// This should probably be uniform rather than readonly, +// but we haven't done the binding work yet. +layout(binding = 0) readonly buffer ConfigBuf { + // size is in uint (4 byte) units + uint size; + uint value; +}; + +layout(binding = 1) buffer TargetBuf { + uint[] data; +}; + +void main() { + uint ix = gl_GlobalInvocationID.x; + if (ix < size) { + data[ix] = value; + } +} diff --git a/tests/shader/gen/clear.hlsl b/tests/shader/gen/clear.hlsl new file mode 100644 index 0000000..f6a576c --- /dev/null +++ b/tests/shader/gen/clear.hlsl @@ -0,0 +1,26 @@ +static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); + +ByteAddressBuffer _19 : register(t0); +RWByteAddressBuffer _32 : register(u1); + +static uint3 gl_GlobalInvocationID; +struct SPIRV_Cross_Input +{ + uint3 gl_GlobalInvocationID : SV_DispatchThreadID; +}; + +void comp_main() +{ + uint ix = gl_GlobalInvocationID.x; + if (ix < _19.Load(0)) + { + _32.Store(ix * 4 + 0, _19.Load(4)); + } +} + +[numthreads(256, 1, 1)] +void main(SPIRV_Cross_Input stage_input) +{ + gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; + comp_main(); +} diff --git a/tests/shader/gen/clear.msl b/tests/shader/gen/clear.msl new file mode 100644 index 0000000..d89853b --- /dev/null +++ b/tests/shader/gen/clear.msl @@ -0,0 +1,27 @@ +#include +#include + +using namespace metal; + +struct ConfigBuf +{ + uint size; + uint value; +}; + +struct TargetBuf +{ + uint data[1]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); + +kernel void main0(const device ConfigBuf& _19 [[buffer(0)]], device TargetBuf& _32 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + uint ix = gl_GlobalInvocationID.x; + if (ix < _19.size) + { + _32.data[ix] = _19.value; + } +} + diff --git a/tests/shader/gen/clear.spv b/tests/shader/gen/clear.spv new file mode 100644 index 0000000..0e8d1d7 Binary files /dev/null and b/tests/shader/gen/clear.spv differ diff --git a/tests/src/clear.rs b/tests/src/clear.rs new file mode 100644 index 0000000..a7934d1 --- /dev/null +++ b/tests/src/clear.rs @@ -0,0 +1,146 @@ +// Copyright 2021 The piet-gpu authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +//! Utilities (and a benchmark) for clearing buffers with compute shaders. + +use piet_gpu_hal::{include_shader, BindType, BufferUsage, DescriptorSet}; +use piet_gpu_hal::{Buffer, Pipeline}; + +use crate::config::Config; +use crate::runner::{Commands, Runner}; +use crate::test_result::TestResult; + +const WG_SIZE: u64 = 256; + +/// The shader code for clearing buffers. +pub struct ClearCode { + pipeline: Pipeline, +} + +/// The stage resources for clearing buffers. +pub struct ClearStage { + n_elements: u64, + config_buf: Buffer, +} + +/// The binding for clearing buffers. +pub struct ClearBinding { + descriptor_set: DescriptorSet, +} + +pub unsafe fn run_clear_test(runner: &mut Runner, config: &Config) -> TestResult { + let mut result = TestResult::new("clear buffers"); + // This will be configurable. + let n_elements: u64 = config.size.choose(1 << 12, 1 << 20, 1 << 24); + let out_buf = runner.buf_down(n_elements * 4); + let code = ClearCode::new(runner); + let stage = ClearStage::new_with_value(runner, n_elements, 0x42); + let binding = stage.bind(runner, &code, &out_buf.dev_buf); + // Also will be configurable of course. + let n_iter = 1000; + let mut total_elapsed = 0.0; + for i in 0..n_iter { + let mut commands = runner.commands(); + commands.write_timestamp(0); + stage.record(&mut commands, &code, &binding); + commands.write_timestamp(1); + if i == 0 { + commands.cmd_buf.memory_barrier(); + commands.download(&out_buf); + } + total_elapsed += runner.submit(commands); + if i == 0 { + let mut dst: Vec = Default::default(); + out_buf.read(&mut dst); + if let Some(failure) = verify(&dst) { + result.fail(format!("failure at {}", failure)); + } + } + } + result.timing(total_elapsed, n_elements * n_iter); + result +} + +impl ClearCode { + pub unsafe fn new(runner: &mut Runner) -> ClearCode { + let code = include_shader!(&runner.session, "../shader/gen/Clear"); + let pipeline = runner + .session + .create_compute_pipeline( + code, + &[BindType::BufReadOnly, BindType::Buffer], + ) + .unwrap(); + ClearCode { pipeline } + } +} + +impl ClearStage { + pub unsafe fn new(runner: &mut Runner, n_elements: u64) -> ClearStage { + Self::new_with_value(runner, n_elements, 0) + } + + pub unsafe fn new_with_value(runner: &mut Runner, n_elements: u64, value: u32) -> ClearStage { + let config = [n_elements as u32, value]; + let config_buf = runner + .session + .create_buffer_init(&config, BufferUsage::STORAGE) + .unwrap(); + ClearStage { + n_elements, + config_buf, + } + } + + pub unsafe fn bind( + &self, + runner: &mut Runner, + code: &ClearCode, + out_buf: &Buffer, + ) -> ClearBinding { + let descriptor_set = runner + .session + .create_simple_descriptor_set(&code.pipeline, &[&self.config_buf, out_buf]) + .unwrap(); + ClearBinding { descriptor_set } + } + + pub unsafe fn record( + &self, + commands: &mut Commands, + code: &ClearCode, + bindings: &ClearBinding, + ) { + let n_workgroups = (self.n_elements + WG_SIZE - 1) / WG_SIZE; + // An issue: for clearing large buffers (>16M), we need to check the + // number of workgroups against the (dynamically detected) limit, and + // potentially issue multiple dispatches. + commands.cmd_buf.dispatch( + &code.pipeline, + &bindings.descriptor_set, + (n_workgroups as u32, 1, 1), + (WG_SIZE as u32, 1, 1), + ); + // One thing that's missing here is registering the buffers so + // they can be safely dropped by Rust code before the execution + // of the command buffer completes. + } +} + +// Verify that the data is cleared. +fn verify(data: &[u32]) -> Option { + data.iter().position(|val| *val != 0x42) +} diff --git a/tests/src/main.rs b/tests/src/main.rs index 40329b0..647e8db 100644 --- a/tests/src/main.rs +++ b/tests/src/main.rs @@ -16,6 +16,7 @@ //! Tests for piet-gpu shaders and GPU capabilities. +mod clear; mod config; mod prefix; mod prefix_tree; @@ -79,6 +80,7 @@ fn main() { flags |= InstanceFlags::DX12; } let mut runner = Runner::new(flags); + report(&clear::run_clear_test(&mut runner, &config)); if config.groups.matches("prefix") { report(&prefix::run_prefix_test(&mut runner, &config)); report(&prefix_tree::run_prefix_test(&mut runner, &config)); diff --git a/tests/src/prefix.rs b/tests/src/prefix.rs index 0c55e77..d431480 100644 --- a/tests/src/prefix.rs +++ b/tests/src/prefix.rs @@ -17,6 +17,7 @@ use piet_gpu_hal::{include_shader, BackendType, BindType, BufferUsage, DescriptorSet}; use piet_gpu_hal::{Buffer, Pipeline}; +use crate::clear::{ClearBinding, ClearCode, ClearStage}; use crate::config::Config; use crate::runner::{Commands, Runner}; use crate::test_result::TestResult; @@ -30,6 +31,7 @@ const ELEMENTS_PER_WG: u64 = WG_SIZE * N_ROWS; /// A code struct can be created once and reused any number of times. struct PrefixCode { pipeline: Pipeline, + clear_code: Option, } /// The stage resources for the prefix sum example. @@ -41,6 +43,7 @@ struct PrefixStage { // treat it as a capacity. n_elements: u64, state_buf: Buffer, + clear_stage: Option<(ClearStage, ClearBinding)>, } /// The binding for the prefix sum example. @@ -63,7 +66,7 @@ pub unsafe fn run_prefix_test(runner: &mut Runner, config: &Config) -> TestResul .unwrap(); let out_buf = runner.buf_down(data_buf.size()); let code = PrefixCode::new(runner); - let stage = PrefixStage::new(runner, n_elements); + let stage = PrefixStage::new(runner, &code, n_elements); let binding = stage.bind(runner, &code, &data_buf, &out_buf.dev_buf); // Also will be configurable of course. let n_iter = 1000; @@ -100,21 +103,39 @@ impl PrefixCode { &[BindType::BufReadOnly, BindType::Buffer, BindType::Buffer], ) .unwrap(); - PrefixCode { pipeline } + // Currently, DX12 and Metal backends don't support buffer clearing, so use a + // compute shader as a workaround. + let clear_code = if runner.backend_type() != BackendType::Vulkan { + Some(ClearCode::new(runner)) + } else { + None + }; + PrefixCode { + pipeline, + clear_code, + } } } impl PrefixStage { - unsafe fn new(runner: &mut Runner, n_elements: u64) -> PrefixStage { + unsafe fn new(runner: &mut Runner, code: &PrefixCode, n_elements: u64) -> PrefixStage { let n_workgroups = (n_elements + ELEMENTS_PER_WG - 1) / ELEMENTS_PER_WG; let state_buf_size = 4 + 12 * n_workgroups; let state_buf = runner .session .create_buffer(state_buf_size, BufferUsage::STORAGE | BufferUsage::COPY_DST) .unwrap(); + let clear_stage = if let Some(clear_code) = &code.clear_code { + let stage = ClearStage::new(runner, state_buf_size / 4); + let binding = stage.bind(runner, clear_code, &state_buf); + Some((stage, binding)) + } else { + None + }; PrefixStage { n_elements, state_buf, + clear_stage, } } @@ -134,7 +155,11 @@ impl PrefixStage { unsafe fn record(&self, commands: &mut Commands, code: &PrefixCode, bindings: &PrefixBinding) { let n_workgroups = (self.n_elements + ELEMENTS_PER_WG - 1) / ELEMENTS_PER_WG; - commands.cmd_buf.clear_buffer(&self.state_buf, None); + if let Some((stage, binding)) = &self.clear_stage { + stage.record(commands, code.clear_code.as_ref().unwrap(), binding); + } else { + commands.cmd_buf.clear_buffer(&self.state_buf, None); + } commands.cmd_buf.memory_barrier(); commands.cmd_buf.dispatch( &code.pipeline, diff --git a/tests/src/runner.rs b/tests/src/runner.rs index ef2b93c..ed57c29 100644 --- a/tests/src/runner.rs +++ b/tests/src/runner.rs @@ -85,7 +85,7 @@ impl Runner { let submitted = self.session.run_cmd_buf(cmd_buf, &[], &[]).unwrap(); self.cmd_buf_pool.extend(submitted.wait().unwrap()); let timestamps = self.session.fetch_query_pool(&query_pool).unwrap(); - timestamps[0] + timestamps.get(0).copied().unwrap_or_default() } #[allow(unused)]