mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 12:41:30 +11:00
Add workaround for buffer clearing
Add a clear stage and associated tests, and also use it on non-Vulkan backends to clear the state buffer. While that's a workaround and will go away when we implement the actual clear command, it's also a nice demo of how the new "stage" structure composes.
This commit is contained in:
parent
94949a6906
commit
fbfd4ee81b
|
@ -14,6 +14,10 @@ rule hlsl
|
|||
rule msl
|
||||
command = $spirv_cross --msl $in --output $out
|
||||
|
||||
build gen/clear.spv: glsl clear.comp
|
||||
build gen/clear.hlsl: hlsl gen/clear.spv
|
||||
build gen/clear.msl: msl gen/clear.spv
|
||||
|
||||
build gen/prefix.spv: glsl prefix.comp
|
||||
build gen/prefix.hlsl: hlsl gen/prefix.spv
|
||||
build gen/prefix.msl: msl gen/prefix.spv
|
||||
|
|
26
tests/shader/clear.comp
Normal file
26
tests/shader/clear.comp
Normal file
|
@ -0,0 +1,26 @@
|
|||
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||
|
||||
// Clear a buffer.
|
||||
|
||||
#version 450
|
||||
|
||||
layout(local_size_x = 256) in;
|
||||
|
||||
// This should probably be uniform rather than readonly,
|
||||
// but we haven't done the binding work yet.
|
||||
layout(binding = 0) readonly buffer ConfigBuf {
|
||||
// size is in uint (4 byte) units
|
||||
uint size;
|
||||
uint value;
|
||||
};
|
||||
|
||||
layout(binding = 1) buffer TargetBuf {
|
||||
uint[] data;
|
||||
};
|
||||
|
||||
void main() {
|
||||
uint ix = gl_GlobalInvocationID.x;
|
||||
if (ix < size) {
|
||||
data[ix] = value;
|
||||
}
|
||||
}
|
26
tests/shader/gen/clear.hlsl
Normal file
26
tests/shader/gen/clear.hlsl
Normal file
|
@ -0,0 +1,26 @@
|
|||
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
|
||||
|
||||
ByteAddressBuffer _19 : register(t0);
|
||||
RWByteAddressBuffer _32 : register(u1);
|
||||
|
||||
static uint3 gl_GlobalInvocationID;
|
||||
struct SPIRV_Cross_Input
|
||||
{
|
||||
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
|
||||
};
|
||||
|
||||
void comp_main()
|
||||
{
|
||||
uint ix = gl_GlobalInvocationID.x;
|
||||
if (ix < _19.Load(0))
|
||||
{
|
||||
_32.Store(ix * 4 + 0, _19.Load(4));
|
||||
}
|
||||
}
|
||||
|
||||
[numthreads(256, 1, 1)]
|
||||
void main(SPIRV_Cross_Input stage_input)
|
||||
{
|
||||
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
|
||||
comp_main();
|
||||
}
|
27
tests/shader/gen/clear.msl
Normal file
27
tests/shader/gen/clear.msl
Normal file
|
@ -0,0 +1,27 @@
|
|||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct ConfigBuf
|
||||
{
|
||||
uint size;
|
||||
uint value;
|
||||
};
|
||||
|
||||
struct TargetBuf
|
||||
{
|
||||
uint data[1];
|
||||
};
|
||||
|
||||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
|
||||
|
||||
kernel void main0(const device ConfigBuf& _19 [[buffer(0)]], device TargetBuf& _32 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
|
||||
{
|
||||
uint ix = gl_GlobalInvocationID.x;
|
||||
if (ix < _19.size)
|
||||
{
|
||||
_32.data[ix] = _19.value;
|
||||
}
|
||||
}
|
||||
|
BIN
tests/shader/gen/clear.spv
Normal file
BIN
tests/shader/gen/clear.spv
Normal file
Binary file not shown.
146
tests/src/clear.rs
Normal file
146
tests/src/clear.rs
Normal file
|
@ -0,0 +1,146 @@
|
|||
// Copyright 2021 The piet-gpu authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Also licensed under MIT license, at your choice.
|
||||
|
||||
//! Utilities (and a benchmark) for clearing buffers with compute shaders.
|
||||
|
||||
use piet_gpu_hal::{include_shader, BindType, BufferUsage, DescriptorSet};
|
||||
use piet_gpu_hal::{Buffer, Pipeline};
|
||||
|
||||
use crate::config::Config;
|
||||
use crate::runner::{Commands, Runner};
|
||||
use crate::test_result::TestResult;
|
||||
|
||||
const WG_SIZE: u64 = 256;
|
||||
|
||||
/// The shader code for clearing buffers.
|
||||
pub struct ClearCode {
|
||||
pipeline: Pipeline,
|
||||
}
|
||||
|
||||
/// The stage resources for clearing buffers.
|
||||
pub struct ClearStage {
|
||||
n_elements: u64,
|
||||
config_buf: Buffer,
|
||||
}
|
||||
|
||||
/// The binding for clearing buffers.
|
||||
pub struct ClearBinding {
|
||||
descriptor_set: DescriptorSet,
|
||||
}
|
||||
|
||||
pub unsafe fn run_clear_test(runner: &mut Runner, config: &Config) -> TestResult {
|
||||
let mut result = TestResult::new("clear buffers");
|
||||
// This will be configurable.
|
||||
let n_elements: u64 = config.size.choose(1 << 12, 1 << 20, 1 << 24);
|
||||
let out_buf = runner.buf_down(n_elements * 4);
|
||||
let code = ClearCode::new(runner);
|
||||
let stage = ClearStage::new_with_value(runner, n_elements, 0x42);
|
||||
let binding = stage.bind(runner, &code, &out_buf.dev_buf);
|
||||
// Also will be configurable of course.
|
||||
let n_iter = 1000;
|
||||
let mut total_elapsed = 0.0;
|
||||
for i in 0..n_iter {
|
||||
let mut commands = runner.commands();
|
||||
commands.write_timestamp(0);
|
||||
stage.record(&mut commands, &code, &binding);
|
||||
commands.write_timestamp(1);
|
||||
if i == 0 {
|
||||
commands.cmd_buf.memory_barrier();
|
||||
commands.download(&out_buf);
|
||||
}
|
||||
total_elapsed += runner.submit(commands);
|
||||
if i == 0 {
|
||||
let mut dst: Vec<u32> = Default::default();
|
||||
out_buf.read(&mut dst);
|
||||
if let Some(failure) = verify(&dst) {
|
||||
result.fail(format!("failure at {}", failure));
|
||||
}
|
||||
}
|
||||
}
|
||||
result.timing(total_elapsed, n_elements * n_iter);
|
||||
result
|
||||
}
|
||||
|
||||
impl ClearCode {
|
||||
pub unsafe fn new(runner: &mut Runner) -> ClearCode {
|
||||
let code = include_shader!(&runner.session, "../shader/gen/Clear");
|
||||
let pipeline = runner
|
||||
.session
|
||||
.create_compute_pipeline(
|
||||
code,
|
||||
&[BindType::BufReadOnly, BindType::Buffer],
|
||||
)
|
||||
.unwrap();
|
||||
ClearCode { pipeline }
|
||||
}
|
||||
}
|
||||
|
||||
impl ClearStage {
|
||||
pub unsafe fn new(runner: &mut Runner, n_elements: u64) -> ClearStage {
|
||||
Self::new_with_value(runner, n_elements, 0)
|
||||
}
|
||||
|
||||
pub unsafe fn new_with_value(runner: &mut Runner, n_elements: u64, value: u32) -> ClearStage {
|
||||
let config = [n_elements as u32, value];
|
||||
let config_buf = runner
|
||||
.session
|
||||
.create_buffer_init(&config, BufferUsage::STORAGE)
|
||||
.unwrap();
|
||||
ClearStage {
|
||||
n_elements,
|
||||
config_buf,
|
||||
}
|
||||
}
|
||||
|
||||
pub unsafe fn bind(
|
||||
&self,
|
||||
runner: &mut Runner,
|
||||
code: &ClearCode,
|
||||
out_buf: &Buffer,
|
||||
) -> ClearBinding {
|
||||
let descriptor_set = runner
|
||||
.session
|
||||
.create_simple_descriptor_set(&code.pipeline, &[&self.config_buf, out_buf])
|
||||
.unwrap();
|
||||
ClearBinding { descriptor_set }
|
||||
}
|
||||
|
||||
pub unsafe fn record(
|
||||
&self,
|
||||
commands: &mut Commands,
|
||||
code: &ClearCode,
|
||||
bindings: &ClearBinding,
|
||||
) {
|
||||
let n_workgroups = (self.n_elements + WG_SIZE - 1) / WG_SIZE;
|
||||
// An issue: for clearing large buffers (>16M), we need to check the
|
||||
// number of workgroups against the (dynamically detected) limit, and
|
||||
// potentially issue multiple dispatches.
|
||||
commands.cmd_buf.dispatch(
|
||||
&code.pipeline,
|
||||
&bindings.descriptor_set,
|
||||
(n_workgroups as u32, 1, 1),
|
||||
(WG_SIZE as u32, 1, 1),
|
||||
);
|
||||
// One thing that's missing here is registering the buffers so
|
||||
// they can be safely dropped by Rust code before the execution
|
||||
// of the command buffer completes.
|
||||
}
|
||||
}
|
||||
|
||||
// Verify that the data is cleared.
|
||||
fn verify(data: &[u32]) -> Option<usize> {
|
||||
data.iter().position(|val| *val != 0x42)
|
||||
}
|
|
@ -16,6 +16,7 @@
|
|||
|
||||
//! Tests for piet-gpu shaders and GPU capabilities.
|
||||
|
||||
mod clear;
|
||||
mod config;
|
||||
mod prefix;
|
||||
mod prefix_tree;
|
||||
|
@ -79,6 +80,7 @@ fn main() {
|
|||
flags |= InstanceFlags::DX12;
|
||||
}
|
||||
let mut runner = Runner::new(flags);
|
||||
report(&clear::run_clear_test(&mut runner, &config));
|
||||
if config.groups.matches("prefix") {
|
||||
report(&prefix::run_prefix_test(&mut runner, &config));
|
||||
report(&prefix_tree::run_prefix_test(&mut runner, &config));
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
use piet_gpu_hal::{include_shader, BackendType, BindType, BufferUsage, DescriptorSet};
|
||||
use piet_gpu_hal::{Buffer, Pipeline};
|
||||
|
||||
use crate::clear::{ClearBinding, ClearCode, ClearStage};
|
||||
use crate::config::Config;
|
||||
use crate::runner::{Commands, Runner};
|
||||
use crate::test_result::TestResult;
|
||||
|
@ -30,6 +31,7 @@ const ELEMENTS_PER_WG: u64 = WG_SIZE * N_ROWS;
|
|||
/// A code struct can be created once and reused any number of times.
|
||||
struct PrefixCode {
|
||||
pipeline: Pipeline,
|
||||
clear_code: Option<ClearCode>,
|
||||
}
|
||||
|
||||
/// The stage resources for the prefix sum example.
|
||||
|
@ -41,6 +43,7 @@ struct PrefixStage {
|
|||
// treat it as a capacity.
|
||||
n_elements: u64,
|
||||
state_buf: Buffer,
|
||||
clear_stage: Option<(ClearStage, ClearBinding)>,
|
||||
}
|
||||
|
||||
/// The binding for the prefix sum example.
|
||||
|
@ -63,7 +66,7 @@ pub unsafe fn run_prefix_test(runner: &mut Runner, config: &Config) -> TestResul
|
|||
.unwrap();
|
||||
let out_buf = runner.buf_down(data_buf.size());
|
||||
let code = PrefixCode::new(runner);
|
||||
let stage = PrefixStage::new(runner, n_elements);
|
||||
let stage = PrefixStage::new(runner, &code, n_elements);
|
||||
let binding = stage.bind(runner, &code, &data_buf, &out_buf.dev_buf);
|
||||
// Also will be configurable of course.
|
||||
let n_iter = 1000;
|
||||
|
@ -100,21 +103,39 @@ impl PrefixCode {
|
|||
&[BindType::BufReadOnly, BindType::Buffer, BindType::Buffer],
|
||||
)
|
||||
.unwrap();
|
||||
PrefixCode { pipeline }
|
||||
// Currently, DX12 and Metal backends don't support buffer clearing, so use a
|
||||
// compute shader as a workaround.
|
||||
let clear_code = if runner.backend_type() != BackendType::Vulkan {
|
||||
Some(ClearCode::new(runner))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
PrefixCode {
|
||||
pipeline,
|
||||
clear_code,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PrefixStage {
|
||||
unsafe fn new(runner: &mut Runner, n_elements: u64) -> PrefixStage {
|
||||
unsafe fn new(runner: &mut Runner, code: &PrefixCode, n_elements: u64) -> PrefixStage {
|
||||
let n_workgroups = (n_elements + ELEMENTS_PER_WG - 1) / ELEMENTS_PER_WG;
|
||||
let state_buf_size = 4 + 12 * n_workgroups;
|
||||
let state_buf = runner
|
||||
.session
|
||||
.create_buffer(state_buf_size, BufferUsage::STORAGE | BufferUsage::COPY_DST)
|
||||
.unwrap();
|
||||
let clear_stage = if let Some(clear_code) = &code.clear_code {
|
||||
let stage = ClearStage::new(runner, state_buf_size / 4);
|
||||
let binding = stage.bind(runner, clear_code, &state_buf);
|
||||
Some((stage, binding))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
PrefixStage {
|
||||
n_elements,
|
||||
state_buf,
|
||||
clear_stage,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -134,7 +155,11 @@ impl PrefixStage {
|
|||
|
||||
unsafe fn record(&self, commands: &mut Commands, code: &PrefixCode, bindings: &PrefixBinding) {
|
||||
let n_workgroups = (self.n_elements + ELEMENTS_PER_WG - 1) / ELEMENTS_PER_WG;
|
||||
commands.cmd_buf.clear_buffer(&self.state_buf, None);
|
||||
if let Some((stage, binding)) = &self.clear_stage {
|
||||
stage.record(commands, code.clear_code.as_ref().unwrap(), binding);
|
||||
} else {
|
||||
commands.cmd_buf.clear_buffer(&self.state_buf, None);
|
||||
}
|
||||
commands.cmd_buf.memory_barrier();
|
||||
commands.cmd_buf.dispatch(
|
||||
&code.pipeline,
|
||||
|
|
|
@ -85,7 +85,7 @@ impl Runner {
|
|||
let submitted = self.session.run_cmd_buf(cmd_buf, &[], &[]).unwrap();
|
||||
self.cmd_buf_pool.extend(submitted.wait().unwrap());
|
||||
let timestamps = self.session.fetch_query_pool(&query_pool).unwrap();
|
||||
timestamps[0]
|
||||
timestamps.get(0).copied().unwrap_or_default()
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
|
|
Loading…
Reference in a new issue