mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-25 18:56:35 +11:00
Add workaround for buffer clearing
Add a clear stage and associated tests, and also use it on non-Vulkan backends to clear the state buffer. While that's a workaround and will go away when we implement the actual clear command, it's also a nice demo of how the new "stage" structure composes.
This commit is contained in:
parent
94949a6906
commit
fbfd4ee81b
9 changed files with 261 additions and 5 deletions
|
@ -14,6 +14,10 @@ rule hlsl
|
||||||
rule msl
|
rule msl
|
||||||
command = $spirv_cross --msl $in --output $out
|
command = $spirv_cross --msl $in --output $out
|
||||||
|
|
||||||
|
build gen/clear.spv: glsl clear.comp
|
||||||
|
build gen/clear.hlsl: hlsl gen/clear.spv
|
||||||
|
build gen/clear.msl: msl gen/clear.spv
|
||||||
|
|
||||||
build gen/prefix.spv: glsl prefix.comp
|
build gen/prefix.spv: glsl prefix.comp
|
||||||
build gen/prefix.hlsl: hlsl gen/prefix.spv
|
build gen/prefix.hlsl: hlsl gen/prefix.spv
|
||||||
build gen/prefix.msl: msl gen/prefix.spv
|
build gen/prefix.msl: msl gen/prefix.spv
|
||||||
|
|
26
tests/shader/clear.comp
Normal file
26
tests/shader/clear.comp
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||||
|
|
||||||
|
// Clear a buffer.
|
||||||
|
|
||||||
|
#version 450
|
||||||
|
|
||||||
|
layout(local_size_x = 256) in;
|
||||||
|
|
||||||
|
// This should probably be uniform rather than readonly,
|
||||||
|
// but we haven't done the binding work yet.
|
||||||
|
layout(binding = 0) readonly buffer ConfigBuf {
|
||||||
|
// size is in uint (4 byte) units
|
||||||
|
uint size;
|
||||||
|
uint value;
|
||||||
|
};
|
||||||
|
|
||||||
|
layout(binding = 1) buffer TargetBuf {
|
||||||
|
uint[] data;
|
||||||
|
};
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
uint ix = gl_GlobalInvocationID.x;
|
||||||
|
if (ix < size) {
|
||||||
|
data[ix] = value;
|
||||||
|
}
|
||||||
|
}
|
26
tests/shader/gen/clear.hlsl
Normal file
26
tests/shader/gen/clear.hlsl
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
|
||||||
|
|
||||||
|
ByteAddressBuffer _19 : register(t0);
|
||||||
|
RWByteAddressBuffer _32 : register(u1);
|
||||||
|
|
||||||
|
static uint3 gl_GlobalInvocationID;
|
||||||
|
struct SPIRV_Cross_Input
|
||||||
|
{
|
||||||
|
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
|
||||||
|
};
|
||||||
|
|
||||||
|
void comp_main()
|
||||||
|
{
|
||||||
|
uint ix = gl_GlobalInvocationID.x;
|
||||||
|
if (ix < _19.Load(0))
|
||||||
|
{
|
||||||
|
_32.Store(ix * 4 + 0, _19.Load(4));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[numthreads(256, 1, 1)]
|
||||||
|
void main(SPIRV_Cross_Input stage_input)
|
||||||
|
{
|
||||||
|
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
|
||||||
|
comp_main();
|
||||||
|
}
|
27
tests/shader/gen/clear.msl
Normal file
27
tests/shader/gen/clear.msl
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
#include <metal_stdlib>
|
||||||
|
#include <simd/simd.h>
|
||||||
|
|
||||||
|
using namespace metal;
|
||||||
|
|
||||||
|
struct ConfigBuf
|
||||||
|
{
|
||||||
|
uint size;
|
||||||
|
uint value;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct TargetBuf
|
||||||
|
{
|
||||||
|
uint data[1];
|
||||||
|
};
|
||||||
|
|
||||||
|
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
|
||||||
|
|
||||||
|
kernel void main0(const device ConfigBuf& _19 [[buffer(0)]], device TargetBuf& _32 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
|
||||||
|
{
|
||||||
|
uint ix = gl_GlobalInvocationID.x;
|
||||||
|
if (ix < _19.size)
|
||||||
|
{
|
||||||
|
_32.data[ix] = _19.value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
BIN
tests/shader/gen/clear.spv
Normal file
BIN
tests/shader/gen/clear.spv
Normal file
Binary file not shown.
146
tests/src/clear.rs
Normal file
146
tests/src/clear.rs
Normal file
|
@ -0,0 +1,146 @@
|
||||||
|
// Copyright 2021 The piet-gpu authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// https://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
// Also licensed under MIT license, at your choice.
|
||||||
|
|
||||||
|
//! Utilities (and a benchmark) for clearing buffers with compute shaders.
|
||||||
|
|
||||||
|
use piet_gpu_hal::{include_shader, BindType, BufferUsage, DescriptorSet};
|
||||||
|
use piet_gpu_hal::{Buffer, Pipeline};
|
||||||
|
|
||||||
|
use crate::config::Config;
|
||||||
|
use crate::runner::{Commands, Runner};
|
||||||
|
use crate::test_result::TestResult;
|
||||||
|
|
||||||
|
const WG_SIZE: u64 = 256;
|
||||||
|
|
||||||
|
/// The shader code for clearing buffers.
|
||||||
|
pub struct ClearCode {
|
||||||
|
pipeline: Pipeline,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The stage resources for clearing buffers.
|
||||||
|
pub struct ClearStage {
|
||||||
|
n_elements: u64,
|
||||||
|
config_buf: Buffer,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The binding for clearing buffers.
|
||||||
|
pub struct ClearBinding {
|
||||||
|
descriptor_set: DescriptorSet,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub unsafe fn run_clear_test(runner: &mut Runner, config: &Config) -> TestResult {
|
||||||
|
let mut result = TestResult::new("clear buffers");
|
||||||
|
// This will be configurable.
|
||||||
|
let n_elements: u64 = config.size.choose(1 << 12, 1 << 20, 1 << 24);
|
||||||
|
let out_buf = runner.buf_down(n_elements * 4);
|
||||||
|
let code = ClearCode::new(runner);
|
||||||
|
let stage = ClearStage::new_with_value(runner, n_elements, 0x42);
|
||||||
|
let binding = stage.bind(runner, &code, &out_buf.dev_buf);
|
||||||
|
// Also will be configurable of course.
|
||||||
|
let n_iter = 1000;
|
||||||
|
let mut total_elapsed = 0.0;
|
||||||
|
for i in 0..n_iter {
|
||||||
|
let mut commands = runner.commands();
|
||||||
|
commands.write_timestamp(0);
|
||||||
|
stage.record(&mut commands, &code, &binding);
|
||||||
|
commands.write_timestamp(1);
|
||||||
|
if i == 0 {
|
||||||
|
commands.cmd_buf.memory_barrier();
|
||||||
|
commands.download(&out_buf);
|
||||||
|
}
|
||||||
|
total_elapsed += runner.submit(commands);
|
||||||
|
if i == 0 {
|
||||||
|
let mut dst: Vec<u32> = Default::default();
|
||||||
|
out_buf.read(&mut dst);
|
||||||
|
if let Some(failure) = verify(&dst) {
|
||||||
|
result.fail(format!("failure at {}", failure));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result.timing(total_elapsed, n_elements * n_iter);
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ClearCode {
|
||||||
|
pub unsafe fn new(runner: &mut Runner) -> ClearCode {
|
||||||
|
let code = include_shader!(&runner.session, "../shader/gen/Clear");
|
||||||
|
let pipeline = runner
|
||||||
|
.session
|
||||||
|
.create_compute_pipeline(
|
||||||
|
code,
|
||||||
|
&[BindType::BufReadOnly, BindType::Buffer],
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
ClearCode { pipeline }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ClearStage {
|
||||||
|
pub unsafe fn new(runner: &mut Runner, n_elements: u64) -> ClearStage {
|
||||||
|
Self::new_with_value(runner, n_elements, 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub unsafe fn new_with_value(runner: &mut Runner, n_elements: u64, value: u32) -> ClearStage {
|
||||||
|
let config = [n_elements as u32, value];
|
||||||
|
let config_buf = runner
|
||||||
|
.session
|
||||||
|
.create_buffer_init(&config, BufferUsage::STORAGE)
|
||||||
|
.unwrap();
|
||||||
|
ClearStage {
|
||||||
|
n_elements,
|
||||||
|
config_buf,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub unsafe fn bind(
|
||||||
|
&self,
|
||||||
|
runner: &mut Runner,
|
||||||
|
code: &ClearCode,
|
||||||
|
out_buf: &Buffer,
|
||||||
|
) -> ClearBinding {
|
||||||
|
let descriptor_set = runner
|
||||||
|
.session
|
||||||
|
.create_simple_descriptor_set(&code.pipeline, &[&self.config_buf, out_buf])
|
||||||
|
.unwrap();
|
||||||
|
ClearBinding { descriptor_set }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub unsafe fn record(
|
||||||
|
&self,
|
||||||
|
commands: &mut Commands,
|
||||||
|
code: &ClearCode,
|
||||||
|
bindings: &ClearBinding,
|
||||||
|
) {
|
||||||
|
let n_workgroups = (self.n_elements + WG_SIZE - 1) / WG_SIZE;
|
||||||
|
// An issue: for clearing large buffers (>16M), we need to check the
|
||||||
|
// number of workgroups against the (dynamically detected) limit, and
|
||||||
|
// potentially issue multiple dispatches.
|
||||||
|
commands.cmd_buf.dispatch(
|
||||||
|
&code.pipeline,
|
||||||
|
&bindings.descriptor_set,
|
||||||
|
(n_workgroups as u32, 1, 1),
|
||||||
|
(WG_SIZE as u32, 1, 1),
|
||||||
|
);
|
||||||
|
// One thing that's missing here is registering the buffers so
|
||||||
|
// they can be safely dropped by Rust code before the execution
|
||||||
|
// of the command buffer completes.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify that the data is cleared.
|
||||||
|
fn verify(data: &[u32]) -> Option<usize> {
|
||||||
|
data.iter().position(|val| *val != 0x42)
|
||||||
|
}
|
|
@ -16,6 +16,7 @@
|
||||||
|
|
||||||
//! Tests for piet-gpu shaders and GPU capabilities.
|
//! Tests for piet-gpu shaders and GPU capabilities.
|
||||||
|
|
||||||
|
mod clear;
|
||||||
mod config;
|
mod config;
|
||||||
mod prefix;
|
mod prefix;
|
||||||
mod prefix_tree;
|
mod prefix_tree;
|
||||||
|
@ -79,6 +80,7 @@ fn main() {
|
||||||
flags |= InstanceFlags::DX12;
|
flags |= InstanceFlags::DX12;
|
||||||
}
|
}
|
||||||
let mut runner = Runner::new(flags);
|
let mut runner = Runner::new(flags);
|
||||||
|
report(&clear::run_clear_test(&mut runner, &config));
|
||||||
if config.groups.matches("prefix") {
|
if config.groups.matches("prefix") {
|
||||||
report(&prefix::run_prefix_test(&mut runner, &config));
|
report(&prefix::run_prefix_test(&mut runner, &config));
|
||||||
report(&prefix_tree::run_prefix_test(&mut runner, &config));
|
report(&prefix_tree::run_prefix_test(&mut runner, &config));
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
use piet_gpu_hal::{include_shader, BackendType, BindType, BufferUsage, DescriptorSet};
|
use piet_gpu_hal::{include_shader, BackendType, BindType, BufferUsage, DescriptorSet};
|
||||||
use piet_gpu_hal::{Buffer, Pipeline};
|
use piet_gpu_hal::{Buffer, Pipeline};
|
||||||
|
|
||||||
|
use crate::clear::{ClearBinding, ClearCode, ClearStage};
|
||||||
use crate::config::Config;
|
use crate::config::Config;
|
||||||
use crate::runner::{Commands, Runner};
|
use crate::runner::{Commands, Runner};
|
||||||
use crate::test_result::TestResult;
|
use crate::test_result::TestResult;
|
||||||
|
@ -30,6 +31,7 @@ const ELEMENTS_PER_WG: u64 = WG_SIZE * N_ROWS;
|
||||||
/// A code struct can be created once and reused any number of times.
|
/// A code struct can be created once and reused any number of times.
|
||||||
struct PrefixCode {
|
struct PrefixCode {
|
||||||
pipeline: Pipeline,
|
pipeline: Pipeline,
|
||||||
|
clear_code: Option<ClearCode>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The stage resources for the prefix sum example.
|
/// The stage resources for the prefix sum example.
|
||||||
|
@ -41,6 +43,7 @@ struct PrefixStage {
|
||||||
// treat it as a capacity.
|
// treat it as a capacity.
|
||||||
n_elements: u64,
|
n_elements: u64,
|
||||||
state_buf: Buffer,
|
state_buf: Buffer,
|
||||||
|
clear_stage: Option<(ClearStage, ClearBinding)>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The binding for the prefix sum example.
|
/// The binding for the prefix sum example.
|
||||||
|
@ -63,7 +66,7 @@ pub unsafe fn run_prefix_test(runner: &mut Runner, config: &Config) -> TestResul
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let out_buf = runner.buf_down(data_buf.size());
|
let out_buf = runner.buf_down(data_buf.size());
|
||||||
let code = PrefixCode::new(runner);
|
let code = PrefixCode::new(runner);
|
||||||
let stage = PrefixStage::new(runner, n_elements);
|
let stage = PrefixStage::new(runner, &code, n_elements);
|
||||||
let binding = stage.bind(runner, &code, &data_buf, &out_buf.dev_buf);
|
let binding = stage.bind(runner, &code, &data_buf, &out_buf.dev_buf);
|
||||||
// Also will be configurable of course.
|
// Also will be configurable of course.
|
||||||
let n_iter = 1000;
|
let n_iter = 1000;
|
||||||
|
@ -100,21 +103,39 @@ impl PrefixCode {
|
||||||
&[BindType::BufReadOnly, BindType::Buffer, BindType::Buffer],
|
&[BindType::BufReadOnly, BindType::Buffer, BindType::Buffer],
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
PrefixCode { pipeline }
|
// Currently, DX12 and Metal backends don't support buffer clearing, so use a
|
||||||
|
// compute shader as a workaround.
|
||||||
|
let clear_code = if runner.backend_type() != BackendType::Vulkan {
|
||||||
|
Some(ClearCode::new(runner))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
PrefixCode {
|
||||||
|
pipeline,
|
||||||
|
clear_code,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl PrefixStage {
|
impl PrefixStage {
|
||||||
unsafe fn new(runner: &mut Runner, n_elements: u64) -> PrefixStage {
|
unsafe fn new(runner: &mut Runner, code: &PrefixCode, n_elements: u64) -> PrefixStage {
|
||||||
let n_workgroups = (n_elements + ELEMENTS_PER_WG - 1) / ELEMENTS_PER_WG;
|
let n_workgroups = (n_elements + ELEMENTS_PER_WG - 1) / ELEMENTS_PER_WG;
|
||||||
let state_buf_size = 4 + 12 * n_workgroups;
|
let state_buf_size = 4 + 12 * n_workgroups;
|
||||||
let state_buf = runner
|
let state_buf = runner
|
||||||
.session
|
.session
|
||||||
.create_buffer(state_buf_size, BufferUsage::STORAGE | BufferUsage::COPY_DST)
|
.create_buffer(state_buf_size, BufferUsage::STORAGE | BufferUsage::COPY_DST)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
let clear_stage = if let Some(clear_code) = &code.clear_code {
|
||||||
|
let stage = ClearStage::new(runner, state_buf_size / 4);
|
||||||
|
let binding = stage.bind(runner, clear_code, &state_buf);
|
||||||
|
Some((stage, binding))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
PrefixStage {
|
PrefixStage {
|
||||||
n_elements,
|
n_elements,
|
||||||
state_buf,
|
state_buf,
|
||||||
|
clear_stage,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -134,7 +155,11 @@ impl PrefixStage {
|
||||||
|
|
||||||
unsafe fn record(&self, commands: &mut Commands, code: &PrefixCode, bindings: &PrefixBinding) {
|
unsafe fn record(&self, commands: &mut Commands, code: &PrefixCode, bindings: &PrefixBinding) {
|
||||||
let n_workgroups = (self.n_elements + ELEMENTS_PER_WG - 1) / ELEMENTS_PER_WG;
|
let n_workgroups = (self.n_elements + ELEMENTS_PER_WG - 1) / ELEMENTS_PER_WG;
|
||||||
commands.cmd_buf.clear_buffer(&self.state_buf, None);
|
if let Some((stage, binding)) = &self.clear_stage {
|
||||||
|
stage.record(commands, code.clear_code.as_ref().unwrap(), binding);
|
||||||
|
} else {
|
||||||
|
commands.cmd_buf.clear_buffer(&self.state_buf, None);
|
||||||
|
}
|
||||||
commands.cmd_buf.memory_barrier();
|
commands.cmd_buf.memory_barrier();
|
||||||
commands.cmd_buf.dispatch(
|
commands.cmd_buf.dispatch(
|
||||||
&code.pipeline,
|
&code.pipeline,
|
||||||
|
|
|
@ -85,7 +85,7 @@ impl Runner {
|
||||||
let submitted = self.session.run_cmd_buf(cmd_buf, &[], &[]).unwrap();
|
let submitted = self.session.run_cmd_buf(cmd_buf, &[], &[]).unwrap();
|
||||||
self.cmd_buf_pool.extend(submitted.wait().unwrap());
|
self.cmd_buf_pool.extend(submitted.wait().unwrap());
|
||||||
let timestamps = self.session.fetch_query_pool(&query_pool).unwrap();
|
let timestamps = self.session.fetch_query_pool(&query_pool).unwrap();
|
||||||
timestamps[0]
|
timestamps.get(0).copied().unwrap_or_default()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(unused)]
|
#[allow(unused)]
|
||||||
|
|
Loading…
Add table
Reference in a new issue