API changes and cleanup

Add workgroup size to dispatch call (needed by metal). Change all fence
references to mutable for consistency.

Move backend traits to a separate file (move them out of the toplevel
namespace in preparation for the hub types going there, to make the
public API nicer).

Add a method and macro for automatically choosing shader code, and
change collatz example to generate all 3 kinds on build.
This commit is contained in:
Raph Levien 2021-05-28 15:17:36 -07:00
parent 641891b01f
commit 7d7c86c44b
17 changed files with 524 additions and 348 deletions

View file

@ -1,6 +1,7 @@
use piet_gpu_hal::hub;
use piet_gpu_hal::mux::{Instance, ShaderCode};
use piet_gpu_hal::mux::Instance;
use piet_gpu_hal::BufferUsage;
use piet_gpu_hal::include_shader;
fn main() {
let (instance, _) = Instance::new(None).unwrap();
@ -10,7 +11,7 @@ fn main() {
let usage = BufferUsage::MAP_READ | BufferUsage::STORAGE;
let src = (0..256).map(|x| x + 1).collect::<Vec<u32>>();
let buffer = session.create_buffer_init(&src, usage).unwrap();
let code = ShaderCode::Msl(include_str!("./shader/collatz.msl"));
let code = include_shader!(&session, "./shader/gen/collatz");
let pipeline = session.create_simple_compute_pipeline(code, 1).unwrap();
let descriptor_set = session
.create_simple_descriptor_set(&pipeline, &[&buffer])
@ -20,7 +21,7 @@ fn main() {
cmd_buf.begin();
cmd_buf.reset_query_pool(&query_pool);
cmd_buf.write_timestamp(&query_pool, 0);
cmd_buf.dispatch(&pipeline, &descriptor_set, (256, 1, 1));
cmd_buf.dispatch(&pipeline, &descriptor_set, (256, 1, 1), (1, 1, 1));
cmd_buf.write_timestamp(&query_pool, 1);
cmd_buf.host_barrier();
cmd_buf.finish();

View file

@ -2,7 +2,8 @@
//! This will probably go away when it's fully implemented and we can
//! just use the hub.
use piet_gpu_hal::{dx12, BufferUsage, CmdBuf, Device, Error};
use piet_gpu_hal::{dx12, BufferUsage, Error};
use piet_gpu_hal::backend::{CmdBuf, Device};
const SHADER_CODE: &str = r#"RWByteAddressBuffer _53 : register(u0, space0);
@ -78,7 +79,7 @@ fn toy() -> Result<(), Error> {
cmd_buf.copy_buffer(&buf, &dev_buf);
cmd_buf.memory_barrier();
cmd_buf.write_timestamp(&query_pool, 0);
cmd_buf.dispatch(&pipeline, &ds, (1, 1, 1));
cmd_buf.dispatch(&pipeline, &ds, (1, 1, 1), (256, 1, 1));
cmd_buf.write_timestamp(&query_pool, 1);
cmd_buf.memory_barrier();
cmd_buf.copy_buffer(&dev_buf, &buf);

View file

@ -1,25 +0,0 @@
// Copyright 2021 The piet-gpu authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Also licensed under MIT license, at your choice.
//! An example to exercise the Metal backend. Once that becomes
//! functional, this file will go away.
use piet_gpu_hal::metal;
fn main() {
let instance = metal::MetalInstance;
println!("hello metal");
}

View file

@ -1,10 +1,19 @@
# Build file for shaders.
# You must have glslangValidator in your path, or patch here.
# You must have Vulkan tools in your path, or patch here.
glslang_validator = glslangValidator
spirv_cross = spirv-cross
rule glsl
command = $glslang_validator -V -o $out $in
build collatz.spv: glsl collatz.comp
rule hlsl
command = $spirv_cross --hlsl $in --output $out
rule msl
command = $spirv_cross --msl $in --output $out
build gen/collatz.spv: glsl collatz.comp
build gen/collatz.hlsl: hlsl gen/collatz.spv
build gen/collatz.msl: msl gen/collatz.spv

View file

@ -0,0 +1,62 @@
static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
RWByteAddressBuffer _57 : register(u0);
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
float mod(float x, float y)
{
return x - y * floor(x / y);
}
float2 mod(float2 x, float2 y)
{
return x - y * floor(x / y);
}
float3 mod(float3 x, float3 y)
{
return x - y * floor(x / y);
}
float4 mod(float4 x, float4 y)
{
return x - y * floor(x / y);
}
uint collatz_iterations(inout uint n)
{
uint i = 0u;
while (n != 1u)
{
if (mod(float(n), 2.0f) == 0.0f)
{
n /= 2u;
}
else
{
n = (3u * n) + 1u;
}
i++;
}
return i;
}
void comp_main()
{
uint index = gl_GlobalInvocationID.x;
uint param = _57.Load(index * 4 + 0);
uint _65 = collatz_iterations(param);
_57.Store(index * 4 + 0, _65);
}
[numthreads(1, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View file

@ -0,0 +1,48 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct PrimeIndices
{
uint indices[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
// Implementation of the GLSL mod() function, which is slightly different than Metal fmod()
template<typename Tx, typename Ty>
inline Tx mod(Tx x, Ty y)
{
return x - y * floor(x / y);
}
static inline __attribute__((always_inline))
uint collatz_iterations(thread uint& n)
{
uint i = 0u;
while (n != 1u)
{
if (mod(float(n), 2.0) == 0.0)
{
n /= 2u;
}
else
{
n = (3u * n) + 1u;
}
i++;
}
return i;
}
kernel void main0(device PrimeIndices& _57 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
uint index = gl_GlobalInvocationID.x;
uint param = _57.indices[index];
uint _65 = collatz_iterations(param);
_57.indices[index] = _65;
}

265
piet-gpu-hal/src/backend.rs Normal file
View file

@ -0,0 +1,265 @@
// Copyright 2021 The piet-gpu authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Also licensed under MIT license, at your choice.
//! The generic trait for backends to implement.
use crate::{BufferUsage, Error, GpuInfo, ImageLayout, SamplerParams, mux::ShaderCode};
pub trait Device: Sized {
type Buffer: 'static;
type Image;
type Pipeline;
type DescriptorSet;
type QueryPool;
type CmdBuf: CmdBuf<Self>;
type Fence;
type Semaphore;
type PipelineBuilder: PipelineBuilder<Self>;
type DescriptorSetBuilder: DescriptorSetBuilder<Self>;
type Sampler;
type ShaderSource: ?Sized;
/// Query the GPU info.
///
/// This method may be expensive, so the hub should call it once and retain
/// the info.
fn query_gpu_info(&self) -> GpuInfo;
fn create_buffer(&self, size: u64, usage: BufferUsage) -> Result<Self::Buffer, Error>;
/// Destroy a buffer.
///
/// The same safety requirements hold as in Vulkan: the buffer cannot be used
/// after this call, and all commands referencing this buffer must have completed.
///
/// Maybe doesn't need result return?
unsafe fn destroy_buffer(&self, buffer: &Self::Buffer) -> Result<(), Error>;
unsafe fn create_image2d(&self, width: u32, height: u32) -> Result<Self::Image, Error>;
/// Destroy an image.
///
/// The same safety requirements hold as in Vulkan: the image cannot be used
/// after this call, and all commands referencing this image must have completed.
///
/// Use this only with images we created, not for swapchain images.
///
/// Maybe doesn't need result return?
unsafe fn destroy_image(&self, image: &Self::Image) -> Result<(), Error>;
/// Start building a pipeline.
///
/// A pipeline is a bit of shader IR plus a signature for what kinds of resources
/// it expects.
unsafe fn pipeline_builder(&self) -> Self::PipelineBuilder;
/// Start building a descriptor set.
///
/// A descriptor set is a binding of resources for a given pipeline.
unsafe fn descriptor_set_builder(&self) -> Self::DescriptorSetBuilder;
/// Create a simple compute pipeline that operates on buffers and storage images.
///
/// This is provided as a convenience but will probably go away, as the functionality
/// is subsumed by the builder.
unsafe fn create_simple_compute_pipeline(
&self,
code: &Self::ShaderSource,
n_buffers: u32,
n_images: u32,
) -> Result<Self::Pipeline, Error> {
let mut builder = self.pipeline_builder();
builder.add_buffers(n_buffers);
builder.add_images(n_images);
builder.create_compute_pipeline(self, code)
}
/// Create a descriptor set for a given pipeline, binding buffers and images.
///
/// This is provided as a convenience but will probably go away, as the functionality
/// is subsumed by the builder.
unsafe fn create_descriptor_set(
&self,
pipeline: &Self::Pipeline,
bufs: &[&Self::Buffer],
images: &[&Self::Image],
) -> Result<Self::DescriptorSet, Error> {
let mut builder = self.descriptor_set_builder();
builder.add_buffers(bufs);
builder.add_images(images);
builder.build(self, pipeline)
}
fn create_cmd_buf(&self) -> Result<Self::CmdBuf, Error>;
fn create_query_pool(&self, n_queries: u32) -> Result<Self::QueryPool, Error>;
/// Get results from query pool, destroying it in the process.
///
/// The returned vector is one less than the number of queries; the first is used as
/// a baseline.
///
/// # Safety
/// All submitted commands that refer to this query pool must have completed.
unsafe fn fetch_query_pool(&self, pool: &Self::QueryPool) -> Result<Vec<f64>, Error>;
unsafe fn run_cmd_bufs(
&self,
cmd_buf: &[&Self::CmdBuf],
wait_semaphores: &[&Self::Semaphore],
signal_semaphores: &[&Self::Semaphore],
fence: Option<&mut Self::Fence>,
) -> Result<(), Error>;
/// Copy data from the buffer to memory.
///
/// Discussion question: add offset?
///
/// # Safety
///
/// The buffer must be valid to access. The destination memory must be valid to
/// write to. The ranges must not overlap. The offset + size must be within
/// the buffer's allocation, and size within the destination.
unsafe fn read_buffer(
&self,
buffer: &Self::Buffer,
dst: *mut u8,
offset: u64,
size: u64,
) -> Result<(), Error>;
/// Copy data from memory to the buffer.
///
/// # Safety
///
/// The buffer must be valid to access. The source memory must be valid to
/// read from. The ranges must not overlap. The offset + size must be within
/// the buffer's allocation, and size within the source.
unsafe fn write_buffer(
&self,
buffer: &Self::Buffer,
contents: *const u8,
offset: u64,
size: u64,
) -> Result<(), Error>;
unsafe fn create_semaphore(&self) -> Result<Self::Semaphore, Error>;
unsafe fn create_fence(&self, signaled: bool) -> Result<Self::Fence, Error>;
unsafe fn wait_and_reset(&self, fences: Vec<&mut Self::Fence>) -> Result<(), Error>;
unsafe fn get_fence_status(&self, fence: &mut Self::Fence) -> Result<bool, Error>;
unsafe fn create_sampler(&self, params: SamplerParams) -> Result<Self::Sampler, Error>;
}
pub trait CmdBuf<D: Device> {
unsafe fn begin(&mut self);
unsafe fn finish(&mut self);
unsafe fn dispatch(
&mut self,
pipeline: &D::Pipeline,
descriptor_set: &D::DescriptorSet,
workgroup_count: (u32, u32, u32),
workgroup_size: (u32, u32, u32),
);
/// Insert an execution and memory barrier.
///
/// Compute kernels (and other actions) after this barrier may read from buffers
/// that were written before this barrier.
unsafe fn memory_barrier(&mut self);
/// Insert a barrier for host access to buffers.
///
/// The host may read buffers written before this barrier, after the fence for
/// the command buffer is signaled.
///
/// See http://themaister.net/blog/2019/08/14/yet-another-blog-explaining-vulkan-synchronization/
/// ("Host memory reads") for an explanation of this barrier.
unsafe fn host_barrier(&mut self);
unsafe fn image_barrier(
&mut self,
image: &D::Image,
src_layout: ImageLayout,
dst_layout: ImageLayout,
);
/// Clear the buffer.
///
/// This is readily supported in Vulkan, but for portability it is remarkably
/// tricky (unimplemented in gfx-hal right now). Possibly best to write a compute
/// kernel, or organize the code not to need it.
unsafe fn clear_buffer(&self, buffer: &D::Buffer, size: Option<u64>);
unsafe fn copy_buffer(&self, src: &D::Buffer, dst: &D::Buffer);
unsafe fn copy_image_to_buffer(&self, src: &D::Image, dst: &D::Buffer);
unsafe fn copy_buffer_to_image(&self, src: &D::Buffer, dst: &D::Image);
// low portability, dx12 doesn't support it natively
unsafe fn blit_image(&self, src: &D::Image, dst: &D::Image);
/// Reset the query pool.
///
/// The query pool must be reset before each use, to avoid validation errors.
/// This is annoying, and we could tweak the API to make it implicit, doing
/// the reset before the first timestamp write.
unsafe fn reset_query_pool(&mut self, pool: &D::QueryPool);
unsafe fn write_timestamp(&mut self, pool: &D::QueryPool, query: u32);
/// Prepare the timestamps for reading. This isn't required on Vulkan but
/// is required on (at least) DX12.
unsafe fn finish_timestamps(&mut self, _pool: &D::QueryPool) {}
}
/// A builder for pipelines with more complex layouts.
pub trait PipelineBuilder<D: Device> {
/// Add buffers to the pipeline. Each has its own binding.
fn add_buffers(&mut self, n_buffers: u32);
/// Add storage images to the pipeline. Each has its own binding.
fn add_images(&mut self, n_images: u32);
/// Add a binding with a variable-size array of textures.
fn add_textures(&mut self, max_textures: u32);
unsafe fn create_compute_pipeline(
self,
device: &D,
code: &D::ShaderSource,
) -> Result<D::Pipeline, Error>;
}
/// A builder for descriptor sets with more complex layouts.
///
/// Note: the order needs to match the pipeline building, and it also needs to
/// be buffers, then images, then textures.
pub trait DescriptorSetBuilder<D: Device> {
fn add_buffers(&mut self, buffers: &[&D::Buffer]);
/// Add an array of storage images.
///
/// The images need to be in `ImageLayout::General` layout.
fn add_images(&mut self, images: &[&D::Image]);
/// Add an array of textures.
///
/// The images need to be in `ImageLayout::ShaderRead` layout.
///
/// The same sampler is used for all textures, which is not very sophisticated;
/// we should have a way to vary the sampler.
fn add_textures(&mut self, images: &[&D::Image]);
unsafe fn build(self, device: &D, pipeline: &D::Pipeline) -> Result<D::DescriptorSet, Error>;
}

View file

@ -224,7 +224,7 @@ impl Dx12Instance {
}
}
impl crate::Device for Dx12Device {
impl crate::backend::Device for Dx12Device {
type Buffer = Buffer;
type Image = Image;
@ -413,7 +413,7 @@ impl crate::Device for Dx12Device {
Ok(())
}
unsafe fn get_fence_status(&self, fence: &Self::Fence) -> Result<bool, Error> {
unsafe fn get_fence_status(&self, fence: &mut Self::Fence) -> Result<bool, Error> {
let fence_val = fence.fence.get_value();
Ok(fence_val == fence.val.get())
}
@ -451,7 +451,7 @@ impl Dx12Device {
}
}
impl crate::CmdBuf<Dx12Device> for CmdBuf {
impl crate::backend::CmdBuf<Dx12Device> for CmdBuf {
unsafe fn begin(&mut self) {}
unsafe fn finish(&mut self) {
@ -468,7 +468,8 @@ impl crate::CmdBuf<Dx12Device> for CmdBuf {
&mut self,
pipeline: &Pipeline,
descriptor_set: &DescriptorSet,
size: (u32, u32, u32),
workgroup_count: (u32, u32, u32),
_workgroup_size: (u32, u32, u32),
) {
self.c.set_pipeline_state(&pipeline.pipeline_state);
self.c
@ -478,7 +479,8 @@ impl crate::CmdBuf<Dx12Device> for CmdBuf {
0,
descriptor_set.0.get_gpu_descriptor_handle_at_offset(0),
);
self.c.dispatch(size.0, size.1, size.2);
self.c
.dispatch(workgroup_count.0, workgroup_count.1, workgroup_count.2);
}
unsafe fn memory_barrier(&mut self) {
@ -554,7 +556,7 @@ impl crate::CmdBuf<Dx12Device> for CmdBuf {
}
}
impl crate::PipelineBuilder<Dx12Device> for PipelineBuilder {
impl crate::backend::PipelineBuilder<Dx12Device> for PipelineBuilder {
fn add_buffers(&mut self, n_buffers: u32) {
if n_buffers != 0 {
self.ranges.push(d3d12::D3D12_DESCRIPTOR_RANGE {
@ -630,7 +632,7 @@ impl crate::PipelineBuilder<Dx12Device> for PipelineBuilder {
}
}
impl crate::DescriptorSetBuilder<Dx12Device> for DescriptorSetBuilder {
impl crate::backend::DescriptorSetBuilder<Dx12Device> for DescriptorSetBuilder {
fn add_buffers(&mut self, buffers: &[&Buffer]) {
// Note: we could get rid of the clone here (which is an AddRef)
// and store a raw pointer, as it's a safety precondition that

View file

@ -123,7 +123,7 @@ impl Session {
unsafe {
let mut i = 0;
while i < pending.len() {
if let Ok(true) = self.0.device.get_fence_status(&pending[i].fence) {
if let Ok(true) = self.0.device.get_fence_status(&mut pending[i].fence) {
let mut item = pending.swap_remove(i);
// TODO: wait is superfluous, can just reset
let _ = self.0.device.wait_and_reset(vec![&mut item.fence]);
@ -295,6 +295,11 @@ impl Session {
pub fn gpu_info(&self) -> &GpuInfo {
&self.0.gpu_info
}
/// Choose shader code from the available choices.
pub fn choose_shader<'a>(&self, spv: &'a [u8], hlsl: &'a str, msl: &'a str) -> ShaderCode<'a> {
self.0.device.choose_shader(spv, hlsl, msl)
}
}
impl CmdBuf {

View file

@ -4,6 +4,7 @@
/// In time, it may go away and be replaced by either gfx-hal or wgpu.
use bitflags::bitflags;
pub mod backend;
pub mod hub;
#[macro_use]
@ -26,9 +27,13 @@ mux_cfg! {
#[cfg(target_os = "macos")]
pub mod metal;
/// This isn't great but is expedient.
/// The common error type for the crate.
///
/// This keeps things imple and can be expanded later.
pub type Error = Box<dyn std::error::Error>;
pub use crate::backend::CmdBuf;
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum ImageLayout {
Undefined,
@ -92,248 +97,3 @@ pub struct SubgroupSize {
min: u32,
max: u32,
}
pub trait Device: Sized {
type Buffer: 'static;
type Image;
type Pipeline;
type DescriptorSet;
type QueryPool;
type CmdBuf: CmdBuf<Self>;
type Fence;
type Semaphore;
type PipelineBuilder: PipelineBuilder<Self>;
type DescriptorSetBuilder: DescriptorSetBuilder<Self>;
type Sampler;
type ShaderSource: ?Sized;
/// Query the GPU info.
///
/// This method may be expensive, so the hub should call it once and retain
/// the info.
fn query_gpu_info(&self) -> GpuInfo;
fn create_buffer(&self, size: u64, usage: BufferUsage) -> Result<Self::Buffer, Error>;
/// Destroy a buffer.
///
/// The same safety requirements hold as in Vulkan: the buffer cannot be used
/// after this call, and all commands referencing this buffer must have completed.
///
/// Maybe doesn't need result return?
unsafe fn destroy_buffer(&self, buffer: &Self::Buffer) -> Result<(), Error>;
unsafe fn create_image2d(&self, width: u32, height: u32) -> Result<Self::Image, Error>;
/// Destroy an image.
///
/// The same safety requirements hold as in Vulkan: the image cannot be used
/// after this call, and all commands referencing this image must have completed.
///
/// Use this only with images we created, not for swapchain images.
///
/// Maybe doesn't need result return?
unsafe fn destroy_image(&self, image: &Self::Image) -> Result<(), Error>;
/// Start building a pipeline.
///
/// A pipeline is a bit of shader IR plus a signature for what kinds of resources
/// it expects.
unsafe fn pipeline_builder(&self) -> Self::PipelineBuilder;
/// Start building a descriptor set.
///
/// A descriptor set is a binding of resources for a given pipeline.
unsafe fn descriptor_set_builder(&self) -> Self::DescriptorSetBuilder;
/// Create a simple compute pipeline that operates on buffers and storage images.
///
/// This is provided as a convenience but will probably go away, as the functionality
/// is subsumed by the builder.
unsafe fn create_simple_compute_pipeline(
&self,
code: &Self::ShaderSource,
n_buffers: u32,
n_images: u32,
) -> Result<Self::Pipeline, Error> {
let mut builder = self.pipeline_builder();
builder.add_buffers(n_buffers);
builder.add_images(n_images);
builder.create_compute_pipeline(self, code)
}
/// Create a descriptor set for a given pipeline, binding buffers and images.
///
/// This is provided as a convenience but will probably go away, as the functionality
/// is subsumed by the builder.
unsafe fn create_descriptor_set(
&self,
pipeline: &Self::Pipeline,
bufs: &[&Self::Buffer],
images: &[&Self::Image],
) -> Result<Self::DescriptorSet, Error> {
let mut builder = self.descriptor_set_builder();
builder.add_buffers(bufs);
builder.add_images(images);
builder.build(self, pipeline)
}
fn create_cmd_buf(&self) -> Result<Self::CmdBuf, Error>;
fn create_query_pool(&self, n_queries: u32) -> Result<Self::QueryPool, Error>;
/// Get results from query pool, destroying it in the process.
///
/// The returned vector is one less than the number of queries; the first is used as
/// a baseline.
///
/// # Safety
/// All submitted commands that refer to this query pool must have completed.
unsafe fn fetch_query_pool(&self, pool: &Self::QueryPool) -> Result<Vec<f64>, Error>;
unsafe fn run_cmd_bufs(
&self,
cmd_buf: &[&Self::CmdBuf],
wait_semaphores: &[&Self::Semaphore],
signal_semaphores: &[&Self::Semaphore],
fence: Option<&mut Self::Fence>,
) -> Result<(), Error>;
/// Copy data from the buffer to memory.
///
/// Discussion question: add offset?
///
/// # Safety
///
/// The buffer must be valid to access. The destination memory must be valid to
/// write to. The ranges must not overlap. The offset + size must be within
/// the buffer's allocation, and size within the destination.
unsafe fn read_buffer(
&self,
buffer: &Self::Buffer,
dst: *mut u8,
offset: u64,
size: u64,
) -> Result<(), Error>;
/// Copy data from memory to the buffer.
///
/// # Safety
///
/// The buffer must be valid to access. The source memory must be valid to
/// read from. The ranges must not overlap. The offset + size must be within
/// the buffer's allocation, and size within the source.
unsafe fn write_buffer(
&self,
buffer: &Self::Buffer,
contents: *const u8,
offset: u64,
size: u64,
) -> Result<(), Error>;
unsafe fn create_semaphore(&self) -> Result<Self::Semaphore, Error>;
unsafe fn create_fence(&self, signaled: bool) -> Result<Self::Fence, Error>;
unsafe fn wait_and_reset(&self, fences: Vec<&mut Self::Fence>) -> Result<(), Error>;
unsafe fn get_fence_status(&self, fence: &Self::Fence) -> Result<bool, Error>;
unsafe fn create_sampler(&self, params: SamplerParams) -> Result<Self::Sampler, Error>;
}
pub trait CmdBuf<D: Device> {
unsafe fn begin(&mut self);
unsafe fn finish(&mut self);
unsafe fn dispatch(
&mut self,
pipeline: &D::Pipeline,
descriptor_set: &D::DescriptorSet,
size: (u32, u32, u32),
);
/// Insert an execution and memory barrier.
///
/// Compute kernels (and other actions) after this barrier may read from buffers
/// that were written before this barrier.
unsafe fn memory_barrier(&mut self);
/// Insert a barrier for host access to buffers.
///
/// The host may read buffers written before this barrier, after the fence for
/// the command buffer is signaled.
///
/// See http://themaister.net/blog/2019/08/14/yet-another-blog-explaining-vulkan-synchronization/
/// ("Host memory reads") for an explanation of this barrier.
unsafe fn host_barrier(&mut self);
unsafe fn image_barrier(
&mut self,
image: &D::Image,
src_layout: ImageLayout,
dst_layout: ImageLayout,
);
/// Clear the buffer.
///
/// This is readily supported in Vulkan, but for portability it is remarkably
/// tricky (unimplemented in gfx-hal right now). Possibly best to write a compute
/// kernel, or organize the code not to need it.
unsafe fn clear_buffer(&self, buffer: &D::Buffer, size: Option<u64>);
unsafe fn copy_buffer(&self, src: &D::Buffer, dst: &D::Buffer);
unsafe fn copy_image_to_buffer(&self, src: &D::Image, dst: &D::Buffer);
unsafe fn copy_buffer_to_image(&self, src: &D::Buffer, dst: &D::Image);
// low portability, dx12 doesn't support it natively
unsafe fn blit_image(&self, src: &D::Image, dst: &D::Image);
/// Reset the query pool.
///
/// The query pool must be reset before each use, to avoid validation errors.
/// This is annoying, and we could tweak the API to make it implicit, doing
/// the reset before the first timestamp write.
unsafe fn reset_query_pool(&mut self, pool: &D::QueryPool);
unsafe fn write_timestamp(&mut self, pool: &D::QueryPool, query: u32);
/// Prepare the timestamps for reading. This isn't required on Vulkan but
/// is required on (at least) DX12.
unsafe fn finish_timestamps(&mut self, pool: &D::QueryPool) {}
}
/// A builder for pipelines with more complex layouts.
pub trait PipelineBuilder<D: Device> {
/// Add buffers to the pipeline. Each has its own binding.
fn add_buffers(&mut self, n_buffers: u32);
/// Add storage images to the pipeline. Each has its own binding.
fn add_images(&mut self, n_images: u32);
/// Add a binding with a variable-size array of textures.
fn add_textures(&mut self, max_textures: u32);
unsafe fn create_compute_pipeline(
self,
device: &D,
code: &D::ShaderSource,
) -> Result<D::Pipeline, Error>;
}
/// A builder for descriptor sets with more complex layouts.
///
/// Note: the order needs to match the pipeline building, and it also needs to
/// be buffers, then images, then textures.
pub trait DescriptorSetBuilder<D: Device> {
fn add_buffers(&mut self, buffers: &[&D::Buffer]);
/// Add an array of storage images.
///
/// The images need to be in `ImageLayout::General` layout.
fn add_images(&mut self, images: &[&D::Image]);
/// Add an array of textures.
///
/// The images need to be in `ImageLayout::ShaderRead` layout.
///
/// The same sampler is used for all textures, which is not very sophisticated;
/// we should have a way to vary the sampler.
fn add_textures(&mut self, images: &[&D::Image]);
unsafe fn build(self, device: &D, pipeline: &D::Pipeline) -> Result<D::DescriptorSet, Error>;
}

View file

@ -117,9 +117,9 @@ macro_rules! mux_device_enum {
$crate::mux_enum! {
$(#[$outer])*
pub enum $assoc_type {
Vk(<$crate::vulkan::VkDevice as $crate::Device>::$assoc_type),
Dx12(<$crate::dx12::Dx12Device as $crate::Device>::$assoc_type),
Mtl(<$crate::metal::MtlDevice as $crate::Device>::$assoc_type),
Vk(<$crate::vulkan::VkDevice as $crate::backend::Device>::$assoc_type),
Dx12(<$crate::dx12::Dx12Device as $crate::backend::Device>::$assoc_type),
Mtl(<$crate::metal::MtlDevice as $crate::backend::Device>::$assoc_type),
}
}
}
@ -154,3 +154,15 @@ macro_rules! mux_match {
}
};
}
/// A convenience macro for selecting a shader from included files.
#[macro_export]
macro_rules! include_shader {
( $device:expr, $path_base:expr) => {
$device.choose_shader(
include_bytes!(concat!($path_base, ".spv")),
include_str!(concat!($path_base, ".hlsl")),
include_str!(concat!($path_base, ".msl")),
)
};
}

View file

@ -102,7 +102,11 @@ impl MtlInstance {
has_memory_model: false,
use_staging_buffers: use_staging_buffers,
};
Ok(MtlDevice { device, cmd_queue, gpu_info })
Ok(MtlDevice {
device,
cmd_queue,
gpu_info,
})
} else {
Err("can't create system default Metal device".into())
}
@ -119,7 +123,7 @@ impl MtlInstance {
}
}
impl crate::Device for MtlDevice {
impl crate::backend::Device for MtlDevice {
type Buffer = Buffer;
type Image = Image;
@ -282,9 +286,13 @@ impl crate::Device for MtlDevice {
Ok(())
}
unsafe fn get_fence_status(&self, fence: &Self::Fence) -> Result<bool, Error> {
// fence need to be mutable here :/
todo!()
unsafe fn get_fence_status(&self, fence: &mut Self::Fence) -> Result<bool, Error> {
match fence {
Fence::Idle => Ok(true),
Fence::CmdBufPending(cmd_buf) => {
Ok(cmd_buf.status() == metal::MTLCommandBufferStatus::Completed)
}
}
}
unsafe fn create_sampler(&self, params: crate::SamplerParams) -> Result<Self::Sampler, Error> {
@ -292,18 +300,17 @@ impl crate::Device for MtlDevice {
}
}
impl crate::CmdBuf<MtlDevice> for CmdBuf {
unsafe fn begin(&mut self) {
}
impl crate::backend::CmdBuf<MtlDevice> for CmdBuf {
unsafe fn begin(&mut self) {}
unsafe fn finish(&mut self) {
}
unsafe fn finish(&mut self) {}
unsafe fn dispatch(
&mut self,
pipeline: &Pipeline,
descriptor_set: &DescriptorSet,
size: (u32, u32, u32),
workgroup_count: (u32, u32, u32),
workgroup_size: (u32, u32, u32),
) {
let encoder = self.cmd_buf.new_compute_command_encoder();
encoder.set_compute_pipeline_state(&pipeline.0);
@ -313,19 +320,17 @@ impl crate::CmdBuf<MtlDevice> for CmdBuf {
ix += 1;
}
// TODO: set images
let work_group_count = metal::MTLSize {
width: size.0 as u64,
height: size.1 as u64,
depth: size.2 as u64,
let workgroup_count = metal::MTLSize {
width: workgroup_count.0 as u64,
height: workgroup_count.1 as u64,
depth: workgroup_count.2 as u64,
};
// TODO: we need to pass this in explicitly. In gfx-hal, this is parsed from
// the spv before translation.
let work_group_size = metal::MTLSize {
width: 1,
height: 1,
depth: 1,
let workgroup_size = metal::MTLSize {
width: workgroup_size.0 as u64,
height: workgroup_size.1 as u64,
depth: workgroup_size.2 as u64,
};
encoder.dispatch_thread_groups(work_group_count, work_group_size);
encoder.dispatch_thread_groups(workgroup_count, workgroup_size);
encoder.end_encoding();
}
@ -334,8 +339,7 @@ impl crate::CmdBuf<MtlDevice> for CmdBuf {
// Metal's own tracking.
}
unsafe fn host_barrier(&mut self) {
}
unsafe fn host_barrier(&mut self) {}
unsafe fn image_barrier(
&mut self,
@ -366,9 +370,7 @@ impl crate::CmdBuf<MtlDevice> for CmdBuf {
todo!()
}
unsafe fn reset_query_pool(&mut self, pool: &QueryPool) {
}
unsafe fn reset_query_pool(&mut self, pool: &QueryPool) {}
unsafe fn write_timestamp(&mut self, pool: &QueryPool, query: u32) {
// TODO
@ -377,17 +379,15 @@ impl crate::CmdBuf<MtlDevice> for CmdBuf {
}
}
impl crate::PipelineBuilder<MtlDevice> for PipelineBuilder {
impl crate::backend::PipelineBuilder<MtlDevice> for PipelineBuilder {
fn add_buffers(&mut self, _n_buffers: u32) {
// My understanding is that Metal infers the pipeline layout from
// the source.
}
fn add_images(&mut self, _n_images: u32) {
}
fn add_images(&mut self, _n_images: u32) {}
fn add_textures(&mut self, _max_textures: u32) {
}
fn add_textures(&mut self, _max_textures: u32) {}
unsafe fn create_compute_pipeline(
self,
@ -399,12 +399,14 @@ impl crate::PipelineBuilder<MtlDevice> for PipelineBuilder {
let library = device.device.new_library_with_source(code, &options)?;
// This seems to be the default name from spirv-cross, but we may need to tweak.
let function = library.get_function("main0", None)?;
let pipeline = device.device.new_compute_pipeline_state_with_function(&function)?;
let pipeline = device
.device
.new_compute_pipeline_state_with_function(&function)?;
Ok(Pipeline(pipeline))
}
}
impl crate::DescriptorSetBuilder<MtlDevice> for DescriptorSetBuilder {
impl crate::backend::DescriptorSetBuilder<MtlDevice> for DescriptorSetBuilder {
fn add_buffers(&mut self, buffers: &[&Buffer]) {
self.0.buffers.extend(buffers.iter().copied().cloned());
}

View file

@ -30,10 +30,10 @@ mux_cfg! {
#[cfg(mtl)]
use crate::metal;
}
use crate::CmdBuf as CmdBufTrait;
use crate::DescriptorSetBuilder as DescriptorSetBuilderTrait;
use crate::Device as DeviceTrait;
use crate::PipelineBuilder as PipelineBuilderTrait;
use crate::backend::CmdBuf as CmdBufTrait;
use crate::backend::DescriptorSetBuilder as DescriptorSetBuilderTrait;
use crate::backend::Device as DeviceTrait;
use crate::backend::PipelineBuilder as PipelineBuilderTrait;
use crate::{BufferUsage, Error, GpuInfo, ImageLayout};
mux_enum! {
@ -255,11 +255,11 @@ impl Device {
}
}
pub unsafe fn get_fence_status(&self, fence: &Fence) -> Result<bool, Error> {
pub unsafe fn get_fence_status(&self, fence: &mut Fence) -> Result<bool, Error> {
mux_match! { self;
Device::Vk(d) => d.get_fence_status(fence.vk()),
Device::Dx12(d) => d.get_fence_status(fence.dx12()),
Device::Mtl(d) => d.get_fence_status(fence.mtl()),
Device::Vk(d) => d.get_fence_status(fence.vk_mut()),
Device::Dx12(d) => d.get_fence_status(fence.dx12_mut()),
Device::Mtl(d) => d.get_fence_status(fence.mtl_mut()),
}
}
@ -400,6 +400,15 @@ impl Device {
Device::Mtl(d) => d.write_buffer(buffer.mtl(), contents, offset, size),
}
}
/// Choose shader code from the available choices.
pub fn choose_shader<'a>(&self, _spv: &'a [u8], _hlsl: &'a str, _msl: &'a str) -> ShaderCode<'a> {
mux_match! { self;
Device::Vk(_d) => ShaderCode::Spv(_spv),
Device::Dx12(_d) => ShaderCode::Hlsl(_hlsl),
Device::Mtl(_d) => ShaderCode::Msl(_msl),
}
}
}
impl PipelineBuilder {
@ -578,16 +587,25 @@ impl CmdBuf {
}
}
/// Dispatch a compute shader.
///
/// Note that both the number of workgroups (`workgroup_count`) and the number of
/// threads in a workgroup (`workgroup_size`) are given. The latter is needed on
/// Metal, while it's baked into the shader on Vulkan and DX12.
///
/// Perhaps we'll have a mechanism to plumb the latter value to configure the size
/// of a workgroup using specialization constants in the future.
pub unsafe fn dispatch(
&mut self,
pipeline: &Pipeline,
descriptor_set: &DescriptorSet,
size: (u32, u32, u32),
workgroup_count: (u32, u32, u32),
workgroup_size: (u32, u32, u32),
) {
mux_match! { self;
CmdBuf::Vk(c) => c.dispatch(pipeline.vk(), descriptor_set.vk(), size),
CmdBuf::Dx12(c) => c.dispatch(pipeline.dx12(), descriptor_set.dx12(), size),
CmdBuf::Mtl(c) => c.dispatch(pipeline.mtl(), descriptor_set.mtl(), size),
CmdBuf::Vk(c) => c.dispatch(pipeline.vk(), descriptor_set.vk(), workgroup_count, workgroup_size),
CmdBuf::Dx12(c) => c.dispatch(pipeline.dx12(), descriptor_set.dx12(), workgroup_count, workgroup_size),
CmdBuf::Mtl(c) => c.dispatch(pipeline.mtl(), descriptor_set.mtl(), workgroup_count, workgroup_size),
}
}

View file

@ -13,8 +13,10 @@ use ash::{vk, Device, Entry, Instance};
use smallvec::SmallVec;
use crate::{
BufferUsage, Device as DeviceTrait, Error, GpuInfo, ImageLayout, SamplerParams, SubgroupSize,
BufferUsage, Error, GpuInfo, ImageLayout, SamplerParams, SubgroupSize,
};
use crate::backend::Device as DeviceTrait;
pub struct VkInstance {
/// Retain the dynamic lib.
@ -455,7 +457,7 @@ impl VkInstance {
}
}
impl crate::Device for VkDevice {
impl crate::backend::Device for VkDevice {
type Buffer = Buffer;
type Image = Image;
type CmdBuf = CmdBuf;
@ -621,16 +623,13 @@ impl crate::Device for VkDevice {
unsafe fn wait_and_reset(&self, fences: Vec<&mut Self::Fence>) -> Result<(), Error> {
let device = &self.device.device;
let fences = fences
.iter()
.map(|f| **f)
.collect::<SmallVec<[_; 4]>>();
let fences = fences.iter().map(|f| **f).collect::<SmallVec<[_; 4]>>();
device.wait_for_fences(&fences, true, !0)?;
device.reset_fences(&fences)?;
Ok(())
}
unsafe fn get_fence_status(&self, fence: &Self::Fence) -> Result<bool, Error> {
unsafe fn get_fence_status(&self, fence: &mut Self::Fence) -> Result<bool, Error> {
let device = &self.device.device;
Ok(device.get_fence_status(*fence)?)
}
@ -843,7 +842,8 @@ impl crate::CmdBuf<VkDevice> for CmdBuf {
&mut self,
pipeline: &Pipeline,
descriptor_set: &DescriptorSet,
size: (u32, u32, u32),
workgroup_count: (u32, u32, u32),
_workgroup_size: (u32, u32, u32),
) {
let device = &self.device.device;
device.cmd_bind_pipeline(
@ -859,7 +859,12 @@ impl crate::CmdBuf<VkDevice> for CmdBuf {
&[descriptor_set.descriptor_set],
&[],
);
device.cmd_dispatch(self.cmd_buf, size.0, size.1, size.2);
device.cmd_dispatch(
self.cmd_buf,
workgroup_count.0,
workgroup_count.1,
workgroup_count.2,
);
}
/// Insert a pipeline barrier for all memory accesses.
@ -1047,7 +1052,7 @@ impl crate::CmdBuf<VkDevice> for CmdBuf {
}
}
impl crate::PipelineBuilder<VkDevice> for PipelineBuilder {
impl crate::backend::PipelineBuilder<VkDevice> for PipelineBuilder {
fn add_buffers(&mut self, n_buffers: u32) {
let start = self.bindings.len() as u32;
for i in 0..n_buffers {
@ -1153,7 +1158,7 @@ impl crate::PipelineBuilder<VkDevice> for PipelineBuilder {
}
}
impl crate::DescriptorSetBuilder<VkDevice> for DescriptorSetBuilder {
impl crate::backend::DescriptorSetBuilder<VkDevice> for DescriptorSetBuilder {
fn add_buffers(&mut self, buffers: &[&Buffer]) {
self.buffers.extend(buffers.iter().map(|b| b.buffer));
}
@ -1307,7 +1312,11 @@ impl VkSwapchain {
image_idx: usize,
semaphores: &[&vk::Semaphore],
) -> Result<bool, Error> {
let semaphores = semaphores.iter().copied().copied().collect::<SmallVec<[_; 4]>>();
let semaphores = semaphores
.iter()
.copied()
.copied()
.collect::<SmallVec<[_; 4]>>();
Ok(self.swapchain_fn.queue_present(
self.present_queue,
&vk::PresentInfoKHR::builder()

View file

@ -12,7 +12,7 @@ use ndk::native_window::NativeWindow;
use ndk_glue::Event;
use piet_gpu_hal::hub;
use piet_gpu_hal::mux::{QueryPool, Instance, Surface, Swapchain};
use piet_gpu_hal::mux::{Instance, QueryPool, Surface, Swapchain};
use piet_gpu_hal::{CmdBuf, Error, ImageLayout};
use piet_gpu::{render_scene, PietGpuRenderContext, Renderer};

View file

@ -403,6 +403,7 @@ impl Renderer {
&self.el_pipeline,
&self.el_ds,
(((self.n_elements + 127) / 128) as u32, 1, 1),
(128, 1, 1),
);
cmd_buf.write_timestamp(&query_pool, 1);
cmd_buf.memory_barrier();
@ -410,6 +411,7 @@ impl Renderer {
&self.tile_pipeline,
&self.tile_ds,
(((self.n_paths + 255) / 256) as u32, 1, 1),
(256, 1, 1),
);
cmd_buf.write_timestamp(&query_pool, 2);
cmd_buf.memory_barrier();
@ -417,6 +419,7 @@ impl Renderer {
&self.path_pipeline,
&self.path_ds,
(((self.n_pathseg + 31) / 32) as u32, 1, 1),
(32, 1, 1),
);
cmd_buf.write_timestamp(&query_pool, 3);
cmd_buf.memory_barrier();
@ -424,6 +427,7 @@ impl Renderer {
&self.backdrop_pipeline,
&self.backdrop_ds,
(((self.n_paths + 255) / 256) as u32, 1, 1),
(256, 1, 1),
);
cmd_buf.write_timestamp(&query_pool, 4);
// Note: this barrier is not needed as an actual dependency between
@ -434,6 +438,7 @@ impl Renderer {
&self.bin_pipeline,
&self.bin_ds,
(((self.n_paths + 255) / 256) as u32, 1, 1),
(256, 1, 1),
);
cmd_buf.write_timestamp(&query_pool, 5);
cmd_buf.memory_barrier();
@ -441,6 +446,7 @@ impl Renderer {
&self.coarse_pipeline,
&self.coarse_ds,
((WIDTH as u32 + 255) / 256, (HEIGHT as u32 + 255) / 256, 1),
(256, 256, 1),
);
cmd_buf.write_timestamp(&query_pool, 6);
cmd_buf.memory_barrier();
@ -448,6 +454,7 @@ impl Renderer {
&self.k4_pipeline,
&self.k4_ds,
((WIDTH / TILE_W) as u32, (HEIGHT / TILE_H) as u32, 1),
(8, 4, 1),
);
cmd_buf.write_timestamp(&query_pool, 7);
cmd_buf.memory_barrier();