mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 12:41:30 +11:00
API changes and cleanup
Add workgroup size to dispatch call (needed by metal). Change all fence references to mutable for consistency. Move backend traits to a separate file (move them out of the toplevel namespace in preparation for the hub types going there, to make the public API nicer). Add a method and macro for automatically choosing shader code, and change collatz example to generate all 3 kinds on build.
This commit is contained in:
parent
641891b01f
commit
7d7c86c44b
|
@ -1,6 +1,7 @@
|
|||
use piet_gpu_hal::hub;
|
||||
use piet_gpu_hal::mux::{Instance, ShaderCode};
|
||||
use piet_gpu_hal::mux::Instance;
|
||||
use piet_gpu_hal::BufferUsage;
|
||||
use piet_gpu_hal::include_shader;
|
||||
|
||||
fn main() {
|
||||
let (instance, _) = Instance::new(None).unwrap();
|
||||
|
@ -10,7 +11,7 @@ fn main() {
|
|||
let usage = BufferUsage::MAP_READ | BufferUsage::STORAGE;
|
||||
let src = (0..256).map(|x| x + 1).collect::<Vec<u32>>();
|
||||
let buffer = session.create_buffer_init(&src, usage).unwrap();
|
||||
let code = ShaderCode::Msl(include_str!("./shader/collatz.msl"));
|
||||
let code = include_shader!(&session, "./shader/gen/collatz");
|
||||
let pipeline = session.create_simple_compute_pipeline(code, 1).unwrap();
|
||||
let descriptor_set = session
|
||||
.create_simple_descriptor_set(&pipeline, &[&buffer])
|
||||
|
@ -20,7 +21,7 @@ fn main() {
|
|||
cmd_buf.begin();
|
||||
cmd_buf.reset_query_pool(&query_pool);
|
||||
cmd_buf.write_timestamp(&query_pool, 0);
|
||||
cmd_buf.dispatch(&pipeline, &descriptor_set, (256, 1, 1));
|
||||
cmd_buf.dispatch(&pipeline, &descriptor_set, (256, 1, 1), (1, 1, 1));
|
||||
cmd_buf.write_timestamp(&query_pool, 1);
|
||||
cmd_buf.host_barrier();
|
||||
cmd_buf.finish();
|
||||
|
|
|
@ -2,7 +2,8 @@
|
|||
//! This will probably go away when it's fully implemented and we can
|
||||
//! just use the hub.
|
||||
|
||||
use piet_gpu_hal::{dx12, BufferUsage, CmdBuf, Device, Error};
|
||||
use piet_gpu_hal::{dx12, BufferUsage, Error};
|
||||
use piet_gpu_hal::backend::{CmdBuf, Device};
|
||||
|
||||
const SHADER_CODE: &str = r#"RWByteAddressBuffer _53 : register(u0, space0);
|
||||
|
||||
|
@ -78,7 +79,7 @@ fn toy() -> Result<(), Error> {
|
|||
cmd_buf.copy_buffer(&buf, &dev_buf);
|
||||
cmd_buf.memory_barrier();
|
||||
cmd_buf.write_timestamp(&query_pool, 0);
|
||||
cmd_buf.dispatch(&pipeline, &ds, (1, 1, 1));
|
||||
cmd_buf.dispatch(&pipeline, &ds, (1, 1, 1), (256, 1, 1));
|
||||
cmd_buf.write_timestamp(&query_pool, 1);
|
||||
cmd_buf.memory_barrier();
|
||||
cmd_buf.copy_buffer(&dev_buf, &buf);
|
||||
|
|
|
@ -1,25 +0,0 @@
|
|||
// Copyright 2021 The piet-gpu authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Also licensed under MIT license, at your choice.
|
||||
|
||||
//! An example to exercise the Metal backend. Once that becomes
|
||||
//! functional, this file will go away.
|
||||
|
||||
use piet_gpu_hal::metal;
|
||||
|
||||
fn main() {
|
||||
let instance = metal::MetalInstance;
|
||||
println!("hello metal");
|
||||
}
|
|
@ -1,10 +1,19 @@
|
|||
# Build file for shaders.
|
||||
|
||||
# You must have glslangValidator in your path, or patch here.
|
||||
# You must have Vulkan tools in your path, or patch here.
|
||||
|
||||
glslang_validator = glslangValidator
|
||||
spirv_cross = spirv-cross
|
||||
|
||||
rule glsl
|
||||
command = $glslang_validator -V -o $out $in
|
||||
|
||||
build collatz.spv: glsl collatz.comp
|
||||
rule hlsl
|
||||
command = $spirv_cross --hlsl $in --output $out
|
||||
|
||||
rule msl
|
||||
command = $spirv_cross --msl $in --output $out
|
||||
|
||||
build gen/collatz.spv: glsl collatz.comp
|
||||
build gen/collatz.hlsl: hlsl gen/collatz.spv
|
||||
build gen/collatz.msl: msl gen/collatz.spv
|
||||
|
|
62
piet-gpu-hal/examples/shader/gen/collatz.hlsl
Normal file
62
piet-gpu-hal/examples/shader/gen/collatz.hlsl
Normal file
|
@ -0,0 +1,62 @@
|
|||
static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
|
||||
|
||||
RWByteAddressBuffer _57 : register(u0);
|
||||
|
||||
static uint3 gl_GlobalInvocationID;
|
||||
struct SPIRV_Cross_Input
|
||||
{
|
||||
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
|
||||
};
|
||||
|
||||
float mod(float x, float y)
|
||||
{
|
||||
return x - y * floor(x / y);
|
||||
}
|
||||
|
||||
float2 mod(float2 x, float2 y)
|
||||
{
|
||||
return x - y * floor(x / y);
|
||||
}
|
||||
|
||||
float3 mod(float3 x, float3 y)
|
||||
{
|
||||
return x - y * floor(x / y);
|
||||
}
|
||||
|
||||
float4 mod(float4 x, float4 y)
|
||||
{
|
||||
return x - y * floor(x / y);
|
||||
}
|
||||
|
||||
uint collatz_iterations(inout uint n)
|
||||
{
|
||||
uint i = 0u;
|
||||
while (n != 1u)
|
||||
{
|
||||
if (mod(float(n), 2.0f) == 0.0f)
|
||||
{
|
||||
n /= 2u;
|
||||
}
|
||||
else
|
||||
{
|
||||
n = (3u * n) + 1u;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
void comp_main()
|
||||
{
|
||||
uint index = gl_GlobalInvocationID.x;
|
||||
uint param = _57.Load(index * 4 + 0);
|
||||
uint _65 = collatz_iterations(param);
|
||||
_57.Store(index * 4 + 0, _65);
|
||||
}
|
||||
|
||||
[numthreads(1, 1, 1)]
|
||||
void main(SPIRV_Cross_Input stage_input)
|
||||
{
|
||||
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
|
||||
comp_main();
|
||||
}
|
48
piet-gpu-hal/examples/shader/gen/collatz.msl
Normal file
48
piet-gpu-hal/examples/shader/gen/collatz.msl
Normal file
|
@ -0,0 +1,48 @@
|
|||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct PrimeIndices
|
||||
{
|
||||
uint indices[1];
|
||||
};
|
||||
|
||||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
|
||||
|
||||
// Implementation of the GLSL mod() function, which is slightly different than Metal fmod()
|
||||
template<typename Tx, typename Ty>
|
||||
inline Tx mod(Tx x, Ty y)
|
||||
{
|
||||
return x - y * floor(x / y);
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
uint collatz_iterations(thread uint& n)
|
||||
{
|
||||
uint i = 0u;
|
||||
while (n != 1u)
|
||||
{
|
||||
if (mod(float(n), 2.0) == 0.0)
|
||||
{
|
||||
n /= 2u;
|
||||
}
|
||||
else
|
||||
{
|
||||
n = (3u * n) + 1u;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
kernel void main0(device PrimeIndices& _57 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
|
||||
{
|
||||
uint index = gl_GlobalInvocationID.x;
|
||||
uint param = _57.indices[index];
|
||||
uint _65 = collatz_iterations(param);
|
||||
_57.indices[index] = _65;
|
||||
}
|
||||
|
Binary file not shown.
265
piet-gpu-hal/src/backend.rs
Normal file
265
piet-gpu-hal/src/backend.rs
Normal file
|
@ -0,0 +1,265 @@
|
|||
// Copyright 2021 The piet-gpu authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Also licensed under MIT license, at your choice.
|
||||
|
||||
//! The generic trait for backends to implement.
|
||||
|
||||
use crate::{BufferUsage, Error, GpuInfo, ImageLayout, SamplerParams, mux::ShaderCode};
|
||||
|
||||
pub trait Device: Sized {
|
||||
type Buffer: 'static;
|
||||
type Image;
|
||||
type Pipeline;
|
||||
type DescriptorSet;
|
||||
type QueryPool;
|
||||
type CmdBuf: CmdBuf<Self>;
|
||||
type Fence;
|
||||
type Semaphore;
|
||||
type PipelineBuilder: PipelineBuilder<Self>;
|
||||
type DescriptorSetBuilder: DescriptorSetBuilder<Self>;
|
||||
type Sampler;
|
||||
type ShaderSource: ?Sized;
|
||||
|
||||
/// Query the GPU info.
|
||||
///
|
||||
/// This method may be expensive, so the hub should call it once and retain
|
||||
/// the info.
|
||||
fn query_gpu_info(&self) -> GpuInfo;
|
||||
|
||||
fn create_buffer(&self, size: u64, usage: BufferUsage) -> Result<Self::Buffer, Error>;
|
||||
|
||||
/// Destroy a buffer.
|
||||
///
|
||||
/// The same safety requirements hold as in Vulkan: the buffer cannot be used
|
||||
/// after this call, and all commands referencing this buffer must have completed.
|
||||
///
|
||||
/// Maybe doesn't need result return?
|
||||
unsafe fn destroy_buffer(&self, buffer: &Self::Buffer) -> Result<(), Error>;
|
||||
|
||||
unsafe fn create_image2d(&self, width: u32, height: u32) -> Result<Self::Image, Error>;
|
||||
|
||||
/// Destroy an image.
|
||||
///
|
||||
/// The same safety requirements hold as in Vulkan: the image cannot be used
|
||||
/// after this call, and all commands referencing this image must have completed.
|
||||
///
|
||||
/// Use this only with images we created, not for swapchain images.
|
||||
///
|
||||
/// Maybe doesn't need result return?
|
||||
unsafe fn destroy_image(&self, image: &Self::Image) -> Result<(), Error>;
|
||||
|
||||
/// Start building a pipeline.
|
||||
///
|
||||
/// A pipeline is a bit of shader IR plus a signature for what kinds of resources
|
||||
/// it expects.
|
||||
unsafe fn pipeline_builder(&self) -> Self::PipelineBuilder;
|
||||
|
||||
/// Start building a descriptor set.
|
||||
///
|
||||
/// A descriptor set is a binding of resources for a given pipeline.
|
||||
unsafe fn descriptor_set_builder(&self) -> Self::DescriptorSetBuilder;
|
||||
|
||||
/// Create a simple compute pipeline that operates on buffers and storage images.
|
||||
///
|
||||
/// This is provided as a convenience but will probably go away, as the functionality
|
||||
/// is subsumed by the builder.
|
||||
unsafe fn create_simple_compute_pipeline(
|
||||
&self,
|
||||
code: &Self::ShaderSource,
|
||||
n_buffers: u32,
|
||||
n_images: u32,
|
||||
) -> Result<Self::Pipeline, Error> {
|
||||
let mut builder = self.pipeline_builder();
|
||||
builder.add_buffers(n_buffers);
|
||||
builder.add_images(n_images);
|
||||
builder.create_compute_pipeline(self, code)
|
||||
}
|
||||
|
||||
/// Create a descriptor set for a given pipeline, binding buffers and images.
|
||||
///
|
||||
/// This is provided as a convenience but will probably go away, as the functionality
|
||||
/// is subsumed by the builder.
|
||||
unsafe fn create_descriptor_set(
|
||||
&self,
|
||||
pipeline: &Self::Pipeline,
|
||||
bufs: &[&Self::Buffer],
|
||||
images: &[&Self::Image],
|
||||
) -> Result<Self::DescriptorSet, Error> {
|
||||
let mut builder = self.descriptor_set_builder();
|
||||
builder.add_buffers(bufs);
|
||||
builder.add_images(images);
|
||||
builder.build(self, pipeline)
|
||||
}
|
||||
|
||||
fn create_cmd_buf(&self) -> Result<Self::CmdBuf, Error>;
|
||||
|
||||
fn create_query_pool(&self, n_queries: u32) -> Result<Self::QueryPool, Error>;
|
||||
|
||||
/// Get results from query pool, destroying it in the process.
|
||||
///
|
||||
/// The returned vector is one less than the number of queries; the first is used as
|
||||
/// a baseline.
|
||||
///
|
||||
/// # Safety
|
||||
/// All submitted commands that refer to this query pool must have completed.
|
||||
unsafe fn fetch_query_pool(&self, pool: &Self::QueryPool) -> Result<Vec<f64>, Error>;
|
||||
|
||||
unsafe fn run_cmd_bufs(
|
||||
&self,
|
||||
cmd_buf: &[&Self::CmdBuf],
|
||||
wait_semaphores: &[&Self::Semaphore],
|
||||
signal_semaphores: &[&Self::Semaphore],
|
||||
fence: Option<&mut Self::Fence>,
|
||||
) -> Result<(), Error>;
|
||||
|
||||
/// Copy data from the buffer to memory.
|
||||
///
|
||||
/// Discussion question: add offset?
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// The buffer must be valid to access. The destination memory must be valid to
|
||||
/// write to. The ranges must not overlap. The offset + size must be within
|
||||
/// the buffer's allocation, and size within the destination.
|
||||
unsafe fn read_buffer(
|
||||
&self,
|
||||
buffer: &Self::Buffer,
|
||||
dst: *mut u8,
|
||||
offset: u64,
|
||||
size: u64,
|
||||
) -> Result<(), Error>;
|
||||
|
||||
/// Copy data from memory to the buffer.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// The buffer must be valid to access. The source memory must be valid to
|
||||
/// read from. The ranges must not overlap. The offset + size must be within
|
||||
/// the buffer's allocation, and size within the source.
|
||||
unsafe fn write_buffer(
|
||||
&self,
|
||||
buffer: &Self::Buffer,
|
||||
contents: *const u8,
|
||||
offset: u64,
|
||||
size: u64,
|
||||
) -> Result<(), Error>;
|
||||
|
||||
unsafe fn create_semaphore(&self) -> Result<Self::Semaphore, Error>;
|
||||
unsafe fn create_fence(&self, signaled: bool) -> Result<Self::Fence, Error>;
|
||||
unsafe fn wait_and_reset(&self, fences: Vec<&mut Self::Fence>) -> Result<(), Error>;
|
||||
unsafe fn get_fence_status(&self, fence: &mut Self::Fence) -> Result<bool, Error>;
|
||||
|
||||
unsafe fn create_sampler(&self, params: SamplerParams) -> Result<Self::Sampler, Error>;
|
||||
}
|
||||
|
||||
pub trait CmdBuf<D: Device> {
|
||||
unsafe fn begin(&mut self);
|
||||
|
||||
unsafe fn finish(&mut self);
|
||||
|
||||
unsafe fn dispatch(
|
||||
&mut self,
|
||||
pipeline: &D::Pipeline,
|
||||
descriptor_set: &D::DescriptorSet,
|
||||
workgroup_count: (u32, u32, u32),
|
||||
workgroup_size: (u32, u32, u32),
|
||||
);
|
||||
|
||||
/// Insert an execution and memory barrier.
|
||||
///
|
||||
/// Compute kernels (and other actions) after this barrier may read from buffers
|
||||
/// that were written before this barrier.
|
||||
unsafe fn memory_barrier(&mut self);
|
||||
|
||||
/// Insert a barrier for host access to buffers.
|
||||
///
|
||||
/// The host may read buffers written before this barrier, after the fence for
|
||||
/// the command buffer is signaled.
|
||||
///
|
||||
/// See http://themaister.net/blog/2019/08/14/yet-another-blog-explaining-vulkan-synchronization/
|
||||
/// ("Host memory reads") for an explanation of this barrier.
|
||||
unsafe fn host_barrier(&mut self);
|
||||
|
||||
unsafe fn image_barrier(
|
||||
&mut self,
|
||||
image: &D::Image,
|
||||
src_layout: ImageLayout,
|
||||
dst_layout: ImageLayout,
|
||||
);
|
||||
|
||||
/// Clear the buffer.
|
||||
///
|
||||
/// This is readily supported in Vulkan, but for portability it is remarkably
|
||||
/// tricky (unimplemented in gfx-hal right now). Possibly best to write a compute
|
||||
/// kernel, or organize the code not to need it.
|
||||
unsafe fn clear_buffer(&self, buffer: &D::Buffer, size: Option<u64>);
|
||||
|
||||
unsafe fn copy_buffer(&self, src: &D::Buffer, dst: &D::Buffer);
|
||||
|
||||
unsafe fn copy_image_to_buffer(&self, src: &D::Image, dst: &D::Buffer);
|
||||
|
||||
unsafe fn copy_buffer_to_image(&self, src: &D::Buffer, dst: &D::Image);
|
||||
|
||||
// low portability, dx12 doesn't support it natively
|
||||
unsafe fn blit_image(&self, src: &D::Image, dst: &D::Image);
|
||||
|
||||
/// Reset the query pool.
|
||||
///
|
||||
/// The query pool must be reset before each use, to avoid validation errors.
|
||||
/// This is annoying, and we could tweak the API to make it implicit, doing
|
||||
/// the reset before the first timestamp write.
|
||||
unsafe fn reset_query_pool(&mut self, pool: &D::QueryPool);
|
||||
|
||||
unsafe fn write_timestamp(&mut self, pool: &D::QueryPool, query: u32);
|
||||
|
||||
/// Prepare the timestamps for reading. This isn't required on Vulkan but
|
||||
/// is required on (at least) DX12.
|
||||
unsafe fn finish_timestamps(&mut self, _pool: &D::QueryPool) {}
|
||||
}
|
||||
|
||||
/// A builder for pipelines with more complex layouts.
|
||||
pub trait PipelineBuilder<D: Device> {
|
||||
/// Add buffers to the pipeline. Each has its own binding.
|
||||
fn add_buffers(&mut self, n_buffers: u32);
|
||||
/// Add storage images to the pipeline. Each has its own binding.
|
||||
fn add_images(&mut self, n_images: u32);
|
||||
/// Add a binding with a variable-size array of textures.
|
||||
fn add_textures(&mut self, max_textures: u32);
|
||||
unsafe fn create_compute_pipeline(
|
||||
self,
|
||||
device: &D,
|
||||
code: &D::ShaderSource,
|
||||
) -> Result<D::Pipeline, Error>;
|
||||
}
|
||||
|
||||
/// A builder for descriptor sets with more complex layouts.
|
||||
///
|
||||
/// Note: the order needs to match the pipeline building, and it also needs to
|
||||
/// be buffers, then images, then textures.
|
||||
pub trait DescriptorSetBuilder<D: Device> {
|
||||
fn add_buffers(&mut self, buffers: &[&D::Buffer]);
|
||||
/// Add an array of storage images.
|
||||
///
|
||||
/// The images need to be in `ImageLayout::General` layout.
|
||||
fn add_images(&mut self, images: &[&D::Image]);
|
||||
/// Add an array of textures.
|
||||
///
|
||||
/// The images need to be in `ImageLayout::ShaderRead` layout.
|
||||
///
|
||||
/// The same sampler is used for all textures, which is not very sophisticated;
|
||||
/// we should have a way to vary the sampler.
|
||||
fn add_textures(&mut self, images: &[&D::Image]);
|
||||
unsafe fn build(self, device: &D, pipeline: &D::Pipeline) -> Result<D::DescriptorSet, Error>;
|
||||
}
|
|
@ -224,7 +224,7 @@ impl Dx12Instance {
|
|||
}
|
||||
}
|
||||
|
||||
impl crate::Device for Dx12Device {
|
||||
impl crate::backend::Device for Dx12Device {
|
||||
type Buffer = Buffer;
|
||||
|
||||
type Image = Image;
|
||||
|
@ -413,7 +413,7 @@ impl crate::Device for Dx12Device {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
unsafe fn get_fence_status(&self, fence: &Self::Fence) -> Result<bool, Error> {
|
||||
unsafe fn get_fence_status(&self, fence: &mut Self::Fence) -> Result<bool, Error> {
|
||||
let fence_val = fence.fence.get_value();
|
||||
Ok(fence_val == fence.val.get())
|
||||
}
|
||||
|
@ -451,7 +451,7 @@ impl Dx12Device {
|
|||
}
|
||||
}
|
||||
|
||||
impl crate::CmdBuf<Dx12Device> for CmdBuf {
|
||||
impl crate::backend::CmdBuf<Dx12Device> for CmdBuf {
|
||||
unsafe fn begin(&mut self) {}
|
||||
|
||||
unsafe fn finish(&mut self) {
|
||||
|
@ -468,7 +468,8 @@ impl crate::CmdBuf<Dx12Device> for CmdBuf {
|
|||
&mut self,
|
||||
pipeline: &Pipeline,
|
||||
descriptor_set: &DescriptorSet,
|
||||
size: (u32, u32, u32),
|
||||
workgroup_count: (u32, u32, u32),
|
||||
_workgroup_size: (u32, u32, u32),
|
||||
) {
|
||||
self.c.set_pipeline_state(&pipeline.pipeline_state);
|
||||
self.c
|
||||
|
@ -478,7 +479,8 @@ impl crate::CmdBuf<Dx12Device> for CmdBuf {
|
|||
0,
|
||||
descriptor_set.0.get_gpu_descriptor_handle_at_offset(0),
|
||||
);
|
||||
self.c.dispatch(size.0, size.1, size.2);
|
||||
self.c
|
||||
.dispatch(workgroup_count.0, workgroup_count.1, workgroup_count.2);
|
||||
}
|
||||
|
||||
unsafe fn memory_barrier(&mut self) {
|
||||
|
@ -554,7 +556,7 @@ impl crate::CmdBuf<Dx12Device> for CmdBuf {
|
|||
}
|
||||
}
|
||||
|
||||
impl crate::PipelineBuilder<Dx12Device> for PipelineBuilder {
|
||||
impl crate::backend::PipelineBuilder<Dx12Device> for PipelineBuilder {
|
||||
fn add_buffers(&mut self, n_buffers: u32) {
|
||||
if n_buffers != 0 {
|
||||
self.ranges.push(d3d12::D3D12_DESCRIPTOR_RANGE {
|
||||
|
@ -630,7 +632,7 @@ impl crate::PipelineBuilder<Dx12Device> for PipelineBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
impl crate::DescriptorSetBuilder<Dx12Device> for DescriptorSetBuilder {
|
||||
impl crate::backend::DescriptorSetBuilder<Dx12Device> for DescriptorSetBuilder {
|
||||
fn add_buffers(&mut self, buffers: &[&Buffer]) {
|
||||
// Note: we could get rid of the clone here (which is an AddRef)
|
||||
// and store a raw pointer, as it's a safety precondition that
|
||||
|
|
|
@ -123,7 +123,7 @@ impl Session {
|
|||
unsafe {
|
||||
let mut i = 0;
|
||||
while i < pending.len() {
|
||||
if let Ok(true) = self.0.device.get_fence_status(&pending[i].fence) {
|
||||
if let Ok(true) = self.0.device.get_fence_status(&mut pending[i].fence) {
|
||||
let mut item = pending.swap_remove(i);
|
||||
// TODO: wait is superfluous, can just reset
|
||||
let _ = self.0.device.wait_and_reset(vec![&mut item.fence]);
|
||||
|
@ -295,6 +295,11 @@ impl Session {
|
|||
pub fn gpu_info(&self) -> &GpuInfo {
|
||||
&self.0.gpu_info
|
||||
}
|
||||
|
||||
/// Choose shader code from the available choices.
|
||||
pub fn choose_shader<'a>(&self, spv: &'a [u8], hlsl: &'a str, msl: &'a str) -> ShaderCode<'a> {
|
||||
self.0.device.choose_shader(spv, hlsl, msl)
|
||||
}
|
||||
}
|
||||
|
||||
impl CmdBuf {
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
/// In time, it may go away and be replaced by either gfx-hal or wgpu.
|
||||
use bitflags::bitflags;
|
||||
|
||||
pub mod backend;
|
||||
pub mod hub;
|
||||
|
||||
#[macro_use]
|
||||
|
@ -26,9 +27,13 @@ mux_cfg! {
|
|||
#[cfg(target_os = "macos")]
|
||||
pub mod metal;
|
||||
|
||||
/// This isn't great but is expedient.
|
||||
/// The common error type for the crate.
|
||||
///
|
||||
/// This keeps things imple and can be expanded later.
|
||||
pub type Error = Box<dyn std::error::Error>;
|
||||
|
||||
pub use crate::backend::CmdBuf;
|
||||
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
pub enum ImageLayout {
|
||||
Undefined,
|
||||
|
@ -92,248 +97,3 @@ pub struct SubgroupSize {
|
|||
min: u32,
|
||||
max: u32,
|
||||
}
|
||||
|
||||
pub trait Device: Sized {
|
||||
type Buffer: 'static;
|
||||
type Image;
|
||||
type Pipeline;
|
||||
type DescriptorSet;
|
||||
type QueryPool;
|
||||
type CmdBuf: CmdBuf<Self>;
|
||||
type Fence;
|
||||
type Semaphore;
|
||||
type PipelineBuilder: PipelineBuilder<Self>;
|
||||
type DescriptorSetBuilder: DescriptorSetBuilder<Self>;
|
||||
type Sampler;
|
||||
type ShaderSource: ?Sized;
|
||||
|
||||
/// Query the GPU info.
|
||||
///
|
||||
/// This method may be expensive, so the hub should call it once and retain
|
||||
/// the info.
|
||||
fn query_gpu_info(&self) -> GpuInfo;
|
||||
|
||||
fn create_buffer(&self, size: u64, usage: BufferUsage) -> Result<Self::Buffer, Error>;
|
||||
|
||||
/// Destroy a buffer.
|
||||
///
|
||||
/// The same safety requirements hold as in Vulkan: the buffer cannot be used
|
||||
/// after this call, and all commands referencing this buffer must have completed.
|
||||
///
|
||||
/// Maybe doesn't need result return?
|
||||
unsafe fn destroy_buffer(&self, buffer: &Self::Buffer) -> Result<(), Error>;
|
||||
|
||||
unsafe fn create_image2d(&self, width: u32, height: u32) -> Result<Self::Image, Error>;
|
||||
|
||||
/// Destroy an image.
|
||||
///
|
||||
/// The same safety requirements hold as in Vulkan: the image cannot be used
|
||||
/// after this call, and all commands referencing this image must have completed.
|
||||
///
|
||||
/// Use this only with images we created, not for swapchain images.
|
||||
///
|
||||
/// Maybe doesn't need result return?
|
||||
unsafe fn destroy_image(&self, image: &Self::Image) -> Result<(), Error>;
|
||||
|
||||
/// Start building a pipeline.
|
||||
///
|
||||
/// A pipeline is a bit of shader IR plus a signature for what kinds of resources
|
||||
/// it expects.
|
||||
unsafe fn pipeline_builder(&self) -> Self::PipelineBuilder;
|
||||
|
||||
/// Start building a descriptor set.
|
||||
///
|
||||
/// A descriptor set is a binding of resources for a given pipeline.
|
||||
unsafe fn descriptor_set_builder(&self) -> Self::DescriptorSetBuilder;
|
||||
|
||||
/// Create a simple compute pipeline that operates on buffers and storage images.
|
||||
///
|
||||
/// This is provided as a convenience but will probably go away, as the functionality
|
||||
/// is subsumed by the builder.
|
||||
unsafe fn create_simple_compute_pipeline(
|
||||
&self,
|
||||
code: &Self::ShaderSource,
|
||||
n_buffers: u32,
|
||||
n_images: u32,
|
||||
) -> Result<Self::Pipeline, Error> {
|
||||
let mut builder = self.pipeline_builder();
|
||||
builder.add_buffers(n_buffers);
|
||||
builder.add_images(n_images);
|
||||
builder.create_compute_pipeline(self, code)
|
||||
}
|
||||
|
||||
/// Create a descriptor set for a given pipeline, binding buffers and images.
|
||||
///
|
||||
/// This is provided as a convenience but will probably go away, as the functionality
|
||||
/// is subsumed by the builder.
|
||||
unsafe fn create_descriptor_set(
|
||||
&self,
|
||||
pipeline: &Self::Pipeline,
|
||||
bufs: &[&Self::Buffer],
|
||||
images: &[&Self::Image],
|
||||
) -> Result<Self::DescriptorSet, Error> {
|
||||
let mut builder = self.descriptor_set_builder();
|
||||
builder.add_buffers(bufs);
|
||||
builder.add_images(images);
|
||||
builder.build(self, pipeline)
|
||||
}
|
||||
|
||||
fn create_cmd_buf(&self) -> Result<Self::CmdBuf, Error>;
|
||||
|
||||
fn create_query_pool(&self, n_queries: u32) -> Result<Self::QueryPool, Error>;
|
||||
|
||||
/// Get results from query pool, destroying it in the process.
|
||||
///
|
||||
/// The returned vector is one less than the number of queries; the first is used as
|
||||
/// a baseline.
|
||||
///
|
||||
/// # Safety
|
||||
/// All submitted commands that refer to this query pool must have completed.
|
||||
unsafe fn fetch_query_pool(&self, pool: &Self::QueryPool) -> Result<Vec<f64>, Error>;
|
||||
|
||||
unsafe fn run_cmd_bufs(
|
||||
&self,
|
||||
cmd_buf: &[&Self::CmdBuf],
|
||||
wait_semaphores: &[&Self::Semaphore],
|
||||
signal_semaphores: &[&Self::Semaphore],
|
||||
fence: Option<&mut Self::Fence>,
|
||||
) -> Result<(), Error>;
|
||||
|
||||
/// Copy data from the buffer to memory.
|
||||
///
|
||||
/// Discussion question: add offset?
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// The buffer must be valid to access. The destination memory must be valid to
|
||||
/// write to. The ranges must not overlap. The offset + size must be within
|
||||
/// the buffer's allocation, and size within the destination.
|
||||
unsafe fn read_buffer(
|
||||
&self,
|
||||
buffer: &Self::Buffer,
|
||||
dst: *mut u8,
|
||||
offset: u64,
|
||||
size: u64,
|
||||
) -> Result<(), Error>;
|
||||
|
||||
/// Copy data from memory to the buffer.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// The buffer must be valid to access. The source memory must be valid to
|
||||
/// read from. The ranges must not overlap. The offset + size must be within
|
||||
/// the buffer's allocation, and size within the source.
|
||||
unsafe fn write_buffer(
|
||||
&self,
|
||||
buffer: &Self::Buffer,
|
||||
contents: *const u8,
|
||||
offset: u64,
|
||||
size: u64,
|
||||
) -> Result<(), Error>;
|
||||
|
||||
unsafe fn create_semaphore(&self) -> Result<Self::Semaphore, Error>;
|
||||
unsafe fn create_fence(&self, signaled: bool) -> Result<Self::Fence, Error>;
|
||||
unsafe fn wait_and_reset(&self, fences: Vec<&mut Self::Fence>) -> Result<(), Error>;
|
||||
unsafe fn get_fence_status(&self, fence: &Self::Fence) -> Result<bool, Error>;
|
||||
|
||||
unsafe fn create_sampler(&self, params: SamplerParams) -> Result<Self::Sampler, Error>;
|
||||
}
|
||||
|
||||
pub trait CmdBuf<D: Device> {
|
||||
unsafe fn begin(&mut self);
|
||||
|
||||
unsafe fn finish(&mut self);
|
||||
|
||||
unsafe fn dispatch(
|
||||
&mut self,
|
||||
pipeline: &D::Pipeline,
|
||||
descriptor_set: &D::DescriptorSet,
|
||||
size: (u32, u32, u32),
|
||||
);
|
||||
|
||||
/// Insert an execution and memory barrier.
|
||||
///
|
||||
/// Compute kernels (and other actions) after this barrier may read from buffers
|
||||
/// that were written before this barrier.
|
||||
unsafe fn memory_barrier(&mut self);
|
||||
|
||||
/// Insert a barrier for host access to buffers.
|
||||
///
|
||||
/// The host may read buffers written before this barrier, after the fence for
|
||||
/// the command buffer is signaled.
|
||||
///
|
||||
/// See http://themaister.net/blog/2019/08/14/yet-another-blog-explaining-vulkan-synchronization/
|
||||
/// ("Host memory reads") for an explanation of this barrier.
|
||||
unsafe fn host_barrier(&mut self);
|
||||
|
||||
unsafe fn image_barrier(
|
||||
&mut self,
|
||||
image: &D::Image,
|
||||
src_layout: ImageLayout,
|
||||
dst_layout: ImageLayout,
|
||||
);
|
||||
|
||||
/// Clear the buffer.
|
||||
///
|
||||
/// This is readily supported in Vulkan, but for portability it is remarkably
|
||||
/// tricky (unimplemented in gfx-hal right now). Possibly best to write a compute
|
||||
/// kernel, or organize the code not to need it.
|
||||
unsafe fn clear_buffer(&self, buffer: &D::Buffer, size: Option<u64>);
|
||||
|
||||
unsafe fn copy_buffer(&self, src: &D::Buffer, dst: &D::Buffer);
|
||||
|
||||
unsafe fn copy_image_to_buffer(&self, src: &D::Image, dst: &D::Buffer);
|
||||
|
||||
unsafe fn copy_buffer_to_image(&self, src: &D::Buffer, dst: &D::Image);
|
||||
|
||||
// low portability, dx12 doesn't support it natively
|
||||
unsafe fn blit_image(&self, src: &D::Image, dst: &D::Image);
|
||||
|
||||
/// Reset the query pool.
|
||||
///
|
||||
/// The query pool must be reset before each use, to avoid validation errors.
|
||||
/// This is annoying, and we could tweak the API to make it implicit, doing
|
||||
/// the reset before the first timestamp write.
|
||||
unsafe fn reset_query_pool(&mut self, pool: &D::QueryPool);
|
||||
|
||||
unsafe fn write_timestamp(&mut self, pool: &D::QueryPool, query: u32);
|
||||
|
||||
/// Prepare the timestamps for reading. This isn't required on Vulkan but
|
||||
/// is required on (at least) DX12.
|
||||
unsafe fn finish_timestamps(&mut self, pool: &D::QueryPool) {}
|
||||
}
|
||||
|
||||
/// A builder for pipelines with more complex layouts.
|
||||
pub trait PipelineBuilder<D: Device> {
|
||||
/// Add buffers to the pipeline. Each has its own binding.
|
||||
fn add_buffers(&mut self, n_buffers: u32);
|
||||
/// Add storage images to the pipeline. Each has its own binding.
|
||||
fn add_images(&mut self, n_images: u32);
|
||||
/// Add a binding with a variable-size array of textures.
|
||||
fn add_textures(&mut self, max_textures: u32);
|
||||
unsafe fn create_compute_pipeline(
|
||||
self,
|
||||
device: &D,
|
||||
code: &D::ShaderSource,
|
||||
) -> Result<D::Pipeline, Error>;
|
||||
}
|
||||
|
||||
/// A builder for descriptor sets with more complex layouts.
|
||||
///
|
||||
/// Note: the order needs to match the pipeline building, and it also needs to
|
||||
/// be buffers, then images, then textures.
|
||||
pub trait DescriptorSetBuilder<D: Device> {
|
||||
fn add_buffers(&mut self, buffers: &[&D::Buffer]);
|
||||
/// Add an array of storage images.
|
||||
///
|
||||
/// The images need to be in `ImageLayout::General` layout.
|
||||
fn add_images(&mut self, images: &[&D::Image]);
|
||||
/// Add an array of textures.
|
||||
///
|
||||
/// The images need to be in `ImageLayout::ShaderRead` layout.
|
||||
///
|
||||
/// The same sampler is used for all textures, which is not very sophisticated;
|
||||
/// we should have a way to vary the sampler.
|
||||
fn add_textures(&mut self, images: &[&D::Image]);
|
||||
unsafe fn build(self, device: &D, pipeline: &D::Pipeline) -> Result<D::DescriptorSet, Error>;
|
||||
}
|
||||
|
|
|
@ -117,9 +117,9 @@ macro_rules! mux_device_enum {
|
|||
$crate::mux_enum! {
|
||||
$(#[$outer])*
|
||||
pub enum $assoc_type {
|
||||
Vk(<$crate::vulkan::VkDevice as $crate::Device>::$assoc_type),
|
||||
Dx12(<$crate::dx12::Dx12Device as $crate::Device>::$assoc_type),
|
||||
Mtl(<$crate::metal::MtlDevice as $crate::Device>::$assoc_type),
|
||||
Vk(<$crate::vulkan::VkDevice as $crate::backend::Device>::$assoc_type),
|
||||
Dx12(<$crate::dx12::Dx12Device as $crate::backend::Device>::$assoc_type),
|
||||
Mtl(<$crate::metal::MtlDevice as $crate::backend::Device>::$assoc_type),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -154,3 +154,15 @@ macro_rules! mux_match {
|
|||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// A convenience macro for selecting a shader from included files.
|
||||
#[macro_export]
|
||||
macro_rules! include_shader {
|
||||
( $device:expr, $path_base:expr) => {
|
||||
$device.choose_shader(
|
||||
include_bytes!(concat!($path_base, ".spv")),
|
||||
include_str!(concat!($path_base, ".hlsl")),
|
||||
include_str!(concat!($path_base, ".msl")),
|
||||
)
|
||||
};
|
||||
}
|
||||
|
|
|
@ -102,7 +102,11 @@ impl MtlInstance {
|
|||
has_memory_model: false,
|
||||
use_staging_buffers: use_staging_buffers,
|
||||
};
|
||||
Ok(MtlDevice { device, cmd_queue, gpu_info })
|
||||
Ok(MtlDevice {
|
||||
device,
|
||||
cmd_queue,
|
||||
gpu_info,
|
||||
})
|
||||
} else {
|
||||
Err("can't create system default Metal device".into())
|
||||
}
|
||||
|
@ -119,7 +123,7 @@ impl MtlInstance {
|
|||
}
|
||||
}
|
||||
|
||||
impl crate::Device for MtlDevice {
|
||||
impl crate::backend::Device for MtlDevice {
|
||||
type Buffer = Buffer;
|
||||
|
||||
type Image = Image;
|
||||
|
@ -282,9 +286,13 @@ impl crate::Device for MtlDevice {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
unsafe fn get_fence_status(&self, fence: &Self::Fence) -> Result<bool, Error> {
|
||||
// fence need to be mutable here :/
|
||||
todo!()
|
||||
unsafe fn get_fence_status(&self, fence: &mut Self::Fence) -> Result<bool, Error> {
|
||||
match fence {
|
||||
Fence::Idle => Ok(true),
|
||||
Fence::CmdBufPending(cmd_buf) => {
|
||||
Ok(cmd_buf.status() == metal::MTLCommandBufferStatus::Completed)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsafe fn create_sampler(&self, params: crate::SamplerParams) -> Result<Self::Sampler, Error> {
|
||||
|
@ -292,18 +300,17 @@ impl crate::Device for MtlDevice {
|
|||
}
|
||||
}
|
||||
|
||||
impl crate::CmdBuf<MtlDevice> for CmdBuf {
|
||||
unsafe fn begin(&mut self) {
|
||||
}
|
||||
impl crate::backend::CmdBuf<MtlDevice> for CmdBuf {
|
||||
unsafe fn begin(&mut self) {}
|
||||
|
||||
unsafe fn finish(&mut self) {
|
||||
}
|
||||
unsafe fn finish(&mut self) {}
|
||||
|
||||
unsafe fn dispatch(
|
||||
&mut self,
|
||||
pipeline: &Pipeline,
|
||||
descriptor_set: &DescriptorSet,
|
||||
size: (u32, u32, u32),
|
||||
workgroup_count: (u32, u32, u32),
|
||||
workgroup_size: (u32, u32, u32),
|
||||
) {
|
||||
let encoder = self.cmd_buf.new_compute_command_encoder();
|
||||
encoder.set_compute_pipeline_state(&pipeline.0);
|
||||
|
@ -313,19 +320,17 @@ impl crate::CmdBuf<MtlDevice> for CmdBuf {
|
|||
ix += 1;
|
||||
}
|
||||
// TODO: set images
|
||||
let work_group_count = metal::MTLSize {
|
||||
width: size.0 as u64,
|
||||
height: size.1 as u64,
|
||||
depth: size.2 as u64,
|
||||
let workgroup_count = metal::MTLSize {
|
||||
width: workgroup_count.0 as u64,
|
||||
height: workgroup_count.1 as u64,
|
||||
depth: workgroup_count.2 as u64,
|
||||
};
|
||||
// TODO: we need to pass this in explicitly. In gfx-hal, this is parsed from
|
||||
// the spv before translation.
|
||||
let work_group_size = metal::MTLSize {
|
||||
width: 1,
|
||||
height: 1,
|
||||
depth: 1,
|
||||
let workgroup_size = metal::MTLSize {
|
||||
width: workgroup_size.0 as u64,
|
||||
height: workgroup_size.1 as u64,
|
||||
depth: workgroup_size.2 as u64,
|
||||
};
|
||||
encoder.dispatch_thread_groups(work_group_count, work_group_size);
|
||||
encoder.dispatch_thread_groups(workgroup_count, workgroup_size);
|
||||
encoder.end_encoding();
|
||||
}
|
||||
|
||||
|
@ -334,8 +339,7 @@ impl crate::CmdBuf<MtlDevice> for CmdBuf {
|
|||
// Metal's own tracking.
|
||||
}
|
||||
|
||||
unsafe fn host_barrier(&mut self) {
|
||||
}
|
||||
unsafe fn host_barrier(&mut self) {}
|
||||
|
||||
unsafe fn image_barrier(
|
||||
&mut self,
|
||||
|
@ -366,9 +370,7 @@ impl crate::CmdBuf<MtlDevice> for CmdBuf {
|
|||
todo!()
|
||||
}
|
||||
|
||||
unsafe fn reset_query_pool(&mut self, pool: &QueryPool) {
|
||||
|
||||
}
|
||||
unsafe fn reset_query_pool(&mut self, pool: &QueryPool) {}
|
||||
|
||||
unsafe fn write_timestamp(&mut self, pool: &QueryPool, query: u32) {
|
||||
// TODO
|
||||
|
@ -377,17 +379,15 @@ impl crate::CmdBuf<MtlDevice> for CmdBuf {
|
|||
}
|
||||
}
|
||||
|
||||
impl crate::PipelineBuilder<MtlDevice> for PipelineBuilder {
|
||||
impl crate::backend::PipelineBuilder<MtlDevice> for PipelineBuilder {
|
||||
fn add_buffers(&mut self, _n_buffers: u32) {
|
||||
// My understanding is that Metal infers the pipeline layout from
|
||||
// the source.
|
||||
}
|
||||
|
||||
fn add_images(&mut self, _n_images: u32) {
|
||||
}
|
||||
fn add_images(&mut self, _n_images: u32) {}
|
||||
|
||||
fn add_textures(&mut self, _max_textures: u32) {
|
||||
}
|
||||
fn add_textures(&mut self, _max_textures: u32) {}
|
||||
|
||||
unsafe fn create_compute_pipeline(
|
||||
self,
|
||||
|
@ -399,12 +399,14 @@ impl crate::PipelineBuilder<MtlDevice> for PipelineBuilder {
|
|||
let library = device.device.new_library_with_source(code, &options)?;
|
||||
// This seems to be the default name from spirv-cross, but we may need to tweak.
|
||||
let function = library.get_function("main0", None)?;
|
||||
let pipeline = device.device.new_compute_pipeline_state_with_function(&function)?;
|
||||
let pipeline = device
|
||||
.device
|
||||
.new_compute_pipeline_state_with_function(&function)?;
|
||||
Ok(Pipeline(pipeline))
|
||||
}
|
||||
}
|
||||
|
||||
impl crate::DescriptorSetBuilder<MtlDevice> for DescriptorSetBuilder {
|
||||
impl crate::backend::DescriptorSetBuilder<MtlDevice> for DescriptorSetBuilder {
|
||||
fn add_buffers(&mut self, buffers: &[&Buffer]) {
|
||||
self.0.buffers.extend(buffers.iter().copied().cloned());
|
||||
}
|
||||
|
|
|
@ -30,10 +30,10 @@ mux_cfg! {
|
|||
#[cfg(mtl)]
|
||||
use crate::metal;
|
||||
}
|
||||
use crate::CmdBuf as CmdBufTrait;
|
||||
use crate::DescriptorSetBuilder as DescriptorSetBuilderTrait;
|
||||
use crate::Device as DeviceTrait;
|
||||
use crate::PipelineBuilder as PipelineBuilderTrait;
|
||||
use crate::backend::CmdBuf as CmdBufTrait;
|
||||
use crate::backend::DescriptorSetBuilder as DescriptorSetBuilderTrait;
|
||||
use crate::backend::Device as DeviceTrait;
|
||||
use crate::backend::PipelineBuilder as PipelineBuilderTrait;
|
||||
use crate::{BufferUsage, Error, GpuInfo, ImageLayout};
|
||||
|
||||
mux_enum! {
|
||||
|
@ -255,11 +255,11 @@ impl Device {
|
|||
}
|
||||
}
|
||||
|
||||
pub unsafe fn get_fence_status(&self, fence: &Fence) -> Result<bool, Error> {
|
||||
pub unsafe fn get_fence_status(&self, fence: &mut Fence) -> Result<bool, Error> {
|
||||
mux_match! { self;
|
||||
Device::Vk(d) => d.get_fence_status(fence.vk()),
|
||||
Device::Dx12(d) => d.get_fence_status(fence.dx12()),
|
||||
Device::Mtl(d) => d.get_fence_status(fence.mtl()),
|
||||
Device::Vk(d) => d.get_fence_status(fence.vk_mut()),
|
||||
Device::Dx12(d) => d.get_fence_status(fence.dx12_mut()),
|
||||
Device::Mtl(d) => d.get_fence_status(fence.mtl_mut()),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -400,6 +400,15 @@ impl Device {
|
|||
Device::Mtl(d) => d.write_buffer(buffer.mtl(), contents, offset, size),
|
||||
}
|
||||
}
|
||||
|
||||
/// Choose shader code from the available choices.
|
||||
pub fn choose_shader<'a>(&self, _spv: &'a [u8], _hlsl: &'a str, _msl: &'a str) -> ShaderCode<'a> {
|
||||
mux_match! { self;
|
||||
Device::Vk(_d) => ShaderCode::Spv(_spv),
|
||||
Device::Dx12(_d) => ShaderCode::Hlsl(_hlsl),
|
||||
Device::Mtl(_d) => ShaderCode::Msl(_msl),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PipelineBuilder {
|
||||
|
@ -578,16 +587,25 @@ impl CmdBuf {
|
|||
}
|
||||
}
|
||||
|
||||
/// Dispatch a compute shader.
|
||||
///
|
||||
/// Note that both the number of workgroups (`workgroup_count`) and the number of
|
||||
/// threads in a workgroup (`workgroup_size`) are given. The latter is needed on
|
||||
/// Metal, while it's baked into the shader on Vulkan and DX12.
|
||||
///
|
||||
/// Perhaps we'll have a mechanism to plumb the latter value to configure the size
|
||||
/// of a workgroup using specialization constants in the future.
|
||||
pub unsafe fn dispatch(
|
||||
&mut self,
|
||||
pipeline: &Pipeline,
|
||||
descriptor_set: &DescriptorSet,
|
||||
size: (u32, u32, u32),
|
||||
workgroup_count: (u32, u32, u32),
|
||||
workgroup_size: (u32, u32, u32),
|
||||
) {
|
||||
mux_match! { self;
|
||||
CmdBuf::Vk(c) => c.dispatch(pipeline.vk(), descriptor_set.vk(), size),
|
||||
CmdBuf::Dx12(c) => c.dispatch(pipeline.dx12(), descriptor_set.dx12(), size),
|
||||
CmdBuf::Mtl(c) => c.dispatch(pipeline.mtl(), descriptor_set.mtl(), size),
|
||||
CmdBuf::Vk(c) => c.dispatch(pipeline.vk(), descriptor_set.vk(), workgroup_count, workgroup_size),
|
||||
CmdBuf::Dx12(c) => c.dispatch(pipeline.dx12(), descriptor_set.dx12(), workgroup_count, workgroup_size),
|
||||
CmdBuf::Mtl(c) => c.dispatch(pipeline.mtl(), descriptor_set.mtl(), workgroup_count, workgroup_size),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -13,8 +13,10 @@ use ash::{vk, Device, Entry, Instance};
|
|||
use smallvec::SmallVec;
|
||||
|
||||
use crate::{
|
||||
BufferUsage, Device as DeviceTrait, Error, GpuInfo, ImageLayout, SamplerParams, SubgroupSize,
|
||||
BufferUsage, Error, GpuInfo, ImageLayout, SamplerParams, SubgroupSize,
|
||||
};
|
||||
use crate::backend::Device as DeviceTrait;
|
||||
|
||||
|
||||
pub struct VkInstance {
|
||||
/// Retain the dynamic lib.
|
||||
|
@ -455,7 +457,7 @@ impl VkInstance {
|
|||
}
|
||||
}
|
||||
|
||||
impl crate::Device for VkDevice {
|
||||
impl crate::backend::Device for VkDevice {
|
||||
type Buffer = Buffer;
|
||||
type Image = Image;
|
||||
type CmdBuf = CmdBuf;
|
||||
|
@ -621,16 +623,13 @@ impl crate::Device for VkDevice {
|
|||
|
||||
unsafe fn wait_and_reset(&self, fences: Vec<&mut Self::Fence>) -> Result<(), Error> {
|
||||
let device = &self.device.device;
|
||||
let fences = fences
|
||||
.iter()
|
||||
.map(|f| **f)
|
||||
.collect::<SmallVec<[_; 4]>>();
|
||||
let fences = fences.iter().map(|f| **f).collect::<SmallVec<[_; 4]>>();
|
||||
device.wait_for_fences(&fences, true, !0)?;
|
||||
device.reset_fences(&fences)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
unsafe fn get_fence_status(&self, fence: &Self::Fence) -> Result<bool, Error> {
|
||||
unsafe fn get_fence_status(&self, fence: &mut Self::Fence) -> Result<bool, Error> {
|
||||
let device = &self.device.device;
|
||||
Ok(device.get_fence_status(*fence)?)
|
||||
}
|
||||
|
@ -843,7 +842,8 @@ impl crate::CmdBuf<VkDevice> for CmdBuf {
|
|||
&mut self,
|
||||
pipeline: &Pipeline,
|
||||
descriptor_set: &DescriptorSet,
|
||||
size: (u32, u32, u32),
|
||||
workgroup_count: (u32, u32, u32),
|
||||
_workgroup_size: (u32, u32, u32),
|
||||
) {
|
||||
let device = &self.device.device;
|
||||
device.cmd_bind_pipeline(
|
||||
|
@ -859,7 +859,12 @@ impl crate::CmdBuf<VkDevice> for CmdBuf {
|
|||
&[descriptor_set.descriptor_set],
|
||||
&[],
|
||||
);
|
||||
device.cmd_dispatch(self.cmd_buf, size.0, size.1, size.2);
|
||||
device.cmd_dispatch(
|
||||
self.cmd_buf,
|
||||
workgroup_count.0,
|
||||
workgroup_count.1,
|
||||
workgroup_count.2,
|
||||
);
|
||||
}
|
||||
|
||||
/// Insert a pipeline barrier for all memory accesses.
|
||||
|
@ -1047,7 +1052,7 @@ impl crate::CmdBuf<VkDevice> for CmdBuf {
|
|||
}
|
||||
}
|
||||
|
||||
impl crate::PipelineBuilder<VkDevice> for PipelineBuilder {
|
||||
impl crate::backend::PipelineBuilder<VkDevice> for PipelineBuilder {
|
||||
fn add_buffers(&mut self, n_buffers: u32) {
|
||||
let start = self.bindings.len() as u32;
|
||||
for i in 0..n_buffers {
|
||||
|
@ -1153,7 +1158,7 @@ impl crate::PipelineBuilder<VkDevice> for PipelineBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
impl crate::DescriptorSetBuilder<VkDevice> for DescriptorSetBuilder {
|
||||
impl crate::backend::DescriptorSetBuilder<VkDevice> for DescriptorSetBuilder {
|
||||
fn add_buffers(&mut self, buffers: &[&Buffer]) {
|
||||
self.buffers.extend(buffers.iter().map(|b| b.buffer));
|
||||
}
|
||||
|
@ -1307,7 +1312,11 @@ impl VkSwapchain {
|
|||
image_idx: usize,
|
||||
semaphores: &[&vk::Semaphore],
|
||||
) -> Result<bool, Error> {
|
||||
let semaphores = semaphores.iter().copied().copied().collect::<SmallVec<[_; 4]>>();
|
||||
let semaphores = semaphores
|
||||
.iter()
|
||||
.copied()
|
||||
.copied()
|
||||
.collect::<SmallVec<[_; 4]>>();
|
||||
Ok(self.swapchain_fn.queue_present(
|
||||
self.present_queue,
|
||||
&vk::PresentInfoKHR::builder()
|
||||
|
|
|
@ -12,7 +12,7 @@ use ndk::native_window::NativeWindow;
|
|||
use ndk_glue::Event;
|
||||
|
||||
use piet_gpu_hal::hub;
|
||||
use piet_gpu_hal::mux::{QueryPool, Instance, Surface, Swapchain};
|
||||
use piet_gpu_hal::mux::{Instance, QueryPool, Surface, Swapchain};
|
||||
use piet_gpu_hal::{CmdBuf, Error, ImageLayout};
|
||||
|
||||
use piet_gpu::{render_scene, PietGpuRenderContext, Renderer};
|
||||
|
|
|
@ -403,6 +403,7 @@ impl Renderer {
|
|||
&self.el_pipeline,
|
||||
&self.el_ds,
|
||||
(((self.n_elements + 127) / 128) as u32, 1, 1),
|
||||
(128, 1, 1),
|
||||
);
|
||||
cmd_buf.write_timestamp(&query_pool, 1);
|
||||
cmd_buf.memory_barrier();
|
||||
|
@ -410,6 +411,7 @@ impl Renderer {
|
|||
&self.tile_pipeline,
|
||||
&self.tile_ds,
|
||||
(((self.n_paths + 255) / 256) as u32, 1, 1),
|
||||
(256, 1, 1),
|
||||
);
|
||||
cmd_buf.write_timestamp(&query_pool, 2);
|
||||
cmd_buf.memory_barrier();
|
||||
|
@ -417,6 +419,7 @@ impl Renderer {
|
|||
&self.path_pipeline,
|
||||
&self.path_ds,
|
||||
(((self.n_pathseg + 31) / 32) as u32, 1, 1),
|
||||
(32, 1, 1),
|
||||
);
|
||||
cmd_buf.write_timestamp(&query_pool, 3);
|
||||
cmd_buf.memory_barrier();
|
||||
|
@ -424,6 +427,7 @@ impl Renderer {
|
|||
&self.backdrop_pipeline,
|
||||
&self.backdrop_ds,
|
||||
(((self.n_paths + 255) / 256) as u32, 1, 1),
|
||||
(256, 1, 1),
|
||||
);
|
||||
cmd_buf.write_timestamp(&query_pool, 4);
|
||||
// Note: this barrier is not needed as an actual dependency between
|
||||
|
@ -434,6 +438,7 @@ impl Renderer {
|
|||
&self.bin_pipeline,
|
||||
&self.bin_ds,
|
||||
(((self.n_paths + 255) / 256) as u32, 1, 1),
|
||||
(256, 1, 1),
|
||||
);
|
||||
cmd_buf.write_timestamp(&query_pool, 5);
|
||||
cmd_buf.memory_barrier();
|
||||
|
@ -441,6 +446,7 @@ impl Renderer {
|
|||
&self.coarse_pipeline,
|
||||
&self.coarse_ds,
|
||||
((WIDTH as u32 + 255) / 256, (HEIGHT as u32 + 255) / 256, 1),
|
||||
(256, 256, 1),
|
||||
);
|
||||
cmd_buf.write_timestamp(&query_pool, 6);
|
||||
cmd_buf.memory_barrier();
|
||||
|
@ -448,6 +454,7 @@ impl Renderer {
|
|||
&self.k4_pipeline,
|
||||
&self.k4_ds,
|
||||
((WIDTH / TILE_W) as u32, (HEIGHT / TILE_H) as u32, 1),
|
||||
(8, 4, 1),
|
||||
);
|
||||
cmd_buf.write_timestamp(&query_pool, 7);
|
||||
cmd_buf.memory_barrier();
|
||||
|
|
Loading…
Reference in a new issue