mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 12:41:30 +11:00
commit
4c42da2d46
685
Cargo.lock
generated
685
Cargo.lock
generated
File diff suppressed because it is too large
Load diff
|
@ -1,8 +1,8 @@
|
||||||
use piet_gpu_hal::include_shader;
|
use piet_gpu_hal::{include_shader, BindType};
|
||||||
use piet_gpu_hal::{BufferUsage, Instance, Session};
|
use piet_gpu_hal::{BufferUsage, Instance, InstanceFlags, Session};
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let (instance, _) = Instance::new(None).unwrap();
|
let (instance, _) = Instance::new(None, InstanceFlags::empty()).unwrap();
|
||||||
unsafe {
|
unsafe {
|
||||||
let device = instance.device(None).unwrap();
|
let device = instance.device(None).unwrap();
|
||||||
let session = Session::new(device);
|
let session = Session::new(device);
|
||||||
|
@ -10,7 +10,9 @@ fn main() {
|
||||||
let src = (0..256).map(|x| x + 1).collect::<Vec<u32>>();
|
let src = (0..256).map(|x| x + 1).collect::<Vec<u32>>();
|
||||||
let buffer = session.create_buffer_init(&src, usage).unwrap();
|
let buffer = session.create_buffer_init(&src, usage).unwrap();
|
||||||
let code = include_shader!(&session, "./shader/gen/collatz");
|
let code = include_shader!(&session, "./shader/gen/collatz");
|
||||||
let pipeline = session.create_simple_compute_pipeline(code, 1).unwrap();
|
let pipeline = session
|
||||||
|
.create_compute_pipeline(code, &[BindType::Buffer])
|
||||||
|
.unwrap();
|
||||||
let descriptor_set = session
|
let descriptor_set = session
|
||||||
.create_simple_descriptor_set(&pipeline, &[&buffer])
|
.create_simple_descriptor_set(&pipeline, &[&buffer])
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
|
|
||||||
glslang_validator = glslangValidator
|
glslang_validator = glslangValidator
|
||||||
spirv_cross = spirv-cross
|
spirv_cross = spirv-cross
|
||||||
|
dxc = dxc
|
||||||
|
|
||||||
rule glsl
|
rule glsl
|
||||||
command = $glslang_validator -V -o $out $in
|
command = $glslang_validator -V -o $out $in
|
||||||
|
@ -11,9 +12,13 @@ rule glsl
|
||||||
rule hlsl
|
rule hlsl
|
||||||
command = $spirv_cross --hlsl $in --output $out
|
command = $spirv_cross --hlsl $in --output $out
|
||||||
|
|
||||||
|
rule dxil
|
||||||
|
command = $dxc -T cs_6_0 $in -Fo $out
|
||||||
|
|
||||||
rule msl
|
rule msl
|
||||||
command = $spirv_cross --msl $in --output $out
|
command = $spirv_cross --msl $in --output $out
|
||||||
|
|
||||||
build gen/collatz.spv: glsl collatz.comp
|
build gen/collatz.spv: glsl collatz.comp
|
||||||
build gen/collatz.hlsl: hlsl gen/collatz.spv
|
build gen/collatz.hlsl: hlsl gen/collatz.spv
|
||||||
|
build gen/collatz.dxil: dxil gen/collatz.hlsl
|
||||||
build gen/collatz.msl: msl gen/collatz.spv
|
build gen/collatz.msl: msl gen/collatz.spv
|
||||||
|
|
BIN
piet-gpu-hal/examples/shader/gen/collatz.dxil
Normal file
BIN
piet-gpu-hal/examples/shader/gen/collatz.dxil
Normal file
Binary file not shown.
|
@ -16,7 +16,7 @@
|
||||||
|
|
||||||
//! The generic trait for backends to implement.
|
//! The generic trait for backends to implement.
|
||||||
|
|
||||||
use crate::{BufferUsage, Error, GpuInfo, ImageLayout, SamplerParams};
|
use crate::{BindType, BufferUsage, Error, GpuInfo, ImageLayout, SamplerParams};
|
||||||
|
|
||||||
pub trait Device: Sized {
|
pub trait Device: Sized {
|
||||||
type Buffer: 'static;
|
type Buffer: 'static;
|
||||||
|
@ -27,7 +27,6 @@ pub trait Device: Sized {
|
||||||
type CmdBuf: CmdBuf<Self>;
|
type CmdBuf: CmdBuf<Self>;
|
||||||
type Fence;
|
type Fence;
|
||||||
type Semaphore;
|
type Semaphore;
|
||||||
type PipelineBuilder: PipelineBuilder<Self>;
|
|
||||||
type DescriptorSetBuilder: DescriptorSetBuilder<Self>;
|
type DescriptorSetBuilder: DescriptorSetBuilder<Self>;
|
||||||
type Sampler;
|
type Sampler;
|
||||||
type ShaderSource: ?Sized;
|
type ShaderSource: ?Sized;
|
||||||
|
@ -60,33 +59,21 @@ pub trait Device: Sized {
|
||||||
/// Maybe doesn't need result return?
|
/// Maybe doesn't need result return?
|
||||||
unsafe fn destroy_image(&self, image: &Self::Image) -> Result<(), Error>;
|
unsafe fn destroy_image(&self, image: &Self::Image) -> Result<(), Error>;
|
||||||
|
|
||||||
/// Start building a pipeline.
|
/// Build a compute pipeline.
|
||||||
///
|
///
|
||||||
/// A pipeline is a bit of shader IR plus a signature for what kinds of resources
|
/// A pipeline is a bit of shader IR plus a signature for what kinds of resources
|
||||||
/// it expects.
|
/// it expects.
|
||||||
unsafe fn pipeline_builder(&self) -> Self::PipelineBuilder;
|
unsafe fn create_compute_pipeline(
|
||||||
|
&self,
|
||||||
|
code: &Self::ShaderSource,
|
||||||
|
bind_types: &[BindType],
|
||||||
|
) -> Result<Self::Pipeline, Error>;
|
||||||
|
|
||||||
/// Start building a descriptor set.
|
/// Start building a descriptor set.
|
||||||
///
|
///
|
||||||
/// A descriptor set is a binding of resources for a given pipeline.
|
/// A descriptor set is a binding of resources for a given pipeline.
|
||||||
unsafe fn descriptor_set_builder(&self) -> Self::DescriptorSetBuilder;
|
unsafe fn descriptor_set_builder(&self) -> Self::DescriptorSetBuilder;
|
||||||
|
|
||||||
/// Create a simple compute pipeline that operates on buffers and storage images.
|
|
||||||
///
|
|
||||||
/// This is provided as a convenience but will probably go away, as the functionality
|
|
||||||
/// is subsumed by the builder.
|
|
||||||
unsafe fn create_simple_compute_pipeline(
|
|
||||||
&self,
|
|
||||||
code: &Self::ShaderSource,
|
|
||||||
n_buffers: u32,
|
|
||||||
n_images: u32,
|
|
||||||
) -> Result<Self::Pipeline, Error> {
|
|
||||||
let mut builder = self.pipeline_builder();
|
|
||||||
builder.add_buffers(n_buffers);
|
|
||||||
builder.add_images(n_images);
|
|
||||||
builder.create_compute_pipeline(self, code)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Create a descriptor set for a given pipeline, binding buffers and images.
|
/// Create a descriptor set for a given pipeline, binding buffers and images.
|
||||||
///
|
///
|
||||||
/// This is provided as a convenience but will probably go away, as the functionality
|
/// This is provided as a convenience but will probably go away, as the functionality
|
||||||
|
@ -236,21 +223,6 @@ pub trait CmdBuf<D: Device> {
|
||||||
unsafe fn finish_timestamps(&mut self, _pool: &D::QueryPool) {}
|
unsafe fn finish_timestamps(&mut self, _pool: &D::QueryPool) {}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A builder for pipelines with more complex layouts.
|
|
||||||
pub trait PipelineBuilder<D: Device> {
|
|
||||||
/// Add buffers to the pipeline. Each has its own binding.
|
|
||||||
fn add_buffers(&mut self, n_buffers: u32);
|
|
||||||
/// Add storage images to the pipeline. Each has its own binding.
|
|
||||||
fn add_images(&mut self, n_images: u32);
|
|
||||||
/// Add a binding with a variable-size array of textures.
|
|
||||||
fn add_textures(&mut self, max_textures: u32);
|
|
||||||
unsafe fn create_compute_pipeline(
|
|
||||||
self,
|
|
||||||
device: &D,
|
|
||||||
code: &D::ShaderSource,
|
|
||||||
) -> Result<D::Pipeline, Error>;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A builder for descriptor sets with more complex layouts.
|
/// A builder for descriptor sets with more complex layouts.
|
||||||
///
|
///
|
||||||
/// Note: the order needs to match the pipeline building, and it also needs to
|
/// Note: the order needs to match the pipeline building, and it also needs to
|
||||||
|
|
|
@ -3,17 +3,19 @@
|
||||||
mod error;
|
mod error;
|
||||||
mod wrappers;
|
mod wrappers;
|
||||||
|
|
||||||
use std::{cell::Cell, convert::TryInto, mem, ptr};
|
use std::{cell::Cell, convert::{TryFrom, TryInto}, mem, ptr};
|
||||||
|
|
||||||
use winapi::shared::minwindef::TRUE;
|
use winapi::shared::minwindef::TRUE;
|
||||||
use winapi::shared::{dxgi, dxgi1_2, dxgi1_3, dxgitype};
|
use winapi::shared::{dxgi, dxgi1_2, dxgitype};
|
||||||
|
#[allow(unused)]
|
||||||
|
use winapi::shared::dxgi1_3; // for error reporting in debug mode
|
||||||
use winapi::um::d3d12;
|
use winapi::um::d3d12;
|
||||||
|
|
||||||
use raw_window_handle::{HasRawWindowHandle, RawWindowHandle};
|
use raw_window_handle::{HasRawWindowHandle, RawWindowHandle};
|
||||||
|
|
||||||
use smallvec::SmallVec;
|
use smallvec::SmallVec;
|
||||||
|
|
||||||
use crate::{BufferUsage, Error, GpuInfo, ImageLayout, WorkgroupLimits};
|
use crate::{BindType, BufferUsage, Error, GpuInfo, ImageLayout, WorkgroupLimits};
|
||||||
|
|
||||||
use self::wrappers::{CommandAllocator, CommandQueue, Device, Factory4, Resource, ShaderByteCode};
|
use self::wrappers::{CommandAllocator, CommandQueue, Device, Factory4, Resource, ShaderByteCode};
|
||||||
|
|
||||||
|
@ -83,13 +85,6 @@ pub struct Fence {
|
||||||
/// semaphore is needed for presentation on DX12.
|
/// semaphore is needed for presentation on DX12.
|
||||||
pub struct Semaphore;
|
pub struct Semaphore;
|
||||||
|
|
||||||
#[derive(Default)]
|
|
||||||
pub struct PipelineBuilder {
|
|
||||||
ranges: Vec<d3d12::D3D12_DESCRIPTOR_RANGE>,
|
|
||||||
n_uav: u32,
|
|
||||||
// TODO: add counters for other resource types
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO
|
// TODO
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
pub struct DescriptorSetBuilder {
|
pub struct DescriptorSetBuilder {
|
||||||
|
@ -239,14 +234,13 @@ impl crate::backend::Device for Dx12Device {
|
||||||
|
|
||||||
type Semaphore = Semaphore;
|
type Semaphore = Semaphore;
|
||||||
|
|
||||||
type PipelineBuilder = PipelineBuilder;
|
|
||||||
|
|
||||||
type DescriptorSetBuilder = DescriptorSetBuilder;
|
type DescriptorSetBuilder = DescriptorSetBuilder;
|
||||||
|
|
||||||
type Sampler = ();
|
type Sampler = ();
|
||||||
|
|
||||||
// Currently this is HLSL source, but we'll probably change it to IR.
|
// Currently due to type inflexibility this is hardcoded to either HLSL or
|
||||||
type ShaderSource = str;
|
// DXIL, but it would be nice to be able to handle both at runtime.
|
||||||
|
type ShaderSource = [u8];
|
||||||
|
|
||||||
fn create_buffer(&self, size: u64, usage: BufferUsage) -> Result<Self::Buffer, Error> {
|
fn create_buffer(&self, size: u64, usage: BufferUsage) -> Result<Self::Buffer, Error> {
|
||||||
// TODO: consider supporting BufferUsage::QUERY_RESOLVE here rather than
|
// TODO: consider supporting BufferUsage::QUERY_RESOLVE here rather than
|
||||||
|
@ -289,9 +283,7 @@ impl crate::backend::Device for Dx12Device {
|
||||||
|
|
||||||
fn create_cmd_buf(&self) -> Result<Self::CmdBuf, Error> {
|
fn create_cmd_buf(&self) -> Result<Self::CmdBuf, Error> {
|
||||||
let list_type = d3d12::D3D12_COMMAND_LIST_TYPE_DIRECT;
|
let list_type = d3d12::D3D12_COMMAND_LIST_TYPE_DIRECT;
|
||||||
let allocator =
|
let allocator = unsafe { self.device.create_command_allocator(list_type)? };
|
||||||
unsafe { self.device.create_command_allocator(list_type)? }
|
|
||||||
;
|
|
||||||
let node_mask = 0;
|
let node_mask = 0;
|
||||||
unsafe {
|
unsafe {
|
||||||
let c = self
|
let c = self
|
||||||
|
@ -420,8 +412,94 @@ impl crate::backend::Device for Dx12Device {
|
||||||
self.gpu_info.clone()
|
self.gpu_info.clone()
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe fn pipeline_builder(&self) -> Self::PipelineBuilder {
|
unsafe fn create_compute_pipeline(
|
||||||
PipelineBuilder::default()
|
&self,
|
||||||
|
code: &Self::ShaderSource,
|
||||||
|
bind_types: &[BindType],
|
||||||
|
) -> Result<Pipeline, Error> {
|
||||||
|
if u32::try_from(bind_types.len()).is_err() {
|
||||||
|
panic!("bind type length overflow");
|
||||||
|
}
|
||||||
|
let mut ranges = Vec::new();
|
||||||
|
let mut i = 0;
|
||||||
|
fn map_range_type(bind_type: BindType) -> d3d12::D3D12_DESCRIPTOR_RANGE_TYPE {
|
||||||
|
match bind_type {
|
||||||
|
BindType::Buffer | BindType::Image | BindType::ImageRead => d3d12::D3D12_DESCRIPTOR_RANGE_TYPE_UAV,
|
||||||
|
BindType::BufReadOnly => d3d12::D3D12_DESCRIPTOR_RANGE_TYPE_SRV,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
while i < bind_types.len() {
|
||||||
|
let range_type = map_range_type(bind_types[i]);
|
||||||
|
let mut end = i + 1;
|
||||||
|
while end < bind_types.len() && map_range_type(bind_types[end]) == range_type {
|
||||||
|
end += 1;
|
||||||
|
}
|
||||||
|
let n_descriptors = (end - i) as u32;
|
||||||
|
ranges.push(d3d12::D3D12_DESCRIPTOR_RANGE {
|
||||||
|
RangeType: range_type,
|
||||||
|
NumDescriptors: n_descriptors,
|
||||||
|
BaseShaderRegister: i as u32,
|
||||||
|
RegisterSpace: 0,
|
||||||
|
OffsetInDescriptorsFromTableStart: d3d12::D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND,
|
||||||
|
});
|
||||||
|
i = end;
|
||||||
|
}
|
||||||
|
|
||||||
|
// We could always have ShaderSource as [u8] even when it's HLSL, and use the
|
||||||
|
// magic number to distinguish. In any case, for now it's hardcoded as one or
|
||||||
|
// the other.
|
||||||
|
/*
|
||||||
|
// HLSL code path
|
||||||
|
#[cfg(debug_assertions)]
|
||||||
|
let flags = winapi::um::d3dcompiler::D3DCOMPILE_DEBUG
|
||||||
|
| winapi::um::d3dcompiler::D3DCOMPILE_SKIP_OPTIMIZATION;
|
||||||
|
#[cfg(not(debug_assertions))]
|
||||||
|
let flags = 0;
|
||||||
|
let shader_blob = ShaderByteCode::compile(code, "cs_5_1", "main", flags)?;
|
||||||
|
let shader = ShaderByteCode::from_blob(shader_blob);
|
||||||
|
*/
|
||||||
|
|
||||||
|
// DXIL code path
|
||||||
|
let shader = ShaderByteCode::from_slice(code);
|
||||||
|
|
||||||
|
let mut root_parameter = d3d12::D3D12_ROOT_PARAMETER {
|
||||||
|
ParameterType: d3d12::D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE,
|
||||||
|
ShaderVisibility: d3d12::D3D12_SHADER_VISIBILITY_ALL,
|
||||||
|
..mem::zeroed()
|
||||||
|
};
|
||||||
|
*root_parameter.u.DescriptorTable_mut() = d3d12::D3D12_ROOT_DESCRIPTOR_TABLE {
|
||||||
|
NumDescriptorRanges: ranges.len() as u32,
|
||||||
|
pDescriptorRanges: ranges.as_ptr(),
|
||||||
|
};
|
||||||
|
let root_signature_desc = d3d12::D3D12_ROOT_SIGNATURE_DESC {
|
||||||
|
NumParameters: 1,
|
||||||
|
pParameters: &root_parameter,
|
||||||
|
NumStaticSamplers: 0,
|
||||||
|
pStaticSamplers: ptr::null(),
|
||||||
|
Flags: d3d12::D3D12_ROOT_SIGNATURE_FLAG_NONE,
|
||||||
|
};
|
||||||
|
let root_signature_blob = wrappers::RootSignature::serialize_description(
|
||||||
|
&root_signature_desc,
|
||||||
|
d3d12::D3D_ROOT_SIGNATURE_VERSION_1,
|
||||||
|
)?;
|
||||||
|
let root_signature = self
|
||||||
|
.device
|
||||||
|
.create_root_signature(0, root_signature_blob)?;
|
||||||
|
let desc = d3d12::D3D12_COMPUTE_PIPELINE_STATE_DESC {
|
||||||
|
pRootSignature: root_signature.0.as_raw(),
|
||||||
|
CS: shader.bytecode,
|
||||||
|
NodeMask: 0,
|
||||||
|
CachedPSO: d3d12::D3D12_CACHED_PIPELINE_STATE {
|
||||||
|
pCachedBlob: ptr::null(),
|
||||||
|
CachedBlobSizeInBytes: 0,
|
||||||
|
},
|
||||||
|
Flags: d3d12::D3D12_PIPELINE_STATE_FLAG_NONE,
|
||||||
|
};
|
||||||
|
let pipeline_state = self.device.create_compute_pipeline_state(&desc)?;
|
||||||
|
Ok(Pipeline {
|
||||||
|
pipeline_state,
|
||||||
|
root_signature,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe fn descriptor_set_builder(&self) -> Self::DescriptorSetBuilder {
|
unsafe fn descriptor_set_builder(&self) -> Self::DescriptorSetBuilder {
|
||||||
|
@ -451,8 +529,7 @@ impl Dx12Device {
|
||||||
|
|
||||||
impl crate::backend::CmdBuf<Dx12Device> for CmdBuf {
|
impl crate::backend::CmdBuf<Dx12Device> for CmdBuf {
|
||||||
unsafe fn begin(&mut self) {
|
unsafe fn begin(&mut self) {
|
||||||
if self.needs_reset {
|
if self.needs_reset {}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe fn finish(&mut self) {
|
unsafe fn finish(&mut self) {
|
||||||
|
@ -559,86 +636,6 @@ impl crate::backend::CmdBuf<Dx12Device> for CmdBuf {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl crate::backend::PipelineBuilder<Dx12Device> for PipelineBuilder {
|
|
||||||
fn add_buffers(&mut self, n_buffers: u32) {
|
|
||||||
// Note: if the buffer is readonly, then it needs to be bound
|
|
||||||
// as an SRV, not a UAV. I think that requires distinguishing
|
|
||||||
// readonly and read-write cases in pipeline and descriptor set
|
|
||||||
// creation. For now we punt.
|
|
||||||
if n_buffers != 0 {
|
|
||||||
self.ranges.push(d3d12::D3D12_DESCRIPTOR_RANGE {
|
|
||||||
RangeType: d3d12::D3D12_DESCRIPTOR_RANGE_TYPE_UAV,
|
|
||||||
NumDescriptors: n_buffers,
|
|
||||||
BaseShaderRegister: self.n_uav,
|
|
||||||
RegisterSpace: 0,
|
|
||||||
OffsetInDescriptorsFromTableStart: d3d12::D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND,
|
|
||||||
});
|
|
||||||
self.n_uav += n_buffers;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn add_images(&mut self, n_images: u32) {
|
|
||||||
// These are UAV images, so the descriptor type is the same as buffers.
|
|
||||||
self.add_buffers(n_images);
|
|
||||||
}
|
|
||||||
|
|
||||||
fn add_textures(&mut self, _max_textures: u32) {
|
|
||||||
todo!()
|
|
||||||
}
|
|
||||||
|
|
||||||
unsafe fn create_compute_pipeline(
|
|
||||||
self,
|
|
||||||
device: &Dx12Device,
|
|
||||||
code: &str,
|
|
||||||
) -> Result<Pipeline, Error> {
|
|
||||||
#[cfg(debug_assertions)]
|
|
||||||
let flags = winapi::um::d3dcompiler::D3DCOMPILE_DEBUG
|
|
||||||
| winapi::um::d3dcompiler::D3DCOMPILE_SKIP_OPTIMIZATION;
|
|
||||||
#[cfg(not(debug_assertions))]
|
|
||||||
let flags = 0;
|
|
||||||
let shader_blob = ShaderByteCode::compile(code, "cs_5_1", "main", flags)?;
|
|
||||||
let shader = ShaderByteCode::from_blob(shader_blob);
|
|
||||||
let mut root_parameter = d3d12::D3D12_ROOT_PARAMETER {
|
|
||||||
ParameterType: d3d12::D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE,
|
|
||||||
ShaderVisibility: d3d12::D3D12_SHADER_VISIBILITY_ALL,
|
|
||||||
..mem::zeroed()
|
|
||||||
};
|
|
||||||
*root_parameter.u.DescriptorTable_mut() = d3d12::D3D12_ROOT_DESCRIPTOR_TABLE {
|
|
||||||
NumDescriptorRanges: self.ranges.len().try_into()?,
|
|
||||||
pDescriptorRanges: self.ranges.as_ptr(),
|
|
||||||
};
|
|
||||||
let root_signature_desc = d3d12::D3D12_ROOT_SIGNATURE_DESC {
|
|
||||||
NumParameters: 1,
|
|
||||||
pParameters: &root_parameter,
|
|
||||||
NumStaticSamplers: 0,
|
|
||||||
pStaticSamplers: ptr::null(),
|
|
||||||
Flags: d3d12::D3D12_ROOT_SIGNATURE_FLAG_NONE,
|
|
||||||
};
|
|
||||||
let root_signature_blob = wrappers::RootSignature::serialize_description(
|
|
||||||
&root_signature_desc,
|
|
||||||
d3d12::D3D_ROOT_SIGNATURE_VERSION_1,
|
|
||||||
)?;
|
|
||||||
let root_signature = device
|
|
||||||
.device
|
|
||||||
.create_root_signature(0, root_signature_blob)?;
|
|
||||||
let desc = d3d12::D3D12_COMPUTE_PIPELINE_STATE_DESC {
|
|
||||||
pRootSignature: root_signature.0.as_raw(),
|
|
||||||
CS: shader.bytecode,
|
|
||||||
NodeMask: 0,
|
|
||||||
CachedPSO: d3d12::D3D12_CACHED_PIPELINE_STATE {
|
|
||||||
pCachedBlob: ptr::null(),
|
|
||||||
CachedBlobSizeInBytes: 0,
|
|
||||||
},
|
|
||||||
Flags: d3d12::D3D12_PIPELINE_STATE_FLAG_NONE,
|
|
||||||
};
|
|
||||||
let pipeline_state = device.device.create_compute_pipeline_state(&desc)?;
|
|
||||||
Ok(Pipeline {
|
|
||||||
pipeline_state,
|
|
||||||
root_signature,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl crate::backend::DescriptorSetBuilder<Dx12Device> for DescriptorSetBuilder {
|
impl crate::backend::DescriptorSetBuilder<Dx12Device> for DescriptorSetBuilder {
|
||||||
fn add_buffers(&mut self, buffers: &[&Buffer]) {
|
fn add_buffers(&mut self, buffers: &[&Buffer]) {
|
||||||
// Note: we could get rid of the clone here (which is an AddRef)
|
// Note: we could get rid of the clone here (which is an AddRef)
|
||||||
|
|
|
@ -10,9 +10,7 @@ use crate::dx12::error::{self, error_if_failed_else_unit, explain_error, Error};
|
||||||
use std::convert::{TryFrom, TryInto};
|
use std::convert::{TryFrom, TryInto};
|
||||||
use std::sync::atomic::{AtomicPtr, Ordering};
|
use std::sync::atomic::{AtomicPtr, Ordering};
|
||||||
use std::{ffi, mem, ptr};
|
use std::{ffi, mem, ptr};
|
||||||
use winapi::shared::{
|
use winapi::shared::{dxgi, dxgi1_2, dxgi1_3, dxgi1_4, dxgiformat, dxgitype, minwindef, windef};
|
||||||
dxgi, dxgi1_2, dxgi1_3, dxgi1_4, dxgiformat, dxgitype, minwindef, windef,
|
|
||||||
};
|
|
||||||
use winapi::um::d3dcommon::ID3DBlob;
|
use winapi::um::d3dcommon::ID3DBlob;
|
||||||
use winapi::um::{
|
use winapi::um::{
|
||||||
d3d12, d3d12sdklayers, d3dcommon, d3dcompiler, dxgidebug, handleapi, synchapi, winnt,
|
d3d12, d3d12sdklayers, d3dcommon, d3dcompiler, dxgidebug, handleapi, synchapi, winnt,
|
||||||
|
@ -198,7 +196,7 @@ impl Factory4 {
|
||||||
error_if_failed_else_unit(self.0.EnumAdapters1(id, &mut adapter))?;
|
error_if_failed_else_unit(self.0.EnumAdapters1(id, &mut adapter))?;
|
||||||
let mut desc = mem::zeroed();
|
let mut desc = mem::zeroed();
|
||||||
(*adapter).GetDesc(&mut desc);
|
(*adapter).GetDesc(&mut desc);
|
||||||
println!("desc: {:?}", desc.Description);
|
//println!("desc: {:?}", desc.Description);
|
||||||
Ok(Adapter1(ComPtr::from_raw(adapter)))
|
Ok(Adapter1(ComPtr::from_raw(adapter)))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -278,6 +276,7 @@ impl SwapChain3 {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Blob {
|
impl Blob {
|
||||||
|
#[allow(unused)]
|
||||||
pub unsafe fn print_to_console(blob: &Blob) {
|
pub unsafe fn print_to_console(blob: &Blob) {
|
||||||
println!("==SHADER COMPILE MESSAGES==");
|
println!("==SHADER COMPILE MESSAGES==");
|
||||||
let message = {
|
let message = {
|
||||||
|
@ -563,7 +562,6 @@ impl Device {
|
||||||
Ok(QueryHeap(ComPtr::from_raw(query_heap)))
|
Ok(QueryHeap(ComPtr::from_raw(query_heap)))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
pub unsafe fn create_buffer(
|
pub unsafe fn create_buffer(
|
||||||
&self,
|
&self,
|
||||||
buffer_size_in_bytes: u32,
|
buffer_size_in_bytes: u32,
|
||||||
|
@ -717,13 +715,13 @@ impl RootSignature {
|
||||||
let hresult =
|
let hresult =
|
||||||
d3d12::D3D12SerializeRootSignature(desc, version, &mut blob, &mut error_blob_ptr);
|
d3d12::D3D12SerializeRootSignature(desc, version, &mut blob, &mut error_blob_ptr);
|
||||||
|
|
||||||
let error_blob = if error_blob_ptr.is_null() {
|
|
||||||
None
|
|
||||||
} else {
|
|
||||||
Some(Blob(ComPtr::from_raw(error_blob_ptr)))
|
|
||||||
};
|
|
||||||
#[cfg(debug_assertions)]
|
#[cfg(debug_assertions)]
|
||||||
{
|
{
|
||||||
|
let error_blob = if error_blob_ptr.is_null() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(Blob(ComPtr::from_raw(error_blob_ptr)))
|
||||||
|
};
|
||||||
if let Some(error_blob) = &error_blob {
|
if let Some(error_blob) = &error_blob {
|
||||||
Blob::print_to_console(error_blob);
|
Blob::print_to_console(error_blob);
|
||||||
}
|
}
|
||||||
|
@ -739,6 +737,7 @@ impl ShaderByteCode {
|
||||||
// `blob` may not be null.
|
// `blob` may not be null.
|
||||||
// TODO: this is not super elegant, maybe want to move the get
|
// TODO: this is not super elegant, maybe want to move the get
|
||||||
// operations closer to where they're used.
|
// operations closer to where they're used.
|
||||||
|
#[allow(unused)]
|
||||||
pub unsafe fn from_blob(blob: Blob) -> ShaderByteCode {
|
pub unsafe fn from_blob(blob: Blob) -> ShaderByteCode {
|
||||||
ShaderByteCode {
|
ShaderByteCode {
|
||||||
bytecode: d3d12::D3D12_SHADER_BYTECODE {
|
bytecode: d3d12::D3D12_SHADER_BYTECODE {
|
||||||
|
@ -752,6 +751,7 @@ impl ShaderByteCode {
|
||||||
/// Compile a shader from raw HLSL.
|
/// Compile a shader from raw HLSL.
|
||||||
///
|
///
|
||||||
/// * `target`: example format: `ps_5_1`.
|
/// * `target`: example format: `ps_5_1`.
|
||||||
|
#[allow(unused)]
|
||||||
pub unsafe fn compile(
|
pub unsafe fn compile(
|
||||||
source: &str,
|
source: &str,
|
||||||
target: &str,
|
target: &str,
|
||||||
|
@ -798,6 +798,24 @@ impl ShaderByteCode {
|
||||||
|
|
||||||
Ok(Blob(ComPtr::from_raw(shader_blob_ptr)))
|
Ok(Blob(ComPtr::from_raw(shader_blob_ptr)))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Create bytecode from a slice.
|
||||||
|
///
|
||||||
|
/// # Safety
|
||||||
|
///
|
||||||
|
/// This call elides the lifetime from the slice. The caller is responsible
|
||||||
|
/// for making sure the reference remains valid for the lifetime of this
|
||||||
|
/// object.
|
||||||
|
#[allow(unused)]
|
||||||
|
pub unsafe fn from_slice(bytecode: &[u8]) -> ShaderByteCode {
|
||||||
|
ShaderByteCode {
|
||||||
|
bytecode: d3d12::D3D12_SHADER_BYTECODE {
|
||||||
|
BytecodeLength: bytecode.len(),
|
||||||
|
pShaderBytecode: bytecode.as_ptr() as *const _,
|
||||||
|
},
|
||||||
|
blob: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Fence {
|
impl Fence {
|
||||||
|
@ -864,7 +882,11 @@ impl GraphicsCommandList {
|
||||||
explain_error(self.0.Close(), "error closing command list")
|
explain_error(self.0.Close(), "error closing command list")
|
||||||
}
|
}
|
||||||
|
|
||||||
pub unsafe fn reset(&self, allocator: &CommandAllocator, initial_pso: Option<&PipelineState>) -> Result<(), Error> {
|
pub unsafe fn reset(
|
||||||
|
&self,
|
||||||
|
allocator: &CommandAllocator,
|
||||||
|
initial_pso: Option<&PipelineState>,
|
||||||
|
) -> Result<(), Error> {
|
||||||
let p_initial_state = initial_pso.map(|p| p.0.as_raw()).unwrap_or(ptr::null_mut());
|
let p_initial_state = initial_pso.map(|p| p.0.as_raw()).unwrap_or(ptr::null_mut());
|
||||||
error::error_if_failed_else_unit(self.0.Reset(allocator.0.as_raw(), p_initial_state))
|
error::error_if_failed_else_unit(self.0.Reset(allocator.0.as_raw(), p_initial_state))
|
||||||
}
|
}
|
||||||
|
@ -1072,9 +1094,8 @@ pub unsafe fn create_transition_resource_barrier(
|
||||||
resource_barrier
|
resource_barrier
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(unused)]
|
||||||
pub unsafe fn enable_debug_layer() -> Result<(), Error> {
|
pub unsafe fn enable_debug_layer() -> Result<(), Error> {
|
||||||
println!("enabling debug layer.");
|
|
||||||
|
|
||||||
let mut debug_controller: *mut d3d12sdklayers::ID3D12Debug1 = ptr::null_mut();
|
let mut debug_controller: *mut d3d12sdklayers::ID3D12Debug1 = ptr::null_mut();
|
||||||
explain_error(
|
explain_error(
|
||||||
d3d12::D3D12GetDebugInterface(
|
d3d12::D3D12GetDebugInterface(
|
||||||
|
|
|
@ -11,9 +11,9 @@ use std::sync::{Arc, Mutex, Weak};
|
||||||
|
|
||||||
use smallvec::SmallVec;
|
use smallvec::SmallVec;
|
||||||
|
|
||||||
use crate::mux;
|
use crate::{mux, BackendType};
|
||||||
|
|
||||||
use crate::{BufferUsage, Error, GpuInfo, ImageLayout, SamplerParams};
|
use crate::{BindType, BufferUsage, Error, GpuInfo, ImageLayout, SamplerParams};
|
||||||
|
|
||||||
pub use crate::mux::{DescriptorSet, Fence, Pipeline, QueryPool, Sampler, Semaphore, ShaderCode};
|
pub use crate::mux::{DescriptorSet, Fence, Pipeline, QueryPool, Sampler, Semaphore, ShaderCode};
|
||||||
|
|
||||||
|
@ -100,12 +100,6 @@ struct BufferInner {
|
||||||
session: Weak<SessionInner>,
|
session: Weak<SessionInner>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A builder for creating pipelines.
|
|
||||||
///
|
|
||||||
/// Configure the signature (buffers and images accessed) for a pipeline,
|
|
||||||
/// which is essentially compiled shader code, ready to be dispatched.
|
|
||||||
pub struct PipelineBuilder(mux::PipelineBuilder);
|
|
||||||
|
|
||||||
/// A builder for creating descriptor sets.
|
/// A builder for creating descriptor sets.
|
||||||
///
|
///
|
||||||
/// Add bindings to the descriptor set before dispatching a shader.
|
/// Add bindings to the descriptor set before dispatching a shader.
|
||||||
|
@ -316,26 +310,16 @@ impl Session {
|
||||||
self.0.device.create_semaphore()
|
self.0.device.create_semaphore()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// This creates a pipeline that operates on some buffers and images.
|
/// Create a compute shader pipeline.
|
||||||
///
|
|
||||||
/// The descriptor set layout is just some number of storage buffers
|
|
||||||
/// and storage images (this might change).
|
|
||||||
pub unsafe fn create_simple_compute_pipeline<'a>(
|
|
||||||
&self,
|
|
||||||
code: ShaderCode<'a>,
|
|
||||||
n_buffers: u32,
|
|
||||||
) -> Result<Pipeline, Error> {
|
|
||||||
self.pipeline_builder()
|
|
||||||
.add_buffers(n_buffers)
|
|
||||||
.create_compute_pipeline(self, code)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Start building a pipeline.
|
|
||||||
///
|
///
|
||||||
/// A pipeline is essentially a compiled shader, with more specific
|
/// A pipeline is essentially a compiled shader, with more specific
|
||||||
/// details about what resources may be bound to it.
|
/// details about what resources may be bound to it.
|
||||||
pub unsafe fn pipeline_builder(&self) -> PipelineBuilder {
|
pub unsafe fn create_compute_pipeline<'a>(
|
||||||
PipelineBuilder(self.0.device.pipeline_builder())
|
&self,
|
||||||
|
code: ShaderCode<'a>,
|
||||||
|
bind_types: &[BindType],
|
||||||
|
) -> Result<Pipeline, Error> {
|
||||||
|
self.0.device.create_compute_pipeline(code, bind_types)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Create a descriptor set for a simple pipeline that just references buffers.
|
/// Create a descriptor set for a simple pipeline that just references buffers.
|
||||||
|
@ -385,8 +369,13 @@ impl Session {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Choose shader code from the available choices.
|
/// Choose shader code from the available choices.
|
||||||
pub fn choose_shader<'a>(&self, spv: &'a [u8], hlsl: &'a str, msl: &'a str) -> ShaderCode<'a> {
|
pub fn choose_shader<'a>(&self, spv: &'a [u8], hlsl: &'a str, dxil: &'a [u8], msl: &'a str) -> ShaderCode<'a> {
|
||||||
self.0.device.choose_shader(spv, hlsl, msl)
|
self.0.device.choose_shader(spv, hlsl, dxil, msl)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Report the backend type that was chosen.
|
||||||
|
pub fn backend_type(&self) -> BackendType {
|
||||||
|
self.0.device.backend_type()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -729,38 +718,6 @@ impl Buffer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl PipelineBuilder {
|
|
||||||
/// Add buffers to the pipeline. Each has its own binding.
|
|
||||||
pub fn add_buffers(mut self, n_buffers: u32) -> Self {
|
|
||||||
self.0.add_buffers(n_buffers);
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Add storage images to the pipeline. Each has its own binding.
|
|
||||||
pub fn add_images(mut self, n_images: u32) -> Self {
|
|
||||||
self.0.add_images(n_images);
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Add a binding with a variable-size array of textures.
|
|
||||||
pub fn add_textures(mut self, max_textures: u32) -> Self {
|
|
||||||
self.0.add_textures(max_textures);
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Create the compute pipeline.
|
|
||||||
///
|
|
||||||
/// The shader code must be given in an appropriate format for
|
|
||||||
/// the back-end. See [`Session::choose_shader`] for a helper.
|
|
||||||
pub unsafe fn create_compute_pipeline<'a>(
|
|
||||||
self,
|
|
||||||
session: &Session,
|
|
||||||
code: ShaderCode<'a>,
|
|
||||||
) -> Result<Pipeline, Error> {
|
|
||||||
self.0.create_compute_pipeline(&session.0.device, code)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl DescriptorSetBuilder {
|
impl DescriptorSetBuilder {
|
||||||
pub fn add_buffers<'a>(mut self, buffers: impl IntoRefs<'a, Buffer>) -> Self {
|
pub fn add_buffers<'a>(mut self, buffers: impl IntoRefs<'a, Buffer>) -> Self {
|
||||||
let mux_buffers = buffers
|
let mux_buffers = buffers
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
/// The cross-platform abstraction for a GPU device.
|
//! The cross-platform abstraction for a GPU device.
|
||||||
///
|
//!
|
||||||
/// This abstraction is inspired by gfx-hal, but is specialized to the needs of piet-gpu.
|
//! This abstraction is inspired by gfx-hal, but is specialized to the needs of piet-gpu.
|
||||||
/// In time, it may go away and be replaced by either gfx-hal or wgpu.
|
//! In time, it may go away and be replaced by either gfx-hal or wgpu.
|
||||||
|
|
||||||
use bitflags::bitflags;
|
use bitflags::bitflags;
|
||||||
|
|
||||||
mod backend;
|
mod backend;
|
||||||
|
@ -17,8 +18,8 @@ pub use crate::mux::{
|
||||||
Swapchain,
|
Swapchain,
|
||||||
};
|
};
|
||||||
pub use hub::{
|
pub use hub::{
|
||||||
Buffer, CmdBuf, DescriptorSetBuilder, Image, PipelineBuilder, PlainData, RetainResource,
|
Buffer, CmdBuf, DescriptorSetBuilder, Image, PlainData, RetainResource, Session,
|
||||||
Session, SubmittedCmdBuf,
|
SubmittedCmdBuf,
|
||||||
};
|
};
|
||||||
|
|
||||||
// TODO: because these are conditionally included, "cargo fmt" does not
|
// TODO: because these are conditionally included, "cargo fmt" does not
|
||||||
|
@ -36,9 +37,27 @@ mod metal;
|
||||||
|
|
||||||
/// The common error type for the crate.
|
/// The common error type for the crate.
|
||||||
///
|
///
|
||||||
/// This keeps things imple and can be expanded later.
|
/// This keeps things simple and can be expanded later.
|
||||||
pub type Error = Box<dyn std::error::Error>;
|
pub type Error = Box<dyn std::error::Error>;
|
||||||
|
|
||||||
|
bitflags! {
|
||||||
|
/// Options when creating an instance.
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct InstanceFlags: u32 {
|
||||||
|
/// Prefer DX12 over Vulkan.
|
||||||
|
const DX12 = 0x1;
|
||||||
|
// TODO: discrete vs integrated selection
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The GPU backend that was selected.
|
||||||
|
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
|
||||||
|
pub enum BackendType {
|
||||||
|
Vulkan,
|
||||||
|
Dx12,
|
||||||
|
Metal,
|
||||||
|
}
|
||||||
|
|
||||||
/// An image layout state.
|
/// An image layout state.
|
||||||
///
|
///
|
||||||
/// An image must be in a particular layout state to be used for
|
/// An image must be in a particular layout state to be used for
|
||||||
|
@ -84,10 +103,31 @@ bitflags! {
|
||||||
const STORAGE = 0x80;
|
const STORAGE = 0x80;
|
||||||
/// The buffer can be used to store the results of queries.
|
/// The buffer can be used to store the results of queries.
|
||||||
const QUERY_RESOLVE = 0x200;
|
const QUERY_RESOLVE = 0x200;
|
||||||
|
/// The buffer may be cleared.
|
||||||
|
const CLEAR = 0x8000;
|
||||||
// May add other types.
|
// May add other types.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// The type of resource that will be bound to a slot in a shader.
|
||||||
|
#[derive(Clone, Copy, PartialEq, Eq)]
|
||||||
|
pub enum BindType {
|
||||||
|
/// A storage buffer with read/write access.
|
||||||
|
Buffer,
|
||||||
|
/// A storage buffer with read only access.
|
||||||
|
BufReadOnly,
|
||||||
|
/// A storage image.
|
||||||
|
Image,
|
||||||
|
/// A storage image with read only access.
|
||||||
|
///
|
||||||
|
/// A note on this. None of the backends are currently making a
|
||||||
|
/// distinction between Image and ImageRead as far as bindings go,
|
||||||
|
/// but the `--hlsl-nonwritable-uav-texture-as-srv` option to
|
||||||
|
/// spirv-cross (marked as unstable) would do so.
|
||||||
|
ImageRead,
|
||||||
|
// TODO: Uniform, Sampler, maybe others
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
/// Information about the GPU.
|
/// Information about the GPU.
|
||||||
pub struct GpuInfo {
|
pub struct GpuInfo {
|
||||||
|
|
|
@ -198,6 +198,7 @@ macro_rules! include_shader {
|
||||||
$device.choose_shader(
|
$device.choose_shader(
|
||||||
include_bytes!(concat!($path_base, ".spv")),
|
include_bytes!(concat!($path_base, ".spv")),
|
||||||
include_str!(concat!($path_base, ".hlsl")),
|
include_str!(concat!($path_base, ".hlsl")),
|
||||||
|
include_bytes!(concat!($path_base, ".dxil")),
|
||||||
include_str!(concat!($path_base, ".msl")),
|
include_str!(concat!($path_base, ".msl")),
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
|
|
|
@ -82,8 +82,6 @@ pub struct CmdBuf {
|
||||||
|
|
||||||
pub struct QueryPool;
|
pub struct QueryPool;
|
||||||
|
|
||||||
pub struct PipelineBuilder;
|
|
||||||
|
|
||||||
pub struct Pipeline(metal::ComputePipelineState);
|
pub struct Pipeline(metal::ComputePipelineState);
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
|
@ -220,8 +218,6 @@ impl crate::backend::Device for MtlDevice {
|
||||||
|
|
||||||
type Semaphore = Semaphore;
|
type Semaphore = Semaphore;
|
||||||
|
|
||||||
type PipelineBuilder = PipelineBuilder;
|
|
||||||
|
|
||||||
type DescriptorSetBuilder = DescriptorSetBuilder;
|
type DescriptorSetBuilder = DescriptorSetBuilder;
|
||||||
|
|
||||||
type Sampler = ();
|
type Sampler = ();
|
||||||
|
@ -273,8 +269,18 @@ impl crate::backend::Device for MtlDevice {
|
||||||
todo!()
|
todo!()
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe fn pipeline_builder(&self) -> Self::PipelineBuilder {
|
unsafe fn create_compute_pipeline(
|
||||||
PipelineBuilder
|
&self,
|
||||||
|
code: &Self::ShaderSource,
|
||||||
|
_bind_types: &[crate::BindType],
|
||||||
|
) -> Result<Self::Pipeline, Error> {
|
||||||
|
let options = metal::CompileOptions::new();
|
||||||
|
let library = self.device.new_library_with_source(code, &options)?;
|
||||||
|
let function = library.get_function("main0", None)?;
|
||||||
|
let pipeline = self
|
||||||
|
.device
|
||||||
|
.new_compute_pipeline_state_with_function(&function)?;
|
||||||
|
Ok(Pipeline(pipeline))
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe fn descriptor_set_builder(&self) -> Self::DescriptorSetBuilder {
|
unsafe fn descriptor_set_builder(&self) -> Self::DescriptorSetBuilder {
|
||||||
|
@ -552,33 +558,6 @@ impl crate::backend::CmdBuf<MtlDevice> for CmdBuf {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl crate::backend::PipelineBuilder<MtlDevice> for PipelineBuilder {
|
|
||||||
fn add_buffers(&mut self, _n_buffers: u32) {
|
|
||||||
// My understanding is that Metal infers the pipeline layout from
|
|
||||||
// the source.
|
|
||||||
}
|
|
||||||
|
|
||||||
fn add_images(&mut self, _n_images: u32) {}
|
|
||||||
|
|
||||||
fn add_textures(&mut self, _max_textures: u32) {}
|
|
||||||
|
|
||||||
unsafe fn create_compute_pipeline(
|
|
||||||
self,
|
|
||||||
device: &MtlDevice,
|
|
||||||
code: &str,
|
|
||||||
) -> Result<Pipeline, Error> {
|
|
||||||
let options = metal::CompileOptions::new();
|
|
||||||
// Probably want to set MSL version here.
|
|
||||||
let library = device.device.new_library_with_source(code, &options)?;
|
|
||||||
// This seems to be the default name from spirv-cross, but we may need to tweak.
|
|
||||||
let function = library.get_function("main0", None)?;
|
|
||||||
let pipeline = device
|
|
||||||
.device
|
|
||||||
.new_compute_pipeline_state_with_function(&function)?;
|
|
||||||
Ok(Pipeline(pipeline))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl crate::backend::DescriptorSetBuilder<MtlDevice> for DescriptorSetBuilder {
|
impl crate::backend::DescriptorSetBuilder<MtlDevice> for DescriptorSetBuilder {
|
||||||
fn add_buffers(&mut self, buffers: &[&Buffer]) {
|
fn add_buffers(&mut self, buffers: &[&Buffer]) {
|
||||||
self.0.buffers.extend(buffers.iter().copied().cloned());
|
self.0.buffers.extend(buffers.iter().copied().cloned());
|
||||||
|
|
|
@ -33,8 +33,9 @@ mux_cfg! {
|
||||||
use crate::backend::CmdBuf as CmdBufTrait;
|
use crate::backend::CmdBuf as CmdBufTrait;
|
||||||
use crate::backend::DescriptorSetBuilder as DescriptorSetBuilderTrait;
|
use crate::backend::DescriptorSetBuilder as DescriptorSetBuilderTrait;
|
||||||
use crate::backend::Device as DeviceTrait;
|
use crate::backend::Device as DeviceTrait;
|
||||||
use crate::backend::PipelineBuilder as PipelineBuilderTrait;
|
use crate::BackendType;
|
||||||
use crate::{BufferUsage, Error, GpuInfo, ImageLayout};
|
use crate::BindType;
|
||||||
|
use crate::{BufferUsage, Error, GpuInfo, ImageLayout, InstanceFlags};
|
||||||
|
|
||||||
mux_enum! {
|
mux_enum! {
|
||||||
/// An instance, selected from multiple backends.
|
/// An instance, selected from multiple backends.
|
||||||
|
@ -84,7 +85,6 @@ mux_device_enum! {
|
||||||
/// presentation by the back-end, this may or may not be a "real"
|
/// presentation by the back-end, this may or may not be a "real"
|
||||||
/// semaphore.
|
/// semaphore.
|
||||||
Semaphore }
|
Semaphore }
|
||||||
mux_device_enum! { PipelineBuilder }
|
|
||||||
mux_device_enum! {
|
mux_device_enum! {
|
||||||
/// A pipeline object; basically a compiled shader.
|
/// A pipeline object; basically a compiled shader.
|
||||||
Pipeline }
|
Pipeline }
|
||||||
|
@ -104,6 +104,8 @@ pub enum ShaderCode<'a> {
|
||||||
Spv(&'a [u8]),
|
Spv(&'a [u8]),
|
||||||
/// HLSL (source)
|
/// HLSL (source)
|
||||||
Hlsl(&'a str),
|
Hlsl(&'a str),
|
||||||
|
/// DXIL (DX12 intermediate language)
|
||||||
|
Dxil(&'a [u8]),
|
||||||
/// Metal Shading Language (source)
|
/// Metal Shading Language (source)
|
||||||
Msl(&'a str),
|
Msl(&'a str),
|
||||||
}
|
}
|
||||||
|
@ -118,22 +120,33 @@ impl Instance {
|
||||||
/// work.
|
/// work.
|
||||||
pub fn new(
|
pub fn new(
|
||||||
window_handle: Option<&dyn raw_window_handle::HasRawWindowHandle>,
|
window_handle: Option<&dyn raw_window_handle::HasRawWindowHandle>,
|
||||||
|
flags: InstanceFlags,
|
||||||
) -> Result<(Instance, Option<Surface>), Error> {
|
) -> Result<(Instance, Option<Surface>), Error> {
|
||||||
mux_cfg! {
|
let mut backends = [BackendType::Vulkan, BackendType::Dx12];
|
||||||
#[cfg(vk)]
|
if flags.contains(InstanceFlags::DX12) {
|
||||||
{
|
backends.swap(0, 1);
|
||||||
let result = vulkan::VkInstance::new(window_handle);
|
}
|
||||||
if let Ok((instance, surface)) = result {
|
for backend in backends {
|
||||||
return Ok((Instance::Vk(instance), surface.map(Surface::Vk)));
|
if backend == BackendType::Vulkan {
|
||||||
|
mux_cfg! {
|
||||||
|
#[cfg(vk)]
|
||||||
|
{
|
||||||
|
let result = vulkan::VkInstance::new(window_handle);
|
||||||
|
if let Ok((instance, surface)) = result {
|
||||||
|
return Ok((Instance::Vk(instance), surface.map(Surface::Vk)));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
if backend == BackendType::Dx12 {
|
||||||
mux_cfg! {
|
mux_cfg! {
|
||||||
#[cfg(dx12)]
|
#[cfg(dx12)]
|
||||||
{
|
{
|
||||||
let result = dx12::Dx12Instance::new(window_handle);
|
let result = dx12::Dx12Instance::new(window_handle);
|
||||||
if let Ok((instance, surface)) = result {
|
if let Ok((instance, surface)) = result {
|
||||||
return Ok((Instance::Dx12(instance), surface.map(Surface::Dx12)));
|
return Ok((Instance::Dx12(instance), surface.map(Surface::Dx12)));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -293,11 +306,40 @@ impl Device {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub unsafe fn pipeline_builder(&self) -> PipelineBuilder {
|
pub unsafe fn create_compute_pipeline<'a>(
|
||||||
|
&self,
|
||||||
|
code: ShaderCode<'a>,
|
||||||
|
bind_types: &[BindType],
|
||||||
|
) -> Result<Pipeline, Error> {
|
||||||
mux_match! { self;
|
mux_match! { self;
|
||||||
Device::Vk(d) => PipelineBuilder::Vk(d.pipeline_builder()),
|
Device::Vk(d) => {
|
||||||
Device::Dx12(d) => PipelineBuilder::Dx12(d.pipeline_builder()),
|
let shader_code = match code {
|
||||||
Device::Mtl(d) => PipelineBuilder::Mtl(d.pipeline_builder()),
|
ShaderCode::Spv(spv) => spv,
|
||||||
|
// Panic or return "incompatible shader" error here?
|
||||||
|
_ => panic!("Vulkan backend requires shader code in SPIR-V format"),
|
||||||
|
};
|
||||||
|
d.create_compute_pipeline(shader_code, bind_types)
|
||||||
|
.map(Pipeline::Vk)
|
||||||
|
}
|
||||||
|
Device::Dx12(d) => {
|
||||||
|
let shader_code = match code {
|
||||||
|
//ShaderCode::Hlsl(hlsl) => hlsl,
|
||||||
|
ShaderCode::Dxil(dxil) => dxil,
|
||||||
|
// Panic or return "incompatible shader" error here?
|
||||||
|
_ => panic!("DX12 backend requires shader code in DXIL format"),
|
||||||
|
};
|
||||||
|
d.create_compute_pipeline(shader_code, bind_types)
|
||||||
|
.map(Pipeline::Dx12)
|
||||||
|
}
|
||||||
|
Device::Mtl(d) => {
|
||||||
|
let shader_code = match code {
|
||||||
|
ShaderCode::Msl(msl) => msl,
|
||||||
|
// Panic or return "incompatible shader" error here?
|
||||||
|
_ => panic!("Metal backend requires shader code in MSL format"),
|
||||||
|
};
|
||||||
|
d.create_compute_pipeline(shader_code, bind_types)
|
||||||
|
.map(Pipeline::Mtl)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -436,74 +478,21 @@ impl Device {
|
||||||
&self,
|
&self,
|
||||||
_spv: &'a [u8],
|
_spv: &'a [u8],
|
||||||
_hlsl: &'a str,
|
_hlsl: &'a str,
|
||||||
|
_dxil: &'a [u8],
|
||||||
_msl: &'a str,
|
_msl: &'a str,
|
||||||
) -> ShaderCode<'a> {
|
) -> ShaderCode<'a> {
|
||||||
mux_match! { self;
|
mux_match! { self;
|
||||||
Device::Vk(_d) => ShaderCode::Spv(_spv),
|
Device::Vk(_d) => ShaderCode::Spv(_spv),
|
||||||
Device::Dx12(_d) => ShaderCode::Hlsl(_hlsl),
|
Device::Dx12(_d) => ShaderCode::Dxil(_dxil),
|
||||||
Device::Mtl(_d) => ShaderCode::Msl(_msl),
|
Device::Mtl(_d) => ShaderCode::Msl(_msl),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
impl PipelineBuilder {
|
pub fn backend_type(&self) -> BackendType {
|
||||||
pub fn add_buffers(&mut self, n_buffers: u32) {
|
|
||||||
mux_match! { self;
|
mux_match! { self;
|
||||||
PipelineBuilder::Vk(x) => x.add_buffers(n_buffers),
|
Device::Vk(_d) => BackendType::Vulkan,
|
||||||
PipelineBuilder::Dx12(x) => x.add_buffers(n_buffers),
|
Device::Dx12(_d) => BackendType::Dx12,
|
||||||
PipelineBuilder::Mtl(x) => x.add_buffers(n_buffers),
|
Device::Mtl(_d) => BackendType::Metal,
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn add_images(&mut self, n_buffers: u32) {
|
|
||||||
mux_match! { self;
|
|
||||||
PipelineBuilder::Vk(x) => x.add_images(n_buffers),
|
|
||||||
PipelineBuilder::Dx12(x) => x.add_images(n_buffers),
|
|
||||||
PipelineBuilder::Mtl(x) => x.add_images(n_buffers),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn add_textures(&mut self, n_buffers: u32) {
|
|
||||||
mux_match! { self;
|
|
||||||
PipelineBuilder::Vk(x) => x.add_textures(n_buffers),
|
|
||||||
PipelineBuilder::Dx12(x) => x.add_textures(n_buffers),
|
|
||||||
PipelineBuilder::Mtl(x) => x.add_textures(n_buffers),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub unsafe fn create_compute_pipeline<'a>(
|
|
||||||
self,
|
|
||||||
device: &Device,
|
|
||||||
code: ShaderCode<'a>,
|
|
||||||
) -> Result<Pipeline, Error> {
|
|
||||||
mux_match! { self;
|
|
||||||
PipelineBuilder::Vk(x) => {
|
|
||||||
let shader_code = match code {
|
|
||||||
ShaderCode::Spv(spv) => spv,
|
|
||||||
// Panic or return "incompatible shader" error here?
|
|
||||||
_ => panic!("Vulkan backend requires shader code in SPIR-V format"),
|
|
||||||
};
|
|
||||||
x.create_compute_pipeline(device.vk(), shader_code)
|
|
||||||
.map(Pipeline::Vk)
|
|
||||||
}
|
|
||||||
PipelineBuilder::Dx12(x) => {
|
|
||||||
let shader_code = match code {
|
|
||||||
ShaderCode::Hlsl(hlsl) => hlsl,
|
|
||||||
// Panic or return "incompatible shader" error here?
|
|
||||||
_ => panic!("DX12 backend requires shader code in HLSL format"),
|
|
||||||
};
|
|
||||||
x.create_compute_pipeline(device.dx12(), shader_code)
|
|
||||||
.map(Pipeline::Dx12)
|
|
||||||
}
|
|
||||||
PipelineBuilder::Mtl(x) => {
|
|
||||||
let shader_code = match code {
|
|
||||||
ShaderCode::Msl(msl) => msl,
|
|
||||||
// Panic or return "incompatible shader" error here?
|
|
||||||
_ => panic!("Metal backend requires shader code in MSL format"),
|
|
||||||
};
|
|
||||||
x.create_compute_pipeline(device.mtl(), shader_code)
|
|
||||||
.map(Pipeline::Mtl)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,9 +11,11 @@ use ash::{vk, Device, Entry, Instance};
|
||||||
|
|
||||||
use smallvec::SmallVec;
|
use smallvec::SmallVec;
|
||||||
|
|
||||||
use crate::{BufferUsage, Error, GpuInfo, ImageLayout, SamplerParams, SubgroupSize, WorkgroupLimits};
|
|
||||||
use crate::backend::Device as DeviceTrait;
|
use crate::backend::Device as DeviceTrait;
|
||||||
|
use crate::{
|
||||||
|
BindType, BufferUsage, Error, GpuInfo, ImageLayout, SamplerParams, SubgroupSize,
|
||||||
|
WorkgroupLimits,
|
||||||
|
};
|
||||||
|
|
||||||
pub struct VkInstance {
|
pub struct VkInstance {
|
||||||
/// Retain the dynamic lib.
|
/// Retain the dynamic lib.
|
||||||
|
@ -98,12 +100,6 @@ pub struct QueryPool {
|
||||||
#[derive(Clone, Copy)]
|
#[derive(Clone, Copy)]
|
||||||
pub struct MemFlags(vk::MemoryPropertyFlags);
|
pub struct MemFlags(vk::MemoryPropertyFlags);
|
||||||
|
|
||||||
pub struct PipelineBuilder {
|
|
||||||
bindings: Vec<vk::DescriptorSetLayoutBinding>,
|
|
||||||
binding_flags: Vec<vk::DescriptorBindingFlags>,
|
|
||||||
max_textures: u32,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct DescriptorSetBuilder {
|
pub struct DescriptorSetBuilder {
|
||||||
buffers: Vec<vk::Buffer>,
|
buffers: Vec<vk::Buffer>,
|
||||||
images: Vec<vk::ImageView>,
|
images: Vec<vk::ImageView>,
|
||||||
|
@ -262,9 +258,9 @@ impl VkInstance {
|
||||||
if vk1_1 {
|
if vk1_1 {
|
||||||
let mut descriptor_indexing_features =
|
let mut descriptor_indexing_features =
|
||||||
vk::PhysicalDeviceDescriptorIndexingFeatures::builder();
|
vk::PhysicalDeviceDescriptorIndexingFeatures::builder();
|
||||||
features2 = features2
|
features2 = features2.push_next(&mut descriptor_indexing_features);
|
||||||
.push_next(&mut descriptor_indexing_features);
|
self.instance
|
||||||
self.instance.get_physical_device_features2(pdevice, &mut features2);
|
.get_physical_device_features2(pdevice, &mut features2);
|
||||||
set_features2 = set_features2.features(features2.features);
|
set_features2 = set_features2.features(features2.features);
|
||||||
has_descriptor_indexing = descriptor_indexing_features
|
has_descriptor_indexing = descriptor_indexing_features
|
||||||
.shader_storage_image_array_non_uniform_indexing
|
.shader_storage_image_array_non_uniform_indexing
|
||||||
|
@ -296,14 +292,13 @@ impl VkInstance {
|
||||||
extensions.try_add(vk::KhrMaintenance3Fn::name());
|
extensions.try_add(vk::KhrMaintenance3Fn::name());
|
||||||
extensions.try_add(vk::ExtDescriptorIndexingFn::name());
|
extensions.try_add(vk::ExtDescriptorIndexingFn::name());
|
||||||
}
|
}
|
||||||
let has_subgroup_size = vk1_1
|
let has_subgroup_size = vk1_1 && extensions.try_add(vk::ExtSubgroupSizeControlFn::name());
|
||||||
&& extensions.try_add(vk::ExtSubgroupSizeControlFn::name());
|
let has_memory_model = vk1_1 && extensions.try_add(vk::KhrVulkanMemoryModelFn::name());
|
||||||
let has_memory_model = vk1_1
|
|
||||||
&& extensions.try_add(vk::KhrVulkanMemoryModelFn::name());
|
|
||||||
let mut create_info = vk::DeviceCreateInfo::builder()
|
let mut create_info = vk::DeviceCreateInfo::builder()
|
||||||
.queue_create_infos(&queue_create_infos)
|
.queue_create_infos(&queue_create_infos)
|
||||||
.enabled_extension_names(extensions.as_ptrs());
|
.enabled_extension_names(extensions.as_ptrs());
|
||||||
let mut set_memory_model_features = vk::PhysicalDeviceVulkanMemoryModelFeatures::builder(); if vk1_1 {
|
let mut set_memory_model_features = vk::PhysicalDeviceVulkanMemoryModelFeatures::builder();
|
||||||
|
if vk1_1 {
|
||||||
create_info = create_info.push_next(&mut set_features2);
|
create_info = create_info.push_next(&mut set_features2);
|
||||||
if has_memory_model {
|
if has_memory_model {
|
||||||
set_memory_model_features = set_memory_model_features
|
set_memory_model_features = set_memory_model_features
|
||||||
|
@ -422,7 +417,8 @@ impl VkInstance {
|
||||||
0 => u32::MAX,
|
0 => u32::MAX,
|
||||||
x => x,
|
x => x,
|
||||||
};
|
};
|
||||||
let image_count = PREFERRED_IMAGE_COUNT.clamp(capabilities.min_image_count, max_image_count);
|
let image_count =
|
||||||
|
PREFERRED_IMAGE_COUNT.clamp(capabilities.min_image_count, max_image_count);
|
||||||
let mut extent = capabilities.current_extent;
|
let mut extent = capabilities.current_extent;
|
||||||
if extent.width == u32::MAX || extent.height == u32::MAX {
|
if extent.width == u32::MAX || extent.height == u32::MAX {
|
||||||
// We're deciding the size.
|
// We're deciding the size.
|
||||||
|
@ -475,7 +471,6 @@ impl crate::backend::Device for VkDevice {
|
||||||
type QueryPool = QueryPool;
|
type QueryPool = QueryPool;
|
||||||
type Fence = vk::Fence;
|
type Fence = vk::Fence;
|
||||||
type Semaphore = vk::Semaphore;
|
type Semaphore = vk::Semaphore;
|
||||||
type PipelineBuilder = PipelineBuilder;
|
|
||||||
type DescriptorSetBuilder = DescriptorSetBuilder;
|
type DescriptorSetBuilder = DescriptorSetBuilder;
|
||||||
type Sampler = vk::Sampler;
|
type Sampler = vk::Sampler;
|
||||||
type ShaderSource = [u8];
|
type ShaderSource = [u8];
|
||||||
|
@ -649,12 +644,65 @@ impl crate::backend::Device for VkDevice {
|
||||||
Ok(device.get_fence_status(*fence)?)
|
Ok(device.get_fence_status(*fence)?)
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe fn pipeline_builder(&self) -> PipelineBuilder {
|
unsafe fn create_compute_pipeline(
|
||||||
PipelineBuilder {
|
&self,
|
||||||
bindings: Vec::new(),
|
code: &[u8],
|
||||||
binding_flags: Vec::new(),
|
bind_types: &[BindType],
|
||||||
max_textures: 0,
|
) -> Result<Pipeline, Error> {
|
||||||
}
|
let device = &self.device.device;
|
||||||
|
let bindings = bind_types
|
||||||
|
.iter()
|
||||||
|
.enumerate()
|
||||||
|
.map(|(i, bind_type)| {
|
||||||
|
let descriptor_type = match bind_type {
|
||||||
|
BindType::Buffer | BindType::BufReadOnly => vk::DescriptorType::STORAGE_BUFFER,
|
||||||
|
BindType::Image | BindType::ImageRead => vk::DescriptorType::STORAGE_IMAGE,
|
||||||
|
};
|
||||||
|
vk::DescriptorSetLayoutBinding::builder()
|
||||||
|
.binding(i.try_into().unwrap())
|
||||||
|
.descriptor_type(descriptor_type)
|
||||||
|
.descriptor_count(1)
|
||||||
|
.stage_flags(vk::ShaderStageFlags::COMPUTE)
|
||||||
|
.build()
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
let descriptor_set_layout = device.create_descriptor_set_layout(
|
||||||
|
&vk::DescriptorSetLayoutCreateInfo::builder().bindings(&bindings),
|
||||||
|
None,
|
||||||
|
)?;
|
||||||
|
let descriptor_set_layouts = [descriptor_set_layout];
|
||||||
|
|
||||||
|
// Create compute pipeline.
|
||||||
|
let code_u32 = convert_u32_vec(code);
|
||||||
|
let compute_shader_module = device
|
||||||
|
.create_shader_module(&vk::ShaderModuleCreateInfo::builder().code(&code_u32), None)?;
|
||||||
|
let entry_name = CString::new("main").unwrap();
|
||||||
|
let pipeline_layout = device.create_pipeline_layout(
|
||||||
|
&vk::PipelineLayoutCreateInfo::builder().set_layouts(&descriptor_set_layouts),
|
||||||
|
None,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
let pipeline = device
|
||||||
|
.create_compute_pipelines(
|
||||||
|
vk::PipelineCache::null(),
|
||||||
|
&[vk::ComputePipelineCreateInfo::builder()
|
||||||
|
.stage(
|
||||||
|
vk::PipelineShaderStageCreateInfo::builder()
|
||||||
|
.stage(vk::ShaderStageFlags::COMPUTE)
|
||||||
|
.module(compute_shader_module)
|
||||||
|
.name(&entry_name)
|
||||||
|
.build(),
|
||||||
|
)
|
||||||
|
.layout(pipeline_layout)
|
||||||
|
.build()],
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.map_err(|(_pipeline, err)| err)?[0];
|
||||||
|
Ok(Pipeline {
|
||||||
|
pipeline,
|
||||||
|
pipeline_layout,
|
||||||
|
descriptor_set_layout,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe fn descriptor_set_builder(&self) -> DescriptorSetBuilder {
|
unsafe fn descriptor_set_builder(&self) -> DescriptorSetBuilder {
|
||||||
|
@ -715,13 +763,7 @@ impl crate::backend::Device for VkDevice {
|
||||||
// fence should make the query available, but otherwise we get sporadic NOT_READY
|
// fence should make the query available, but otherwise we get sporadic NOT_READY
|
||||||
// results (Windows 10, AMD 5700 XT).
|
// results (Windows 10, AMD 5700 XT).
|
||||||
let flags = vk::QueryResultFlags::TYPE_64 | vk::QueryResultFlags::WAIT;
|
let flags = vk::QueryResultFlags::TYPE_64 | vk::QueryResultFlags::WAIT;
|
||||||
device.get_query_pool_results(
|
device.get_query_pool_results(pool.pool, 0, pool.n_queries, &mut buf, flags)?;
|
||||||
pool.pool,
|
|
||||||
0,
|
|
||||||
pool.n_queries,
|
|
||||||
&mut buf,
|
|
||||||
flags,
|
|
||||||
)?;
|
|
||||||
let ts0 = buf[0];
|
let ts0 = buf[0];
|
||||||
let tsp = self.timestamp_period as f64 * 1e-9;
|
let tsp = self.timestamp_period as f64 * 1e-9;
|
||||||
let result = buf[1..]
|
let result = buf[1..]
|
||||||
|
@ -1080,109 +1122,6 @@ impl crate::backend::CmdBuf<VkDevice> for CmdBuf {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl crate::backend::PipelineBuilder<VkDevice> for PipelineBuilder {
|
|
||||||
fn add_buffers(&mut self, n_buffers: u32) {
|
|
||||||
let start = self.bindings.len() as u32;
|
|
||||||
for i in 0..n_buffers {
|
|
||||||
self.bindings.push(
|
|
||||||
vk::DescriptorSetLayoutBinding::builder()
|
|
||||||
.binding(start + i)
|
|
||||||
.descriptor_type(vk::DescriptorType::STORAGE_BUFFER)
|
|
||||||
.descriptor_count(1)
|
|
||||||
.stage_flags(vk::ShaderStageFlags::COMPUTE)
|
|
||||||
.build(),
|
|
||||||
);
|
|
||||||
self.binding_flags
|
|
||||||
.push(vk::DescriptorBindingFlags::default());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn add_images(&mut self, n_images: u32) {
|
|
||||||
let start = self.bindings.len() as u32;
|
|
||||||
for i in 0..n_images {
|
|
||||||
self.bindings.push(
|
|
||||||
vk::DescriptorSetLayoutBinding::builder()
|
|
||||||
.binding(start + i)
|
|
||||||
.descriptor_type(vk::DescriptorType::STORAGE_IMAGE)
|
|
||||||
.descriptor_count(1)
|
|
||||||
.stage_flags(vk::ShaderStageFlags::COMPUTE)
|
|
||||||
.build(),
|
|
||||||
);
|
|
||||||
self.binding_flags
|
|
||||||
.push(vk::DescriptorBindingFlags::default());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn add_textures(&mut self, n_images: u32) {
|
|
||||||
let start = self.bindings.len() as u32;
|
|
||||||
for i in 0..n_images {
|
|
||||||
self.bindings.push(
|
|
||||||
vk::DescriptorSetLayoutBinding::builder()
|
|
||||||
.binding(start + i)
|
|
||||||
.descriptor_type(vk::DescriptorType::STORAGE_IMAGE)
|
|
||||||
.descriptor_count(1)
|
|
||||||
.stage_flags(vk::ShaderStageFlags::COMPUTE)
|
|
||||||
.build(),
|
|
||||||
);
|
|
||||||
self.binding_flags
|
|
||||||
.push(vk::DescriptorBindingFlags::default());
|
|
||||||
}
|
|
||||||
self.max_textures += n_images;
|
|
||||||
}
|
|
||||||
|
|
||||||
unsafe fn create_compute_pipeline(
|
|
||||||
self,
|
|
||||||
device: &VkDevice,
|
|
||||||
code: &[u8],
|
|
||||||
) -> Result<Pipeline, Error> {
|
|
||||||
let device = &device.device.device;
|
|
||||||
let descriptor_set_layout = device.create_descriptor_set_layout(
|
|
||||||
&vk::DescriptorSetLayoutCreateInfo::builder()
|
|
||||||
.bindings(&self.bindings)
|
|
||||||
// It might be a slight optimization not to push this if max_textures = 0
|
|
||||||
.push_next(
|
|
||||||
&mut vk::DescriptorSetLayoutBindingFlagsCreateInfo::builder()
|
|
||||||
.binding_flags(&self.binding_flags)
|
|
||||||
.build(),
|
|
||||||
),
|
|
||||||
None,
|
|
||||||
)?;
|
|
||||||
let descriptor_set_layouts = [descriptor_set_layout];
|
|
||||||
|
|
||||||
// Create compute pipeline.
|
|
||||||
let code_u32 = convert_u32_vec(code);
|
|
||||||
let compute_shader_module = device
|
|
||||||
.create_shader_module(&vk::ShaderModuleCreateInfo::builder().code(&code_u32), None)?;
|
|
||||||
let entry_name = CString::new("main").unwrap();
|
|
||||||
let pipeline_layout = device.create_pipeline_layout(
|
|
||||||
&vk::PipelineLayoutCreateInfo::builder().set_layouts(&descriptor_set_layouts),
|
|
||||||
None,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
let pipeline = device
|
|
||||||
.create_compute_pipelines(
|
|
||||||
vk::PipelineCache::null(),
|
|
||||||
&[vk::ComputePipelineCreateInfo::builder()
|
|
||||||
.stage(
|
|
||||||
vk::PipelineShaderStageCreateInfo::builder()
|
|
||||||
.stage(vk::ShaderStageFlags::COMPUTE)
|
|
||||||
.module(compute_shader_module)
|
|
||||||
.name(&entry_name)
|
|
||||||
.build(),
|
|
||||||
)
|
|
||||||
.layout(pipeline_layout)
|
|
||||||
.build()],
|
|
||||||
None,
|
|
||||||
)
|
|
||||||
.map_err(|(_pipeline, err)| err)?[0];
|
|
||||||
Ok(Pipeline {
|
|
||||||
pipeline,
|
|
||||||
pipeline_layout,
|
|
||||||
descriptor_set_layout,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl crate::backend::DescriptorSetBuilder<VkDevice> for DescriptorSetBuilder {
|
impl crate::backend::DescriptorSetBuilder<VkDevice> for DescriptorSetBuilder {
|
||||||
fn add_buffers(&mut self, buffers: &[&Buffer]) {
|
fn add_buffers(&mut self, buffers: &[&Buffer]) {
|
||||||
self.buffers.extend(buffers.iter().map(|b| b.buffer));
|
self.buffers.extend(buffers.iter().map(|b| b.buffer));
|
||||||
|
|
|
@ -30,7 +30,7 @@ piet = "0.2.0"
|
||||||
png = "0.16.2"
|
png = "0.16.2"
|
||||||
rand = "0.7.3"
|
rand = "0.7.3"
|
||||||
roxmltree = "0.13"
|
roxmltree = "0.13"
|
||||||
winit = "0.23"
|
winit = "0.25"
|
||||||
clap = "2.33"
|
clap = "2.33"
|
||||||
swash = "0.1.4"
|
swash = "0.1.4"
|
||||||
|
|
||||||
|
|
|
@ -56,7 +56,7 @@ fn my_main() -> Result<(), Error> {
|
||||||
let width = window.width() as usize;
|
let width = window.width() as usize;
|
||||||
let height = window.height() as usize;
|
let height = window.height() as usize;
|
||||||
let handle = get_handle(window);
|
let handle = get_handle(window);
|
||||||
let (instance, surface) = Instance::new(Some(&handle))?;
|
let (instance, surface) = Instance::new(Some(&handle), Default::default())?;
|
||||||
gfx_state =
|
gfx_state =
|
||||||
Some(GfxState::new(&instance, surface.as_ref(), width, height)?);
|
Some(GfxState::new(&instance, surface.as_ref(), width, height)?);
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -226,7 +226,7 @@ fn main() -> Result<(), Error> {
|
||||||
.takes_value(true),
|
.takes_value(true),
|
||||||
)
|
)
|
||||||
.get_matches();
|
.get_matches();
|
||||||
let (instance, _) = Instance::new(None)?;
|
let (instance, _) = Instance::new(None, Default::default())?;
|
||||||
unsafe {
|
unsafe {
|
||||||
let device = instance.device(None)?;
|
let device = instance.device(None)?;
|
||||||
let session = Session::new(device);
|
let session = Session::new(device);
|
||||||
|
|
|
@ -38,7 +38,7 @@ fn main() -> Result<(), Error> {
|
||||||
.with_resizable(false) // currently not supported
|
.with_resizable(false) // currently not supported
|
||||||
.build(&event_loop)?;
|
.build(&event_loop)?;
|
||||||
|
|
||||||
let (instance, surface) = Instance::new(Some(&window))?;
|
let (instance, surface) = Instance::new(Some(&window), Default::default())?;
|
||||||
let mut info_string = "info".to_string();
|
let mut info_string = "info".to_string();
|
||||||
unsafe {
|
unsafe {
|
||||||
let device = instance.device(surface.as_ref())?;
|
let device = instance.device(surface.as_ref())?;
|
||||||
|
|
|
@ -14,8 +14,8 @@ use piet::{ImageFormat, RenderContext};
|
||||||
use piet_gpu_types::encoder::Encode;
|
use piet_gpu_types::encoder::Encode;
|
||||||
|
|
||||||
use piet_gpu_hal::{
|
use piet_gpu_hal::{
|
||||||
Buffer, BufferUsage, CmdBuf, DescriptorSet, Error, Image, ImageLayout, Pipeline, QueryPool,
|
BindType, Buffer, BufferUsage, CmdBuf, DescriptorSet, Error, Image, ImageLayout, Pipeline,
|
||||||
Session, ShaderCode,
|
QueryPool, Session, ShaderCode,
|
||||||
};
|
};
|
||||||
|
|
||||||
use pico_svg::PicoSvg;
|
use pico_svg::PicoSvg;
|
||||||
|
@ -140,7 +140,15 @@ impl Renderer {
|
||||||
let memory_buf_dev = session.create_buffer(128 * 1024 * 1024, dev)?;
|
let memory_buf_dev = session.create_buffer(128 * 1024 * 1024, dev)?;
|
||||||
|
|
||||||
let el_code = ShaderCode::Spv(include_bytes!("../shader/elements.spv"));
|
let el_code = ShaderCode::Spv(include_bytes!("../shader/elements.spv"));
|
||||||
let el_pipeline = session.create_simple_compute_pipeline(el_code, 4)?;
|
let el_pipeline = session.create_compute_pipeline(
|
||||||
|
el_code,
|
||||||
|
&[
|
||||||
|
BindType::Buffer,
|
||||||
|
BindType::Buffer,
|
||||||
|
BindType::Buffer,
|
||||||
|
BindType::Buffer,
|
||||||
|
],
|
||||||
|
)?;
|
||||||
let mut el_ds = Vec::with_capacity(n_bufs);
|
let mut el_ds = Vec::with_capacity(n_bufs);
|
||||||
for scene_buf in &scene_bufs {
|
for scene_buf in &scene_bufs {
|
||||||
el_ds.push(session.create_simple_descriptor_set(
|
el_ds.push(session.create_simple_descriptor_set(
|
||||||
|
@ -150,12 +158,14 @@ impl Renderer {
|
||||||
}
|
}
|
||||||
|
|
||||||
let tile_alloc_code = ShaderCode::Spv(include_bytes!("../shader/tile_alloc.spv"));
|
let tile_alloc_code = ShaderCode::Spv(include_bytes!("../shader/tile_alloc.spv"));
|
||||||
let tile_pipeline = session.create_simple_compute_pipeline(tile_alloc_code, 2)?;
|
let tile_pipeline = session
|
||||||
|
.create_compute_pipeline(tile_alloc_code, &[BindType::Buffer, BindType::Buffer])?;
|
||||||
let tile_ds = session
|
let tile_ds = session
|
||||||
.create_simple_descriptor_set(&tile_pipeline, &[&memory_buf_dev, &config_buf])?;
|
.create_simple_descriptor_set(&tile_pipeline, &[&memory_buf_dev, &config_buf])?;
|
||||||
|
|
||||||
let path_alloc_code = ShaderCode::Spv(include_bytes!("../shader/path_coarse.spv"));
|
let path_alloc_code = ShaderCode::Spv(include_bytes!("../shader/path_coarse.spv"));
|
||||||
let path_pipeline = session.create_simple_compute_pipeline(path_alloc_code, 2)?;
|
let path_pipeline = session
|
||||||
|
.create_compute_pipeline(path_alloc_code, &[BindType::Buffer, BindType::Buffer])?;
|
||||||
let path_ds = session
|
let path_ds = session
|
||||||
.create_simple_descriptor_set(&path_pipeline, &[&memory_buf_dev, &config_buf])?;
|
.create_simple_descriptor_set(&path_pipeline, &[&memory_buf_dev, &config_buf])?;
|
||||||
|
|
||||||
|
@ -165,18 +175,21 @@ impl Renderer {
|
||||||
println!("using small workgroup backdrop kernel");
|
println!("using small workgroup backdrop kernel");
|
||||||
ShaderCode::Spv(include_bytes!("../shader/backdrop.spv"))
|
ShaderCode::Spv(include_bytes!("../shader/backdrop.spv"))
|
||||||
};
|
};
|
||||||
let backdrop_pipeline = session.create_simple_compute_pipeline(backdrop_code, 2)?;
|
let backdrop_pipeline = session
|
||||||
|
.create_compute_pipeline(backdrop_code, &[BindType::Buffer, BindType::Buffer])?;
|
||||||
let backdrop_ds = session
|
let backdrop_ds = session
|
||||||
.create_simple_descriptor_set(&backdrop_pipeline, &[&memory_buf_dev, &config_buf])?;
|
.create_simple_descriptor_set(&backdrop_pipeline, &[&memory_buf_dev, &config_buf])?;
|
||||||
|
|
||||||
// TODO: constants
|
// TODO: constants
|
||||||
let bin_code = ShaderCode::Spv(include_bytes!("../shader/binning.spv"));
|
let bin_code = ShaderCode::Spv(include_bytes!("../shader/binning.spv"));
|
||||||
let bin_pipeline = session.create_simple_compute_pipeline(bin_code, 2)?;
|
let bin_pipeline =
|
||||||
|
session.create_compute_pipeline(bin_code, &[BindType::Buffer, BindType::Buffer])?;
|
||||||
let bin_ds =
|
let bin_ds =
|
||||||
session.create_simple_descriptor_set(&bin_pipeline, &[&memory_buf_dev, &config_buf])?;
|
session.create_simple_descriptor_set(&bin_pipeline, &[&memory_buf_dev, &config_buf])?;
|
||||||
|
|
||||||
let coarse_code = ShaderCode::Spv(include_bytes!("../shader/coarse.spv"));
|
let coarse_code = ShaderCode::Spv(include_bytes!("../shader/coarse.spv"));
|
||||||
let coarse_pipeline = session.create_simple_compute_pipeline(coarse_code, 2)?;
|
let coarse_pipeline =
|
||||||
|
session.create_compute_pipeline(coarse_code, &[BindType::Buffer, BindType::Buffer])?;
|
||||||
let coarse_ds = session
|
let coarse_ds = session
|
||||||
.create_simple_descriptor_set(&coarse_pipeline, &[&memory_buf_dev, &config_buf])?;
|
.create_simple_descriptor_set(&coarse_pipeline, &[&memory_buf_dev, &config_buf])?;
|
||||||
|
|
||||||
|
@ -194,17 +207,16 @@ impl Renderer {
|
||||||
let gradients = Self::make_gradient_image(&session);
|
let gradients = Self::make_gradient_image(&session);
|
||||||
|
|
||||||
let k4_code = ShaderCode::Spv(include_bytes!("../shader/kernel4.spv"));
|
let k4_code = ShaderCode::Spv(include_bytes!("../shader/kernel4.spv"));
|
||||||
// This is a bit of a stand-in for future development. For now, we assume one
|
let k4_pipeline = session.create_compute_pipeline(
|
||||||
// atlas image for all images, and another image for the gradients. In the future,
|
k4_code,
|
||||||
// on GPUs that support it, we will probably want to go to descriptor indexing in
|
&[
|
||||||
// order to cut down on allocation and copying for the atlas image.
|
BindType::Buffer,
|
||||||
let max_textures = 2;
|
BindType::Buffer,
|
||||||
let k4_pipeline = session
|
BindType::Image,
|
||||||
.pipeline_builder()
|
BindType::ImageRead,
|
||||||
.add_buffers(2)
|
BindType::ImageRead,
|
||||||
.add_images(1)
|
],
|
||||||
.add_textures(max_textures)
|
)?;
|
||||||
.create_compute_pipeline(&session, k4_code)?;
|
|
||||||
let k4_ds = session
|
let k4_ds = session
|
||||||
.descriptor_set_builder()
|
.descriptor_set_builder()
|
||||||
.add_buffers(&[&memory_buf_dev, &config_buf])
|
.add_buffers(&[&memory_buf_dev, &config_buf])
|
||||||
|
|
|
@ -4,6 +4,11 @@
|
||||||
|
|
||||||
glslang_validator = glslangValidator
|
glslang_validator = glslangValidator
|
||||||
spirv_cross = spirv-cross
|
spirv_cross = spirv-cross
|
||||||
|
dxc = dxc
|
||||||
|
|
||||||
|
# See https://github.com/KhronosGroup/SPIRV-Cross/issues/1248 for
|
||||||
|
# why we set this.
|
||||||
|
msl_flags = --msl-decoration-binding
|
||||||
|
|
||||||
rule glsl
|
rule glsl
|
||||||
command = $glslang_validator $flags -V -o $out $in
|
command = $glslang_validator $flags -V -o $out $in
|
||||||
|
@ -11,22 +16,34 @@ rule glsl
|
||||||
rule hlsl
|
rule hlsl
|
||||||
command = $spirv_cross --hlsl $in --output $out
|
command = $spirv_cross --hlsl $in --output $out
|
||||||
|
|
||||||
|
rule dxil
|
||||||
|
command = $dxc -T cs_6_0 $in -Fo $out
|
||||||
|
|
||||||
rule msl
|
rule msl
|
||||||
command = $spirv_cross --msl $in --output $out
|
command = $spirv_cross --msl $in --output $out $msl_flags
|
||||||
|
|
||||||
|
build gen/clear.spv: glsl clear.comp
|
||||||
|
build gen/clear.hlsl: hlsl gen/clear.spv
|
||||||
|
build gen/clear.dxil: dxil gen/clear.hlsl
|
||||||
|
build gen/clear.msl: msl gen/clear.spv
|
||||||
|
|
||||||
build gen/prefix.spv: glsl prefix.comp
|
build gen/prefix.spv: glsl prefix.comp
|
||||||
build gen/prefix.hlsl: hlsl gen/prefix.spv
|
build gen/prefix.hlsl: hlsl gen/prefix.spv
|
||||||
|
build gen/prefix.dxil: dxil gen/prefix.hlsl
|
||||||
build gen/prefix.msl: msl gen/prefix.spv
|
build gen/prefix.msl: msl gen/prefix.spv
|
||||||
|
|
||||||
build gen/prefix_reduce.spv: glsl prefix_reduce.comp
|
build gen/prefix_reduce.spv: glsl prefix_reduce.comp
|
||||||
build gen/prefix_reduce.hlsl: hlsl gen/prefix_reduce.spv
|
build gen/prefix_reduce.hlsl: hlsl gen/prefix_reduce.spv
|
||||||
|
build gen/prefix_reduce.dxil: dxil gen/prefix_reduce.hlsl
|
||||||
build gen/prefix_reduce.msl: msl gen/prefix_reduce.spv
|
build gen/prefix_reduce.msl: msl gen/prefix_reduce.spv
|
||||||
|
|
||||||
build gen/prefix_root.spv: glsl prefix_scan.comp
|
build gen/prefix_root.spv: glsl prefix_scan.comp
|
||||||
flags = -DROOT
|
flags = -DROOT
|
||||||
build gen/prefix_root.hlsl: hlsl gen/prefix_root.spv
|
build gen/prefix_root.hlsl: hlsl gen/prefix_root.spv
|
||||||
|
build gen/prefix_root.dxil: dxil gen/prefix_root.hlsl
|
||||||
build gen/prefix_root.msl: msl gen/prefix_root.spv
|
build gen/prefix_root.msl: msl gen/prefix_root.spv
|
||||||
|
|
||||||
build gen/prefix_scan.spv: glsl prefix_scan.comp
|
build gen/prefix_scan.spv: glsl prefix_scan.comp
|
||||||
build gen/prefix_scan.hlsl: hlsl gen/prefix_scan.spv
|
build gen/prefix_scan.hlsl: hlsl gen/prefix_scan.spv
|
||||||
|
build gen/prefix_scan.dxil: dxil gen/prefix_scan.hlsl
|
||||||
build gen/prefix_scan.msl: msl gen/prefix_scan.spv
|
build gen/prefix_scan.msl: msl gen/prefix_scan.spv
|
||||||
|
|
26
tests/shader/clear.comp
Normal file
26
tests/shader/clear.comp
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||||
|
|
||||||
|
// Clear a buffer.
|
||||||
|
|
||||||
|
#version 450
|
||||||
|
|
||||||
|
layout(local_size_x = 256) in;
|
||||||
|
|
||||||
|
// This should probably be uniform rather than readonly,
|
||||||
|
// but we haven't done the binding work yet.
|
||||||
|
layout(binding = 0) readonly buffer ConfigBuf {
|
||||||
|
// size is in uint (4 byte) units
|
||||||
|
uint size;
|
||||||
|
uint value;
|
||||||
|
};
|
||||||
|
|
||||||
|
layout(binding = 1) buffer TargetBuf {
|
||||||
|
uint[] data;
|
||||||
|
};
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
uint ix = gl_GlobalInvocationID.x;
|
||||||
|
if (ix < size) {
|
||||||
|
data[ix] = value;
|
||||||
|
}
|
||||||
|
}
|
BIN
tests/shader/gen/clear.dxil
Normal file
BIN
tests/shader/gen/clear.dxil
Normal file
Binary file not shown.
26
tests/shader/gen/clear.hlsl
Normal file
26
tests/shader/gen/clear.hlsl
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
|
||||||
|
|
||||||
|
ByteAddressBuffer _19 : register(t0);
|
||||||
|
RWByteAddressBuffer _32 : register(u1);
|
||||||
|
|
||||||
|
static uint3 gl_GlobalInvocationID;
|
||||||
|
struct SPIRV_Cross_Input
|
||||||
|
{
|
||||||
|
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
|
||||||
|
};
|
||||||
|
|
||||||
|
void comp_main()
|
||||||
|
{
|
||||||
|
uint ix = gl_GlobalInvocationID.x;
|
||||||
|
if (ix < _19.Load(0))
|
||||||
|
{
|
||||||
|
_32.Store(ix * 4 + 0, _19.Load(4));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[numthreads(256, 1, 1)]
|
||||||
|
void main(SPIRV_Cross_Input stage_input)
|
||||||
|
{
|
||||||
|
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
|
||||||
|
comp_main();
|
||||||
|
}
|
27
tests/shader/gen/clear.msl
Normal file
27
tests/shader/gen/clear.msl
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
#include <metal_stdlib>
|
||||||
|
#include <simd/simd.h>
|
||||||
|
|
||||||
|
using namespace metal;
|
||||||
|
|
||||||
|
struct ConfigBuf
|
||||||
|
{
|
||||||
|
uint size;
|
||||||
|
uint value;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct TargetBuf
|
||||||
|
{
|
||||||
|
uint data[1];
|
||||||
|
};
|
||||||
|
|
||||||
|
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
|
||||||
|
|
||||||
|
kernel void main0(const device ConfigBuf& _19 [[buffer(0)]], device TargetBuf& _32 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
|
||||||
|
{
|
||||||
|
uint ix = gl_GlobalInvocationID.x;
|
||||||
|
if (ix < _19.size)
|
||||||
|
{
|
||||||
|
_32.data[ix] = _19.value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
BIN
tests/shader/gen/clear.spv
Normal file
BIN
tests/shader/gen/clear.spv
Normal file
Binary file not shown.
BIN
tests/shader/gen/prefix.dxil
Normal file
BIN
tests/shader/gen/prefix.dxil
Normal file
Binary file not shown.
|
@ -12,11 +12,11 @@ struct State
|
||||||
|
|
||||||
static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u);
|
static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u);
|
||||||
|
|
||||||
static const Monoid _187 = { 0u };
|
static const Monoid _185 = { 0u };
|
||||||
|
|
||||||
globallycoherent RWByteAddressBuffer _43 : register(u2);
|
globallycoherent RWByteAddressBuffer _43 : register(u2);
|
||||||
ByteAddressBuffer _67 : register(t0);
|
ByteAddressBuffer _67 : register(t0);
|
||||||
RWByteAddressBuffer _374 : register(u1);
|
RWByteAddressBuffer _372 : register(u1);
|
||||||
|
|
||||||
static uint3 gl_LocalInvocationID;
|
static uint3 gl_LocalInvocationID;
|
||||||
struct SPIRV_Cross_Input
|
struct SPIRV_Cross_Input
|
||||||
|
@ -64,9 +64,9 @@ void comp_main()
|
||||||
for (uint i_1 = 0u; i_1 < 9u; i_1++)
|
for (uint i_1 = 0u; i_1 < 9u; i_1++)
|
||||||
{
|
{
|
||||||
GroupMemoryBarrierWithGroupSync();
|
GroupMemoryBarrierWithGroupSync();
|
||||||
if (gl_LocalInvocationID.x >= uint(1 << int(i_1)))
|
if (gl_LocalInvocationID.x >= (1u << i_1))
|
||||||
{
|
{
|
||||||
Monoid other = sh_scratch[gl_LocalInvocationID.x - uint(1 << int(i_1))];
|
Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
|
||||||
Monoid param_2 = other;
|
Monoid param_2 = other;
|
||||||
Monoid param_3 = agg;
|
Monoid param_3 = agg;
|
||||||
agg = combine_monoid(param_2, param_3);
|
agg = combine_monoid(param_2, param_3);
|
||||||
|
@ -92,7 +92,7 @@ void comp_main()
|
||||||
}
|
}
|
||||||
_43.Store(part_ix * 12 + 4, flag);
|
_43.Store(part_ix * 12 + 4, flag);
|
||||||
}
|
}
|
||||||
Monoid exclusive = _187;
|
Monoid exclusive = _185;
|
||||||
if (part_ix != 0u)
|
if (part_ix != 0u)
|
||||||
{
|
{
|
||||||
uint look_back_ix = part_ix - 1u;
|
uint look_back_ix = part_ix - 1u;
|
||||||
|
@ -113,9 +113,9 @@ void comp_main()
|
||||||
{
|
{
|
||||||
if (gl_LocalInvocationID.x == 511u)
|
if (gl_LocalInvocationID.x == 511u)
|
||||||
{
|
{
|
||||||
Monoid _225;
|
Monoid _223;
|
||||||
_225.element = _43.Load(look_back_ix * 12 + 12);
|
_223.element = _43.Load(look_back_ix * 12 + 12);
|
||||||
their_prefix.element = _225.element;
|
their_prefix.element = _223.element;
|
||||||
Monoid param_4 = their_prefix;
|
Monoid param_4 = their_prefix;
|
||||||
Monoid param_5 = exclusive;
|
Monoid param_5 = exclusive;
|
||||||
exclusive = combine_monoid(param_4, param_5);
|
exclusive = combine_monoid(param_4, param_5);
|
||||||
|
@ -128,9 +128,9 @@ void comp_main()
|
||||||
{
|
{
|
||||||
if (gl_LocalInvocationID.x == 511u)
|
if (gl_LocalInvocationID.x == 511u)
|
||||||
{
|
{
|
||||||
Monoid _247;
|
Monoid _245;
|
||||||
_247.element = _43.Load(look_back_ix * 12 + 8);
|
_245.element = _43.Load(look_back_ix * 12 + 8);
|
||||||
their_agg.element = _247.element;
|
their_agg.element = _245.element;
|
||||||
Monoid param_6 = their_agg;
|
Monoid param_6 = their_agg;
|
||||||
Monoid param_7 = exclusive;
|
Monoid param_7 = exclusive;
|
||||||
exclusive = combine_monoid(param_6, param_7);
|
exclusive = combine_monoid(param_6, param_7);
|
||||||
|
@ -142,9 +142,9 @@ void comp_main()
|
||||||
}
|
}
|
||||||
if (gl_LocalInvocationID.x == 511u)
|
if (gl_LocalInvocationID.x == 511u)
|
||||||
{
|
{
|
||||||
Monoid _269;
|
Monoid _267;
|
||||||
_269.element = _67.Load(((look_back_ix * 8192u) + their_ix) * 4 + 0);
|
_267.element = _67.Load(((look_back_ix * 8192u) + their_ix) * 4 + 0);
|
||||||
m.element = _269.element;
|
m.element = _267.element;
|
||||||
if (their_ix == 0u)
|
if (their_ix == 0u)
|
||||||
{
|
{
|
||||||
their_agg = m;
|
their_agg = m;
|
||||||
|
@ -211,7 +211,7 @@ void comp_main()
|
||||||
Monoid param_16 = row;
|
Monoid param_16 = row;
|
||||||
Monoid param_17 = local[i_2];
|
Monoid param_17 = local[i_2];
|
||||||
Monoid m_1 = combine_monoid(param_16, param_17);
|
Monoid m_1 = combine_monoid(param_16, param_17);
|
||||||
_374.Store((ix + i_2) * 4 + 0, m_1.element);
|
_372.Store((ix + i_2) * 4 + 0, m_1.element);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -87,7 +87,7 @@ Monoid combine_monoid(thread const Monoid& a, thread const Monoid& b)
|
||||||
return Monoid{ a.element + b.element };
|
return Monoid{ a.element + b.element };
|
||||||
}
|
}
|
||||||
|
|
||||||
kernel void main0(volatile device StateBuf& _43 [[buffer(0)]], const device InBuf& _67 [[buffer(1)]], device OutBuf& _374 [[buffer(2)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
|
kernel void main0(const device InBuf& _67 [[buffer(0)]], device OutBuf& _372 [[buffer(1)]], volatile device StateBuf& _43 [[buffer(2)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
|
||||||
{
|
{
|
||||||
threadgroup uint sh_part_ix;
|
threadgroup uint sh_part_ix;
|
||||||
threadgroup Monoid sh_scratch[512];
|
threadgroup Monoid sh_scratch[512];
|
||||||
|
@ -115,9 +115,9 @@ kernel void main0(volatile device StateBuf& _43 [[buffer(0)]], const device InBu
|
||||||
for (uint i_1 = 0u; i_1 < 9u; i_1++)
|
for (uint i_1 = 0u; i_1 < 9u; i_1++)
|
||||||
{
|
{
|
||||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
if (gl_LocalInvocationID.x >= uint(1 << int(i_1)))
|
if (gl_LocalInvocationID.x >= (1u << i_1))
|
||||||
{
|
{
|
||||||
Monoid other = sh_scratch[gl_LocalInvocationID.x - uint(1 << int(i_1))];
|
Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
|
||||||
Monoid param_2 = other;
|
Monoid param_2 = other;
|
||||||
Monoid param_3 = agg;
|
Monoid param_3 = agg;
|
||||||
agg = combine_monoid(param_2, param_3);
|
agg = combine_monoid(param_2, param_3);
|
||||||
|
@ -256,7 +256,7 @@ kernel void main0(volatile device StateBuf& _43 [[buffer(0)]], const device InBu
|
||||||
Monoid param_16 = row;
|
Monoid param_16 = row;
|
||||||
Monoid param_17 = local[i_2];
|
Monoid param_17 = local[i_2];
|
||||||
Monoid m_1 = combine_monoid(param_16, param_17);
|
Monoid m_1 = combine_monoid(param_16, param_17);
|
||||||
_374.outbuf[ix + i_2].element = m_1.element;
|
_372.outbuf[ix + i_2].element = m_1.element;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Binary file not shown.
BIN
tests/shader/gen/prefix_reduce.dxil
Normal file
BIN
tests/shader/gen/prefix_reduce.dxil
Normal file
Binary file not shown.
|
@ -6,7 +6,7 @@ struct Monoid
|
||||||
static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u);
|
static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u);
|
||||||
|
|
||||||
ByteAddressBuffer _40 : register(t0);
|
ByteAddressBuffer _40 : register(t0);
|
||||||
RWByteAddressBuffer _129 : register(u1);
|
RWByteAddressBuffer _127 : register(u1);
|
||||||
|
|
||||||
static uint3 gl_WorkGroupID;
|
static uint3 gl_WorkGroupID;
|
||||||
static uint3 gl_LocalInvocationID;
|
static uint3 gl_LocalInvocationID;
|
||||||
|
@ -46,9 +46,9 @@ void comp_main()
|
||||||
for (uint i_1 = 0u; i_1 < 9u; i_1++)
|
for (uint i_1 = 0u; i_1 < 9u; i_1++)
|
||||||
{
|
{
|
||||||
GroupMemoryBarrierWithGroupSync();
|
GroupMemoryBarrierWithGroupSync();
|
||||||
if ((gl_LocalInvocationID.x + uint(1 << int(i_1))) < 512u)
|
if ((gl_LocalInvocationID.x + (1u << i_1)) < 512u)
|
||||||
{
|
{
|
||||||
Monoid other = sh_scratch[gl_LocalInvocationID.x + uint(1 << int(i_1))];
|
Monoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)];
|
||||||
Monoid param_2 = agg;
|
Monoid param_2 = agg;
|
||||||
Monoid param_3 = other;
|
Monoid param_3 = other;
|
||||||
agg = combine_monoid(param_2, param_3);
|
agg = combine_monoid(param_2, param_3);
|
||||||
|
@ -58,7 +58,7 @@ void comp_main()
|
||||||
}
|
}
|
||||||
if (gl_LocalInvocationID.x == 0u)
|
if (gl_LocalInvocationID.x == 0u)
|
||||||
{
|
{
|
||||||
_129.Store(gl_WorkGroupID.x * 4 + 0, agg.element);
|
_127.Store(gl_WorkGroupID.x * 4 + 0, agg.element);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -33,7 +33,7 @@ Monoid combine_monoid(thread const Monoid& a, thread const Monoid& b)
|
||||||
return Monoid{ a.element + b.element };
|
return Monoid{ a.element + b.element };
|
||||||
}
|
}
|
||||||
|
|
||||||
kernel void main0(const device InBuf& _40 [[buffer(0)]], device OutBuf& _129 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
|
kernel void main0(const device InBuf& _40 [[buffer(0)]], device OutBuf& _127 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
|
||||||
{
|
{
|
||||||
threadgroup Monoid sh_scratch[512];
|
threadgroup Monoid sh_scratch[512];
|
||||||
uint ix = gl_GlobalInvocationID.x * 8u;
|
uint ix = gl_GlobalInvocationID.x * 8u;
|
||||||
|
@ -50,9 +50,9 @@ kernel void main0(const device InBuf& _40 [[buffer(0)]], device OutBuf& _129 [[b
|
||||||
for (uint i_1 = 0u; i_1 < 9u; i_1++)
|
for (uint i_1 = 0u; i_1 < 9u; i_1++)
|
||||||
{
|
{
|
||||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
if ((gl_LocalInvocationID.x + uint(1 << int(i_1))) < 512u)
|
if ((gl_LocalInvocationID.x + (1u << i_1)) < 512u)
|
||||||
{
|
{
|
||||||
Monoid other = sh_scratch[gl_LocalInvocationID.x + uint(1 << int(i_1))];
|
Monoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)];
|
||||||
Monoid param_2 = agg;
|
Monoid param_2 = agg;
|
||||||
Monoid param_3 = other;
|
Monoid param_3 = other;
|
||||||
agg = combine_monoid(param_2, param_3);
|
agg = combine_monoid(param_2, param_3);
|
||||||
|
@ -62,7 +62,7 @@ kernel void main0(const device InBuf& _40 [[buffer(0)]], device OutBuf& _129 [[b
|
||||||
}
|
}
|
||||||
if (gl_LocalInvocationID.x == 0u)
|
if (gl_LocalInvocationID.x == 0u)
|
||||||
{
|
{
|
||||||
_129.outbuf[gl_WorkGroupID.x].element = agg.element;
|
_127.outbuf[gl_WorkGroupID.x].element = agg.element;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Binary file not shown.
BIN
tests/shader/gen/prefix_root.dxil
Normal file
BIN
tests/shader/gen/prefix_root.dxil
Normal file
Binary file not shown.
|
@ -5,7 +5,7 @@ struct Monoid
|
||||||
|
|
||||||
static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u);
|
static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u);
|
||||||
|
|
||||||
static const Monoid _133 = { 0u };
|
static const Monoid _131 = { 0u };
|
||||||
|
|
||||||
RWByteAddressBuffer _42 : register(u0);
|
RWByteAddressBuffer _42 : register(u0);
|
||||||
|
|
||||||
|
@ -46,9 +46,9 @@ void comp_main()
|
||||||
for (uint i_1 = 0u; i_1 < 9u; i_1++)
|
for (uint i_1 = 0u; i_1 < 9u; i_1++)
|
||||||
{
|
{
|
||||||
GroupMemoryBarrierWithGroupSync();
|
GroupMemoryBarrierWithGroupSync();
|
||||||
if (gl_LocalInvocationID.x >= uint(1 << int(i_1)))
|
if (gl_LocalInvocationID.x >= (1u << i_1))
|
||||||
{
|
{
|
||||||
Monoid other = sh_scratch[gl_LocalInvocationID.x - uint(1 << int(i_1))];
|
Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
|
||||||
Monoid param_2 = other;
|
Monoid param_2 = other;
|
||||||
Monoid param_3 = agg;
|
Monoid param_3 = agg;
|
||||||
agg = combine_monoid(param_2, param_3);
|
agg = combine_monoid(param_2, param_3);
|
||||||
|
@ -57,7 +57,7 @@ void comp_main()
|
||||||
sh_scratch[gl_LocalInvocationID.x] = agg;
|
sh_scratch[gl_LocalInvocationID.x] = agg;
|
||||||
}
|
}
|
||||||
GroupMemoryBarrierWithGroupSync();
|
GroupMemoryBarrierWithGroupSync();
|
||||||
Monoid row = _133;
|
Monoid row = _131;
|
||||||
if (gl_LocalInvocationID.x > 0u)
|
if (gl_LocalInvocationID.x > 0u)
|
||||||
{
|
{
|
||||||
row = sh_scratch[gl_LocalInvocationID.x - 1u];
|
row = sh_scratch[gl_LocalInvocationID.x - 1u];
|
||||||
|
|
|
@ -85,9 +85,9 @@ kernel void main0(device DataBuf& _42 [[buffer(0)]], uint3 gl_GlobalInvocationID
|
||||||
for (uint i_1 = 0u; i_1 < 9u; i_1++)
|
for (uint i_1 = 0u; i_1 < 9u; i_1++)
|
||||||
{
|
{
|
||||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
if (gl_LocalInvocationID.x >= uint(1 << int(i_1)))
|
if (gl_LocalInvocationID.x >= (1u << i_1))
|
||||||
{
|
{
|
||||||
Monoid other = sh_scratch[gl_LocalInvocationID.x - uint(1 << int(i_1))];
|
Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
|
||||||
Monoid param_2 = other;
|
Monoid param_2 = other;
|
||||||
Monoid param_3 = agg;
|
Monoid param_3 = agg;
|
||||||
agg = combine_monoid(param_2, param_3);
|
agg = combine_monoid(param_2, param_3);
|
||||||
|
|
Binary file not shown.
BIN
tests/shader/gen/prefix_scan.dxil
Normal file
BIN
tests/shader/gen/prefix_scan.dxil
Normal file
Binary file not shown.
|
@ -5,10 +5,10 @@ struct Monoid
|
||||||
|
|
||||||
static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u);
|
static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u);
|
||||||
|
|
||||||
static const Monoid _133 = { 0u };
|
static const Monoid _131 = { 0u };
|
||||||
|
|
||||||
RWByteAddressBuffer _42 : register(u0);
|
RWByteAddressBuffer _42 : register(u0);
|
||||||
RWByteAddressBuffer _143 : register(u1);
|
ByteAddressBuffer _141 : register(t1);
|
||||||
|
|
||||||
static uint3 gl_WorkGroupID;
|
static uint3 gl_WorkGroupID;
|
||||||
static uint3 gl_LocalInvocationID;
|
static uint3 gl_LocalInvocationID;
|
||||||
|
@ -49,9 +49,9 @@ void comp_main()
|
||||||
for (uint i_1 = 0u; i_1 < 9u; i_1++)
|
for (uint i_1 = 0u; i_1 < 9u; i_1++)
|
||||||
{
|
{
|
||||||
GroupMemoryBarrierWithGroupSync();
|
GroupMemoryBarrierWithGroupSync();
|
||||||
if (gl_LocalInvocationID.x >= uint(1 << int(i_1)))
|
if (gl_LocalInvocationID.x >= (1u << i_1))
|
||||||
{
|
{
|
||||||
Monoid other = sh_scratch[gl_LocalInvocationID.x - uint(1 << int(i_1))];
|
Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
|
||||||
Monoid param_2 = other;
|
Monoid param_2 = other;
|
||||||
Monoid param_3 = agg;
|
Monoid param_3 = agg;
|
||||||
agg = combine_monoid(param_2, param_3);
|
agg = combine_monoid(param_2, param_3);
|
||||||
|
@ -60,12 +60,12 @@ void comp_main()
|
||||||
sh_scratch[gl_LocalInvocationID.x] = agg;
|
sh_scratch[gl_LocalInvocationID.x] = agg;
|
||||||
}
|
}
|
||||||
GroupMemoryBarrierWithGroupSync();
|
GroupMemoryBarrierWithGroupSync();
|
||||||
Monoid row = _133;
|
Monoid row = _131;
|
||||||
if (gl_WorkGroupID.x > 0u)
|
if (gl_WorkGroupID.x > 0u)
|
||||||
{
|
{
|
||||||
Monoid _148;
|
Monoid _146;
|
||||||
_148.element = _143.Load((gl_WorkGroupID.x - 1u) * 4 + 0);
|
_146.element = _141.Load((gl_WorkGroupID.x - 1u) * 4 + 0);
|
||||||
row.element = _148.element;
|
row.element = _146.element;
|
||||||
}
|
}
|
||||||
if (gl_LocalInvocationID.x > 0u)
|
if (gl_LocalInvocationID.x > 0u)
|
||||||
{
|
{
|
||||||
|
|
|
@ -72,7 +72,7 @@ Monoid combine_monoid(thread const Monoid& a, thread const Monoid& b)
|
||||||
return Monoid{ a.element + b.element };
|
return Monoid{ a.element + b.element };
|
||||||
}
|
}
|
||||||
|
|
||||||
kernel void main0(device DataBuf& _42 [[buffer(0)]], device ParentBuf& _143 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
|
kernel void main0(device DataBuf& _42 [[buffer(0)]], const device ParentBuf& _141 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
|
||||||
{
|
{
|
||||||
threadgroup Monoid sh_scratch[512];
|
threadgroup Monoid sh_scratch[512];
|
||||||
uint ix = gl_GlobalInvocationID.x * 8u;
|
uint ix = gl_GlobalInvocationID.x * 8u;
|
||||||
|
@ -90,9 +90,9 @@ kernel void main0(device DataBuf& _42 [[buffer(0)]], device ParentBuf& _143 [[bu
|
||||||
for (uint i_1 = 0u; i_1 < 9u; i_1++)
|
for (uint i_1 = 0u; i_1 < 9u; i_1++)
|
||||||
{
|
{
|
||||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
if (gl_LocalInvocationID.x >= uint(1 << int(i_1)))
|
if (gl_LocalInvocationID.x >= (1u << i_1))
|
||||||
{
|
{
|
||||||
Monoid other = sh_scratch[gl_LocalInvocationID.x - uint(1 << int(i_1))];
|
Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
|
||||||
Monoid param_2 = other;
|
Monoid param_2 = other;
|
||||||
Monoid param_3 = agg;
|
Monoid param_3 = agg;
|
||||||
agg = combine_monoid(param_2, param_3);
|
agg = combine_monoid(param_2, param_3);
|
||||||
|
@ -104,7 +104,7 @@ kernel void main0(device DataBuf& _42 [[buffer(0)]], device ParentBuf& _143 [[bu
|
||||||
Monoid row = Monoid{ 0u };
|
Monoid row = Monoid{ 0u };
|
||||||
if (gl_WorkGroupID.x > 0u)
|
if (gl_WorkGroupID.x > 0u)
|
||||||
{
|
{
|
||||||
row.element = _143.parent[gl_WorkGroupID.x - 1u].element;
|
row.element = _141.parent[gl_WorkGroupID.x - 1u].element;
|
||||||
}
|
}
|
||||||
if (gl_LocalInvocationID.x > 0u)
|
if (gl_LocalInvocationID.x > 0u)
|
||||||
{
|
{
|
||||||
|
|
Binary file not shown.
|
@ -71,8 +71,8 @@ void main() {
|
||||||
sh_scratch[gl_LocalInvocationID.x] = agg;
|
sh_scratch[gl_LocalInvocationID.x] = agg;
|
||||||
for (uint i = 0; i < LG_WG_SIZE; i++) {
|
for (uint i = 0; i < LG_WG_SIZE; i++) {
|
||||||
barrier();
|
barrier();
|
||||||
if (gl_LocalInvocationID.x >= (1 << i)) {
|
if (gl_LocalInvocationID.x >= (1u << i)) {
|
||||||
Monoid other = sh_scratch[gl_LocalInvocationID.x - (1 << i)];
|
Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i)];
|
||||||
agg = combine_monoid(other, agg);
|
agg = combine_monoid(other, agg);
|
||||||
}
|
}
|
||||||
barrier();
|
barrier();
|
||||||
|
|
|
@ -40,8 +40,8 @@ void main() {
|
||||||
for (uint i = 0; i < LG_WG_SIZE; i++) {
|
for (uint i = 0; i < LG_WG_SIZE; i++) {
|
||||||
barrier();
|
barrier();
|
||||||
// We could make this predicate tighter, but would it help?
|
// We could make this predicate tighter, but would it help?
|
||||||
if (gl_LocalInvocationID.x + (1 << i) < WG_SIZE) {
|
if (gl_LocalInvocationID.x + (1u << i) < WG_SIZE) {
|
||||||
Monoid other = sh_scratch[gl_LocalInvocationID.x + (1 << i)];
|
Monoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i)];
|
||||||
agg = combine_monoid(agg, other);
|
agg = combine_monoid(agg, other);
|
||||||
}
|
}
|
||||||
barrier();
|
barrier();
|
||||||
|
|
|
@ -20,7 +20,7 @@ layout(set = 0, binding = 0) buffer DataBuf {
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifndef ROOT
|
#ifndef ROOT
|
||||||
layout(set = 0, binding = 1) buffer ParentBuf {
|
layout(set = 0, binding = 1) readonly buffer ParentBuf {
|
||||||
Monoid[] parent;
|
Monoid[] parent;
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
@ -45,8 +45,8 @@ void main() {
|
||||||
sh_scratch[gl_LocalInvocationID.x] = agg;
|
sh_scratch[gl_LocalInvocationID.x] = agg;
|
||||||
for (uint i = 0; i < LG_WG_SIZE; i++) {
|
for (uint i = 0; i < LG_WG_SIZE; i++) {
|
||||||
barrier();
|
barrier();
|
||||||
if (gl_LocalInvocationID.x >= (1 << i)) {
|
if (gl_LocalInvocationID.x >= (1u << i)) {
|
||||||
Monoid other = sh_scratch[gl_LocalInvocationID.x - (1 << i)];
|
Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i)];
|
||||||
agg = combine_monoid(other, agg);
|
agg = combine_monoid(other, agg);
|
||||||
}
|
}
|
||||||
barrier();
|
barrier();
|
||||||
|
|
141
tests/src/clear.rs
Normal file
141
tests/src/clear.rs
Normal file
|
@ -0,0 +1,141 @@
|
||||||
|
// Copyright 2021 The piet-gpu authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// https://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
// Also licensed under MIT license, at your choice.
|
||||||
|
|
||||||
|
//! Utilities (and a benchmark) for clearing buffers with compute shaders.
|
||||||
|
|
||||||
|
use piet_gpu_hal::{include_shader, BindType, BufferUsage, DescriptorSet};
|
||||||
|
use piet_gpu_hal::{Buffer, Pipeline};
|
||||||
|
|
||||||
|
use crate::config::Config;
|
||||||
|
use crate::runner::{Commands, Runner};
|
||||||
|
use crate::test_result::TestResult;
|
||||||
|
|
||||||
|
const WG_SIZE: u64 = 256;
|
||||||
|
|
||||||
|
/// The shader code for clearing buffers.
|
||||||
|
pub struct ClearCode {
|
||||||
|
pipeline: Pipeline,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The stage resources for clearing buffers.
|
||||||
|
pub struct ClearStage {
|
||||||
|
n_elements: u64,
|
||||||
|
config_buf: Buffer,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The binding for clearing buffers.
|
||||||
|
pub struct ClearBinding {
|
||||||
|
descriptor_set: DescriptorSet,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub unsafe fn run_clear_test(runner: &mut Runner, config: &Config) -> TestResult {
|
||||||
|
let mut result = TestResult::new("clear buffers");
|
||||||
|
let n_elements: u64 = config.size.choose(1 << 12, 1 << 20, 1 << 24);
|
||||||
|
let out_buf = runner.buf_down(n_elements * 4);
|
||||||
|
let code = ClearCode::new(runner);
|
||||||
|
let stage = ClearStage::new_with_value(runner, n_elements, 0x42);
|
||||||
|
let binding = stage.bind(runner, &code, &out_buf.dev_buf);
|
||||||
|
let n_iter = config.n_iter;
|
||||||
|
let mut total_elapsed = 0.0;
|
||||||
|
for i in 0..n_iter {
|
||||||
|
let mut commands = runner.commands();
|
||||||
|
commands.write_timestamp(0);
|
||||||
|
stage.record(&mut commands, &code, &binding);
|
||||||
|
commands.write_timestamp(1);
|
||||||
|
if i == 0 {
|
||||||
|
commands.cmd_buf.memory_barrier();
|
||||||
|
commands.download(&out_buf);
|
||||||
|
}
|
||||||
|
total_elapsed += runner.submit(commands);
|
||||||
|
if i == 0 {
|
||||||
|
let mut dst: Vec<u32> = Default::default();
|
||||||
|
out_buf.read(&mut dst);
|
||||||
|
if let Some(failure) = verify(&dst) {
|
||||||
|
result.fail(format!("failure at {}", failure));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result.timing(total_elapsed, n_elements * n_iter);
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ClearCode {
|
||||||
|
pub unsafe fn new(runner: &mut Runner) -> ClearCode {
|
||||||
|
let code = include_shader!(&runner.session, "../shader/gen/Clear");
|
||||||
|
let pipeline = runner
|
||||||
|
.session
|
||||||
|
.create_compute_pipeline(code, &[BindType::BufReadOnly, BindType::Buffer])
|
||||||
|
.unwrap();
|
||||||
|
ClearCode { pipeline }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ClearStage {
|
||||||
|
pub unsafe fn new(runner: &mut Runner, n_elements: u64) -> ClearStage {
|
||||||
|
Self::new_with_value(runner, n_elements, 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub unsafe fn new_with_value(runner: &mut Runner, n_elements: u64, value: u32) -> ClearStage {
|
||||||
|
let config = [n_elements as u32, value];
|
||||||
|
let config_buf = runner
|
||||||
|
.session
|
||||||
|
.create_buffer_init(&config, BufferUsage::STORAGE)
|
||||||
|
.unwrap();
|
||||||
|
ClearStage {
|
||||||
|
n_elements,
|
||||||
|
config_buf,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub unsafe fn bind(
|
||||||
|
&self,
|
||||||
|
runner: &mut Runner,
|
||||||
|
code: &ClearCode,
|
||||||
|
out_buf: &Buffer,
|
||||||
|
) -> ClearBinding {
|
||||||
|
let descriptor_set = runner
|
||||||
|
.session
|
||||||
|
.create_simple_descriptor_set(&code.pipeline, &[&self.config_buf, out_buf])
|
||||||
|
.unwrap();
|
||||||
|
ClearBinding { descriptor_set }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub unsafe fn record(
|
||||||
|
&self,
|
||||||
|
commands: &mut Commands,
|
||||||
|
code: &ClearCode,
|
||||||
|
bindings: &ClearBinding,
|
||||||
|
) {
|
||||||
|
let n_workgroups = (self.n_elements + WG_SIZE - 1) / WG_SIZE;
|
||||||
|
// An issue: for clearing large buffers (>16M), we need to check the
|
||||||
|
// number of workgroups against the (dynamically detected) limit, and
|
||||||
|
// potentially issue multiple dispatches.
|
||||||
|
commands.cmd_buf.dispatch(
|
||||||
|
&code.pipeline,
|
||||||
|
&bindings.descriptor_set,
|
||||||
|
(n_workgroups as u32, 1, 1),
|
||||||
|
(WG_SIZE as u32, 1, 1),
|
||||||
|
);
|
||||||
|
// One thing that's missing here is registering the buffers so
|
||||||
|
// they can be safely dropped by Rust code before the execution
|
||||||
|
// of the command buffer completes.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify that the data is cleared.
|
||||||
|
fn verify(data: &[u32]) -> Option<usize> {
|
||||||
|
data.iter().position(|val| *val != 0x42)
|
||||||
|
}
|
|
@ -21,6 +21,7 @@ use clap::ArgMatches;
|
||||||
pub struct Config {
|
pub struct Config {
|
||||||
pub groups: Groups,
|
pub groups: Groups,
|
||||||
pub size: Size,
|
pub size: Size,
|
||||||
|
pub n_iter: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct Groups(String);
|
pub struct Groups(String);
|
||||||
|
@ -35,8 +36,14 @@ impl Config {
|
||||||
pub fn from_matches(matches: &ArgMatches) -> Config {
|
pub fn from_matches(matches: &ArgMatches) -> Config {
|
||||||
let groups = Groups::from_str(matches.value_of("groups").unwrap_or("all"));
|
let groups = Groups::from_str(matches.value_of("groups").unwrap_or("all"));
|
||||||
let size = Size::from_str(matches.value_of("size").unwrap_or("m"));
|
let size = Size::from_str(matches.value_of("size").unwrap_or("m"));
|
||||||
|
let n_iter = matches
|
||||||
|
.value_of("n_iter")
|
||||||
|
.and_then(|s| s.parse().ok())
|
||||||
|
.unwrap_or(1000);
|
||||||
Config {
|
Config {
|
||||||
groups, size
|
groups,
|
||||||
|
size,
|
||||||
|
n_iter,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
|
|
||||||
//! Tests for piet-gpu shaders and GPU capabilities.
|
//! Tests for piet-gpu shaders and GPU capabilities.
|
||||||
|
|
||||||
|
mod clear;
|
||||||
mod config;
|
mod config;
|
||||||
mod prefix;
|
mod prefix;
|
||||||
mod prefix_tree;
|
mod prefix_tree;
|
||||||
|
@ -23,6 +24,7 @@ mod runner;
|
||||||
mod test_result;
|
mod test_result;
|
||||||
|
|
||||||
use clap::{App, Arg};
|
use clap::{App, Arg};
|
||||||
|
use piet_gpu_hal::InstanceFlags;
|
||||||
|
|
||||||
use crate::config::Config;
|
use crate::config::Config;
|
||||||
use crate::runner::Runner;
|
use crate::runner::Runner;
|
||||||
|
@ -41,21 +43,26 @@ fn main() {
|
||||||
.short("g")
|
.short("g")
|
||||||
.long("groups")
|
.long("groups")
|
||||||
.help("Groups to run")
|
.help("Groups to run")
|
||||||
.takes_value(true)
|
.takes_value(true),
|
||||||
)
|
)
|
||||||
.arg(
|
.arg(
|
||||||
Arg::with_name("size")
|
Arg::with_name("size")
|
||||||
.short("s")
|
.short("s")
|
||||||
.long("size")
|
.long("size")
|
||||||
.help("Size of tests")
|
.help("Size of tests")
|
||||||
.takes_value(true)
|
.takes_value(true),
|
||||||
)
|
)
|
||||||
.arg(
|
.arg(
|
||||||
Arg::with_name("n_iter")
|
Arg::with_name("n_iter")
|
||||||
.short("n")
|
.short("n")
|
||||||
.long("n_iter")
|
.long("n_iter")
|
||||||
.help("Number of iterations")
|
.help("Number of iterations")
|
||||||
.takes_value(true)
|
.takes_value(true),
|
||||||
|
)
|
||||||
|
.arg(
|
||||||
|
Arg::with_name("dx12")
|
||||||
|
.long("dx12")
|
||||||
|
.help("Prefer DX12 backend"),
|
||||||
)
|
)
|
||||||
.get_matches();
|
.get_matches();
|
||||||
let style = if matches.is_present("verbose") {
|
let style = if matches.is_present("verbose") {
|
||||||
|
@ -68,7 +75,16 @@ fn main() {
|
||||||
let report = |test_result: &TestResult| {
|
let report = |test_result: &TestResult| {
|
||||||
test_result.report(style);
|
test_result.report(style);
|
||||||
};
|
};
|
||||||
let mut runner = Runner::new();
|
let mut flags = InstanceFlags::empty();
|
||||||
|
if matches.is_present("dx12") {
|
||||||
|
flags |= InstanceFlags::DX12;
|
||||||
|
}
|
||||||
|
let mut runner = Runner::new(flags);
|
||||||
|
if style == ReportStyle::Verbose {
|
||||||
|
// TODO: get adapter name in here too
|
||||||
|
println!("Backend: {:?}", runner.backend_type());
|
||||||
|
}
|
||||||
|
report(&clear::run_clear_test(&mut runner, &config));
|
||||||
if config.groups.matches("prefix") {
|
if config.groups.matches("prefix") {
|
||||||
report(&prefix::run_prefix_test(&mut runner, &config));
|
report(&prefix::run_prefix_test(&mut runner, &config));
|
||||||
report(&prefix_tree::run_prefix_test(&mut runner, &config));
|
report(&prefix_tree::run_prefix_test(&mut runner, &config));
|
||||||
|
|
|
@ -14,9 +14,10 @@
|
||||||
//
|
//
|
||||||
// Also licensed under MIT license, at your choice.
|
// Also licensed under MIT license, at your choice.
|
||||||
|
|
||||||
use piet_gpu_hal::{include_shader, BufferUsage, DescriptorSet};
|
use piet_gpu_hal::{include_shader, BackendType, BindType, BufferUsage, DescriptorSet};
|
||||||
use piet_gpu_hal::{Buffer, Pipeline};
|
use piet_gpu_hal::{Buffer, Pipeline};
|
||||||
|
|
||||||
|
use crate::clear::{ClearBinding, ClearCode, ClearStage};
|
||||||
use crate::config::Config;
|
use crate::config::Config;
|
||||||
use crate::runner::{Commands, Runner};
|
use crate::runner::{Commands, Runner};
|
||||||
use crate::test_result::TestResult;
|
use crate::test_result::TestResult;
|
||||||
|
@ -30,6 +31,7 @@ const ELEMENTS_PER_WG: u64 = WG_SIZE * N_ROWS;
|
||||||
/// A code struct can be created once and reused any number of times.
|
/// A code struct can be created once and reused any number of times.
|
||||||
struct PrefixCode {
|
struct PrefixCode {
|
||||||
pipeline: Pipeline,
|
pipeline: Pipeline,
|
||||||
|
clear_code: Option<ClearCode>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The stage resources for the prefix sum example.
|
/// The stage resources for the prefix sum example.
|
||||||
|
@ -41,6 +43,7 @@ struct PrefixStage {
|
||||||
// treat it as a capacity.
|
// treat it as a capacity.
|
||||||
n_elements: u64,
|
n_elements: u64,
|
||||||
state_buf: Buffer,
|
state_buf: Buffer,
|
||||||
|
clear_stage: Option<(ClearStage, ClearBinding)>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The binding for the prefix sum example.
|
/// The binding for the prefix sum example.
|
||||||
|
@ -50,7 +53,13 @@ struct PrefixBinding {
|
||||||
|
|
||||||
pub unsafe fn run_prefix_test(runner: &mut Runner, config: &Config) -> TestResult {
|
pub unsafe fn run_prefix_test(runner: &mut Runner, config: &Config) -> TestResult {
|
||||||
let mut result = TestResult::new("prefix sum, decoupled look-back");
|
let mut result = TestResult::new("prefix sum, decoupled look-back");
|
||||||
// This will be configurable.
|
/*
|
||||||
|
// We're good if we're using DXC.
|
||||||
|
if runner.backend_type() == BackendType::Dx12 {
|
||||||
|
result.skip("Shader won't compile on FXC");
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
*/
|
||||||
let n_elements: u64 = config.size.choose(1 << 12, 1 << 24, 1 << 25);
|
let n_elements: u64 = config.size.choose(1 << 12, 1 << 24, 1 << 25);
|
||||||
let data: Vec<u32> = (0..n_elements as u32).collect();
|
let data: Vec<u32> = (0..n_elements as u32).collect();
|
||||||
let data_buf = runner
|
let data_buf = runner
|
||||||
|
@ -59,10 +68,9 @@ pub unsafe fn run_prefix_test(runner: &mut Runner, config: &Config) -> TestResul
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let out_buf = runner.buf_down(data_buf.size());
|
let out_buf = runner.buf_down(data_buf.size());
|
||||||
let code = PrefixCode::new(runner);
|
let code = PrefixCode::new(runner);
|
||||||
let stage = PrefixStage::new(runner, n_elements);
|
let stage = PrefixStage::new(runner, &code, n_elements);
|
||||||
let binding = stage.bind(runner, &code, &data_buf, &out_buf.dev_buf);
|
let binding = stage.bind(runner, &code, &data_buf, &out_buf.dev_buf);
|
||||||
// Also will be configurable of course.
|
let n_iter = config.n_iter;
|
||||||
let n_iter = 1000;
|
|
||||||
let mut total_elapsed = 0.0;
|
let mut total_elapsed = 0.0;
|
||||||
for i in 0..n_iter {
|
for i in 0..n_iter {
|
||||||
let mut commands = runner.commands();
|
let mut commands = runner.commands();
|
||||||
|
@ -91,23 +99,44 @@ impl PrefixCode {
|
||||||
let code = include_shader!(&runner.session, "../shader/gen/prefix");
|
let code = include_shader!(&runner.session, "../shader/gen/prefix");
|
||||||
let pipeline = runner
|
let pipeline = runner
|
||||||
.session
|
.session
|
||||||
.create_simple_compute_pipeline(code, 3)
|
.create_compute_pipeline(
|
||||||
|
code,
|
||||||
|
&[BindType::BufReadOnly, BindType::Buffer, BindType::Buffer],
|
||||||
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
PrefixCode { pipeline }
|
// Currently, DX12 and Metal backends don't support buffer clearing, so use a
|
||||||
|
// compute shader as a workaround.
|
||||||
|
let clear_code = if runner.backend_type() != BackendType::Vulkan {
|
||||||
|
Some(ClearCode::new(runner))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
PrefixCode {
|
||||||
|
pipeline,
|
||||||
|
clear_code,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl PrefixStage {
|
impl PrefixStage {
|
||||||
unsafe fn new(runner: &mut Runner, n_elements: u64) -> PrefixStage {
|
unsafe fn new(runner: &mut Runner, code: &PrefixCode, n_elements: u64) -> PrefixStage {
|
||||||
let n_workgroups = (n_elements + ELEMENTS_PER_WG - 1) / ELEMENTS_PER_WG;
|
let n_workgroups = (n_elements + ELEMENTS_PER_WG - 1) / ELEMENTS_PER_WG;
|
||||||
let state_buf_size = 4 + 12 * n_workgroups;
|
let state_buf_size = 4 + 12 * n_workgroups;
|
||||||
let state_buf = runner
|
let state_buf = runner
|
||||||
.session
|
.session
|
||||||
.create_buffer(state_buf_size, BufferUsage::STORAGE | BufferUsage::COPY_DST)
|
.create_buffer(state_buf_size, BufferUsage::STORAGE | BufferUsage::COPY_DST)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
let clear_stage = if let Some(clear_code) = &code.clear_code {
|
||||||
|
let stage = ClearStage::new(runner, state_buf_size / 4);
|
||||||
|
let binding = stage.bind(runner, clear_code, &state_buf);
|
||||||
|
Some((stage, binding))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
PrefixStage {
|
PrefixStage {
|
||||||
n_elements,
|
n_elements,
|
||||||
state_buf,
|
state_buf,
|
||||||
|
clear_stage,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -127,7 +156,11 @@ impl PrefixStage {
|
||||||
|
|
||||||
unsafe fn record(&self, commands: &mut Commands, code: &PrefixCode, bindings: &PrefixBinding) {
|
unsafe fn record(&self, commands: &mut Commands, code: &PrefixCode, bindings: &PrefixBinding) {
|
||||||
let n_workgroups = (self.n_elements + ELEMENTS_PER_WG - 1) / ELEMENTS_PER_WG;
|
let n_workgroups = (self.n_elements + ELEMENTS_PER_WG - 1) / ELEMENTS_PER_WG;
|
||||||
commands.cmd_buf.clear_buffer(&self.state_buf, None);
|
if let Some((stage, binding)) = &self.clear_stage {
|
||||||
|
stage.record(commands, code.clear_code.as_ref().unwrap(), binding);
|
||||||
|
} else {
|
||||||
|
commands.cmd_buf.clear_buffer(&self.state_buf, None);
|
||||||
|
}
|
||||||
commands.cmd_buf.memory_barrier();
|
commands.cmd_buf.memory_barrier();
|
||||||
commands.cmd_buf.dispatch(
|
commands.cmd_buf.dispatch(
|
||||||
&code.pipeline,
|
&code.pipeline,
|
||||||
|
|
|
@ -14,7 +14,7 @@
|
||||||
//
|
//
|
||||||
// Also licensed under MIT license, at your choice.
|
// Also licensed under MIT license, at your choice.
|
||||||
|
|
||||||
use piet_gpu_hal::{include_shader, BufferUsage, DescriptorSet};
|
use piet_gpu_hal::{include_shader, BindType, BufferUsage, DescriptorSet};
|
||||||
use piet_gpu_hal::{Buffer, Pipeline};
|
use piet_gpu_hal::{Buffer, Pipeline};
|
||||||
|
|
||||||
use crate::config::Config;
|
use crate::config::Config;
|
||||||
|
@ -57,7 +57,7 @@ pub unsafe fn run_prefix_test(runner: &mut Runner, config: &Config) -> TestResul
|
||||||
let stage = PrefixTreeStage::new(runner, n_elements);
|
let stage = PrefixTreeStage::new(runner, n_elements);
|
||||||
let binding = stage.bind(runner, &code, &out_buf.dev_buf);
|
let binding = stage.bind(runner, &code, &out_buf.dev_buf);
|
||||||
// Also will be configurable of course.
|
// Also will be configurable of course.
|
||||||
let n_iter = 1000;
|
let n_iter = config.n_iter;
|
||||||
let mut total_elapsed = 0.0;
|
let mut total_elapsed = 0.0;
|
||||||
for i in 0..n_iter {
|
for i in 0..n_iter {
|
||||||
let mut commands = runner.commands();
|
let mut commands = runner.commands();
|
||||||
|
@ -88,17 +88,17 @@ impl PrefixTreeCode {
|
||||||
let reduce_code = include_shader!(&runner.session, "../shader/gen/prefix_reduce");
|
let reduce_code = include_shader!(&runner.session, "../shader/gen/prefix_reduce");
|
||||||
let reduce_pipeline = runner
|
let reduce_pipeline = runner
|
||||||
.session
|
.session
|
||||||
.create_simple_compute_pipeline(reduce_code, 2)
|
.create_compute_pipeline(reduce_code, &[BindType::BufReadOnly, BindType::Buffer])
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let scan_code = include_shader!(&runner.session, "../shader/gen/prefix_scan");
|
let scan_code = include_shader!(&runner.session, "../shader/gen/prefix_scan");
|
||||||
let scan_pipeline = runner
|
let scan_pipeline = runner
|
||||||
.session
|
.session
|
||||||
.create_simple_compute_pipeline(scan_code, 2)
|
.create_compute_pipeline(scan_code, &[BindType::Buffer, BindType::BufReadOnly])
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let root_code = include_shader!(&runner.session, "../shader/gen/prefix_root");
|
let root_code = include_shader!(&runner.session, "../shader/gen/prefix_root");
|
||||||
let root_pipeline = runner
|
let root_pipeline = runner
|
||||||
.session
|
.session
|
||||||
.create_simple_compute_pipeline(root_code, 1)
|
.create_compute_pipeline(root_code, &[BindType::Buffer])
|
||||||
.unwrap();
|
.unwrap();
|
||||||
PrefixTreeCode {
|
PrefixTreeCode {
|
||||||
reduce_pipeline,
|
reduce_pipeline,
|
||||||
|
|
|
@ -16,7 +16,10 @@
|
||||||
|
|
||||||
//! Test runner intended to make it easy to write tests.
|
//! Test runner intended to make it easy to write tests.
|
||||||
|
|
||||||
use piet_gpu_hal::{Buffer, BufferUsage, CmdBuf, Instance, PlainData, QueryPool, Session};
|
use piet_gpu_hal::{
|
||||||
|
BackendType, Buffer, BufferUsage, CmdBuf, Instance, InstanceFlags, PlainData, QueryPool,
|
||||||
|
Session,
|
||||||
|
};
|
||||||
|
|
||||||
pub struct Runner {
|
pub struct Runner {
|
||||||
#[allow(unused)]
|
#[allow(unused)]
|
||||||
|
@ -45,8 +48,8 @@ pub struct BufDown {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Runner {
|
impl Runner {
|
||||||
pub unsafe fn new() -> Runner {
|
pub unsafe fn new(flags: InstanceFlags) -> Runner {
|
||||||
let (instance, _) = Instance::new(None).unwrap();
|
let (instance, _) = Instance::new(None, flags).unwrap();
|
||||||
let device = instance.device(None).unwrap();
|
let device = instance.device(None).unwrap();
|
||||||
let session = Session::new(device);
|
let session = Session::new(device);
|
||||||
let cmd_buf_pool = Vec::new();
|
let cmd_buf_pool = Vec::new();
|
||||||
|
@ -82,7 +85,7 @@ impl Runner {
|
||||||
let submitted = self.session.run_cmd_buf(cmd_buf, &[], &[]).unwrap();
|
let submitted = self.session.run_cmd_buf(cmd_buf, &[], &[]).unwrap();
|
||||||
self.cmd_buf_pool.extend(submitted.wait().unwrap());
|
self.cmd_buf_pool.extend(submitted.wait().unwrap());
|
||||||
let timestamps = self.session.fetch_query_pool(&query_pool).unwrap();
|
let timestamps = self.session.fetch_query_pool(&query_pool).unwrap();
|
||||||
timestamps[0]
|
timestamps.get(0).copied().unwrap_or_default()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(unused)]
|
#[allow(unused)]
|
||||||
|
@ -114,6 +117,10 @@ impl Runner {
|
||||||
.unwrap();
|
.unwrap();
|
||||||
BufDown { stage_buf, dev_buf }
|
BufDown { stage_buf, dev_buf }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn backend_type(&self) -> BackendType {
|
||||||
|
self.session.backend_type()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Commands {
|
impl Commands {
|
||||||
|
|
|
@ -21,10 +21,17 @@ pub struct TestResult {
|
||||||
// TODO: statistics. We're lean and mean for now.
|
// TODO: statistics. We're lean and mean for now.
|
||||||
total_time: f64,
|
total_time: f64,
|
||||||
n_elements: u64,
|
n_elements: u64,
|
||||||
failure: Option<String>,
|
status: Status,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Copy)]
|
pub enum Status {
|
||||||
|
Pass,
|
||||||
|
Fail(String),
|
||||||
|
#[allow(unused)]
|
||||||
|
Skipped(String),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, PartialEq, Eq)]
|
||||||
pub enum ReportStyle {
|
pub enum ReportStyle {
|
||||||
Short,
|
Short,
|
||||||
Verbose,
|
Verbose,
|
||||||
|
@ -36,14 +43,15 @@ impl TestResult {
|
||||||
name: name.to_string(),
|
name: name.to_string(),
|
||||||
total_time: 0.0,
|
total_time: 0.0,
|
||||||
n_elements: 0,
|
n_elements: 0,
|
||||||
failure: None,
|
status: Status::Pass,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn report(&self, style: ReportStyle) {
|
pub fn report(&self, style: ReportStyle) {
|
||||||
let fail_string = match &self.failure {
|
let fail_string = match &self.status {
|
||||||
None => "pass".into(),
|
Status::Pass => "pass".into(),
|
||||||
Some(s) => format!("fail ({})", s),
|
Status::Fail(s) => format!("fail ({})", s),
|
||||||
|
Status::Skipped(s) => format!("skipped ({})", s),
|
||||||
};
|
};
|
||||||
match style {
|
match style {
|
||||||
ReportStyle::Short => {
|
ReportStyle::Short => {
|
||||||
|
@ -73,8 +81,13 @@ impl TestResult {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn fail(&mut self, explanation: String) {
|
pub fn fail(&mut self, explanation: impl Into<String>) {
|
||||||
self.failure = Some(explanation);
|
self.status = Status::Fail(explanation.into());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(unused)]
|
||||||
|
pub fn skip(&mut self, explanation: impl Into<String>) {
|
||||||
|
self.status = Status::Skipped(explanation.into());
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn timing(&mut self, total_time: f64, n_elements: u64) {
|
pub fn timing(&mut self, total_time: f64, n_elements: u64) {
|
||||||
|
|
Loading…
Reference in a new issue