mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 12:41:30 +11:00
Redo memory options for usage
Rework the entire mechanism for specifying memory for creating resources, inferring the correct options from the new usage flags.
This commit is contained in:
parent
3dfae7aed6
commit
050df66801
|
@ -2,7 +2,7 @@
|
|||
//! This will probably go away when it's fully implemented and we can
|
||||
//! just use the hub.
|
||||
|
||||
use piet_gpu_hal::{dx12, CmdBuf, Device, Error, MemFlags};
|
||||
use piet_gpu_hal::{dx12, BufferUsage, CmdBuf, Device, Error};
|
||||
|
||||
const SHADER_CODE: &str = r#"RWByteAddressBuffer _53 : register(u0, space0);
|
||||
|
||||
|
@ -49,8 +49,17 @@ void main(SPIRV_Cross_Input stage_input)
|
|||
fn toy() -> Result<(), Error> {
|
||||
let instance = dx12::Dx12Instance::new()?;
|
||||
let device = instance.device()?;
|
||||
let buf = device.create_buffer(1024, MemFlags::host_coherent())?;
|
||||
let dev_buf = device.create_buffer(1024, MemFlags::device_local())?;
|
||||
let buf = device.create_buffer(
|
||||
1024,
|
||||
BufferUsage::MAP_READ
|
||||
| BufferUsage::MAP_WRITE
|
||||
| BufferUsage::COPY_SRC
|
||||
| BufferUsage::COPY_DST,
|
||||
)?;
|
||||
let dev_buf = device.create_buffer(
|
||||
1024,
|
||||
BufferUsage::STORAGE | BufferUsage::COPY_SRC | BufferUsage::COPY_DST,
|
||||
)?;
|
||||
let data: Vec<u32> = (1..257).collect();
|
||||
let query_pool = device.create_query_pool(2)?;
|
||||
unsafe {
|
||||
|
|
|
@ -6,9 +6,10 @@ mod wrappers;
|
|||
use std::{cell::Cell, convert::TryInto, mem, ptr};
|
||||
|
||||
use winapi::shared::dxgi1_3;
|
||||
use winapi::shared::minwindef::TRUE;
|
||||
use winapi::um::d3d12;
|
||||
|
||||
use crate::Error;
|
||||
use crate::{BufferUsage, Error};
|
||||
|
||||
use self::wrappers::{
|
||||
CommandAllocator, CommandQueue, Device, Factory4, GraphicsCommandList, Resource, ShaderByteCode,
|
||||
|
@ -23,6 +24,7 @@ pub struct Dx12Device {
|
|||
command_allocator: CommandAllocator,
|
||||
command_queue: CommandQueue,
|
||||
ts_freq: u64,
|
||||
memory_arch: MemoryArchitecture,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
|
@ -35,15 +37,6 @@ pub struct Image {
|
|||
resource: Resource,
|
||||
}
|
||||
|
||||
// TODO: this doesn't make an upload/download distinction. Probably
|
||||
// we want to move toward webgpu-style usage flags, ie map_read and
|
||||
// map_write are the main ones that affect buffer creation.
|
||||
#[derive(Clone, Copy)]
|
||||
pub enum MemFlags {
|
||||
DeviceLocal,
|
||||
HostCoherent,
|
||||
}
|
||||
|
||||
pub struct CmdBuf(GraphicsCommandList);
|
||||
|
||||
pub struct Pipeline {
|
||||
|
@ -59,10 +52,6 @@ pub struct DescriptorSet(wrappers::DescriptorHeap);
|
|||
pub struct QueryPool {
|
||||
heap: wrappers::QueryHeap,
|
||||
buf: Buffer,
|
||||
// TODO: piet-dx12 manages to do this with one buffer. I think a
|
||||
// HEAP_TYPE_READBACK will work, but we currently don't have fine
|
||||
// grained usage flags to express this.
|
||||
readback_buf: Buffer,
|
||||
n_queries: u32,
|
||||
}
|
||||
|
||||
|
@ -88,6 +77,16 @@ pub struct DescriptorSetBuilder {
|
|||
buffers: Vec<Buffer>,
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Eq)]
|
||||
enum MemoryArchitecture {
|
||||
/// Integrated graphics
|
||||
CacheCoherentUMA,
|
||||
/// Unified memory with no cache coherence (does this happen?)
|
||||
UMA,
|
||||
/// Discrete graphics
|
||||
NUMA,
|
||||
}
|
||||
|
||||
impl Dx12Instance {
|
||||
/// Create a new instance.
|
||||
///
|
||||
|
@ -128,11 +127,20 @@ impl Dx12Instance {
|
|||
)?;
|
||||
let command_allocator = device.create_command_allocator(list_type)?;
|
||||
let ts_freq = command_queue.get_timestamp_frequency()?;
|
||||
let features_architecture = device.get_features_architecture()?;
|
||||
let uma = features_architecture.UMA == TRUE;
|
||||
let cc_uma = features_architecture.CacheCoherentUMA == TRUE;
|
||||
let memory_arch = match (uma, cc_uma) {
|
||||
(true, true) => MemoryArchitecture::CacheCoherentUMA,
|
||||
(true, false) => MemoryArchitecture::UMA,
|
||||
_ => MemoryArchitecture::NUMA,
|
||||
};
|
||||
Ok(Dx12Device {
|
||||
device,
|
||||
command_queue,
|
||||
command_allocator,
|
||||
ts_freq,
|
||||
memory_arch,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -143,8 +151,6 @@ impl crate::Device for Dx12Device {
|
|||
|
||||
type Image = Image;
|
||||
|
||||
type MemFlags = MemFlags;
|
||||
|
||||
type Pipeline = Pipeline;
|
||||
|
||||
type DescriptorSet = DescriptorSet;
|
||||
|
@ -166,16 +172,22 @@ impl crate::Device for Dx12Device {
|
|||
// Currently this is HLSL source, but we'll probably change it to IR.
|
||||
type ShaderSource = str;
|
||||
|
||||
fn create_buffer(&self, size: u64, mem_flags: Self::MemFlags) -> Result<Self::Buffer, Error> {
|
||||
fn create_buffer(&self, size: u64, usage: BufferUsage) -> Result<Self::Buffer, Error> {
|
||||
// TODO: consider supporting BufferUsage::QUERY_RESOLVE here rather than
|
||||
// having a separate function.
|
||||
unsafe {
|
||||
let resource = match mem_flags {
|
||||
MemFlags::DeviceLocal => self
|
||||
.device
|
||||
.create_gpu_only_byte_addressed_buffer(size.try_into()?)?,
|
||||
MemFlags::HostCoherent => self
|
||||
.device
|
||||
.create_uploadable_byte_addressed_buffer(size.try_into()?)?,
|
||||
};
|
||||
let page_property = self.memory_arch.page_property(usage);
|
||||
let memory_pool = self.memory_arch.memory_pool(usage);
|
||||
//TODO: consider flag D3D12_HEAP_FLAG_ALLOW_SHADER_ATOMICS?
|
||||
let flags = d3d12::D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
|
||||
let resource = self.device.create_buffer(
|
||||
size.try_into()?,
|
||||
d3d12::D3D12_HEAP_TYPE_CUSTOM,
|
||||
page_property,
|
||||
memory_pool,
|
||||
d3d12::D3D12_RESOURCE_STATE_COMMON,
|
||||
flags,
|
||||
)?;
|
||||
Ok(Buffer { resource, size })
|
||||
}
|
||||
}
|
||||
|
@ -185,12 +197,7 @@ impl crate::Device for Dx12Device {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
unsafe fn create_image2d(
|
||||
&self,
|
||||
width: u32,
|
||||
height: u32,
|
||||
mem_flags: Self::MemFlags,
|
||||
) -> Result<Self::Image, Error> {
|
||||
unsafe fn create_image2d(&self, width: u32, height: u32) -> Result<Self::Image, Error> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
|
@ -217,14 +224,10 @@ impl crate::Device for Dx12Device {
|
|||
let heap = self
|
||||
.device
|
||||
.create_query_heap(d3d12::D3D12_QUERY_HEAP_TYPE_TIMESTAMP, n_queries)?;
|
||||
let buf =
|
||||
self.create_buffer((n_queries * 8) as u64, crate::MemFlags::device_local())?;
|
||||
let readback_buf =
|
||||
self.create_buffer((n_queries * 8) as u64, crate::MemFlags::host_coherent())?;
|
||||
let buf = self.create_readback_buffer((n_queries * 8) as u64)?;
|
||||
Ok(QueryPool {
|
||||
heap,
|
||||
buf,
|
||||
readback_buf,
|
||||
n_queries,
|
||||
})
|
||||
}
|
||||
|
@ -232,7 +235,7 @@ impl crate::Device for Dx12Device {
|
|||
|
||||
unsafe fn fetch_query_pool(&self, pool: &Self::QueryPool) -> Result<Vec<f64>, Error> {
|
||||
let mut buf = vec![0u64; pool.n_queries as usize];
|
||||
self.read_buffer(&pool.readback_buf, &mut buf)?;
|
||||
self.read_buffer(&pool.buf, &mut buf)?;
|
||||
let ts0 = buf[0];
|
||||
let tsp = (self.ts_freq as f64).recip();
|
||||
let result = buf[1..]
|
||||
|
@ -327,6 +330,22 @@ impl crate::Device for Dx12Device {
|
|||
}
|
||||
}
|
||||
|
||||
impl Dx12Device {
|
||||
fn create_readback_buffer(&self, size: u64) -> Result<Buffer, Error> {
|
||||
unsafe {
|
||||
let resource = self.device.create_buffer(
|
||||
size.try_into()?,
|
||||
d3d12::D3D12_HEAP_TYPE_READBACK,
|
||||
d3d12::D3D12_CPU_PAGE_PROPERTY_UNKNOWN,
|
||||
d3d12::D3D12_MEMORY_POOL_UNKNOWN,
|
||||
d3d12::D3D12_RESOURCE_STATE_COPY_DEST,
|
||||
d3d12::D3D12_RESOURCE_FLAG_NONE,
|
||||
)?;
|
||||
Ok(Buffer { resource, size })
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl crate::CmdBuf<Dx12Device> for CmdBuf {
|
||||
unsafe fn begin(&mut self) {}
|
||||
|
||||
|
@ -406,17 +425,6 @@ impl crate::CmdBuf<Dx12Device> for CmdBuf {
|
|||
unsafe fn finish_timestamps(&mut self, pool: &QueryPool) {
|
||||
self.0
|
||||
.resolve_timing_query_data(&pool.heap, 0, pool.n_queries, &pool.buf.resource, 0);
|
||||
self.copy_buffer(&pool.buf, &pool.readback_buf);
|
||||
}
|
||||
}
|
||||
|
||||
impl crate::MemFlags for MemFlags {
|
||||
fn device_local() -> Self {
|
||||
MemFlags::DeviceLocal
|
||||
}
|
||||
|
||||
fn host_coherent() -> Self {
|
||||
MemFlags::HostCoherent
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -543,3 +551,31 @@ impl crate::DescriptorSetBuilder<Dx12Device> for DescriptorSetBuilder {
|
|||
Ok(DescriptorSet(heap))
|
||||
}
|
||||
}
|
||||
|
||||
impl MemoryArchitecture {
|
||||
// See https://msdn.microsoft.com/de-de/library/windows/desktop/dn788678(v=vs.85).aspx
|
||||
|
||||
fn page_property(&self, usage: BufferUsage) -> d3d12::D3D12_CPU_PAGE_PROPERTY {
|
||||
if usage.contains(BufferUsage::MAP_READ) {
|
||||
d3d12::D3D12_CPU_PAGE_PROPERTY_WRITE_BACK
|
||||
} else if usage.contains(BufferUsage::MAP_WRITE) {
|
||||
if *self == MemoryArchitecture::CacheCoherentUMA {
|
||||
d3d12::D3D12_CPU_PAGE_PROPERTY_WRITE_BACK
|
||||
} else {
|
||||
d3d12::D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE
|
||||
}
|
||||
} else {
|
||||
d3d12::D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE
|
||||
}
|
||||
}
|
||||
|
||||
fn memory_pool(&self, usage: BufferUsage) -> d3d12::D3D12_MEMORY_POOL {
|
||||
if *self == MemoryArchitecture::NUMA
|
||||
&& !usage.intersects(BufferUsage::MAP_READ | BufferUsage::MAP_WRITE)
|
||||
{
|
||||
d3d12::D3D12_MEMORY_POOL_L1
|
||||
} else {
|
||||
d3d12::D3D12_MEMORY_POOL_L0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -677,7 +677,6 @@ impl Device {
|
|||
.CreateRenderTargetView(resource.get_mut(), desc, descriptor);
|
||||
}
|
||||
|
||||
// TODO: interface not complete
|
||||
pub unsafe fn create_fence(&self, initial: u64) -> Result<Fence, Error> {
|
||||
let mut fence = ptr::null_mut();
|
||||
explain_error(
|
||||
|
@ -810,57 +809,19 @@ impl Device {
|
|||
(layouts, num_rows, row_size_in_bytes, total_size)
|
||||
}
|
||||
|
||||
// TODO: probably combine these and add usage flags
|
||||
pub unsafe fn create_uploadable_buffer(
|
||||
&self,
|
||||
buffer_size_in_bytes: u64,
|
||||
) -> Result<Resource, Error> {
|
||||
let heap_properties = d3d12::D3D12_HEAP_PROPERTIES {
|
||||
//for GPU access only
|
||||
Type: d3d12::D3D12_HEAP_TYPE_UPLOAD,
|
||||
CPUPageProperty: d3d12::D3D12_CPU_PAGE_PROPERTY_UNKNOWN,
|
||||
//TODO: what should MemoryPoolPreference flag be?
|
||||
MemoryPoolPreference: d3d12::D3D12_MEMORY_POOL_UNKNOWN,
|
||||
//we don't care about multi-adapter operation, so these next two will be zero
|
||||
CreationNodeMask: 0,
|
||||
VisibleNodeMask: 0,
|
||||
};
|
||||
let resource_description = d3d12::D3D12_RESOURCE_DESC {
|
||||
Dimension: d3d12::D3D12_RESOURCE_DIMENSION_BUFFER,
|
||||
Width: buffer_size_in_bytes,
|
||||
Height: 1,
|
||||
DepthOrArraySize: 1,
|
||||
MipLevels: 1,
|
||||
SampleDesc: dxgitype::DXGI_SAMPLE_DESC {
|
||||
Count: 1,
|
||||
Quality: 0,
|
||||
},
|
||||
Layout: d3d12::D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
|
||||
Flags: d3d12::D3D12_RESOURCE_FLAG_NONE,
|
||||
..mem::zeroed()
|
||||
};
|
||||
|
||||
let buffer = self.create_committed_resource(
|
||||
&heap_properties,
|
||||
//TODO: is this heap flag ok?
|
||||
d3d12::D3D12_HEAP_FLAG_NONE,
|
||||
&resource_description,
|
||||
d3d12::D3D12_RESOURCE_STATE_GENERIC_READ,
|
||||
ptr::null(),
|
||||
)?;
|
||||
|
||||
Ok(buffer)
|
||||
}
|
||||
|
||||
pub unsafe fn create_uploadable_byte_addressed_buffer(
|
||||
pub unsafe fn create_buffer(
|
||||
&self,
|
||||
buffer_size_in_bytes: u32,
|
||||
heap_type: d3d12::D3D12_HEAP_TYPE,
|
||||
cpu_page: d3d12::D3D12_CPU_PAGE_PROPERTY,
|
||||
memory_pool_preference: d3d12::D3D12_MEMORY_POOL,
|
||||
init_resource_state: d3d12::D3D12_RESOURCE_STATES,
|
||||
resource_flags: d3d12::D3D12_RESOURCE_FLAGS,
|
||||
) -> Result<Resource, Error> {
|
||||
let heap_properties = d3d12::D3D12_HEAP_PROPERTIES {
|
||||
Type: d3d12::D3D12_HEAP_TYPE_UPLOAD,
|
||||
CPUPageProperty: d3d12::D3D12_CPU_PAGE_PROPERTY_UNKNOWN,
|
||||
//TODO: what should MemoryPoolPreference flag be?
|
||||
MemoryPoolPreference: d3d12::D3D12_MEMORY_POOL_UNKNOWN,
|
||||
Type: heap_type,
|
||||
CPUPageProperty: cpu_page,
|
||||
MemoryPoolPreference: memory_pool_preference,
|
||||
//we don't care about multi-adapter operation, so these next two will be zero
|
||||
CreationNodeMask: 0,
|
||||
VisibleNodeMask: 0,
|
||||
|
@ -876,50 +837,7 @@ impl Device {
|
|||
Quality: 0,
|
||||
},
|
||||
Layout: d3d12::D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
|
||||
Flags: d3d12::D3D12_RESOURCE_FLAG_NONE,
|
||||
..mem::zeroed()
|
||||
};
|
||||
|
||||
let byte_addressed_buffer = self.create_committed_resource(
|
||||
&heap_properties,
|
||||
//TODO: is this heap flag ok?
|
||||
d3d12::D3D12_HEAP_FLAG_NONE,
|
||||
&resource_description,
|
||||
d3d12::D3D12_RESOURCE_STATE_GENERIC_READ,
|
||||
ptr::null(),
|
||||
)?;
|
||||
|
||||
Ok(byte_addressed_buffer)
|
||||
}
|
||||
|
||||
pub unsafe fn create_gpu_only_byte_addressed_buffer(
|
||||
&self,
|
||||
buffer_size_in_bytes: u32,
|
||||
) -> Result<Resource, Error> {
|
||||
//TODO: consider flag D3D12_HEAP_FLAG_ALLOW_SHADER_ATOMICS?
|
||||
let heap_properties = d3d12::D3D12_HEAP_PROPERTIES {
|
||||
//for GPU access only
|
||||
Type: d3d12::D3D12_HEAP_TYPE_DEFAULT,
|
||||
CPUPageProperty: d3d12::D3D12_CPU_PAGE_PROPERTY_UNKNOWN,
|
||||
//TODO: what should MemoryPoolPreference flag be?
|
||||
MemoryPoolPreference: d3d12::D3D12_MEMORY_POOL_UNKNOWN,
|
||||
//we don't care about multi-adapter operation, so these next two will be zero
|
||||
CreationNodeMask: 0,
|
||||
VisibleNodeMask: 0,
|
||||
};
|
||||
let resource_description = d3d12::D3D12_RESOURCE_DESC {
|
||||
Dimension: d3d12::D3D12_RESOURCE_DIMENSION_BUFFER,
|
||||
Width: buffer_size_in_bytes as u64,
|
||||
Height: 1,
|
||||
DepthOrArraySize: 1,
|
||||
MipLevels: 1,
|
||||
SampleDesc: dxgitype::DXGI_SAMPLE_DESC {
|
||||
Count: 1,
|
||||
Quality: 0,
|
||||
},
|
||||
//essentially we're letting the adapter decide the layout
|
||||
Layout: d3d12::D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
|
||||
Flags: d3d12::D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS,
|
||||
Flags: resource_flags,
|
||||
..mem::zeroed()
|
||||
};
|
||||
|
||||
|
@ -927,7 +845,7 @@ impl Device {
|
|||
&heap_properties,
|
||||
d3d12::D3D12_HEAP_FLAG_NONE,
|
||||
&resource_description,
|
||||
d3d12::D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
|
||||
init_resource_state,
|
||||
ptr::null(),
|
||||
)?;
|
||||
|
||||
|
@ -994,6 +912,21 @@ impl Device {
|
|||
Ok(buffer)
|
||||
}
|
||||
|
||||
pub unsafe fn get_features_architecture(
|
||||
&self,
|
||||
) -> Result<d3d12::D3D12_FEATURE_DATA_ARCHITECTURE, Error> {
|
||||
let mut features_architecture = mem::zeroed();
|
||||
explain_error(
|
||||
self.0.CheckFeatureSupport(
|
||||
d3d12::D3D12_FEATURE_ARCHITECTURE,
|
||||
&mut features_architecture as *mut _ as *mut _,
|
||||
mem::size_of::<d3d12::D3D12_FEATURE_DATA_ARCHITECTURE>() as u32,
|
||||
),
|
||||
"error querying feature architecture",
|
||||
)?;
|
||||
Ok(features_architecture)
|
||||
}
|
||||
|
||||
pub unsafe fn get_removal_reason(&self) -> Error {
|
||||
Error::Hresult(self.0.GetDeviceRemovedReason())
|
||||
}
|
||||
|
|
|
@ -150,11 +150,7 @@ impl Session {
|
|||
})))
|
||||
}
|
||||
|
||||
pub unsafe fn create_image2d(
|
||||
&self,
|
||||
width: u32,
|
||||
height: u32,
|
||||
) -> Result<Image, Error> {
|
||||
pub unsafe fn create_image2d(&self, width: u32, height: u32) -> Result<Image, Error> {
|
||||
let image = self.0.device.create_image2d(width, height)?;
|
||||
Ok(Image(Arc::new(ImageInner {
|
||||
image,
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
///
|
||||
/// This abstraction is inspired by gfx-hal, but is specialized to the needs of piet-gpu.
|
||||
/// In time, it may go away and be replaced by either gfx-hal or wgpu.
|
||||
|
||||
use bitflags::bitflags;
|
||||
|
||||
pub mod hub;
|
||||
|
@ -106,11 +105,7 @@ pub trait Device: Sized {
|
|||
/// Maybe doesn't need result return?
|
||||
unsafe fn destroy_buffer(&self, buffer: &Self::Buffer) -> Result<(), Error>;
|
||||
|
||||
unsafe fn create_image2d(
|
||||
&self,
|
||||
width: u32,
|
||||
height: u32,
|
||||
) -> Result<Self::Image, Error>;
|
||||
unsafe fn create_image2d(&self, width: u32, height: u32) -> Result<Self::Image, Error>;
|
||||
|
||||
/// Destroy an image.
|
||||
///
|
||||
|
|
|
@ -9,7 +9,9 @@ use ash::extensions::{ext::DebugUtils, khr};
|
|||
use ash::version::{DeviceV1_0, EntryV1_0, InstanceV1_0, InstanceV1_1};
|
||||
use ash::{vk, Device, Entry, Instance};
|
||||
|
||||
use crate::{BufferUsage, Device as DeviceTrait, Error, GpuInfo, ImageLayout, SamplerParams, SubgroupSize};
|
||||
use crate::{
|
||||
BufferUsage, Device as DeviceTrait, Error, GpuInfo, ImageLayout, SamplerParams, SubgroupSize,
|
||||
};
|
||||
|
||||
pub struct VkInstance {
|
||||
/// Retain the dynamic lib.
|
||||
|
@ -512,11 +514,7 @@ impl crate::Device for VkDevice {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
unsafe fn create_image2d(
|
||||
&self,
|
||||
width: u32,
|
||||
height: u32,
|
||||
) -> Result<Self::Image, Error> {
|
||||
unsafe fn create_image2d(&self, width: u32, height: u32) -> Result<Self::Image, Error> {
|
||||
let device = &self.device.device;
|
||||
let extent = vk::Extent3D {
|
||||
width,
|
||||
|
|
Loading…
Reference in a new issue