Redo memory options for usage

Rework the entire mechanism for specifying memory for creating
resources, inferring the correct options from the new usage flags.
This commit is contained in:
Raph Levien 2021-05-21 22:03:42 -07:00
parent 3dfae7aed6
commit 050df66801
6 changed files with 128 additions and 161 deletions

View file

@ -2,7 +2,7 @@
//! This will probably go away when it's fully implemented and we can //! This will probably go away when it's fully implemented and we can
//! just use the hub. //! just use the hub.
use piet_gpu_hal::{dx12, CmdBuf, Device, Error, MemFlags}; use piet_gpu_hal::{dx12, BufferUsage, CmdBuf, Device, Error};
const SHADER_CODE: &str = r#"RWByteAddressBuffer _53 : register(u0, space0); const SHADER_CODE: &str = r#"RWByteAddressBuffer _53 : register(u0, space0);
@ -49,8 +49,17 @@ void main(SPIRV_Cross_Input stage_input)
fn toy() -> Result<(), Error> { fn toy() -> Result<(), Error> {
let instance = dx12::Dx12Instance::new()?; let instance = dx12::Dx12Instance::new()?;
let device = instance.device()?; let device = instance.device()?;
let buf = device.create_buffer(1024, MemFlags::host_coherent())?; let buf = device.create_buffer(
let dev_buf = device.create_buffer(1024, MemFlags::device_local())?; 1024,
BufferUsage::MAP_READ
| BufferUsage::MAP_WRITE
| BufferUsage::COPY_SRC
| BufferUsage::COPY_DST,
)?;
let dev_buf = device.create_buffer(
1024,
BufferUsage::STORAGE | BufferUsage::COPY_SRC | BufferUsage::COPY_DST,
)?;
let data: Vec<u32> = (1..257).collect(); let data: Vec<u32> = (1..257).collect();
let query_pool = device.create_query_pool(2)?; let query_pool = device.create_query_pool(2)?;
unsafe { unsafe {

View file

@ -6,9 +6,10 @@ mod wrappers;
use std::{cell::Cell, convert::TryInto, mem, ptr}; use std::{cell::Cell, convert::TryInto, mem, ptr};
use winapi::shared::dxgi1_3; use winapi::shared::dxgi1_3;
use winapi::shared::minwindef::TRUE;
use winapi::um::d3d12; use winapi::um::d3d12;
use crate::Error; use crate::{BufferUsage, Error};
use self::wrappers::{ use self::wrappers::{
CommandAllocator, CommandQueue, Device, Factory4, GraphicsCommandList, Resource, ShaderByteCode, CommandAllocator, CommandQueue, Device, Factory4, GraphicsCommandList, Resource, ShaderByteCode,
@ -23,6 +24,7 @@ pub struct Dx12Device {
command_allocator: CommandAllocator, command_allocator: CommandAllocator,
command_queue: CommandQueue, command_queue: CommandQueue,
ts_freq: u64, ts_freq: u64,
memory_arch: MemoryArchitecture,
} }
#[derive(Clone)] #[derive(Clone)]
@ -35,15 +37,6 @@ pub struct Image {
resource: Resource, resource: Resource,
} }
// TODO: this doesn't make an upload/download distinction. Probably
// we want to move toward webgpu-style usage flags, ie map_read and
// map_write are the main ones that affect buffer creation.
#[derive(Clone, Copy)]
pub enum MemFlags {
DeviceLocal,
HostCoherent,
}
pub struct CmdBuf(GraphicsCommandList); pub struct CmdBuf(GraphicsCommandList);
pub struct Pipeline { pub struct Pipeline {
@ -59,10 +52,6 @@ pub struct DescriptorSet(wrappers::DescriptorHeap);
pub struct QueryPool { pub struct QueryPool {
heap: wrappers::QueryHeap, heap: wrappers::QueryHeap,
buf: Buffer, buf: Buffer,
// TODO: piet-dx12 manages to do this with one buffer. I think a
// HEAP_TYPE_READBACK will work, but we currently don't have fine
// grained usage flags to express this.
readback_buf: Buffer,
n_queries: u32, n_queries: u32,
} }
@ -88,6 +77,16 @@ pub struct DescriptorSetBuilder {
buffers: Vec<Buffer>, buffers: Vec<Buffer>,
} }
#[derive(PartialEq, Eq)]
enum MemoryArchitecture {
/// Integrated graphics
CacheCoherentUMA,
/// Unified memory with no cache coherence (does this happen?)
UMA,
/// Discrete graphics
NUMA,
}
impl Dx12Instance { impl Dx12Instance {
/// Create a new instance. /// Create a new instance.
/// ///
@ -128,11 +127,20 @@ impl Dx12Instance {
)?; )?;
let command_allocator = device.create_command_allocator(list_type)?; let command_allocator = device.create_command_allocator(list_type)?;
let ts_freq = command_queue.get_timestamp_frequency()?; let ts_freq = command_queue.get_timestamp_frequency()?;
let features_architecture = device.get_features_architecture()?;
let uma = features_architecture.UMA == TRUE;
let cc_uma = features_architecture.CacheCoherentUMA == TRUE;
let memory_arch = match (uma, cc_uma) {
(true, true) => MemoryArchitecture::CacheCoherentUMA,
(true, false) => MemoryArchitecture::UMA,
_ => MemoryArchitecture::NUMA,
};
Ok(Dx12Device { Ok(Dx12Device {
device, device,
command_queue, command_queue,
command_allocator, command_allocator,
ts_freq, ts_freq,
memory_arch,
}) })
} }
} }
@ -143,8 +151,6 @@ impl crate::Device for Dx12Device {
type Image = Image; type Image = Image;
type MemFlags = MemFlags;
type Pipeline = Pipeline; type Pipeline = Pipeline;
type DescriptorSet = DescriptorSet; type DescriptorSet = DescriptorSet;
@ -166,16 +172,22 @@ impl crate::Device for Dx12Device {
// Currently this is HLSL source, but we'll probably change it to IR. // Currently this is HLSL source, but we'll probably change it to IR.
type ShaderSource = str; type ShaderSource = str;
fn create_buffer(&self, size: u64, mem_flags: Self::MemFlags) -> Result<Self::Buffer, Error> { fn create_buffer(&self, size: u64, usage: BufferUsage) -> Result<Self::Buffer, Error> {
// TODO: consider supporting BufferUsage::QUERY_RESOLVE here rather than
// having a separate function.
unsafe { unsafe {
let resource = match mem_flags { let page_property = self.memory_arch.page_property(usage);
MemFlags::DeviceLocal => self let memory_pool = self.memory_arch.memory_pool(usage);
.device //TODO: consider flag D3D12_HEAP_FLAG_ALLOW_SHADER_ATOMICS?
.create_gpu_only_byte_addressed_buffer(size.try_into()?)?, let flags = d3d12::D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
MemFlags::HostCoherent => self let resource = self.device.create_buffer(
.device size.try_into()?,
.create_uploadable_byte_addressed_buffer(size.try_into()?)?, d3d12::D3D12_HEAP_TYPE_CUSTOM,
}; page_property,
memory_pool,
d3d12::D3D12_RESOURCE_STATE_COMMON,
flags,
)?;
Ok(Buffer { resource, size }) Ok(Buffer { resource, size })
} }
} }
@ -185,12 +197,7 @@ impl crate::Device for Dx12Device {
Ok(()) Ok(())
} }
unsafe fn create_image2d( unsafe fn create_image2d(&self, width: u32, height: u32) -> Result<Self::Image, Error> {
&self,
width: u32,
height: u32,
mem_flags: Self::MemFlags,
) -> Result<Self::Image, Error> {
todo!() todo!()
} }
@ -217,14 +224,10 @@ impl crate::Device for Dx12Device {
let heap = self let heap = self
.device .device
.create_query_heap(d3d12::D3D12_QUERY_HEAP_TYPE_TIMESTAMP, n_queries)?; .create_query_heap(d3d12::D3D12_QUERY_HEAP_TYPE_TIMESTAMP, n_queries)?;
let buf = let buf = self.create_readback_buffer((n_queries * 8) as u64)?;
self.create_buffer((n_queries * 8) as u64, crate::MemFlags::device_local())?;
let readback_buf =
self.create_buffer((n_queries * 8) as u64, crate::MemFlags::host_coherent())?;
Ok(QueryPool { Ok(QueryPool {
heap, heap,
buf, buf,
readback_buf,
n_queries, n_queries,
}) })
} }
@ -232,7 +235,7 @@ impl crate::Device for Dx12Device {
unsafe fn fetch_query_pool(&self, pool: &Self::QueryPool) -> Result<Vec<f64>, Error> { unsafe fn fetch_query_pool(&self, pool: &Self::QueryPool) -> Result<Vec<f64>, Error> {
let mut buf = vec![0u64; pool.n_queries as usize]; let mut buf = vec![0u64; pool.n_queries as usize];
self.read_buffer(&pool.readback_buf, &mut buf)?; self.read_buffer(&pool.buf, &mut buf)?;
let ts0 = buf[0]; let ts0 = buf[0];
let tsp = (self.ts_freq as f64).recip(); let tsp = (self.ts_freq as f64).recip();
let result = buf[1..] let result = buf[1..]
@ -327,6 +330,22 @@ impl crate::Device for Dx12Device {
} }
} }
impl Dx12Device {
fn create_readback_buffer(&self, size: u64) -> Result<Buffer, Error> {
unsafe {
let resource = self.device.create_buffer(
size.try_into()?,
d3d12::D3D12_HEAP_TYPE_READBACK,
d3d12::D3D12_CPU_PAGE_PROPERTY_UNKNOWN,
d3d12::D3D12_MEMORY_POOL_UNKNOWN,
d3d12::D3D12_RESOURCE_STATE_COPY_DEST,
d3d12::D3D12_RESOURCE_FLAG_NONE,
)?;
Ok(Buffer { resource, size })
}
}
}
impl crate::CmdBuf<Dx12Device> for CmdBuf { impl crate::CmdBuf<Dx12Device> for CmdBuf {
unsafe fn begin(&mut self) {} unsafe fn begin(&mut self) {}
@ -406,17 +425,6 @@ impl crate::CmdBuf<Dx12Device> for CmdBuf {
unsafe fn finish_timestamps(&mut self, pool: &QueryPool) { unsafe fn finish_timestamps(&mut self, pool: &QueryPool) {
self.0 self.0
.resolve_timing_query_data(&pool.heap, 0, pool.n_queries, &pool.buf.resource, 0); .resolve_timing_query_data(&pool.heap, 0, pool.n_queries, &pool.buf.resource, 0);
self.copy_buffer(&pool.buf, &pool.readback_buf);
}
}
impl crate::MemFlags for MemFlags {
fn device_local() -> Self {
MemFlags::DeviceLocal
}
fn host_coherent() -> Self {
MemFlags::HostCoherent
} }
} }
@ -543,3 +551,31 @@ impl crate::DescriptorSetBuilder<Dx12Device> for DescriptorSetBuilder {
Ok(DescriptorSet(heap)) Ok(DescriptorSet(heap))
} }
} }
impl MemoryArchitecture {
// See https://msdn.microsoft.com/de-de/library/windows/desktop/dn788678(v=vs.85).aspx
fn page_property(&self, usage: BufferUsage) -> d3d12::D3D12_CPU_PAGE_PROPERTY {
if usage.contains(BufferUsage::MAP_READ) {
d3d12::D3D12_CPU_PAGE_PROPERTY_WRITE_BACK
} else if usage.contains(BufferUsage::MAP_WRITE) {
if *self == MemoryArchitecture::CacheCoherentUMA {
d3d12::D3D12_CPU_PAGE_PROPERTY_WRITE_BACK
} else {
d3d12::D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE
}
} else {
d3d12::D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE
}
}
fn memory_pool(&self, usage: BufferUsage) -> d3d12::D3D12_MEMORY_POOL {
if *self == MemoryArchitecture::NUMA
&& !usage.intersects(BufferUsage::MAP_READ | BufferUsage::MAP_WRITE)
{
d3d12::D3D12_MEMORY_POOL_L1
} else {
d3d12::D3D12_MEMORY_POOL_L0
}
}
}

View file

@ -677,7 +677,6 @@ impl Device {
.CreateRenderTargetView(resource.get_mut(), desc, descriptor); .CreateRenderTargetView(resource.get_mut(), desc, descriptor);
} }
// TODO: interface not complete
pub unsafe fn create_fence(&self, initial: u64) -> Result<Fence, Error> { pub unsafe fn create_fence(&self, initial: u64) -> Result<Fence, Error> {
let mut fence = ptr::null_mut(); let mut fence = ptr::null_mut();
explain_error( explain_error(
@ -810,57 +809,19 @@ impl Device {
(layouts, num_rows, row_size_in_bytes, total_size) (layouts, num_rows, row_size_in_bytes, total_size)
} }
// TODO: probably combine these and add usage flags pub unsafe fn create_buffer(
pub unsafe fn create_uploadable_buffer(
&self,
buffer_size_in_bytes: u64,
) -> Result<Resource, Error> {
let heap_properties = d3d12::D3D12_HEAP_PROPERTIES {
//for GPU access only
Type: d3d12::D3D12_HEAP_TYPE_UPLOAD,
CPUPageProperty: d3d12::D3D12_CPU_PAGE_PROPERTY_UNKNOWN,
//TODO: what should MemoryPoolPreference flag be?
MemoryPoolPreference: d3d12::D3D12_MEMORY_POOL_UNKNOWN,
//we don't care about multi-adapter operation, so these next two will be zero
CreationNodeMask: 0,
VisibleNodeMask: 0,
};
let resource_description = d3d12::D3D12_RESOURCE_DESC {
Dimension: d3d12::D3D12_RESOURCE_DIMENSION_BUFFER,
Width: buffer_size_in_bytes,
Height: 1,
DepthOrArraySize: 1,
MipLevels: 1,
SampleDesc: dxgitype::DXGI_SAMPLE_DESC {
Count: 1,
Quality: 0,
},
Layout: d3d12::D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
Flags: d3d12::D3D12_RESOURCE_FLAG_NONE,
..mem::zeroed()
};
let buffer = self.create_committed_resource(
&heap_properties,
//TODO: is this heap flag ok?
d3d12::D3D12_HEAP_FLAG_NONE,
&resource_description,
d3d12::D3D12_RESOURCE_STATE_GENERIC_READ,
ptr::null(),
)?;
Ok(buffer)
}
pub unsafe fn create_uploadable_byte_addressed_buffer(
&self, &self,
buffer_size_in_bytes: u32, buffer_size_in_bytes: u32,
heap_type: d3d12::D3D12_HEAP_TYPE,
cpu_page: d3d12::D3D12_CPU_PAGE_PROPERTY,
memory_pool_preference: d3d12::D3D12_MEMORY_POOL,
init_resource_state: d3d12::D3D12_RESOURCE_STATES,
resource_flags: d3d12::D3D12_RESOURCE_FLAGS,
) -> Result<Resource, Error> { ) -> Result<Resource, Error> {
let heap_properties = d3d12::D3D12_HEAP_PROPERTIES { let heap_properties = d3d12::D3D12_HEAP_PROPERTIES {
Type: d3d12::D3D12_HEAP_TYPE_UPLOAD, Type: heap_type,
CPUPageProperty: d3d12::D3D12_CPU_PAGE_PROPERTY_UNKNOWN, CPUPageProperty: cpu_page,
//TODO: what should MemoryPoolPreference flag be? MemoryPoolPreference: memory_pool_preference,
MemoryPoolPreference: d3d12::D3D12_MEMORY_POOL_UNKNOWN,
//we don't care about multi-adapter operation, so these next two will be zero //we don't care about multi-adapter operation, so these next two will be zero
CreationNodeMask: 0, CreationNodeMask: 0,
VisibleNodeMask: 0, VisibleNodeMask: 0,
@ -876,50 +837,7 @@ impl Device {
Quality: 0, Quality: 0,
}, },
Layout: d3d12::D3D12_TEXTURE_LAYOUT_ROW_MAJOR, Layout: d3d12::D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
Flags: d3d12::D3D12_RESOURCE_FLAG_NONE, Flags: resource_flags,
..mem::zeroed()
};
let byte_addressed_buffer = self.create_committed_resource(
&heap_properties,
//TODO: is this heap flag ok?
d3d12::D3D12_HEAP_FLAG_NONE,
&resource_description,
d3d12::D3D12_RESOURCE_STATE_GENERIC_READ,
ptr::null(),
)?;
Ok(byte_addressed_buffer)
}
pub unsafe fn create_gpu_only_byte_addressed_buffer(
&self,
buffer_size_in_bytes: u32,
) -> Result<Resource, Error> {
//TODO: consider flag D3D12_HEAP_FLAG_ALLOW_SHADER_ATOMICS?
let heap_properties = d3d12::D3D12_HEAP_PROPERTIES {
//for GPU access only
Type: d3d12::D3D12_HEAP_TYPE_DEFAULT,
CPUPageProperty: d3d12::D3D12_CPU_PAGE_PROPERTY_UNKNOWN,
//TODO: what should MemoryPoolPreference flag be?
MemoryPoolPreference: d3d12::D3D12_MEMORY_POOL_UNKNOWN,
//we don't care about multi-adapter operation, so these next two will be zero
CreationNodeMask: 0,
VisibleNodeMask: 0,
};
let resource_description = d3d12::D3D12_RESOURCE_DESC {
Dimension: d3d12::D3D12_RESOURCE_DIMENSION_BUFFER,
Width: buffer_size_in_bytes as u64,
Height: 1,
DepthOrArraySize: 1,
MipLevels: 1,
SampleDesc: dxgitype::DXGI_SAMPLE_DESC {
Count: 1,
Quality: 0,
},
//essentially we're letting the adapter decide the layout
Layout: d3d12::D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
Flags: d3d12::D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS,
..mem::zeroed() ..mem::zeroed()
}; };
@ -927,7 +845,7 @@ impl Device {
&heap_properties, &heap_properties,
d3d12::D3D12_HEAP_FLAG_NONE, d3d12::D3D12_HEAP_FLAG_NONE,
&resource_description, &resource_description,
d3d12::D3D12_RESOURCE_STATE_UNORDERED_ACCESS, init_resource_state,
ptr::null(), ptr::null(),
)?; )?;
@ -994,6 +912,21 @@ impl Device {
Ok(buffer) Ok(buffer)
} }
pub unsafe fn get_features_architecture(
&self,
) -> Result<d3d12::D3D12_FEATURE_DATA_ARCHITECTURE, Error> {
let mut features_architecture = mem::zeroed();
explain_error(
self.0.CheckFeatureSupport(
d3d12::D3D12_FEATURE_ARCHITECTURE,
&mut features_architecture as *mut _ as *mut _,
mem::size_of::<d3d12::D3D12_FEATURE_DATA_ARCHITECTURE>() as u32,
),
"error querying feature architecture",
)?;
Ok(features_architecture)
}
pub unsafe fn get_removal_reason(&self) -> Error { pub unsafe fn get_removal_reason(&self) -> Error {
Error::Hresult(self.0.GetDeviceRemovedReason()) Error::Hresult(self.0.GetDeviceRemovedReason())
} }

View file

@ -150,11 +150,7 @@ impl Session {
}))) })))
} }
pub unsafe fn create_image2d( pub unsafe fn create_image2d(&self, width: u32, height: u32) -> Result<Image, Error> {
&self,
width: u32,
height: u32,
) -> Result<Image, Error> {
let image = self.0.device.create_image2d(width, height)?; let image = self.0.device.create_image2d(width, height)?;
Ok(Image(Arc::new(ImageInner { Ok(Image(Arc::new(ImageInner {
image, image,

View file

@ -2,7 +2,6 @@
/// ///
/// This abstraction is inspired by gfx-hal, but is specialized to the needs of piet-gpu. /// This abstraction is inspired by gfx-hal, but is specialized to the needs of piet-gpu.
/// In time, it may go away and be replaced by either gfx-hal or wgpu. /// In time, it may go away and be replaced by either gfx-hal or wgpu.
use bitflags::bitflags; use bitflags::bitflags;
pub mod hub; pub mod hub;
@ -106,11 +105,7 @@ pub trait Device: Sized {
/// Maybe doesn't need result return? /// Maybe doesn't need result return?
unsafe fn destroy_buffer(&self, buffer: &Self::Buffer) -> Result<(), Error>; unsafe fn destroy_buffer(&self, buffer: &Self::Buffer) -> Result<(), Error>;
unsafe fn create_image2d( unsafe fn create_image2d(&self, width: u32, height: u32) -> Result<Self::Image, Error>;
&self,
width: u32,
height: u32,
) -> Result<Self::Image, Error>;
/// Destroy an image. /// Destroy an image.
/// ///

View file

@ -9,7 +9,9 @@ use ash::extensions::{ext::DebugUtils, khr};
use ash::version::{DeviceV1_0, EntryV1_0, InstanceV1_0, InstanceV1_1}; use ash::version::{DeviceV1_0, EntryV1_0, InstanceV1_0, InstanceV1_1};
use ash::{vk, Device, Entry, Instance}; use ash::{vk, Device, Entry, Instance};
use crate::{BufferUsage, Device as DeviceTrait, Error, GpuInfo, ImageLayout, SamplerParams, SubgroupSize}; use crate::{
BufferUsage, Device as DeviceTrait, Error, GpuInfo, ImageLayout, SamplerParams, SubgroupSize,
};
pub struct VkInstance { pub struct VkInstance {
/// Retain the dynamic lib. /// Retain the dynamic lib.
@ -512,11 +514,7 @@ impl crate::Device for VkDevice {
Ok(()) Ok(())
} }
unsafe fn create_image2d( unsafe fn create_image2d(&self, width: u32, height: u32) -> Result<Self::Image, Error> {
&self,
width: u32,
height: u32,
) -> Result<Self::Image, Error> {
let device = &self.device.device; let device = &self.device.device;
let extent = vk::Extent3D { let extent = vk::Extent3D {
width, width,