Add create_buffer_init method

Add a method to create a buffer with initial content, which requires
staging buffers under the hood.

This patch also changes the lower-level (Vulkan) interface to be closer
to the raw Vulkan call.
This commit is contained in:
Raph Levien 2021-05-24 11:44:56 -07:00
parent 7060cc1ab5
commit 47d2e0a756
6 changed files with 246 additions and 97 deletions

View file

@ -7,12 +7,9 @@ fn main() {
unsafe {
let device = instance.device(None).unwrap();
let session = hub::Session::new(device);
let usage = BufferUsage::MAP_READ | BufferUsage::MAP_WRITE | BufferUsage::STORAGE;
let usage = BufferUsage::MAP_READ | BufferUsage::STORAGE;
let src = (0..256).map(|x| x + 1).collect::<Vec<u32>>();
let mut buffer = session
.create_buffer(std::mem::size_of_val(&src[..]) as u64, usage)
.unwrap();
buffer.write(&src).unwrap();
let buffer = session.create_buffer_init(&src, usage).unwrap();
let code = include_bytes!("./shader/collatz.spv");
let pipeline = session.create_simple_compute_pipeline(code, 1).unwrap();
let descriptor_set = session

View file

@ -5,9 +5,11 @@
//! negotiation of backends (Vulkan, DX12), but right now it's Vulkan-only.
use std::any::Any;
use std::convert::TryInto;
use std::sync::{Arc, Mutex, Weak};
use crate::vulkan;
use crate::CmdBuf as CmdBufTrait;
use crate::DescriptorSetBuilder as DescriptorSetBuilderTrait;
use crate::PipelineBuilder as PipelineBuilderTrait;
use crate::{BufferUsage, Device, Error, GpuInfo, SamplerParams};
@ -31,6 +33,8 @@ struct SessionInner {
cmd_buf_pool: Mutex<Vec<(vulkan::CmdBuf, Fence)>>,
/// Command buffers that are still pending (so resources can't be freed).
pending: Mutex<Vec<SubmittedCmdBufInner>>,
/// A command buffer that is used for copying from staging buffers.
staging_cmd_buf: Mutex<Option<CmdBuf>>,
gpu_info: GpuInfo,
}
@ -45,9 +49,12 @@ pub struct CmdBuf {
pub struct SubmittedCmdBuf(Option<SubmittedCmdBufInner>, Weak<SessionInner>);
struct SubmittedCmdBufInner {
// It's inconsistent, cmd_buf is unpacked, staging_cmd_buf isn't. Probably
// better to chose one or the other.
cmd_buf: vulkan::CmdBuf,
fence: Fence,
resources: Vec<Box<dyn Any>>,
staging_cmd_buf: Option<CmdBuf>,
}
#[derive(Clone)]
@ -70,6 +77,20 @@ pub struct PipelineBuilder(vulkan::PipelineBuilder);
pub struct DescriptorSetBuilder(vulkan::DescriptorSetBuilder);
/// Data types that can be stored in a GPU buffer.
pub unsafe trait PlainData {}
unsafe impl PlainData for u8 {}
unsafe impl PlainData for u16 {}
unsafe impl PlainData for u32 {}
unsafe impl PlainData for u64 {}
unsafe impl PlainData for i8 {}
unsafe impl PlainData for i16 {}
unsafe impl PlainData for i32 {}
unsafe impl PlainData for i64 {}
unsafe impl PlainData for f32 {}
unsafe impl PlainData for f64 {}
impl Session {
pub fn new(device: vulkan::VkDevice) -> Session {
let gpu_info = device.query_gpu_info();
@ -78,6 +99,7 @@ impl Session {
gpu_info,
cmd_buf_pool: Default::default(),
pending: Default::default(),
staging_cmd_buf: Default::default(),
}))
}
@ -107,12 +129,13 @@ impl Session {
let item = pending.swap_remove(i);
// TODO: wait is superfluous, can just reset
let _ = self.0.device.wait_and_reset(&[item.fence]);
self.0
.cmd_buf_pool
.lock()
.unwrap()
.push((item.cmd_buf, item.fence));
let mut pool = self.0.cmd_buf_pool.lock().unwrap();
pool.push((item.cmd_buf, item.fence));
std::mem::drop(item.resources);
if let Some(staging_cmd_buf) = item.staging_cmd_buf {
pool.push((staging_cmd_buf.cmd_buf, staging_cmd_buf.fence));
std::mem::drop(staging_cmd_buf.resources);
}
} else {
i += 1;
}
@ -126,8 +149,19 @@ impl Session {
wait_semaphores: &[Semaphore],
signal_semaphores: &[Semaphore],
) -> Result<SubmittedCmdBuf, Error> {
self.0.device.run_cmd_buf(
&cmd_buf.cmd_buf,
// Again, SmallVec here?
let mut cmd_bufs = Vec::with_capacity(2);
let mut staging_cmd_buf = self.0.staging_cmd_buf.lock().unwrap().take();
if let Some(staging) = &mut staging_cmd_buf {
// With finer grained resource tracking, we might be able to avoid this in
// some cases.
staging.memory_barrier();
staging.finish();
cmd_bufs.push(&staging.cmd_buf);
}
cmd_bufs.push(&cmd_buf.cmd_buf);
self.0.device.run_cmd_bufs(
&cmd_bufs,
wait_semaphores,
signal_semaphores,
Some(&cmd_buf.fence),
@ -137,6 +171,7 @@ impl Session {
cmd_buf: cmd_buf.cmd_buf,
fence: cmd_buf.fence,
resources: cmd_buf.resources,
staging_cmd_buf,
}),
cmd_buf.session,
))
@ -150,11 +185,58 @@ impl Session {
})))
}
pub unsafe fn create_image2d(
/// Create a buffer with initialized data.
pub fn create_buffer_init(
&self,
width: u32,
height: u32,
) -> Result<Image, Error> {
contents: &[impl PlainData],
usage: BufferUsage,
) -> Result<Buffer, Error> {
unsafe {
self.create_buffer_init_raw(
contents.as_ptr() as *const u8,
std::mem::size_of_val(contents).try_into()?,
usage,
)
}
}
/// Create a buffer with initialized data, from a raw pointer memory region.
pub unsafe fn create_buffer_init_raw(
&self,
contents: *const u8,
size: u64,
usage: BufferUsage,
) -> Result<Buffer, Error> {
let use_staging_buffer = !usage.intersects(BufferUsage::MAP_READ | BufferUsage::MAP_WRITE)
&& self.gpu_info().use_staging_buffers;
let create_usage = if use_staging_buffer {
BufferUsage::MAP_WRITE | BufferUsage::COPY_SRC
} else {
usage | BufferUsage::MAP_WRITE
};
let create_buf = self.create_buffer(size, create_usage)?;
self.0
.device
.write_buffer(&create_buf.vk_buffer(), contents, 0, size)?;
if use_staging_buffer {
let buf = self.create_buffer(size, usage | BufferUsage::COPY_DST)?;
let mut staging_cmd_buf = self.0.staging_cmd_buf.lock().unwrap();
if staging_cmd_buf.is_none() {
let mut cmd_buf = self.cmd_buf()?;
cmd_buf.begin();
*staging_cmd_buf = Some(cmd_buf);
}
let staging_cmd_buf = staging_cmd_buf.as_mut().unwrap();
// This will ensure the staging buffer is deallocated
staging_cmd_buf.add_resource(&create_buf);
staging_cmd_buf.copy_buffer(create_buf.vk_buffer(), buf.vk_buffer());
Ok(buf)
} else {
Ok(create_buf)
}
}
pub unsafe fn create_image2d(&self, width: u32, height: u32) -> Result<Image, Error> {
let image = self.0.device.create_image2d(width, height)?;
Ok(Image(Arc::new(ImageInner {
image,
@ -305,16 +387,29 @@ impl Buffer {
&self.0.buffer
}
pub unsafe fn write<T: Sized>(&mut self, contents: &[T]) -> Result<(), Error> {
pub unsafe fn write<T: PlainData>(&mut self, contents: &[T]) -> Result<(), Error> {
if let Some(session) = Weak::upgrade(&self.0.session) {
session.device.write_buffer(&self.0.buffer, contents)?;
session.device.write_buffer(
&self.0.buffer,
contents.as_ptr() as *const u8,
0,
std::mem::size_of_val(contents).try_into()?,
)?;
}
// else session lost error?
Ok(())
}
pub unsafe fn read<T: Sized>(&self, result: &mut Vec<T>) -> Result<(), Error> {
pub unsafe fn read<T: PlainData>(&self, result: &mut Vec<T>) -> Result<(), Error> {
let size = self.vk_buffer().size;
let len = size as usize / std::mem::size_of::<T>();
if len > result.len() {
result.reserve(len - result.len());
}
if let Some(session) = Weak::upgrade(&self.0.session) {
session.device.read_buffer(&self.0.buffer, result)?;
session
.device
.read_buffer(&self.0.buffer, result.as_mut_ptr() as *mut u8, 0, size)?;
result.set_len(len);
}
// else session lost error?
Ok(())

View file

@ -2,7 +2,6 @@
///
/// This abstraction is inspired by gfx-hal, but is specialized to the needs of piet-gpu.
/// In time, it may go away and be replaced by either gfx-hal or wgpu.
use bitflags::bitflags;
pub mod hub;
@ -66,6 +65,8 @@ pub struct GpuInfo {
pub subgroup_size: Option<SubgroupSize>,
/// The GPU supports a real, grown-ass memory model.
pub has_memory_model: bool,
/// Whether staging buffers should be used.
pub use_staging_buffers: bool,
}
#[derive(Clone, Debug)]
@ -103,11 +104,7 @@ pub trait Device: Sized {
/// Maybe doesn't need result return?
unsafe fn destroy_buffer(&self, buffer: &Self::Buffer) -> Result<(), Error>;
unsafe fn create_image2d(
&self,
width: u32,
height: u32,
) -> Result<Self::Image, Error>;
unsafe fn create_image2d(&self, width: u32, height: u32) -> Result<Self::Image, Error>;
/// Destroy an image.
///
@ -175,6 +172,7 @@ pub trait Device: Sized {
/// All submitted commands that refer to this query pool must have completed.
unsafe fn fetch_query_pool(&self, pool: &Self::QueryPool) -> Result<Vec<f64>, Error>;
#[deprecated]
unsafe fn run_cmd_buf(
&self,
cmd_buf: &Self::CmdBuf,
@ -183,16 +181,44 @@ pub trait Device: Sized {
fence: Option<&Self::Fence>,
) -> Result<(), Error>;
unsafe fn read_buffer<T: Sized>(
unsafe fn run_cmd_bufs(
&self,
buffer: &Self::Buffer,
result: &mut Vec<T>,
cmd_buf: &[&Self::CmdBuf],
wait_semaphores: &[Self::Semaphore],
signal_semaphores: &[Self::Semaphore],
fence: Option<&Self::Fence>,
) -> Result<(), Error>;
unsafe fn write_buffer<T: Sized>(
/// Copy data from the buffer to memory.
///
/// Discussion question: add offset?
///
/// # Safety
///
/// The buffer must be valid to access. The destination memory must be valid to
/// write to. The ranges must not overlap. The offset + size must be within
/// the buffer's allocation, and size within the destination.
unsafe fn read_buffer(
&self,
buffer: &Self::Buffer,
contents: &[T],
dst: *mut u8,
offset: u64,
size: u64,
) -> Result<(), Error>;
/// Copy data from memory to the buffer.
///
/// # Safety
///
/// The buffer must be valid to access. The source memory must be valid to
/// read from. The ranges must not overlap. The offset + size must be within
/// the buffer's allocation, and size within the source.
unsafe fn write_buffer(
&self,
buffer: &Self::Buffer,
contents: *const u8,
offset: u64,
size: u64,
) -> Result<(), Error>;
unsafe fn create_semaphore(&self) -> Result<Self::Semaphore, Error>;

View file

@ -1,6 +1,7 @@
//! Vulkan implemenation of HAL trait.
use std::borrow::Cow;
use std::convert::TryInto;
use std::ffi::{CStr, CString};
use std::os::raw::c_char;
use std::sync::Arc;
@ -9,7 +10,9 @@ use ash::extensions::{ext::DebugUtils, khr};
use ash::version::{DeviceV1_0, EntryV1_0, InstanceV1_0, InstanceV1_1};
use ash::{vk, Device, Entry, Instance};
use crate::{BufferUsage, Device as DeviceTrait, Error, GpuInfo, ImageLayout, SamplerParams, SubgroupSize};
use crate::{
BufferUsage, Device as DeviceTrait, Error, GpuInfo, ImageLayout, SamplerParams, SubgroupSize,
};
pub struct VkInstance {
/// Retain the dynamic lib.
@ -60,7 +63,8 @@ pub struct VkSwapchain {
pub struct Buffer {
buffer: vk::Buffer,
buffer_memory: vk::DeviceMemory,
size: u64,
// TODO: there should probably be a Buffer trait and this should be a method.
pub size: u64,
}
pub struct Image {
@ -339,6 +343,14 @@ impl VkInstance {
None
};
// The question of when and when not to use staging buffers is complex, and this
// is only a first approximation. Basically, it *must* be false when buffers can
// be created with a memory type that is not host-visible. That is not guaranteed
// here but is likely to be the case.
//
// I'm still investigating what should be done in systems with Resizable BAR.
let use_staging_buffers = props.device_type != vk::PhysicalDeviceType::INTEGRATED_GPU;
// TODO: finer grained query of specific subgroup info.
let has_subgroups = self.vk_version >= vk::make_version(1, 1, 0);
let gpu_info = GpuInfo {
@ -346,6 +358,7 @@ impl VkInstance {
has_subgroups,
subgroup_size,
has_memory_model,
use_staging_buffers,
};
Ok(VkDevice {
@ -511,11 +524,7 @@ impl crate::Device for VkDevice {
Ok(())
}
unsafe fn create_image2d(
&self,
width: u32,
height: u32,
) -> Result<Self::Image, Error> {
unsafe fn create_image2d(&self, width: u32, height: u32) -> Result<Self::Image, Error> {
let device = &self.device.device;
let extent = vk::Extent3D {
width,
@ -726,37 +735,78 @@ impl crate::Device for VkDevice {
Ok(())
}
unsafe fn read_buffer<T: Sized>(
/// Run the command buffers.
///
/// This submits the command buffer for execution. The provided fence
/// is signalled when the execution is complete.
unsafe fn run_cmd_bufs(
&self,
buffer: &Buffer,
result: &mut Vec<T>,
cmd_bufs: &[&CmdBuf],
wait_semaphores: &[Self::Semaphore],
signal_semaphores: &[Self::Semaphore],
fence: Option<&Self::Fence>,
) -> Result<(), Error> {
let device = &self.device.device;
let fence = match fence {
Some(fence) => *fence,
None => vk::Fence::null(),
};
let wait_stages = wait_semaphores
.iter()
.map(|_| vk::PipelineStageFlags::ALL_COMMANDS)
.collect::<Vec<_>>();
// Use SmallVec or similar here to reduce allocation?
let cmd_bufs = cmd_bufs.iter().map(|c| c.cmd_buf).collect::<Vec<_>>();
device.queue_submit(
self.queue,
&[vk::SubmitInfo::builder()
.command_buffers(&cmd_bufs)
.wait_semaphores(wait_semaphores)
.wait_dst_stage_mask(&wait_stages)
.signal_semaphores(signal_semaphores)
.build()],
fence,
)?;
Ok(())
}
unsafe fn read_buffer(
&self,
buffer: &Self::Buffer,
dst: *mut u8,
offset: u64,
size: u64,
) -> Result<(), Error> {
let copy_size = size.try_into()?;
let device = &self.device.device;
let buf = device.map_memory(
buffer.buffer_memory,
0,
buffer.size,
offset,
size,
vk::MemoryMapFlags::empty(),
)?;
let len = buffer.size as usize / std::mem::size_of::<T>();
if len > result.len() {
result.reserve(len - result.len());
}
std::ptr::copy_nonoverlapping(buf as *const T, result.as_mut_ptr(), len);
result.set_len(len);
std::ptr::copy_nonoverlapping(buf as *const u8, dst, copy_size);
device.unmap_memory(buffer.buffer_memory);
Ok(())
}
unsafe fn write_buffer<T: Sized>(&self, buffer: &Buffer, contents: &[T]) -> Result<(), Error> {
unsafe fn write_buffer(
&self,
buffer: &Buffer,
contents: *const u8,
offset: u64,
size: u64,
) -> Result<(), Error> {
let copy_size = size.try_into()?;
let device = &self.device.device;
let buf = device.map_memory(
buffer.buffer_memory,
0,
std::mem::size_of_val(contents) as u64,
offset,
size,
vk::MemoryMapFlags::empty(),
)?;
std::ptr::copy_nonoverlapping(contents.as_ptr(), buf as *mut T, contents.len());
std::ptr::copy_nonoverlapping(contents, buf as *mut u8, copy_size);
device.unmap_memory(buffer.buffer_memory);
Ok(())
}

View file

@ -254,8 +254,7 @@ fn main() -> Result<(), Error> {
let renderer = Renderer::new(&session, scene, n_paths, n_pathseg, n_trans)?;
let image_usage = BufferUsage::MAP_READ | BufferUsage::COPY_DST;
let image_buf =
session.create_buffer((WIDTH * HEIGHT * 4) as u64, image_usage)?;
let image_buf = session.create_buffer((WIDTH * HEIGHT * 4) as u64, image_usage)?;
cmd_buf.begin();
renderer.record(&mut cmd_buf, &query_pool);

View file

@ -189,16 +189,18 @@ pub fn dump_k1_data(k1_buf: &[u32]) {
pub struct Renderer {
pub image_dev: hub::Image, // resulting image
scene_buf_host: hub::Buffer,
scene_buf_dev: hub::Buffer,
// The reference is held by the pipelines. We will be changing
// this to make the scene upload dynamic.
#[allow(dead_code)]
scene_buf: hub::Buffer,
memory_buf_host: hub::Buffer,
memory_buf_dev: hub::Buffer,
state_buf: hub::Buffer,
config_buf_host: hub::Buffer,
config_buf_dev: hub::Buffer,
#[allow(dead_code)]
config_buf: hub::Buffer,
el_pipeline: hub::Pipeline,
el_ds: hub::DescriptorSet,
@ -246,21 +248,11 @@ impl Renderer {
n_elements, n_paths, n_pathseg, n_trans
);
let mut scene_buf_host = session
.create_buffer(std::mem::size_of_val(&scene[..]) as u64, host_upload)
.unwrap();
let scene_buf_dev = session
.create_buffer(std::mem::size_of_val(&scene[..]) as u64, dev)
.unwrap();
scene_buf_host.write(&scene)?;
let scene_buf = session.create_buffer_init(&scene[..], dev).unwrap();
let state_buf = session.create_buffer(1 * 1024 * 1024, dev)?;
let image_dev = session.create_image2d(WIDTH as u32, HEIGHT as u32)?;
const CONFIG_SIZE: u64 = 10 * 4; // Size of Config in setup.h.
let mut config_buf_host = session.create_buffer(CONFIG_SIZE, host_upload)?;
let config_buf_dev = session.create_buffer(CONFIG_SIZE, dev)?;
// TODO: constants
const PATH_SIZE: usize = 12;
const BIN_SIZE: usize = 8;
@ -280,7 +272,7 @@ impl Renderer {
alloc += (n_paths * ANNO_SIZE + 3) & !3;
let trans_base = alloc;
alloc += (n_trans * TRANS_SIZE + 3) & !3;
config_buf_host.write(&[
let config = &[
n_paths as u32,
n_pathseg as u32,
WIDTH_IN_TILES as u32,
@ -291,8 +283,11 @@ impl Renderer {
pathseg_base as u32,
anno_base as u32,
trans_base as u32,
])?;
];
let config_buf = session.create_buffer_init(&config[..], dev).unwrap();
// Perhaps we could avoid the explicit staging buffer by having buffer creation method
// that takes both initial contents and a size.
let mut memory_buf_host = session.create_buffer(2 * 4, host_upload)?;
let memory_buf_dev = session.create_buffer(128 * 1024 * 1024, dev)?;
memory_buf_host.write(&[alloc as u32, 0 /* Overflow flag */])?;
@ -301,36 +296,34 @@ impl Renderer {
let el_pipeline = session.create_simple_compute_pipeline(el_code, 4)?;
let el_ds = session.create_simple_descriptor_set(
&el_pipeline,
&[&memory_buf_dev, &config_buf_dev, &scene_buf_dev, &state_buf],
&[&memory_buf_dev, &config_buf, &scene_buf, &state_buf],
)?;
let tile_alloc_code = include_bytes!("../shader/tile_alloc.spv");
let tile_pipeline = session.create_simple_compute_pipeline(tile_alloc_code, 2)?;
let tile_ds = session
.create_simple_descriptor_set(&tile_pipeline, &[&memory_buf_dev, &config_buf_dev])?;
.create_simple_descriptor_set(&tile_pipeline, &[&memory_buf_dev, &config_buf])?;
let path_alloc_code = include_bytes!("../shader/path_coarse.spv");
let path_pipeline = session.create_simple_compute_pipeline(path_alloc_code, 2)?;
let path_ds = session
.create_simple_descriptor_set(&path_pipeline, &[&memory_buf_dev, &config_buf_dev])?;
.create_simple_descriptor_set(&path_pipeline, &[&memory_buf_dev, &config_buf])?;
let backdrop_alloc_code = include_bytes!("../shader/backdrop.spv");
let backdrop_pipeline = session.create_simple_compute_pipeline(backdrop_alloc_code, 2)?;
let backdrop_ds = session.create_simple_descriptor_set(
&backdrop_pipeline,
&[&memory_buf_dev, &config_buf_dev],
)?;
let backdrop_ds = session
.create_simple_descriptor_set(&backdrop_pipeline, &[&memory_buf_dev, &config_buf])?;
// TODO: constants
let bin_code = include_bytes!("../shader/binning.spv");
let bin_pipeline = session.create_simple_compute_pipeline(bin_code, 2)?;
let bin_ds = session
.create_simple_descriptor_set(&bin_pipeline, &[&memory_buf_dev, &config_buf_dev])?;
let bin_ds =
session.create_simple_descriptor_set(&bin_pipeline, &[&memory_buf_dev, &config_buf])?;
let coarse_code = include_bytes!("../shader/coarse.spv");
let coarse_pipeline = session.create_simple_compute_pipeline(coarse_code, 2)?;
let coarse_ds = session
.create_simple_descriptor_set(&coarse_pipeline, &[&memory_buf_dev, &config_buf_dev])?;
.create_simple_descriptor_set(&coarse_pipeline, &[&memory_buf_dev, &config_buf])?;
let bg_image = Self::make_test_bg_image(&session);
@ -358,19 +351,17 @@ impl Renderer {
.create_compute_pipeline(&session, k4_code)?;
let k4_ds = session
.descriptor_set_builder()
.add_buffers(&[&memory_buf_dev, &config_buf_dev])
.add_buffers(&[&memory_buf_dev, &config_buf])
.add_images(&[&image_dev])
.add_textures(&[&bg_image])
.build(&session, &k4_pipeline)?;
Ok(Renderer {
scene_buf_host,
scene_buf_dev,
scene_buf,
memory_buf_host,
memory_buf_dev,
state_buf,
config_buf_host,
config_buf_dev,
config_buf,
image_dev,
el_pipeline,
el_ds,
@ -394,14 +385,6 @@ impl Renderer {
}
pub unsafe fn record(&self, cmd_buf: &mut hub::CmdBuf, query_pool: &hub::QueryPool) {
cmd_buf.copy_buffer(
self.scene_buf_host.vk_buffer(),
self.scene_buf_dev.vk_buffer(),
);
cmd_buf.copy_buffer(
self.config_buf_host.vk_buffer(),
self.config_buf_dev.vk_buffer(),
);
cmd_buf.copy_buffer(
self.memory_buf_host.vk_buffer(),
self.memory_buf_dev.vk_buffer(),
@ -488,8 +471,7 @@ impl Renderer {
let host_upload = BufferUsage::MAP_WRITE | BufferUsage::COPY_SRC;
let mut buffer = session.create_buffer(buf.len() as u64, host_upload)?;
buffer.write(buf)?;
let image =
session.create_image2d(width.try_into()?, height.try_into()?)?;
let image = session.create_image2d(width.try_into()?, height.try_into()?)?;
let mut cmd_buf = session.cmd_buf()?;
cmd_buf.begin();
cmd_buf.image_barrier(