mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 12:41:30 +11:00
Merge branch 'staging' into dx12
This commit is contained in:
commit
dfac2148a9
|
@ -7,12 +7,9 @@ fn main() {
|
||||||
unsafe {
|
unsafe {
|
||||||
let device = instance.device(None).unwrap();
|
let device = instance.device(None).unwrap();
|
||||||
let session = hub::Session::new(device);
|
let session = hub::Session::new(device);
|
||||||
let usage = BufferUsage::MAP_READ | BufferUsage::MAP_WRITE | BufferUsage::STORAGE;
|
let usage = BufferUsage::MAP_READ | BufferUsage::STORAGE;
|
||||||
let src = (0..256).map(|x| x + 1).collect::<Vec<u32>>();
|
let src = (0..256).map(|x| x + 1).collect::<Vec<u32>>();
|
||||||
let mut buffer = session
|
let buffer = session.create_buffer_init(&src, usage).unwrap();
|
||||||
.create_buffer(std::mem::size_of_val(&src[..]) as u64, usage)
|
|
||||||
.unwrap();
|
|
||||||
buffer.write(&src).unwrap();
|
|
||||||
let code = include_bytes!("./shader/collatz.spv");
|
let code = include_bytes!("./shader/collatz.spv");
|
||||||
let pipeline = session.create_simple_compute_pipeline(code, 1).unwrap();
|
let pipeline = session.create_simple_compute_pipeline(code, 1).unwrap();
|
||||||
let descriptor_set = session
|
let descriptor_set = session
|
||||||
|
|
|
@ -4,10 +4,11 @@
|
||||||
//! It is likely that it will also take care of compile time and runtime
|
//! It is likely that it will also take care of compile time and runtime
|
||||||
//! negotiation of backends (Vulkan, DX12), but right now it's Vulkan-only.
|
//! negotiation of backends (Vulkan, DX12), but right now it's Vulkan-only.
|
||||||
|
|
||||||
use std::any::Any;
|
use std::convert::TryInto;
|
||||||
use std::sync::{Arc, Mutex, Weak};
|
use std::sync::{Arc, Mutex, Weak};
|
||||||
|
|
||||||
use crate::vulkan;
|
use crate::vulkan;
|
||||||
|
use crate::CmdBuf as CmdBufTrait;
|
||||||
use crate::DescriptorSetBuilder as DescriptorSetBuilderTrait;
|
use crate::DescriptorSetBuilder as DescriptorSetBuilderTrait;
|
||||||
use crate::PipelineBuilder as PipelineBuilderTrait;
|
use crate::PipelineBuilder as PipelineBuilderTrait;
|
||||||
use crate::{BufferUsage, Device, Error, GpuInfo, SamplerParams};
|
use crate::{BufferUsage, Device, Error, GpuInfo, SamplerParams};
|
||||||
|
@ -31,13 +32,15 @@ struct SessionInner {
|
||||||
cmd_buf_pool: Mutex<Vec<(vulkan::CmdBuf, Fence)>>,
|
cmd_buf_pool: Mutex<Vec<(vulkan::CmdBuf, Fence)>>,
|
||||||
/// Command buffers that are still pending (so resources can't be freed).
|
/// Command buffers that are still pending (so resources can't be freed).
|
||||||
pending: Mutex<Vec<SubmittedCmdBufInner>>,
|
pending: Mutex<Vec<SubmittedCmdBufInner>>,
|
||||||
|
/// A command buffer that is used for copying from staging buffers.
|
||||||
|
staging_cmd_buf: Mutex<Option<CmdBuf>>,
|
||||||
gpu_info: GpuInfo,
|
gpu_info: GpuInfo,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct CmdBuf {
|
pub struct CmdBuf {
|
||||||
cmd_buf: vulkan::CmdBuf,
|
cmd_buf: vulkan::CmdBuf,
|
||||||
fence: Fence,
|
fence: Fence,
|
||||||
resources: Vec<Box<dyn Any>>,
|
resources: Vec<RetainResource>,
|
||||||
session: Weak<SessionInner>,
|
session: Weak<SessionInner>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -45,9 +48,12 @@ pub struct CmdBuf {
|
||||||
pub struct SubmittedCmdBuf(Option<SubmittedCmdBufInner>, Weak<SessionInner>);
|
pub struct SubmittedCmdBuf(Option<SubmittedCmdBufInner>, Weak<SessionInner>);
|
||||||
|
|
||||||
struct SubmittedCmdBufInner {
|
struct SubmittedCmdBufInner {
|
||||||
|
// It's inconsistent, cmd_buf is unpacked, staging_cmd_buf isn't. Probably
|
||||||
|
// better to chose one or the other.
|
||||||
cmd_buf: vulkan::CmdBuf,
|
cmd_buf: vulkan::CmdBuf,
|
||||||
fence: Fence,
|
fence: Fence,
|
||||||
resources: Vec<Box<dyn Any>>,
|
resources: Vec<RetainResource>,
|
||||||
|
staging_cmd_buf: Option<CmdBuf>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
|
@ -70,6 +76,26 @@ pub struct PipelineBuilder(vulkan::PipelineBuilder);
|
||||||
|
|
||||||
pub struct DescriptorSetBuilder(vulkan::DescriptorSetBuilder);
|
pub struct DescriptorSetBuilder(vulkan::DescriptorSetBuilder);
|
||||||
|
|
||||||
|
/// Data types that can be stored in a GPU buffer.
|
||||||
|
pub unsafe trait PlainData {}
|
||||||
|
|
||||||
|
unsafe impl PlainData for u8 {}
|
||||||
|
unsafe impl PlainData for u16 {}
|
||||||
|
unsafe impl PlainData for u32 {}
|
||||||
|
unsafe impl PlainData for u64 {}
|
||||||
|
unsafe impl PlainData for i8 {}
|
||||||
|
unsafe impl PlainData for i16 {}
|
||||||
|
unsafe impl PlainData for i32 {}
|
||||||
|
unsafe impl PlainData for i64 {}
|
||||||
|
unsafe impl PlainData for f32 {}
|
||||||
|
unsafe impl PlainData for f64 {}
|
||||||
|
|
||||||
|
/// A resource to retain during the lifetime of a command submission.
|
||||||
|
pub enum RetainResource {
|
||||||
|
Buffer(Buffer),
|
||||||
|
Image(Image),
|
||||||
|
}
|
||||||
|
|
||||||
impl Session {
|
impl Session {
|
||||||
pub fn new(device: vulkan::VkDevice) -> Session {
|
pub fn new(device: vulkan::VkDevice) -> Session {
|
||||||
let gpu_info = device.query_gpu_info();
|
let gpu_info = device.query_gpu_info();
|
||||||
|
@ -78,6 +104,7 @@ impl Session {
|
||||||
gpu_info,
|
gpu_info,
|
||||||
cmd_buf_pool: Default::default(),
|
cmd_buf_pool: Default::default(),
|
||||||
pending: Default::default(),
|
pending: Default::default(),
|
||||||
|
staging_cmd_buf: Default::default(),
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -107,12 +134,13 @@ impl Session {
|
||||||
let item = pending.swap_remove(i);
|
let item = pending.swap_remove(i);
|
||||||
// TODO: wait is superfluous, can just reset
|
// TODO: wait is superfluous, can just reset
|
||||||
let _ = self.0.device.wait_and_reset(&[item.fence]);
|
let _ = self.0.device.wait_and_reset(&[item.fence]);
|
||||||
self.0
|
let mut pool = self.0.cmd_buf_pool.lock().unwrap();
|
||||||
.cmd_buf_pool
|
pool.push((item.cmd_buf, item.fence));
|
||||||
.lock()
|
|
||||||
.unwrap()
|
|
||||||
.push((item.cmd_buf, item.fence));
|
|
||||||
std::mem::drop(item.resources);
|
std::mem::drop(item.resources);
|
||||||
|
if let Some(staging_cmd_buf) = item.staging_cmd_buf {
|
||||||
|
pool.push((staging_cmd_buf.cmd_buf, staging_cmd_buf.fence));
|
||||||
|
std::mem::drop(staging_cmd_buf.resources);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
i += 1;
|
i += 1;
|
||||||
}
|
}
|
||||||
|
@ -126,8 +154,19 @@ impl Session {
|
||||||
wait_semaphores: &[Semaphore],
|
wait_semaphores: &[Semaphore],
|
||||||
signal_semaphores: &[Semaphore],
|
signal_semaphores: &[Semaphore],
|
||||||
) -> Result<SubmittedCmdBuf, Error> {
|
) -> Result<SubmittedCmdBuf, Error> {
|
||||||
self.0.device.run_cmd_buf(
|
// Again, SmallVec here?
|
||||||
&cmd_buf.cmd_buf,
|
let mut cmd_bufs = Vec::with_capacity(2);
|
||||||
|
let mut staging_cmd_buf = self.0.staging_cmd_buf.lock().unwrap().take();
|
||||||
|
if let Some(staging) = &mut staging_cmd_buf {
|
||||||
|
// With finer grained resource tracking, we might be able to avoid this in
|
||||||
|
// some cases.
|
||||||
|
staging.memory_barrier();
|
||||||
|
staging.finish();
|
||||||
|
cmd_bufs.push(&staging.cmd_buf);
|
||||||
|
}
|
||||||
|
cmd_bufs.push(&cmd_buf.cmd_buf);
|
||||||
|
self.0.device.run_cmd_bufs(
|
||||||
|
&cmd_bufs,
|
||||||
wait_semaphores,
|
wait_semaphores,
|
||||||
signal_semaphores,
|
signal_semaphores,
|
||||||
Some(&cmd_buf.fence),
|
Some(&cmd_buf.fence),
|
||||||
|
@ -137,6 +176,7 @@ impl Session {
|
||||||
cmd_buf: cmd_buf.cmd_buf,
|
cmd_buf: cmd_buf.cmd_buf,
|
||||||
fence: cmd_buf.fence,
|
fence: cmd_buf.fence,
|
||||||
resources: cmd_buf.resources,
|
resources: cmd_buf.resources,
|
||||||
|
staging_cmd_buf,
|
||||||
}),
|
}),
|
||||||
cmd_buf.session,
|
cmd_buf.session,
|
||||||
))
|
))
|
||||||
|
@ -150,6 +190,57 @@ impl Session {
|
||||||
})))
|
})))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Create a buffer with initialized data.
|
||||||
|
pub fn create_buffer_init(
|
||||||
|
&self,
|
||||||
|
contents: &[impl PlainData],
|
||||||
|
usage: BufferUsage,
|
||||||
|
) -> Result<Buffer, Error> {
|
||||||
|
unsafe {
|
||||||
|
self.create_buffer_init_raw(
|
||||||
|
contents.as_ptr() as *const u8,
|
||||||
|
std::mem::size_of_val(contents).try_into()?,
|
||||||
|
usage,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a buffer with initialized data, from a raw pointer memory region.
|
||||||
|
pub unsafe fn create_buffer_init_raw(
|
||||||
|
&self,
|
||||||
|
contents: *const u8,
|
||||||
|
size: u64,
|
||||||
|
usage: BufferUsage,
|
||||||
|
) -> Result<Buffer, Error> {
|
||||||
|
let use_staging_buffer = !usage.intersects(BufferUsage::MAP_READ | BufferUsage::MAP_WRITE)
|
||||||
|
&& self.gpu_info().use_staging_buffers;
|
||||||
|
let create_usage = if use_staging_buffer {
|
||||||
|
BufferUsage::MAP_WRITE | BufferUsage::COPY_SRC
|
||||||
|
} else {
|
||||||
|
usage | BufferUsage::MAP_WRITE
|
||||||
|
};
|
||||||
|
let create_buf = self.create_buffer(size, create_usage)?;
|
||||||
|
self.0
|
||||||
|
.device
|
||||||
|
.write_buffer(&create_buf.vk_buffer(), contents, 0, size)?;
|
||||||
|
if use_staging_buffer {
|
||||||
|
let buf = self.create_buffer(size, usage | BufferUsage::COPY_DST)?;
|
||||||
|
let mut staging_cmd_buf = self.0.staging_cmd_buf.lock().unwrap();
|
||||||
|
if staging_cmd_buf.is_none() {
|
||||||
|
let mut cmd_buf = self.cmd_buf()?;
|
||||||
|
cmd_buf.begin();
|
||||||
|
*staging_cmd_buf = Some(cmd_buf);
|
||||||
|
}
|
||||||
|
let staging_cmd_buf = staging_cmd_buf.as_mut().unwrap();
|
||||||
|
// This will ensure the staging buffer is deallocated. It would be nice to
|
||||||
|
staging_cmd_buf.copy_buffer(create_buf.vk_buffer(), buf.vk_buffer());
|
||||||
|
staging_cmd_buf.add_resource(create_buf);
|
||||||
|
Ok(buf)
|
||||||
|
} else {
|
||||||
|
Ok(create_buf)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub unsafe fn create_image2d(&self, width: u32, height: u32) -> Result<Image, Error> {
|
pub unsafe fn create_image2d(&self, width: u32, height: u32) -> Result<Image, Error> {
|
||||||
let image = self.0.device.create_image2d(width, height)?;
|
let image = self.0.device.create_image2d(width, height)?;
|
||||||
Ok(Image(Arc::new(ImageInner {
|
Ok(Image(Arc::new(ImageInner {
|
||||||
|
@ -221,8 +312,8 @@ impl CmdBuf {
|
||||||
/// There are two choices for upholding the lifetime invariant: this function, or
|
/// There are two choices for upholding the lifetime invariant: this function, or
|
||||||
/// the caller can manually hold the reference. The latter is appropriate when it's
|
/// the caller can manually hold the reference. The latter is appropriate when it's
|
||||||
/// part of retained state.
|
/// part of retained state.
|
||||||
pub fn add_resource<T: Clone + 'static>(&mut self, resource: &T) {
|
pub fn add_resource(&mut self, resource: impl Into<RetainResource>) {
|
||||||
self.resources.push(Box::new(resource.clone()));
|
self.resources.push(resource.into());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -301,16 +392,29 @@ impl Buffer {
|
||||||
&self.0.buffer
|
&self.0.buffer
|
||||||
}
|
}
|
||||||
|
|
||||||
pub unsafe fn write<T: Sized>(&mut self, contents: &[T]) -> Result<(), Error> {
|
pub unsafe fn write<T: PlainData>(&mut self, contents: &[T]) -> Result<(), Error> {
|
||||||
if let Some(session) = Weak::upgrade(&self.0.session) {
|
if let Some(session) = Weak::upgrade(&self.0.session) {
|
||||||
session.device.write_buffer(&self.0.buffer, contents)?;
|
session.device.write_buffer(
|
||||||
|
&self.0.buffer,
|
||||||
|
contents.as_ptr() as *const u8,
|
||||||
|
0,
|
||||||
|
std::mem::size_of_val(contents).try_into()?,
|
||||||
|
)?;
|
||||||
}
|
}
|
||||||
// else session lost error?
|
// else session lost error?
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
pub unsafe fn read<T: Sized>(&self, result: &mut Vec<T>) -> Result<(), Error> {
|
pub unsafe fn read<T: PlainData>(&self, result: &mut Vec<T>) -> Result<(), Error> {
|
||||||
|
let size = self.vk_buffer().size;
|
||||||
|
let len = size as usize / std::mem::size_of::<T>();
|
||||||
|
if len > result.len() {
|
||||||
|
result.reserve(len - result.len());
|
||||||
|
}
|
||||||
if let Some(session) = Weak::upgrade(&self.0.session) {
|
if let Some(session) = Weak::upgrade(&self.0.session) {
|
||||||
session.device.read_buffer(&self.0.buffer, result)?;
|
session
|
||||||
|
.device
|
||||||
|
.read_buffer(&self.0.buffer, result.as_mut_ptr() as *mut u8, 0, size)?;
|
||||||
|
result.set_len(len);
|
||||||
}
|
}
|
||||||
// else session lost error?
|
// else session lost error?
|
||||||
Ok(())
|
Ok(())
|
||||||
|
@ -398,43 +502,7 @@ impl<'a, T> IntoRefs<'a, T> for &'a [&'a T] {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: this will benefit from const generics!
|
impl<'a, T, const N: usize> IntoRefs<'a, T> for &'a [&'a T; N] {
|
||||||
impl<'a, T> IntoRefs<'a, T> for &'a [&'a T; 1] {
|
|
||||||
type Iterator = std::iter::Copied<std::slice::Iter<'a, &'a T>>;
|
|
||||||
fn into_refs(self) -> Self::Iterator {
|
|
||||||
self.into_iter().copied()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a, T> IntoRefs<'a, T> for &'a [&'a T; 2] {
|
|
||||||
type Iterator = std::iter::Copied<std::slice::Iter<'a, &'a T>>;
|
|
||||||
fn into_refs(self) -> Self::Iterator {
|
|
||||||
self.into_iter().copied()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a, T> IntoRefs<'a, T> for &'a [&'a T; 3] {
|
|
||||||
type Iterator = std::iter::Copied<std::slice::Iter<'a, &'a T>>;
|
|
||||||
fn into_refs(self) -> Self::Iterator {
|
|
||||||
self.into_iter().copied()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a, T> IntoRefs<'a, T> for &'a [&'a T; 4] {
|
|
||||||
type Iterator = std::iter::Copied<std::slice::Iter<'a, &'a T>>;
|
|
||||||
fn into_refs(self) -> Self::Iterator {
|
|
||||||
self.into_iter().copied()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a, T> IntoRefs<'a, T> for &'a [&'a T; 5] {
|
|
||||||
type Iterator = std::iter::Copied<std::slice::Iter<'a, &'a T>>;
|
|
||||||
fn into_refs(self) -> Self::Iterator {
|
|
||||||
self.into_iter().copied()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a, T> IntoRefs<'a, T> for &'a [&'a T; 6] {
|
|
||||||
type Iterator = std::iter::Copied<std::slice::Iter<'a, &'a T>>;
|
type Iterator = std::iter::Copied<std::slice::Iter<'a, &'a T>>;
|
||||||
fn into_refs(self) -> Self::Iterator {
|
fn into_refs(self) -> Self::Iterator {
|
||||||
self.into_iter().copied()
|
self.into_iter().copied()
|
||||||
|
@ -447,3 +515,21 @@ impl<'a, T> IntoRefs<'a, T> for Vec<&'a T> {
|
||||||
self.into_iter()
|
self.into_iter()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl From<Buffer> for RetainResource {
|
||||||
|
fn from(buf: Buffer) -> Self {
|
||||||
|
RetainResource::Buffer(buf)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<Image> for RetainResource {
|
||||||
|
fn from(img: Image) -> Self {
|
||||||
|
RetainResource::Image(img)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, T: Clone + Into<RetainResource>> From<&'a T> for RetainResource {
|
||||||
|
fn from(resource: &'a T) -> Self {
|
||||||
|
resource.clone().into()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -67,6 +67,8 @@ pub struct GpuInfo {
|
||||||
pub subgroup_size: Option<SubgroupSize>,
|
pub subgroup_size: Option<SubgroupSize>,
|
||||||
/// The GPU supports a real, grown-ass memory model.
|
/// The GPU supports a real, grown-ass memory model.
|
||||||
pub has_memory_model: bool,
|
pub has_memory_model: bool,
|
||||||
|
/// Whether staging buffers should be used.
|
||||||
|
pub use_staging_buffers: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
|
@ -173,24 +175,44 @@ pub trait Device: Sized {
|
||||||
/// All submitted commands that refer to this query pool must have completed.
|
/// All submitted commands that refer to this query pool must have completed.
|
||||||
unsafe fn fetch_query_pool(&self, pool: &Self::QueryPool) -> Result<Vec<f64>, Error>;
|
unsafe fn fetch_query_pool(&self, pool: &Self::QueryPool) -> Result<Vec<f64>, Error>;
|
||||||
|
|
||||||
unsafe fn run_cmd_buf(
|
unsafe fn run_cmd_bufs(
|
||||||
&self,
|
&self,
|
||||||
cmd_buf: &Self::CmdBuf,
|
cmd_buf: &[&Self::CmdBuf],
|
||||||
wait_semaphores: &[Self::Semaphore],
|
wait_semaphores: &[Self::Semaphore],
|
||||||
signal_semaphores: &[Self::Semaphore],
|
signal_semaphores: &[Self::Semaphore],
|
||||||
fence: Option<&Self::Fence>,
|
fence: Option<&Self::Fence>,
|
||||||
) -> Result<(), Error>;
|
) -> Result<(), Error>;
|
||||||
|
|
||||||
unsafe fn read_buffer<T: Sized>(
|
/// Copy data from the buffer to memory.
|
||||||
|
///
|
||||||
|
/// Discussion question: add offset?
|
||||||
|
///
|
||||||
|
/// # Safety
|
||||||
|
///
|
||||||
|
/// The buffer must be valid to access. The destination memory must be valid to
|
||||||
|
/// write to. The ranges must not overlap. The offset + size must be within
|
||||||
|
/// the buffer's allocation, and size within the destination.
|
||||||
|
unsafe fn read_buffer(
|
||||||
&self,
|
&self,
|
||||||
buffer: &Self::Buffer,
|
buffer: &Self::Buffer,
|
||||||
result: &mut Vec<T>,
|
dst: *mut u8,
|
||||||
|
offset: u64,
|
||||||
|
size: u64,
|
||||||
) -> Result<(), Error>;
|
) -> Result<(), Error>;
|
||||||
|
|
||||||
unsafe fn write_buffer<T: Sized>(
|
/// Copy data from memory to the buffer.
|
||||||
|
///
|
||||||
|
/// # Safety
|
||||||
|
///
|
||||||
|
/// The buffer must be valid to access. The source memory must be valid to
|
||||||
|
/// read from. The ranges must not overlap. The offset + size must be within
|
||||||
|
/// the buffer's allocation, and size within the source.
|
||||||
|
unsafe fn write_buffer(
|
||||||
&self,
|
&self,
|
||||||
buffer: &Self::Buffer,
|
buffer: &Self::Buffer,
|
||||||
contents: &[T],
|
contents: *const u8,
|
||||||
|
offset: u64,
|
||||||
|
size: u64,
|
||||||
) -> Result<(), Error>;
|
) -> Result<(), Error>;
|
||||||
|
|
||||||
unsafe fn create_semaphore(&self) -> Result<Self::Semaphore, Error>;
|
unsafe fn create_semaphore(&self) -> Result<Self::Semaphore, Error>;
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
//! Vulkan implemenation of HAL trait.
|
//! Vulkan implemenation of HAL trait.
|
||||||
|
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
|
use std::convert::TryInto;
|
||||||
use std::ffi::{CStr, CString};
|
use std::ffi::{CStr, CString};
|
||||||
use std::os::raw::c_char;
|
use std::os::raw::c_char;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
@ -62,7 +63,8 @@ pub struct VkSwapchain {
|
||||||
pub struct Buffer {
|
pub struct Buffer {
|
||||||
buffer: vk::Buffer,
|
buffer: vk::Buffer,
|
||||||
buffer_memory: vk::DeviceMemory,
|
buffer_memory: vk::DeviceMemory,
|
||||||
size: u64,
|
// TODO: there should probably be a Buffer trait and this should be a method.
|
||||||
|
pub size: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct Image {
|
pub struct Image {
|
||||||
|
@ -341,6 +343,14 @@ impl VkInstance {
|
||||||
None
|
None
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// The question of when and when not to use staging buffers is complex, and this
|
||||||
|
// is only a first approximation. Basically, it *must* be false when buffers can
|
||||||
|
// be created with a memory type that is not host-visible. That is not guaranteed
|
||||||
|
// here but is likely to be the case.
|
||||||
|
//
|
||||||
|
// I'm still investigating what should be done in systems with Resizable BAR.
|
||||||
|
let use_staging_buffers = props.device_type != vk::PhysicalDeviceType::INTEGRATED_GPU;
|
||||||
|
|
||||||
// TODO: finer grained query of specific subgroup info.
|
// TODO: finer grained query of specific subgroup info.
|
||||||
let has_subgroups = self.vk_version >= vk::make_version(1, 1, 0);
|
let has_subgroups = self.vk_version >= vk::make_version(1, 1, 0);
|
||||||
let gpu_info = GpuInfo {
|
let gpu_info = GpuInfo {
|
||||||
|
@ -348,6 +358,7 @@ impl VkInstance {
|
||||||
has_subgroups,
|
has_subgroups,
|
||||||
subgroup_size,
|
subgroup_size,
|
||||||
has_memory_model,
|
has_memory_model,
|
||||||
|
use_staging_buffers,
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok(VkDevice {
|
Ok(VkDevice {
|
||||||
|
@ -691,13 +702,13 @@ impl crate::Device for VkDevice {
|
||||||
Ok(result)
|
Ok(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Run the command buffer.
|
/// Run the command buffers.
|
||||||
///
|
///
|
||||||
/// This submits the command buffer for execution. The provided fence
|
/// This submits the command buffer for execution. The provided fence
|
||||||
/// is signalled when the execution is complete.
|
/// is signalled when the execution is complete.
|
||||||
unsafe fn run_cmd_buf(
|
unsafe fn run_cmd_bufs(
|
||||||
&self,
|
&self,
|
||||||
cmd_buf: &CmdBuf,
|
cmd_bufs: &[&CmdBuf],
|
||||||
wait_semaphores: &[Self::Semaphore],
|
wait_semaphores: &[Self::Semaphore],
|
||||||
signal_semaphores: &[Self::Semaphore],
|
signal_semaphores: &[Self::Semaphore],
|
||||||
fence: Option<&Self::Fence>,
|
fence: Option<&Self::Fence>,
|
||||||
|
@ -712,10 +723,12 @@ impl crate::Device for VkDevice {
|
||||||
.iter()
|
.iter()
|
||||||
.map(|_| vk::PipelineStageFlags::ALL_COMMANDS)
|
.map(|_| vk::PipelineStageFlags::ALL_COMMANDS)
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
// Use SmallVec or similar here to reduce allocation?
|
||||||
|
let cmd_bufs = cmd_bufs.iter().map(|c| c.cmd_buf).collect::<Vec<_>>();
|
||||||
device.queue_submit(
|
device.queue_submit(
|
||||||
self.queue,
|
self.queue,
|
||||||
&[vk::SubmitInfo::builder()
|
&[vk::SubmitInfo::builder()
|
||||||
.command_buffers(&[cmd_buf.cmd_buf])
|
.command_buffers(&cmd_bufs)
|
||||||
.wait_semaphores(wait_semaphores)
|
.wait_semaphores(wait_semaphores)
|
||||||
.wait_dst_stage_mask(&wait_stages)
|
.wait_dst_stage_mask(&wait_stages)
|
||||||
.signal_semaphores(signal_semaphores)
|
.signal_semaphores(signal_semaphores)
|
||||||
|
@ -725,37 +738,42 @@ impl crate::Device for VkDevice {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe fn read_buffer<T: Sized>(
|
unsafe fn read_buffer(
|
||||||
&self,
|
&self,
|
||||||
buffer: &Buffer,
|
buffer: &Self::Buffer,
|
||||||
result: &mut Vec<T>,
|
dst: *mut u8,
|
||||||
|
offset: u64,
|
||||||
|
size: u64,
|
||||||
) -> Result<(), Error> {
|
) -> Result<(), Error> {
|
||||||
|
let copy_size = size.try_into()?;
|
||||||
let device = &self.device.device;
|
let device = &self.device.device;
|
||||||
let buf = device.map_memory(
|
let buf = device.map_memory(
|
||||||
buffer.buffer_memory,
|
buffer.buffer_memory,
|
||||||
0,
|
offset,
|
||||||
buffer.size,
|
size,
|
||||||
vk::MemoryMapFlags::empty(),
|
vk::MemoryMapFlags::empty(),
|
||||||
)?;
|
)?;
|
||||||
let len = buffer.size as usize / std::mem::size_of::<T>();
|
std::ptr::copy_nonoverlapping(buf as *const u8, dst, copy_size);
|
||||||
if len > result.len() {
|
|
||||||
result.reserve(len - result.len());
|
|
||||||
}
|
|
||||||
std::ptr::copy_nonoverlapping(buf as *const T, result.as_mut_ptr(), len);
|
|
||||||
result.set_len(len);
|
|
||||||
device.unmap_memory(buffer.buffer_memory);
|
device.unmap_memory(buffer.buffer_memory);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe fn write_buffer<T: Sized>(&self, buffer: &Buffer, contents: &[T]) -> Result<(), Error> {
|
unsafe fn write_buffer(
|
||||||
|
&self,
|
||||||
|
buffer: &Buffer,
|
||||||
|
contents: *const u8,
|
||||||
|
offset: u64,
|
||||||
|
size: u64,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
let copy_size = size.try_into()?;
|
||||||
let device = &self.device.device;
|
let device = &self.device.device;
|
||||||
let buf = device.map_memory(
|
let buf = device.map_memory(
|
||||||
buffer.buffer_memory,
|
buffer.buffer_memory,
|
||||||
0,
|
offset,
|
||||||
std::mem::size_of_val(contents) as u64,
|
size,
|
||||||
vk::MemoryMapFlags::empty(),
|
vk::MemoryMapFlags::empty(),
|
||||||
)?;
|
)?;
|
||||||
std::ptr::copy_nonoverlapping(contents.as_ptr(), buf as *mut T, contents.len());
|
std::ptr::copy_nonoverlapping(contents, buf as *mut u8, copy_size);
|
||||||
device.unmap_memory(buffer.buffer_memory);
|
device.unmap_memory(buffer.buffer_memory);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
|
@ -189,16 +189,18 @@ pub fn dump_k1_data(k1_buf: &[u32]) {
|
||||||
pub struct Renderer {
|
pub struct Renderer {
|
||||||
pub image_dev: hub::Image, // resulting image
|
pub image_dev: hub::Image, // resulting image
|
||||||
|
|
||||||
scene_buf_host: hub::Buffer,
|
// The reference is held by the pipelines. We will be changing
|
||||||
scene_buf_dev: hub::Buffer,
|
// this to make the scene upload dynamic.
|
||||||
|
#[allow(dead_code)]
|
||||||
|
scene_buf: hub::Buffer,
|
||||||
|
|
||||||
memory_buf_host: hub::Buffer,
|
memory_buf_host: hub::Buffer,
|
||||||
memory_buf_dev: hub::Buffer,
|
memory_buf_dev: hub::Buffer,
|
||||||
|
|
||||||
state_buf: hub::Buffer,
|
state_buf: hub::Buffer,
|
||||||
|
|
||||||
config_buf_host: hub::Buffer,
|
#[allow(dead_code)]
|
||||||
config_buf_dev: hub::Buffer,
|
config_buf: hub::Buffer,
|
||||||
|
|
||||||
el_pipeline: hub::Pipeline,
|
el_pipeline: hub::Pipeline,
|
||||||
el_ds: hub::DescriptorSet,
|
el_ds: hub::DescriptorSet,
|
||||||
|
@ -246,21 +248,11 @@ impl Renderer {
|
||||||
n_elements, n_paths, n_pathseg, n_trans
|
n_elements, n_paths, n_pathseg, n_trans
|
||||||
);
|
);
|
||||||
|
|
||||||
let mut scene_buf_host = session
|
let scene_buf = session.create_buffer_init(&scene[..], dev).unwrap();
|
||||||
.create_buffer(std::mem::size_of_val(&scene[..]) as u64, host_upload)
|
|
||||||
.unwrap();
|
|
||||||
let scene_buf_dev = session
|
|
||||||
.create_buffer(std::mem::size_of_val(&scene[..]) as u64, dev)
|
|
||||||
.unwrap();
|
|
||||||
scene_buf_host.write(&scene)?;
|
|
||||||
|
|
||||||
let state_buf = session.create_buffer(1 * 1024 * 1024, dev)?;
|
let state_buf = session.create_buffer(1 * 1024 * 1024, dev)?;
|
||||||
let image_dev = session.create_image2d(WIDTH as u32, HEIGHT as u32)?;
|
let image_dev = session.create_image2d(WIDTH as u32, HEIGHT as u32)?;
|
||||||
|
|
||||||
const CONFIG_SIZE: u64 = 10 * 4; // Size of Config in setup.h.
|
|
||||||
let mut config_buf_host = session.create_buffer(CONFIG_SIZE, host_upload)?;
|
|
||||||
let config_buf_dev = session.create_buffer(CONFIG_SIZE, dev)?;
|
|
||||||
|
|
||||||
// TODO: constants
|
// TODO: constants
|
||||||
const PATH_SIZE: usize = 12;
|
const PATH_SIZE: usize = 12;
|
||||||
const BIN_SIZE: usize = 8;
|
const BIN_SIZE: usize = 8;
|
||||||
|
@ -280,7 +272,7 @@ impl Renderer {
|
||||||
alloc += (n_paths * ANNO_SIZE + 3) & !3;
|
alloc += (n_paths * ANNO_SIZE + 3) & !3;
|
||||||
let trans_base = alloc;
|
let trans_base = alloc;
|
||||||
alloc += (n_trans * TRANS_SIZE + 3) & !3;
|
alloc += (n_trans * TRANS_SIZE + 3) & !3;
|
||||||
config_buf_host.write(&[
|
let config = &[
|
||||||
n_paths as u32,
|
n_paths as u32,
|
||||||
n_pathseg as u32,
|
n_pathseg as u32,
|
||||||
WIDTH_IN_TILES as u32,
|
WIDTH_IN_TILES as u32,
|
||||||
|
@ -291,8 +283,11 @@ impl Renderer {
|
||||||
pathseg_base as u32,
|
pathseg_base as u32,
|
||||||
anno_base as u32,
|
anno_base as u32,
|
||||||
trans_base as u32,
|
trans_base as u32,
|
||||||
])?;
|
];
|
||||||
|
let config_buf = session.create_buffer_init(&config[..], dev).unwrap();
|
||||||
|
|
||||||
|
// Perhaps we could avoid the explicit staging buffer by having buffer creation method
|
||||||
|
// that takes both initial contents and a size.
|
||||||
let mut memory_buf_host = session.create_buffer(2 * 4, host_upload)?;
|
let mut memory_buf_host = session.create_buffer(2 * 4, host_upload)?;
|
||||||
let memory_buf_dev = session.create_buffer(128 * 1024 * 1024, dev)?;
|
let memory_buf_dev = session.create_buffer(128 * 1024 * 1024, dev)?;
|
||||||
memory_buf_host.write(&[alloc as u32, 0 /* Overflow flag */])?;
|
memory_buf_host.write(&[alloc as u32, 0 /* Overflow flag */])?;
|
||||||
|
@ -301,36 +296,34 @@ impl Renderer {
|
||||||
let el_pipeline = session.create_simple_compute_pipeline(el_code, 4)?;
|
let el_pipeline = session.create_simple_compute_pipeline(el_code, 4)?;
|
||||||
let el_ds = session.create_simple_descriptor_set(
|
let el_ds = session.create_simple_descriptor_set(
|
||||||
&el_pipeline,
|
&el_pipeline,
|
||||||
&[&memory_buf_dev, &config_buf_dev, &scene_buf_dev, &state_buf],
|
&[&memory_buf_dev, &config_buf, &scene_buf, &state_buf],
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let tile_alloc_code = include_bytes!("../shader/tile_alloc.spv");
|
let tile_alloc_code = include_bytes!("../shader/tile_alloc.spv");
|
||||||
let tile_pipeline = session.create_simple_compute_pipeline(tile_alloc_code, 2)?;
|
let tile_pipeline = session.create_simple_compute_pipeline(tile_alloc_code, 2)?;
|
||||||
let tile_ds = session
|
let tile_ds = session
|
||||||
.create_simple_descriptor_set(&tile_pipeline, &[&memory_buf_dev, &config_buf_dev])?;
|
.create_simple_descriptor_set(&tile_pipeline, &[&memory_buf_dev, &config_buf])?;
|
||||||
|
|
||||||
let path_alloc_code = include_bytes!("../shader/path_coarse.spv");
|
let path_alloc_code = include_bytes!("../shader/path_coarse.spv");
|
||||||
let path_pipeline = session.create_simple_compute_pipeline(path_alloc_code, 2)?;
|
let path_pipeline = session.create_simple_compute_pipeline(path_alloc_code, 2)?;
|
||||||
let path_ds = session
|
let path_ds = session
|
||||||
.create_simple_descriptor_set(&path_pipeline, &[&memory_buf_dev, &config_buf_dev])?;
|
.create_simple_descriptor_set(&path_pipeline, &[&memory_buf_dev, &config_buf])?;
|
||||||
|
|
||||||
let backdrop_alloc_code = include_bytes!("../shader/backdrop.spv");
|
let backdrop_alloc_code = include_bytes!("../shader/backdrop.spv");
|
||||||
let backdrop_pipeline = session.create_simple_compute_pipeline(backdrop_alloc_code, 2)?;
|
let backdrop_pipeline = session.create_simple_compute_pipeline(backdrop_alloc_code, 2)?;
|
||||||
let backdrop_ds = session.create_simple_descriptor_set(
|
let backdrop_ds = session
|
||||||
&backdrop_pipeline,
|
.create_simple_descriptor_set(&backdrop_pipeline, &[&memory_buf_dev, &config_buf])?;
|
||||||
&[&memory_buf_dev, &config_buf_dev],
|
|
||||||
)?;
|
|
||||||
|
|
||||||
// TODO: constants
|
// TODO: constants
|
||||||
let bin_code = include_bytes!("../shader/binning.spv");
|
let bin_code = include_bytes!("../shader/binning.spv");
|
||||||
let bin_pipeline = session.create_simple_compute_pipeline(bin_code, 2)?;
|
let bin_pipeline = session.create_simple_compute_pipeline(bin_code, 2)?;
|
||||||
let bin_ds = session
|
let bin_ds =
|
||||||
.create_simple_descriptor_set(&bin_pipeline, &[&memory_buf_dev, &config_buf_dev])?;
|
session.create_simple_descriptor_set(&bin_pipeline, &[&memory_buf_dev, &config_buf])?;
|
||||||
|
|
||||||
let coarse_code = include_bytes!("../shader/coarse.spv");
|
let coarse_code = include_bytes!("../shader/coarse.spv");
|
||||||
let coarse_pipeline = session.create_simple_compute_pipeline(coarse_code, 2)?;
|
let coarse_pipeline = session.create_simple_compute_pipeline(coarse_code, 2)?;
|
||||||
let coarse_ds = session
|
let coarse_ds = session
|
||||||
.create_simple_descriptor_set(&coarse_pipeline, &[&memory_buf_dev, &config_buf_dev])?;
|
.create_simple_descriptor_set(&coarse_pipeline, &[&memory_buf_dev, &config_buf])?;
|
||||||
|
|
||||||
let bg_image = Self::make_test_bg_image(&session);
|
let bg_image = Self::make_test_bg_image(&session);
|
||||||
|
|
||||||
|
@ -358,19 +351,17 @@ impl Renderer {
|
||||||
.create_compute_pipeline(&session, k4_code)?;
|
.create_compute_pipeline(&session, k4_code)?;
|
||||||
let k4_ds = session
|
let k4_ds = session
|
||||||
.descriptor_set_builder()
|
.descriptor_set_builder()
|
||||||
.add_buffers(&[&memory_buf_dev, &config_buf_dev])
|
.add_buffers(&[&memory_buf_dev, &config_buf])
|
||||||
.add_images(&[&image_dev])
|
.add_images(&[&image_dev])
|
||||||
.add_textures(&[&bg_image])
|
.add_textures(&[&bg_image])
|
||||||
.build(&session, &k4_pipeline)?;
|
.build(&session, &k4_pipeline)?;
|
||||||
|
|
||||||
Ok(Renderer {
|
Ok(Renderer {
|
||||||
scene_buf_host,
|
scene_buf,
|
||||||
scene_buf_dev,
|
|
||||||
memory_buf_host,
|
memory_buf_host,
|
||||||
memory_buf_dev,
|
memory_buf_dev,
|
||||||
state_buf,
|
state_buf,
|
||||||
config_buf_host,
|
config_buf,
|
||||||
config_buf_dev,
|
|
||||||
image_dev,
|
image_dev,
|
||||||
el_pipeline,
|
el_pipeline,
|
||||||
el_ds,
|
el_ds,
|
||||||
|
@ -394,14 +385,6 @@ impl Renderer {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub unsafe fn record(&self, cmd_buf: &mut hub::CmdBuf, query_pool: &hub::QueryPool) {
|
pub unsafe fn record(&self, cmd_buf: &mut hub::CmdBuf, query_pool: &hub::QueryPool) {
|
||||||
cmd_buf.copy_buffer(
|
|
||||||
self.scene_buf_host.vk_buffer(),
|
|
||||||
self.scene_buf_dev.vk_buffer(),
|
|
||||||
);
|
|
||||||
cmd_buf.copy_buffer(
|
|
||||||
self.config_buf_host.vk_buffer(),
|
|
||||||
self.config_buf_dev.vk_buffer(),
|
|
||||||
);
|
|
||||||
cmd_buf.copy_buffer(
|
cmd_buf.copy_buffer(
|
||||||
self.memory_buf_host.vk_buffer(),
|
self.memory_buf_host.vk_buffer(),
|
||||||
self.memory_buf_dev.vk_buffer(),
|
self.memory_buf_dev.vk_buffer(),
|
||||||
|
|
Loading…
Reference in a new issue