Merge pull request #41 from linebender/hub

Add hub abstraction
This commit is contained in:
Raph Levien 2020-11-19 16:30:37 -08:00 committed by GitHub
commit 1d0fd02c79
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 486 additions and 141 deletions

View file

@ -1,24 +1,25 @@
use piet_gpu_hal::hub;
use piet_gpu_hal::vulkan::VkInstance; use piet_gpu_hal::vulkan::VkInstance;
use piet_gpu_hal::{CmdBuf, Device, MemFlags}; use piet_gpu_hal::{CmdBuf, MemFlags};
fn main() { fn main() {
let (instance, _) = VkInstance::new(None).unwrap(); let (instance, _) = VkInstance::new(None).unwrap();
unsafe { unsafe {
let device = instance.device(None).unwrap(); let device = instance.device(None).unwrap();
let fence = device.create_fence(false).unwrap(); let session = hub::Session::new(device);
let mem_flags = MemFlags::host_coherent(); let mem_flags = MemFlags::host_coherent();
let src = (0..256).map(|x| x + 1).collect::<Vec<u32>>(); let src = (0..256).map(|x| x + 1).collect::<Vec<u32>>();
let buffer = device let mut buffer = session
.create_buffer(std::mem::size_of_val(&src[..]) as u64, mem_flags) .create_buffer(std::mem::size_of_val(&src[..]) as u64, mem_flags)
.unwrap(); .unwrap();
device.write_buffer(&buffer, &src).unwrap(); buffer.write(&src).unwrap();
let code = include_bytes!("./shader/collatz.spv"); let code = include_bytes!("./shader/collatz.spv");
let pipeline = device.create_simple_compute_pipeline(code, 1, 0).unwrap(); let pipeline = session.create_simple_compute_pipeline(code, 1, 0).unwrap();
let descriptor_set = device let descriptor_set = session
.create_descriptor_set(&pipeline, &[&buffer], &[]) .create_descriptor_set(&pipeline, &[buffer.vk_buffer()], &[])
.unwrap(); .unwrap();
let query_pool = device.create_query_pool(2).unwrap(); let query_pool = session.create_query_pool(2).unwrap();
let mut cmd_buf = device.create_cmd_buf().unwrap(); let mut cmd_buf = session.cmd_buf().unwrap();
cmd_buf.begin(); cmd_buf.begin();
cmd_buf.reset_query_pool(&query_pool); cmd_buf.reset_query_pool(&query_pool);
cmd_buf.write_timestamp(&query_pool, 0); cmd_buf.write_timestamp(&query_pool, 0);
@ -26,13 +27,11 @@ fn main() {
cmd_buf.write_timestamp(&query_pool, 1); cmd_buf.write_timestamp(&query_pool, 1);
cmd_buf.host_barrier(); cmd_buf.host_barrier();
cmd_buf.finish(); cmd_buf.finish();
device let submitted = session.run_cmd_buf(cmd_buf, &[], &[]).unwrap();
.run_cmd_buf(&cmd_buf, &[], &[], Some(&fence)) submitted.wait().unwrap();
.unwrap(); let timestamps = session.fetch_query_pool(&query_pool);
device.wait_and_reset(&[fence]).unwrap();
let timestamps = device.reap_query_pool(&query_pool);
let mut dst: Vec<u32> = Default::default(); let mut dst: Vec<u32> = Default::default();
device.read_buffer(&buffer, &mut dst).unwrap(); buffer.read(&mut dst).unwrap();
for (i, val) in dst.iter().enumerate().take(16) { for (i, val) in dst.iter().enumerate().take(16) {
println!("{}: {}", i, val); println!("{}: {}", i, val);
} }

293
piet-gpu-hal/src/hub.rs Normal file
View file

@ -0,0 +1,293 @@
//! A convenience layer on top of raw hal.
//!
//! This layer takes care of some lifetime and synchronization bookkeeping.
//! It is likely that it will also take care of compile time and runtime
//! negotiation of backends (Vulkan, DX12), but right now it's Vulkan-only.
use std::any::Any;
use std::sync::{Arc, Mutex, Weak};
use crate::vulkan;
use crate::{Device, Error};
pub type MemFlags = <vulkan::VkDevice as Device>::MemFlags;
pub type Semaphore = <vulkan::VkDevice as Device>::Semaphore;
pub type Pipeline = <vulkan::VkDevice as Device>::Pipeline;
pub type DescriptorSet = <vulkan::VkDevice as Device>::DescriptorSet;
pub type QueryPool = <vulkan::VkDevice as Device>::QueryPool;
type Fence = <vulkan::VkDevice as Device>::Fence;
type VkImage = <vulkan::VkDevice as Device>::Image;
type VkBuffer = <vulkan::VkDevice as Device>::Buffer;
#[derive(Clone)]
pub struct Session(Arc<SessionInner>);
struct SessionInner {
device: vulkan::VkDevice,
cmd_buf_pool: Mutex<Vec<(vulkan::CmdBuf, Fence)>>,
/// Command buffers that are still pending (so resources can't be freed).
pending: Mutex<Vec<SubmittedCmdBufInner>>,
}
pub struct CmdBuf {
cmd_buf: vulkan::CmdBuf,
fence: Fence,
resources: Vec<Box<dyn Any>>,
session: Weak<SessionInner>,
}
// Maybe "pending" is a better name?
pub struct SubmittedCmdBuf(Option<SubmittedCmdBufInner>, Weak<SessionInner>);
struct SubmittedCmdBufInner {
cmd_buf: vulkan::CmdBuf,
fence: Fence,
resources: Vec<Box<dyn Any>>,
}
#[derive(Clone)]
pub struct Image(Arc<ImageInner>);
struct ImageInner {
image: VkImage,
session: Weak<SessionInner>,
}
#[derive(Clone)]
pub struct Buffer(Arc<BufferInner>);
struct BufferInner {
buffer: VkBuffer,
session: Weak<SessionInner>,
}
impl Session {
pub fn new(device: vulkan::VkDevice) -> Session {
Session(Arc::new(SessionInner {
device,
cmd_buf_pool: Default::default(),
pending: Default::default(),
}))
}
pub fn cmd_buf(&self) -> Result<CmdBuf, Error> {
self.poll_cleanup();
let (cmd_buf, fence) = if let Some(cf) = self.0.cmd_buf_pool.lock().unwrap().pop() {
cf
} else {
let cmd_buf = self.0.device.create_cmd_buf()?;
let fence = unsafe { self.0.device.create_fence(false)? };
(cmd_buf, fence)
};
Ok(CmdBuf {
cmd_buf,
fence,
resources: Vec::new(),
session: Arc::downgrade(&self.0),
})
}
fn poll_cleanup(&self) {
let mut pending = self.0.pending.lock().unwrap();
unsafe {
let mut i = 0;
while i < pending.len() {
if let Ok(true) = self.0.device.get_fence_status(pending[i].fence) {
let item = pending.swap_remove(i);
// TODO: wait is superfluous, can just reset
let _ = self.0.device.wait_and_reset(&[item.fence]);
self.0
.cmd_buf_pool
.lock()
.unwrap()
.push((item.cmd_buf, item.fence));
std::mem::drop(item.resources);
} else {
i += 1;
}
}
}
}
pub unsafe fn run_cmd_buf(
&self,
cmd_buf: CmdBuf,
wait_semaphores: &[Semaphore],
signal_semaphores: &[Semaphore],
) -> Result<SubmittedCmdBuf, Error> {
self.0.device.run_cmd_buf(
&cmd_buf.cmd_buf,
wait_semaphores,
signal_semaphores,
Some(&cmd_buf.fence),
)?;
Ok(SubmittedCmdBuf(
Some(SubmittedCmdBufInner {
cmd_buf: cmd_buf.cmd_buf,
fence: cmd_buf.fence,
resources: cmd_buf.resources,
}),
cmd_buf.session,
))
}
pub fn create_buffer(&self, size: u64, mem_flags: MemFlags) -> Result<Buffer, Error> {
let buffer = self.0.device.create_buffer(size, mem_flags)?;
Ok(Buffer(Arc::new(BufferInner {
buffer,
session: Arc::downgrade(&self.0),
})))
}
pub unsafe fn create_image2d(
&self,
width: u32,
height: u32,
mem_flags: MemFlags,
) -> Result<Image, Error> {
let image = self.0.device.create_image2d(width, height, mem_flags)?;
Ok(Image(Arc::new(ImageInner {
image,
session: Arc::downgrade(&self.0),
})))
}
pub unsafe fn create_semaphore(&self) -> Result<Semaphore, Error> {
self.0.device.create_semaphore()
}
/// This creates a pipeline that runs over the buffer.
///
/// The descriptor set layout is just some number of storage buffers and storage images (this might change).
pub unsafe fn create_simple_compute_pipeline(
&self,
code: &[u8],
n_buffers: u32,
n_images: u32,
) -> Result<Pipeline, Error> {
self.0
.device
.create_simple_compute_pipeline(code, n_buffers, n_images)
}
/// Create a descriptor set for a simple pipeline that just references buffers and images.
///
/// Note: when we do portability, the signature will change to not reference the Vulkan types
/// directly.
pub unsafe fn create_descriptor_set(
&self,
pipeline: &Pipeline,
bufs: &[&vulkan::Buffer],
images: &[&vulkan::Image],
) -> Result<DescriptorSet, Error> {
self.0.device.create_descriptor_set(pipeline, bufs, images)
}
/// Create a query pool for timestamp queries.
pub fn create_query_pool(&self, n_queries: u32) -> Result<QueryPool, Error> {
self.0.device.create_query_pool(n_queries)
}
pub unsafe fn fetch_query_pool(&self, pool: &QueryPool) -> Result<Vec<f64>, Error> {
self.0.device.fetch_query_pool(pool)
}
}
impl CmdBuf {
pub fn add_resource<T: Clone + 'static>(&mut self, resource: &T) {
self.resources.push(Box::new(resource.clone()));
}
}
impl SubmittedCmdBuf {
pub fn wait(mut self) -> Result<(), Error> {
let item = self.0.take().unwrap();
if let Some(session) = Weak::upgrade(&self.1) {
unsafe {
session.device.wait_and_reset(&[item.fence])?;
}
session
.cmd_buf_pool
.lock()
.unwrap()
.push((item.cmd_buf, item.fence));
std::mem::drop(item.resources);
}
// else session dropped error?
Ok(())
}
}
impl Drop for SubmittedCmdBuf {
fn drop(&mut self) {
if let Some(inner) = self.0.take() {
if let Some(session) = Weak::upgrade(&self.1) {
session.pending.lock().unwrap().push(inner);
}
}
}
}
impl Drop for BufferInner {
fn drop(&mut self) {
if let Some(session) = Weak::upgrade(&self.session) {
unsafe {
let _ = session.device.destroy_buffer(&self.buffer);
}
}
}
}
impl Drop for ImageInner {
fn drop(&mut self) {
if let Some(session) = Weak::upgrade(&self.session) {
unsafe {
let _ = session.device.destroy_image(&self.image);
}
}
}
}
/// For now, we deref, but for runtime backend switching we'll need to wrap
/// all methods.
impl std::ops::Deref for CmdBuf {
type Target = vulkan::CmdBuf;
fn deref(&self) -> &Self::Target {
&self.cmd_buf
}
}
impl std::ops::DerefMut for CmdBuf {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.cmd_buf
}
}
impl Image {
pub fn vk_image(&self) -> &vulkan::Image {
&self.0.image
}
}
impl Buffer {
pub fn vk_buffer(&self) -> &vulkan::Buffer {
&self.0.buffer
}
pub unsafe fn write<T: Sized>(&mut self, contents: &[T]) -> Result<(), Error> {
if let Some(session) = Weak::upgrade(&self.0.session) {
session.device.write_buffer(&self.0.buffer, contents)?;
}
// else session lost error?
Ok(())
}
pub unsafe fn read<T: Sized>(&self, result: &mut Vec<T>) -> Result<(), Error> {
if let Some(session) = Weak::upgrade(&self.0.session) {
session.device.read_buffer(&self.0.buffer, result)?;
}
// else session lost error?
Ok(())
}
}

View file

@ -2,6 +2,8 @@
/// ///
/// This abstraction is inspired by gfx-hal, but is specialized to the needs of piet-gpu. /// This abstraction is inspired by gfx-hal, but is specialized to the needs of piet-gpu.
/// In time, it may go away and be replaced by either gfx-hal or wgpu. /// In time, it may go away and be replaced by either gfx-hal or wgpu.
pub mod hub;
pub mod vulkan; pub mod vulkan;
/// This isn't great but is expedient. /// This isn't great but is expedient.
@ -29,6 +31,14 @@ pub trait Device: Sized {
fn create_buffer(&self, size: u64, mem_flags: Self::MemFlags) -> Result<Self::Buffer, Error>; fn create_buffer(&self, size: u64, mem_flags: Self::MemFlags) -> Result<Self::Buffer, Error>;
/// Destroy a buffer.
///
/// The same safety requirements hold as in Vulkan: the buffer cannot be used
/// after this call, and all commands referencing this buffer must have completed.
///
/// Maybe doesn't need result return?
unsafe fn destroy_buffer(&self, buffer: &Self::Buffer) -> Result<(), Error>;
unsafe fn create_image2d( unsafe fn create_image2d(
&self, &self,
width: u32, width: u32,
@ -36,6 +46,16 @@ pub trait Device: Sized {
mem_flags: Self::MemFlags, mem_flags: Self::MemFlags,
) -> Result<Self::Image, Error>; ) -> Result<Self::Image, Error>;
/// Destroy an image.
///
/// The same safety requirements hold as in Vulkan: the image cannot be used
/// after this call, and all commands referencing this image must have completed.
///
/// Use this only with images we created, not for swapchain images.
///
/// Maybe doesn't need result return?
unsafe fn destroy_image(&self, image: &Self::Image) -> Result<(), Error>;
unsafe fn create_simple_compute_pipeline( unsafe fn create_simple_compute_pipeline(
&self, &self,
code: &[u8], code: &[u8],
@ -61,7 +81,7 @@ pub trait Device: Sized {
/// ///
/// # Safety /// # Safety
/// All submitted commands that refer to this query pool must have completed. /// All submitted commands that refer to this query pool must have completed.
unsafe fn reap_query_pool(&self, pool: &Self::QueryPool) -> Result<Vec<f64>, Error>; unsafe fn fetch_query_pool(&self, pool: &Self::QueryPool) -> Result<Vec<f64>, Error>;
unsafe fn run_cmd_buf( unsafe fn run_cmd_buf(
&self, &self,
@ -86,6 +106,7 @@ pub trait Device: Sized {
unsafe fn create_semaphore(&self) -> Result<Self::Semaphore, Error>; unsafe fn create_semaphore(&self) -> Result<Self::Semaphore, Error>;
unsafe fn create_fence(&self, signaled: bool) -> Result<Self::Fence, Error>; unsafe fn create_fence(&self, signaled: bool) -> Result<Self::Fence, Error>;
unsafe fn wait_and_reset(&self, fences: &[Self::Fence]) -> Result<(), Error>; unsafe fn wait_and_reset(&self, fences: &[Self::Fence]) -> Result<(), Error>;
unsafe fn get_fence_status(&self, fence: Self::Fence) -> Result<bool, Error>;
} }
pub trait CmdBuf<D: Device> { pub trait CmdBuf<D: Device> {

View file

@ -62,8 +62,7 @@ pub struct Buffer {
pub struct Image { pub struct Image {
image: vk::Image, image: vk::Image,
// Not used now but probably will be for destruction. image_memory: vk::DeviceMemory,
_image_memory: vk::DeviceMemory,
image_view: vk::ImageView, image_view: vk::ImageView,
extent: vk::Extent3D, extent: vk::Extent3D,
} }
@ -413,6 +412,13 @@ impl crate::Device for VkDevice {
} }
} }
unsafe fn destroy_buffer(&self, buffer: &Self::Buffer) -> Result<(), Error> {
let device = &self.device.device;
device.destroy_buffer(buffer.buffer, None);
device.free_memory(buffer.buffer_memory, None);
Ok(())
}
unsafe fn create_image2d( unsafe fn create_image2d(
&self, &self,
width: u32, width: u32,
@ -476,12 +482,20 @@ impl crate::Device for VkDevice {
)?; )?;
Ok(Image { Ok(Image {
image, image,
_image_memory: image_memory, image_memory,
image_view, image_view,
extent, extent,
}) })
} }
unsafe fn destroy_image(&self, image: &Self::Image) -> Result<(), Error> {
let device = &self.device.device;
device.destroy_image(image.image, None);
device.destroy_image_view(image.image_view, None);
device.free_memory(image.image_memory, None);
Ok(())
}
unsafe fn create_fence(&self, signaled: bool) -> Result<Self::Fence, Error> { unsafe fn create_fence(&self, signaled: bool) -> Result<Self::Fence, Error> {
let device = &self.device.device; let device = &self.device.device;
let mut flags = vk::FenceCreateFlags::empty(); let mut flags = vk::FenceCreateFlags::empty();
@ -503,6 +517,11 @@ impl crate::Device for VkDevice {
Ok(()) Ok(())
} }
unsafe fn get_fence_status(&self, fence: Self::Fence) -> Result<bool, Error> {
let device = &self.device.device;
Ok(device.get_fence_status(fence)?)
}
/// This creates a pipeline that runs over the buffer. /// This creates a pipeline that runs over the buffer.
/// ///
/// The descriptor set layout is just some number of storage buffers and storage images (this might change). /// The descriptor set layout is just some number of storage buffers and storage images (this might change).
@ -686,7 +705,7 @@ impl crate::Device for VkDevice {
} }
} }
unsafe fn reap_query_pool(&self, pool: &Self::QueryPool) -> Result<Vec<f64>, Error> { unsafe fn fetch_query_pool(&self, pool: &Self::QueryPool) -> Result<Vec<f64>, Error> {
let device = &self.device.device; let device = &self.device.device;
let mut buf = vec![0u64; pool.n_queries as usize]; let mut buf = vec![0u64; pool.n_queries as usize];
device.get_query_pool_results( device.get_query_pool_results(
@ -1025,7 +1044,7 @@ impl VkSwapchain {
pub unsafe fn image(&self, idx: usize) -> Image { pub unsafe fn image(&self, idx: usize) -> Image {
Image { Image {
image: self.images[idx], image: self.images[idx],
_image_memory: vk::DeviceMemory::null(), image_memory: vk::DeviceMemory::null(),
image_view: vk::ImageView::null(), image_view: vk::ImageView::null(),
extent: vk::Extent3D { extent: vk::Extent3D {
width: self.extent.width, width: self.extent.width,

View file

@ -4,8 +4,9 @@ use std::path::Path;
use clap::{App, Arg}; use clap::{App, Arg};
use piet_gpu_hal::hub;
use piet_gpu_hal::vulkan::VkInstance; use piet_gpu_hal::vulkan::VkInstance;
use piet_gpu_hal::{CmdBuf, Device, Error, MemFlags}; use piet_gpu_hal::{CmdBuf, Error, MemFlags};
use piet_gpu::{render_scene, render_svg, PietGpuRenderContext, Renderer, HEIGHT, WIDTH}; use piet_gpu::{render_scene, render_svg, PietGpuRenderContext, Renderer, HEIGHT, WIDTH};
@ -195,10 +196,10 @@ fn main() -> Result<(), Error> {
let (instance, _) = VkInstance::new(None)?; let (instance, _) = VkInstance::new(None)?;
unsafe { unsafe {
let device = instance.device(None)?; let device = instance.device(None)?;
let session = hub::Session::new(device);
let fence = device.create_fence(false)?; let mut cmd_buf = session.cmd_buf()?;
let mut cmd_buf = device.create_cmd_buf()?; let query_pool = session.create_query_pool(8)?;
let query_pool = device.create_query_pool(8)?;
let mut ctx = PietGpuRenderContext::new(); let mut ctx = PietGpuRenderContext::new();
if let Some(input) = matches.value_of("INPUT") { if let Some(input) = matches.value_of("INPUT") {
@ -218,20 +219,20 @@ fn main() -> Result<(), Error> {
let scene = ctx.get_scene_buf(); let scene = ctx.get_scene_buf();
//dump_scene(&scene); //dump_scene(&scene);
let renderer = Renderer::new(&device, scene, n_paths, n_pathseg)?; let renderer = Renderer::new(&session, scene, n_paths, n_pathseg)?;
let image_buf = let image_buf =
device.create_buffer((WIDTH * HEIGHT * 4) as u64, MemFlags::host_coherent())?; session.create_buffer((WIDTH * HEIGHT * 4) as u64, MemFlags::host_coherent())?;
cmd_buf.begin(); cmd_buf.begin();
renderer.record(&mut cmd_buf, &query_pool); renderer.record(&mut cmd_buf, &query_pool);
cmd_buf.copy_image_to_buffer(&renderer.image_dev, &image_buf); cmd_buf.copy_image_to_buffer(renderer.image_dev.vk_image(), image_buf.vk_buffer());
cmd_buf.host_barrier(); cmd_buf.host_barrier();
cmd_buf.finish(); cmd_buf.finish();
let start = std::time::Instant::now(); let start = std::time::Instant::now();
device.run_cmd_buf(&cmd_buf, &[], &[], Some(&fence))?; let submitted = session.run_cmd_buf(cmd_buf, &[], &[])?;
device.wait_and_reset(&[fence])?; submitted.wait()?;
println!("elapsed = {:?}", start.elapsed()); println!("elapsed = {:?}", start.elapsed());
let ts = device.reap_query_pool(&query_pool).unwrap(); let ts = session.fetch_query_pool(&query_pool).unwrap();
println!("Element kernel time: {:.3}ms", ts[0] * 1e3); println!("Element kernel time: {:.3}ms", ts[0] * 1e3);
println!( println!(
"Tile allocation kernel time: {:.3}ms", "Tile allocation kernel time: {:.3}ms",
@ -253,7 +254,7 @@ fn main() -> Result<(), Error> {
let mut img_data: Vec<u8> = Default::default(); let mut img_data: Vec<u8> = Default::default();
// Note: because png can use a `&[u8]` slice, we could avoid an extra copy // Note: because png can use a `&[u8]` slice, we could avoid an extra copy
// (probably passing a slice into a closure). But for now: keep it simple. // (probably passing a slice into a closure). But for now: keep it simple.
device.read_buffer(&image_buf, &mut img_data).unwrap(); image_buf.read(&mut img_data).unwrap();
// Write image as PNG file. // Write image as PNG file.
let path = Path::new("image.png"); let path = Path::new("image.png");

View file

@ -1,5 +1,6 @@
use piet_gpu_hal::hub;
use piet_gpu_hal::vulkan::VkInstance; use piet_gpu_hal::vulkan::VkInstance;
use piet_gpu_hal::{CmdBuf, Device, Error, ImageLayout}; use piet_gpu_hal::{CmdBuf, Error, ImageLayout};
use piet_gpu::{render_scene, PietGpuRenderContext, Renderer, HEIGHT, WIDTH}; use piet_gpu::{render_scene, PietGpuRenderContext, Renderer, HEIGHT, WIDTH};
@ -25,19 +26,14 @@ fn main() -> Result<(), Error> {
unsafe { unsafe {
let device = instance.device(surface.as_ref())?; let device = instance.device(surface.as_ref())?;
let mut swapchain = instance.swapchain(&device, surface.as_ref().unwrap())?; let mut swapchain = instance.swapchain(&device, surface.as_ref().unwrap())?;
let session = hub::Session::new(device);
let mut current_frame = 0; let mut current_frame = 0;
let present_semaphores = (0..NUM_FRAMES) let present_semaphores = (0..NUM_FRAMES)
.map(|_| device.create_semaphore()) .map(|_| session.create_semaphore())
.collect::<Result<Vec<_>, Error>>()?;
let frame_fences = (0..NUM_FRAMES)
.map(|_| device.create_fence(false))
.collect::<Result<Vec<_>, Error>>()?;
let mut cmd_buffers = (0..NUM_FRAMES)
.map(|_| device.create_cmd_buf())
.collect::<Result<Vec<_>, Error>>()?; .collect::<Result<Vec<_>, Error>>()?;
let query_pools = (0..NUM_FRAMES) let query_pools = (0..NUM_FRAMES)
.map(|_| device.create_query_pool(8)) .map(|_| session.create_query_pool(8))
.collect::<Result<Vec<_>, Error>>()?; .collect::<Result<Vec<_>, Error>>()?;
let mut ctx = PietGpuRenderContext::new(); let mut ctx = PietGpuRenderContext::new();
@ -46,7 +42,9 @@ fn main() -> Result<(), Error> {
let n_pathseg = ctx.pathseg_count(); let n_pathseg = ctx.pathseg_count();
let scene = ctx.get_scene_buf(); let scene = ctx.get_scene_buf();
let renderer = Renderer::new(&device, scene, n_paths, n_pathseg)?; let renderer = Renderer::new(&session, scene, n_paths, n_pathseg)?;
let mut submitted: Option<hub::SubmittedCmdBuf> = None;
event_loop.run(move |event, _, control_flow| { event_loop.run(move |event, _, control_flow| {
*control_flow = ControlFlow::Poll; // `ControlFlow::Wait` if only re-render on event *control_flow = ControlFlow::Poll; // `ControlFlow::Wait` if only re-render on event
@ -67,10 +65,10 @@ fn main() -> Result<(), Error> {
let frame_idx = current_frame % NUM_FRAMES; let frame_idx = current_frame % NUM_FRAMES;
let query_pool = &query_pools[frame_idx]; let query_pool = &query_pools[frame_idx];
if current_frame >= NUM_FRAMES { if let Some(submitted) = submitted.take() {
device.wait_and_reset(&[frame_fences[frame_idx]]).unwrap(); submitted.wait().unwrap();
let ts = device.reap_query_pool(query_pool).unwrap(); let ts = session.fetch_query_pool(query_pool).unwrap();
window.set_title(&format!( window.set_title(&format!(
"{:.3}ms :: e:{:.3}ms|alloc:{:.3}ms|cp:{:.3}ms|bd:{:.3}ms|bin:{:.3}ms|cr:{:.3}ms|r:{:.3}ms", "{:.3}ms :: e:{:.3}ms|alloc:{:.3}ms|cp:{:.3}ms|bd:{:.3}ms|bin:{:.3}ms|cr:{:.3}ms|r:{:.3}ms",
ts[6] * 1e3, ts[6] * 1e3,
@ -86,9 +84,9 @@ fn main() -> Result<(), Error> {
let (image_idx, acquisition_semaphore) = swapchain.next().unwrap(); let (image_idx, acquisition_semaphore) = swapchain.next().unwrap();
let swap_image = swapchain.image(image_idx); let swap_image = swapchain.image(image_idx);
let cmd_buf = &mut cmd_buffers[frame_idx]; let mut cmd_buf = session.cmd_buf().unwrap();
cmd_buf.begin(); cmd_buf.begin();
renderer.record(cmd_buf, &query_pool); renderer.record(&mut cmd_buf, &query_pool);
// Image -> Swapchain // Image -> Swapchain
cmd_buf.image_barrier( cmd_buf.image_barrier(
@ -96,18 +94,17 @@ fn main() -> Result<(), Error> {
ImageLayout::Undefined, ImageLayout::Undefined,
ImageLayout::BlitDst, ImageLayout::BlitDst,
); );
cmd_buf.blit_image(&renderer.image_dev, &swap_image); cmd_buf.blit_image(renderer.image_dev.vk_image(), &swap_image);
cmd_buf.image_barrier(&swap_image, ImageLayout::BlitDst, ImageLayout::Present); cmd_buf.image_barrier(&swap_image, ImageLayout::BlitDst, ImageLayout::Present);
cmd_buf.finish(); cmd_buf.finish();
device submitted = Some(session
.run_cmd_buf( .run_cmd_buf(
&cmd_buf, cmd_buf,
&[acquisition_semaphore], &[acquisition_semaphore],
&[present_semaphores[frame_idx]], &[present_semaphores[frame_idx]],
Some(&frame_fences[frame_idx]),
) )
.unwrap(); .unwrap());
swapchain swapchain
.present(image_idx, &[present_semaphores[frame_idx]]) .present(image_idx, &[present_semaphores[frame_idx]])

View file

@ -10,7 +10,8 @@ use piet::{Color, RenderContext};
use piet_gpu_types::encoder::Encode; use piet_gpu_types::encoder::Encode;
use piet_gpu_hal::{CmdBuf, Device, Error, ImageLayout, MemFlags}; use piet_gpu_hal::hub;
use piet_gpu_hal::{CmdBuf, Error, ImageLayout, MemFlags};
use pico_svg::PicoSvg; use pico_svg::PicoSvg;
@ -113,57 +114,57 @@ pub fn dump_k1_data(k1_buf: &[u32]) {
} }
} }
pub struct Renderer<D: Device> { pub struct Renderer {
pub image_dev: D::Image, // resulting image pub image_dev: hub::Image, // resulting image
scene_buf: D::Buffer, scene_buf: hub::Buffer,
scene_dev: D::Buffer, scene_dev: hub::Buffer,
pub state_buf: D::Buffer, pub state_buf: hub::Buffer,
pub anno_buf: D::Buffer, pub anno_buf: hub::Buffer,
pub pathseg_buf: D::Buffer, pub pathseg_buf: hub::Buffer,
pub tile_buf: D::Buffer, pub tile_buf: hub::Buffer,
pub bin_buf: D::Buffer, pub bin_buf: hub::Buffer,
pub ptcl_buf: D::Buffer, pub ptcl_buf: hub::Buffer,
el_pipeline: D::Pipeline, el_pipeline: hub::Pipeline,
el_ds: D::DescriptorSet, el_ds: hub::DescriptorSet,
tile_pipeline: D::Pipeline, tile_pipeline: hub::Pipeline,
tile_ds: D::DescriptorSet, tile_ds: hub::DescriptorSet,
path_pipeline: D::Pipeline, path_pipeline: hub::Pipeline,
path_ds: D::DescriptorSet, path_ds: hub::DescriptorSet,
backdrop_pipeline: D::Pipeline, backdrop_pipeline: hub::Pipeline,
backdrop_ds: D::DescriptorSet, backdrop_ds: hub::DescriptorSet,
tile_alloc_buf_host: D::Buffer, tile_alloc_buf_host: hub::Buffer,
tile_alloc_buf_dev: D::Buffer, tile_alloc_buf_dev: hub::Buffer,
bin_pipeline: D::Pipeline, bin_pipeline: hub::Pipeline,
bin_ds: D::DescriptorSet, bin_ds: hub::DescriptorSet,
bin_alloc_buf_host: D::Buffer, bin_alloc_buf_host: hub::Buffer,
bin_alloc_buf_dev: D::Buffer, bin_alloc_buf_dev: hub::Buffer,
coarse_pipeline: D::Pipeline, coarse_pipeline: hub::Pipeline,
coarse_ds: D::DescriptorSet, coarse_ds: hub::DescriptorSet,
coarse_alloc_buf_host: D::Buffer, coarse_alloc_buf_host: hub::Buffer,
coarse_alloc_buf_dev: D::Buffer, coarse_alloc_buf_dev: hub::Buffer,
k4_pipeline: D::Pipeline, k4_pipeline: hub::Pipeline,
k4_ds: D::DescriptorSet, k4_ds: hub::DescriptorSet,
n_elements: usize, n_elements: usize,
n_paths: usize, n_paths: usize,
n_pathseg: usize, n_pathseg: usize,
} }
impl<D: Device> Renderer<D> { impl Renderer {
pub unsafe fn new( pub unsafe fn new(
device: &D, session: &hub::Session,
scene: &[u8], scene: &[u8],
n_paths: usize, n_paths: usize,
n_pathseg: usize, n_pathseg: usize,
@ -177,107 +178,108 @@ impl<D: Device> Renderer<D> {
n_elements, n_paths, n_pathseg n_elements, n_paths, n_pathseg
); );
let scene_buf = device let mut scene_buf = session
.create_buffer(std::mem::size_of_val(&scene[..]) as u64, host) .create_buffer(std::mem::size_of_val(&scene[..]) as u64, host)
.unwrap(); .unwrap();
let scene_dev = device let scene_dev = session
.create_buffer(std::mem::size_of_val(&scene[..]) as u64, dev) .create_buffer(std::mem::size_of_val(&scene[..]) as u64, dev)
.unwrap(); .unwrap();
device.write_buffer(&scene_buf, &scene)?; scene_buf.write(&scene)?;
let state_buf = device.create_buffer(1 * 1024 * 1024, dev)?; let state_buf = session.create_buffer(1 * 1024 * 1024, dev)?;
let anno_buf = device.create_buffer(64 * 1024 * 1024, dev)?; let anno_buf = session.create_buffer(64 * 1024 * 1024, dev)?;
let pathseg_buf = device.create_buffer(64 * 1024 * 1024, dev)?; let pathseg_buf = session.create_buffer(64 * 1024 * 1024, dev)?;
let tile_buf = device.create_buffer(64 * 1024 * 1024, dev)?; let tile_buf = session.create_buffer(64 * 1024 * 1024, dev)?;
let bin_buf = device.create_buffer(64 * 1024 * 1024, dev)?; let bin_buf = session.create_buffer(64 * 1024 * 1024, dev)?;
let ptcl_buf = device.create_buffer(48 * 1024 * 1024, dev)?; let ptcl_buf = session.create_buffer(48 * 1024 * 1024, dev)?;
let image_dev = device.create_image2d(WIDTH as u32, HEIGHT as u32, dev)?; let image_dev = session.create_image2d(WIDTH as u32, HEIGHT as u32, dev)?;
let el_code = include_bytes!("../shader/elements.spv"); let el_code = include_bytes!("../shader/elements.spv");
let el_pipeline = device.create_simple_compute_pipeline(el_code, 4, 0)?; let el_pipeline = session.create_simple_compute_pipeline(el_code, 4, 0)?;
let el_ds = device.create_descriptor_set( let el_ds = session.create_descriptor_set(
&el_pipeline, &el_pipeline,
&[&scene_dev, &state_buf, &anno_buf, &pathseg_buf], &[
scene_dev.vk_buffer(),
state_buf.vk_buffer(),
anno_buf.vk_buffer(),
pathseg_buf.vk_buffer(),
],
&[], &[],
)?; )?;
let tile_alloc_buf_host = device.create_buffer(12, host)?; let mut tile_alloc_buf_host = session.create_buffer(12, host)?;
let tile_alloc_buf_dev = device.create_buffer(12, dev)?; let tile_alloc_buf_dev = session.create_buffer(12, dev)?;
// TODO: constants // TODO: constants
const PATH_SIZE: usize = 12; const PATH_SIZE: usize = 12;
let tile_alloc_start = ((n_paths + 31) & !31) * PATH_SIZE; let tile_alloc_start = ((n_paths + 31) & !31) * PATH_SIZE;
device.write_buffer( tile_alloc_buf_host.write(
&tile_alloc_buf_host,
&[n_paths as u32, n_pathseg as u32, tile_alloc_start as u32], &[n_paths as u32, n_pathseg as u32, tile_alloc_start as u32],
)?; )?;
let tile_alloc_code = include_bytes!("../shader/tile_alloc.spv"); let tile_alloc_code = include_bytes!("../shader/tile_alloc.spv");
let tile_pipeline = device.create_simple_compute_pipeline(tile_alloc_code, 3, 0)?; let tile_pipeline = session.create_simple_compute_pipeline(tile_alloc_code, 3, 0)?;
let tile_ds = device.create_descriptor_set( let tile_ds = session.create_descriptor_set(
&tile_pipeline, &tile_pipeline,
&[&anno_buf, &tile_alloc_buf_dev, &tile_buf], &[anno_buf.vk_buffer(), tile_alloc_buf_dev.vk_buffer(), tile_buf.vk_buffer()],
&[], &[],
)?; )?;
let path_alloc_code = include_bytes!("../shader/path_coarse.spv"); let path_alloc_code = include_bytes!("../shader/path_coarse.spv");
let path_pipeline = device.create_simple_compute_pipeline(path_alloc_code, 3, 0)?; let path_pipeline = session.create_simple_compute_pipeline(path_alloc_code, 3, 0)?;
let path_ds = device.create_descriptor_set( let path_ds = session.create_descriptor_set(
&path_pipeline, &path_pipeline,
&[&pathseg_buf, &tile_alloc_buf_dev, &tile_buf], &[pathseg_buf.vk_buffer(), tile_alloc_buf_dev.vk_buffer(), tile_buf.vk_buffer()],
&[], &[],
)?; )?;
let backdrop_alloc_code = include_bytes!("../shader/backdrop.spv"); let backdrop_alloc_code = include_bytes!("../shader/backdrop.spv");
let backdrop_pipeline = device.create_simple_compute_pipeline(backdrop_alloc_code, 3, 0)?; let backdrop_pipeline =
let backdrop_ds = device.create_descriptor_set( session.create_simple_compute_pipeline(backdrop_alloc_code, 3, 0)?;
let backdrop_ds = session.create_descriptor_set(
&backdrop_pipeline, &backdrop_pipeline,
&[&anno_buf, &tile_alloc_buf_dev, &tile_buf], &[anno_buf.vk_buffer(), tile_alloc_buf_dev.vk_buffer(), tile_buf.vk_buffer()],
&[], &[],
)?; )?;
let bin_alloc_buf_host = device.create_buffer(8, host)?; let mut bin_alloc_buf_host = session.create_buffer(8, host)?;
let bin_alloc_buf_dev = device.create_buffer(8, dev)?; let bin_alloc_buf_dev = session.create_buffer(8, dev)?;
// TODO: constants // TODO: constants
let bin_alloc_start = ((n_paths + 255) & !255) * 8; let bin_alloc_start = ((n_paths + 255) & !255) * 8;
device.write_buffer( bin_alloc_buf_host.write(&[n_paths as u32, bin_alloc_start as u32])?;
&bin_alloc_buf_host,
&[n_paths as u32, bin_alloc_start as u32],
)?;
let bin_code = include_bytes!("../shader/binning.spv"); let bin_code = include_bytes!("../shader/binning.spv");
let bin_pipeline = device.create_simple_compute_pipeline(bin_code, 3, 0)?; let bin_pipeline = session.create_simple_compute_pipeline(bin_code, 3, 0)?;
let bin_ds = device.create_descriptor_set( let bin_ds = session.create_descriptor_set(
&bin_pipeline, &bin_pipeline,
&[&anno_buf, &bin_alloc_buf_dev, &bin_buf], &[anno_buf.vk_buffer(), bin_alloc_buf_dev.vk_buffer(), bin_buf.vk_buffer()],
&[], &[],
)?; )?;
let coarse_alloc_buf_host = device.create_buffer(8, host)?; let mut coarse_alloc_buf_host = session.create_buffer(8, host)?;
let coarse_alloc_buf_dev = device.create_buffer(8, dev)?; let coarse_alloc_buf_dev = session.create_buffer(8, dev)?;
let coarse_alloc_start = WIDTH_IN_TILES * HEIGHT_IN_TILES * PTCL_INITIAL_ALLOC; let coarse_alloc_start = WIDTH_IN_TILES * HEIGHT_IN_TILES * PTCL_INITIAL_ALLOC;
device.write_buffer( coarse_alloc_buf_host.write(
&coarse_alloc_buf_host,
&[n_paths as u32, coarse_alloc_start as u32], &[n_paths as u32, coarse_alloc_start as u32],
)?; )?;
let coarse_code = include_bytes!("../shader/coarse.spv"); let coarse_code = include_bytes!("../shader/coarse.spv");
let coarse_pipeline = device.create_simple_compute_pipeline(coarse_code, 5, 0)?; let coarse_pipeline = session.create_simple_compute_pipeline(coarse_code, 5, 0)?;
let coarse_ds = device.create_descriptor_set( let coarse_ds = session.create_descriptor_set(
&coarse_pipeline, &coarse_pipeline,
&[ &[
&anno_buf, anno_buf.vk_buffer(),
&bin_buf, bin_buf.vk_buffer(),
&tile_buf, tile_buf.vk_buffer(),
&coarse_alloc_buf_dev, coarse_alloc_buf_dev.vk_buffer(),
&ptcl_buf, ptcl_buf.vk_buffer(),
], ],
&[], &[],
)?; )?;
let k4_code = include_bytes!("../shader/kernel4.spv"); let k4_code = include_bytes!("../shader/kernel4.spv");
let k4_pipeline = device.create_simple_compute_pipeline(k4_code, 2, 1)?; let k4_pipeline = session.create_simple_compute_pipeline(k4_code, 2, 1)?;
let k4_ds = let k4_ds =
device.create_descriptor_set(&k4_pipeline, &[&ptcl_buf, &tile_buf], &[&image_dev])?; session.create_descriptor_set(&k4_pipeline, &[ptcl_buf.vk_buffer(), tile_buf.vk_buffer()], &[image_dev.vk_image()])?;
Ok(Renderer { Ok(Renderer {
scene_buf, scene_buf,
@ -315,15 +317,24 @@ impl<D: Device> Renderer<D> {
}) })
} }
pub unsafe fn record(&self, cmd_buf: &mut impl CmdBuf<D>, query_pool: &D::QueryPool) { pub unsafe fn record(&self, cmd_buf: &mut hub::CmdBuf, query_pool: &hub::QueryPool) {
cmd_buf.copy_buffer(&self.scene_buf, &self.scene_dev); cmd_buf.copy_buffer(self.scene_buf.vk_buffer(), self.scene_dev.vk_buffer());
cmd_buf.copy_buffer(&self.tile_alloc_buf_host, &self.tile_alloc_buf_dev); cmd_buf.copy_buffer(
cmd_buf.copy_buffer(&self.bin_alloc_buf_host, &self.bin_alloc_buf_dev); self.tile_alloc_buf_host.vk_buffer(),
cmd_buf.copy_buffer(&self.coarse_alloc_buf_host, &self.coarse_alloc_buf_dev); self.tile_alloc_buf_dev.vk_buffer(),
cmd_buf.clear_buffer(&self.state_buf); );
cmd_buf.copy_buffer(
self.bin_alloc_buf_host.vk_buffer(),
self.bin_alloc_buf_dev.vk_buffer(),
);
cmd_buf.copy_buffer(
self.coarse_alloc_buf_host.vk_buffer(),
self.coarse_alloc_buf_dev.vk_buffer(),
);
cmd_buf.clear_buffer(self.state_buf.vk_buffer());
cmd_buf.memory_barrier(); cmd_buf.memory_barrier();
cmd_buf.image_barrier( cmd_buf.image_barrier(
&self.image_dev, self.image_dev.vk_image(),
ImageLayout::Undefined, ImageLayout::Undefined,
ImageLayout::General, ImageLayout::General,
); );
@ -381,6 +392,10 @@ impl<D: Device> Renderer<D> {
); );
cmd_buf.write_timestamp(&query_pool, 7); cmd_buf.write_timestamp(&query_pool, 7);
cmd_buf.memory_barrier(); cmd_buf.memory_barrier();
cmd_buf.image_barrier(&self.image_dev, ImageLayout::General, ImageLayout::BlitSrc); cmd_buf.image_barrier(
self.image_dev.vk_image(),
ImageLayout::General,
ImageLayout::BlitSrc,
);
} }
} }