Initial work for surface support

surface: handle extensions

Implement swapchain creation and blit image to screen
This commit is contained in:
msiglreith 2020-04-30 15:02:48 +02:00
parent 4db4b3b87d
commit b38e43f0c2
7 changed files with 776 additions and 165 deletions

View file

@ -9,3 +9,5 @@ edition = "2018"
[dependencies] [dependencies]
ash = "0.30" ash = "0.30"
once_cell = "1.3.1" once_cell = "1.3.1"
ash-window = { git = "https://github.com/norse-rs/ash-window.git", branch = "dyn_trait" }
raw-window-handle = "0.3"

View file

@ -2,9 +2,9 @@ use piet_gpu_hal::vulkan::VkInstance;
use piet_gpu_hal::{CmdBuf, Device, MemFlags}; use piet_gpu_hal::{CmdBuf, Device, MemFlags};
fn main() { fn main() {
let instance = VkInstance::new().unwrap(); let (instance, _) = VkInstance::new(None).unwrap();
unsafe { unsafe {
let device = instance.device().unwrap(); let device = instance.device(None).unwrap();
let mem_flags = MemFlags::host_coherent(); let mem_flags = MemFlags::host_coherent();
let src = (0..256).map(|x| x + 1).collect::<Vec<u32>>(); let src = (0..256).map(|x| x + 1).collect::<Vec<u32>>();
let buffer = device let buffer = device

View file

@ -5,28 +5,49 @@
pub mod vulkan; pub mod vulkan;
/// This isn't great but is expedient. /// This isn't great but is expedient.
type Error = Box<dyn std::error::Error>; pub type Error = Box<dyn std::error::Error>;
#[derive(Copy, Clone, Debug)]
pub enum ImageLayout {
Undefined,
Present,
BlitSrc,
BlitDst,
General,
}
pub trait Device: Sized { pub trait Device: Sized {
type Buffer; type Buffer;
type Image;
type MemFlags: MemFlags; type MemFlags: MemFlags;
type Pipeline; type Pipeline;
type DescriptorSet; type DescriptorSet;
type QueryPool; type QueryPool;
type CmdBuf: CmdBuf<Self>; type CmdBuf: CmdBuf<Self>;
type Fence;
type Semaphore;
fn create_buffer(&self, size: u64, mem_flags: Self::MemFlags) -> Result<Self::Buffer, Error>; fn create_buffer(&self, size: u64, mem_flags: Self::MemFlags) -> Result<Self::Buffer, Error>;
unsafe fn create_image2d(
&self,
width: u32,
height: u32,
mem_flags: Self::MemFlags,
) -> Result<Self::Image, Error>;
unsafe fn create_simple_compute_pipeline( unsafe fn create_simple_compute_pipeline(
&self, &self,
code: &[u8], code: &[u8],
n_buffers: u32, n_buffers: u32,
n_images: u32,
) -> Result<Self::Pipeline, Error>; ) -> Result<Self::Pipeline, Error>;
unsafe fn create_descriptor_set( unsafe fn create_descriptor_set(
&self, &self,
pipeline: &Self::Pipeline, pipeline: &Self::Pipeline,
bufs: &[&Self::Buffer], bufs: &[&Self::Buffer],
images: &[&Self::Image],
) -> Result<Self::DescriptorSet, Error>; ) -> Result<Self::DescriptorSet, Error>;
fn create_cmd_buf(&self) -> Result<Self::CmdBuf, Error>; fn create_cmd_buf(&self) -> Result<Self::CmdBuf, Error>;
@ -40,9 +61,15 @@ pub trait Device: Sized {
/// ///
/// # Safety /// # Safety
/// All submitted commands that refer to this query pool must have completed. /// All submitted commands that refer to this query pool must have completed.
unsafe fn reap_query_pool(&self, pool: Self::QueryPool) -> Result<Vec<f64>, Error>; unsafe fn reap_query_pool(&self, pool: &Self::QueryPool) -> Result<Vec<f64>, Error>;
unsafe fn run_cmd_buf(&self, cmd_buf: &Self::CmdBuf) -> Result<(), Error>; unsafe fn run_cmd_buf(
&self,
cmd_buf: &Self::CmdBuf,
wait_semaphores: &[Self::Semaphore],
signal_semaphores: &[Self::Semaphore],
fence: Option<&Self::Fence>,
) -> Result<(), Error>;
unsafe fn read_buffer<T: Sized>( unsafe fn read_buffer<T: Sized>(
&self, &self,
@ -55,6 +82,10 @@ pub trait Device: Sized {
buffer: &Self::Buffer, buffer: &Self::Buffer,
contents: &[T], contents: &[T],
) -> Result<(), Error>; ) -> Result<(), Error>;
unsafe fn create_semaphore(&self) -> Result<Self::Semaphore, Error>;
unsafe fn create_fence(&self, signaled: bool) -> Result<Self::Fence, Error>;
unsafe fn wait_and_reset(&self, fences: &[Self::Fence]) -> Result<(), Error>;
} }
pub trait CmdBuf<D: Device> { pub trait CmdBuf<D: Device> {
@ -71,6 +102,13 @@ pub trait CmdBuf<D: Device> {
unsafe fn memory_barrier(&mut self); unsafe fn memory_barrier(&mut self);
unsafe fn image_barrier(
&mut self,
image: &D::Image,
src_layout: ImageLayout,
dst_layout: ImageLayout,
);
/// Clear the buffer. /// Clear the buffer.
/// ///
/// This is readily supported in Vulkan, but for portability it is remarkably /// This is readily supported in Vulkan, but for portability it is remarkably
@ -80,6 +118,11 @@ pub trait CmdBuf<D: Device> {
unsafe fn copy_buffer(&self, src: &D::Buffer, dst: &D::Buffer); unsafe fn copy_buffer(&self, src: &D::Buffer, dst: &D::Buffer);
unsafe fn copy_image_to_buffer(&self, src: &D::Image, dst: &D::Buffer);
// low portability, dx12 doesn't support it natively
unsafe fn blit_image(&self, src: &D::Image, dst: &D::Image);
/// Reset the query pool. /// Reset the query pool.
/// ///
/// The query pool must be reset before each use, to avoid validation errors. /// The query pool must be reset before each use, to avoid validation errors.

View file

@ -4,12 +4,12 @@ use std::borrow::Cow;
use std::ffi::{CStr, CString}; use std::ffi::{CStr, CString};
use std::sync::Arc; use std::sync::Arc;
use ash::extensions::ext::DebugUtils; use ash::extensions::{ext::DebugUtils, khr};
use ash::version::{DeviceV1_0, EntryV1_0, InstanceV1_0}; use ash::version::{DeviceV1_0, EntryV1_0, InstanceV1_0};
use ash::{vk, Device, Entry, Instance}; use ash::{vk, Device, Entry, Instance};
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use crate::Error; use crate::{Device as DeviceTrait, Error, ImageLayout};
pub struct VkInstance { pub struct VkInstance {
/// Retain the dynamic lib. /// Retain the dynamic lib.
@ -22,6 +22,7 @@ pub struct VkInstance {
pub struct VkDevice { pub struct VkDevice {
device: Arc<RawDevice>, device: Arc<RawDevice>,
physical_device: vk::PhysicalDevice,
device_mem_props: vk::PhysicalDeviceMemoryProperties, device_mem_props: vk::PhysicalDeviceMemoryProperties,
queue: vk::Queue, queue: vk::Queue,
qfi: u32, qfi: u32,
@ -32,6 +33,23 @@ struct RawDevice {
device: Device, device: Device,
} }
pub struct VkSurface {
surface: vk::SurfaceKHR,
surface_fn: khr::Surface,
}
pub struct VkSwapchain {
swapchain: vk::SwapchainKHR,
swapchain_fn: khr::Swapchain,
present_queue: vk::Queue,
acquisition_idx: usize,
acquisition_semaphores: Vec<vk::Semaphore>, // same length as `images`
images: Vec<vk::Image>,
extent: vk::Extent2D,
}
/// A handle to a buffer. /// A handle to a buffer.
/// ///
/// There is no lifetime tracking at this level; the caller is responsible /// There is no lifetime tracking at this level; the caller is responsible
@ -42,6 +60,13 @@ pub struct Buffer {
size: u64, size: u64,
} }
pub struct Image {
image: vk::Image,
image_memory: vk::DeviceMemory,
image_view: vk::ImageView,
extent: vk::Extent3D,
}
pub struct Pipeline { pub struct Pipeline {
pipeline: vk::Pipeline, pipeline: vk::Pipeline,
descriptor_set_layout: vk::DescriptorSetLayout, descriptor_set_layout: vk::DescriptorSetLayout,
@ -88,11 +113,7 @@ unsafe extern "system" fn vulkan_debug_callback(
println!( println!(
"{:?}:\n{:?} [{} ({})] : {}\n", "{:?}:\n{:?} [{} ({})] : {}\n",
message_severity, message_severity, message_type, message_id_name, message_id_number, message,
message_type,
message_id_name,
message_id_number,
message,
); );
vk::FALSE vk::FALSE
@ -119,40 +140,59 @@ impl VkInstance {
/// ///
/// There's more to be done to make this suitable for integration with other /// There's more to be done to make this suitable for integration with other
/// systems, but for now the goal is to make things simple. /// systems, but for now the goal is to make things simple.
pub fn new() -> Result<VkInstance, Error> { ///
/// The caller is responsible for making sure that window which owns the raw window handle
/// outlives the surface.
pub fn new(
window_handle: Option<&dyn raw_window_handle::HasRawWindowHandle>,
) -> Result<(VkInstance, Option<VkSurface>), Error> {
unsafe { unsafe {
let app_name = CString::new("VkToy").unwrap(); let app_name = CString::new("VkToy").unwrap();
let entry = Entry::new()?; let entry = Entry::new()?;
let exist_layers = entry let exist_layers = entry.enumerate_instance_layer_properties()?;
.enumerate_instance_layer_properties()?; let layers = LAYERS
let layers = LAYERS.iter().filter_map(|&lyr| { .iter()
exist_layers .filter_map(|&lyr| {
.iter() exist_layers
.find(|x| .iter()
CStr::from_ptr(x.layer_name.as_ptr()) == lyr .find(|x| CStr::from_ptr(x.layer_name.as_ptr()) == lyr)
) .map(|_| lyr.as_ptr())
.map(|_| lyr.as_ptr()) .or_else(|| {
.or_else(|| { println!(
println!("Unable to find layer: {}, have you installed the Vulkan SDK?", lyr.to_string_lossy()); "Unable to find layer: {}, have you installed the Vulkan SDK?",
None lyr.to_string_lossy()
}) );
}).collect::<Vec<_>>(); None
})
})
.collect::<Vec<_>>();
let exist_exts = entry let exist_exts = entry.enumerate_instance_extension_properties()?;
.enumerate_instance_extension_properties()?; let mut exts = EXTS
let exts = EXTS.iter().filter_map(|&ext| { .iter()
exist_exts .filter_map(|&ext| {
.iter() exist_exts
.find(|x| .iter()
CStr::from_ptr(x.extension_name.as_ptr()) == ext .find(|x| CStr::from_ptr(x.extension_name.as_ptr()) == ext)
) .map(|_| ext.as_ptr())
.map(|_| ext.as_ptr()) .or_else(|| {
.or_else(|| { println!(
println!("Unable to find extension: {}, have you installed the Vulkan SDK?", ext.to_string_lossy()); "Unable to find extension: {}, have you installed the Vulkan SDK?",
None ext.to_string_lossy()
}) );
}).collect::<Vec<_>>(); None
})
})
.collect::<Vec<_>>();
let surface_extensions = match window_handle {
Some(ref handle) => ash_window::enumerate_required_extensions(*handle)?,
None => vec![],
};
for extension in surface_extensions {
exts.push(extension.as_ptr());
}
let instance = entry.create_instance( let instance = entry.create_instance(
&vk::InstanceCreateInfo::builder() &vk::InstanceCreateInfo::builder()
@ -168,7 +208,7 @@ impl VkInstance {
None, None,
)?; )?;
let (_dbg_loader, _dbg_callbk) = if cfg!(debug_assertions) { let (_dbg_loader, _dbg_callbk) = if false {
let dbg_info = vk::DebugUtilsMessengerCreateInfoEXT::builder() let dbg_info = vk::DebugUtilsMessengerCreateInfoEXT::builder()
.message_severity( .message_severity(
vk::DebugUtilsMessageSeverityFlagsEXT::ERROR vk::DebugUtilsMessageSeverityFlagsEXT::ERROR
@ -185,37 +225,51 @@ impl VkInstance {
(None, None) (None, None)
}; };
Ok(VkInstance { let vk_surface = match window_handle {
Some(handle) => Some(VkSurface {
surface: ash_window::create_surface(&entry, &instance, handle, None)?,
surface_fn: khr::Surface::new(&entry, &instance),
}),
None => None,
};
let vk_instance = VkInstance {
entry, entry,
instance, instance,
_dbg_loader, _dbg_loader,
_dbg_callbk, _dbg_callbk,
}) };
Ok((vk_instance, vk_surface))
} }
} }
/// Create a device from the instance, suitable for compute. /// Create a device from the instance, suitable for compute, with an optional surface.
/// ///
/// # Safety /// # Safety
/// ///
/// The caller is responsible for making sure that the instance outlives the device. /// The caller is responsible for making sure that the instance outlives the device
/// We could enforce that, for example having an `Arc` of the raw instance, but for /// and surface. We could enforce that, for example having an `Arc` of the raw instance,
/// now keep things simple. /// but for now keep things simple.
pub unsafe fn device(&self) -> Result<VkDevice, Error> { pub unsafe fn device(&self, surface: Option<&VkSurface>) -> Result<VkDevice, Error> {
let devices = self.instance.enumerate_physical_devices()?; let devices = self.instance.enumerate_physical_devices()?;
let (pdevice, qfi) = let (pdevice, qfi) =
choose_compute_device(&self.instance, &devices).ok_or("no suitable device")?; choose_compute_device(&self.instance, &devices, surface).ok_or("no suitable device")?;
let device = self.instance.create_device( let queue_priorities = [1.0];
pdevice, let queue_create_infos = [vk::DeviceQueueCreateInfo::builder()
&vk::DeviceCreateInfo::builder().queue_create_infos(&[ .queue_family_index(qfi)
vk::DeviceQueueCreateInfo::builder() .queue_priorities(&queue_priorities)
.queue_family_index(qfi) .build()];
.queue_priorities(&[1.0]) let extensions = match surface {
.build(), Some(_) => vec![khr::Swapchain::name().as_ptr()],
]), None => vec![],
None, };
)?; let create_info = vk::DeviceCreateInfo::builder()
.queue_create_infos(&queue_create_infos)
.enabled_extension_names(&extensions)
.build();
let device = self.instance.create_device(pdevice, &create_info, None)?;
let device_mem_props = self.instance.get_physical_device_memory_properties(pdevice); let device_mem_props = self.instance.get_physical_device_memory_properties(pdevice);
@ -229,21 +283,98 @@ impl VkInstance {
Ok(VkDevice { Ok(VkDevice {
device, device,
physical_device: pdevice,
device_mem_props, device_mem_props,
qfi, qfi,
queue, queue,
timestamp_period, timestamp_period,
}) })
} }
pub unsafe fn swapchain(
&self,
device: &VkDevice,
surface: &VkSurface,
) -> Result<VkSwapchain, Error> {
let formats = surface
.surface_fn
.get_physical_device_surface_formats(device.physical_device, surface.surface)?;
let surface_format = formats
.iter()
.map(|surface_fmt| match surface_fmt.format {
vk::Format::UNDEFINED => {
vk::SurfaceFormatKHR {
format: vk::Format::B8G8R8A8_UNORM, // most common format on desktop
color_space: surface_fmt.color_space,
}
}
_ => *surface_fmt,
})
.next()
.ok_or("no surface format found")?;
let capabilities = surface
.surface_fn
.get_physical_device_surface_capabilities(device.physical_device, surface.surface)?;
let present_modes = surface
.surface_fn
.get_physical_device_surface_present_modes(device.physical_device, surface.surface)?;
let present_mode = present_modes
.into_iter()
.find(|mode| mode == &vk::PresentModeKHR::MAILBOX)
.unwrap_or(vk::PresentModeKHR::FIFO);
let image_count = 2; // TODO
let extent = capabilities.current_extent; // TODO: wayland for example will complain here ..
let create_info = vk::SwapchainCreateInfoKHR::builder()
.surface(surface.surface)
.min_image_count(image_count)
.image_format(surface_format.format)
.image_color_space(surface_format.color_space)
.image_extent(extent)
.image_array_layers(1)
.image_usage(vk::ImageUsageFlags::TRANSFER_DST)
.image_sharing_mode(vk::SharingMode::EXCLUSIVE)
.pre_transform(vk::SurfaceTransformFlagsKHR::IDENTITY)
.composite_alpha(vk::CompositeAlphaFlagsKHR::OPAQUE)
.present_mode(present_mode)
.clipped(true);
let swapchain_fn = khr::Swapchain::new(&self.instance, &device.device.device);
let swapchain = swapchain_fn.create_swapchain(&create_info, None)?;
let images = swapchain_fn.get_swapchain_images(swapchain)?;
let acquisition_semaphores = (0..images.len())
.map(|_| device.create_semaphore())
.collect::<Result<Vec<_>, Error>>()?;
Ok(VkSwapchain {
swapchain,
swapchain_fn,
present_queue: device.queue,
images,
acquisition_semaphores,
acquisition_idx: 0,
extent,
})
}
} }
impl crate::Device for VkDevice { impl crate::Device for VkDevice {
type Buffer = Buffer; type Buffer = Buffer;
type Image = Image;
type CmdBuf = CmdBuf; type CmdBuf = CmdBuf;
type DescriptorSet = DescriptorSet; type DescriptorSet = DescriptorSet;
type Pipeline = Pipeline; type Pipeline = Pipeline;
type QueryPool = QueryPool; type QueryPool = QueryPool;
type MemFlags = MemFlags; type MemFlags = MemFlags;
type Fence = vk::Fence;
type Semaphore = vk::Semaphore;
fn create_buffer(&self, size: u64, mem_flags: MemFlags) -> Result<Buffer, Error> { fn create_buffer(&self, size: u64, mem_flags: MemFlags) -> Result<Buffer, Error> {
unsafe { unsafe {
@ -281,6 +412,96 @@ impl crate::Device for VkDevice {
} }
} }
unsafe fn create_image2d(
&self,
width: u32,
height: u32,
mem_flags: Self::MemFlags,
) -> Result<Self::Image, Error> {
let device = &self.device.device;
let extent = vk::Extent3D {
width,
height,
depth: 1,
};
let image = device.create_image(
&vk::ImageCreateInfo::builder()
.image_type(vk::ImageType::TYPE_2D)
.format(vk::Format::R8G8B8A8_UNORM)
.extent(extent)
.mip_levels(1)
.array_layers(1)
.samples(vk::SampleCountFlags::TYPE_1)
.tiling(vk::ImageTiling::OPTIMAL)
.initial_layout(vk::ImageLayout::UNDEFINED)
.usage(vk::ImageUsageFlags::STORAGE | vk::ImageUsageFlags::TRANSFER_SRC) // write in compute and blit src
.sharing_mode(vk::SharingMode::EXCLUSIVE),
None,
)?;
let mem_requirements = device.get_image_memory_requirements(image);
let mem_type = find_memory_type(
mem_requirements.memory_type_bits,
mem_flags.0,
&self.device_mem_props,
)
.unwrap(); // TODO: proper error
let image_memory = device.allocate_memory(
&vk::MemoryAllocateInfo::builder()
.allocation_size(mem_requirements.size)
.memory_type_index(mem_type),
None,
)?;
device.bind_image_memory(image, image_memory, 0)?;
let image_view = device.create_image_view(
&vk::ImageViewCreateInfo::builder()
.view_type(vk::ImageViewType::TYPE_2D)
.image(image)
.format(vk::Format::R8G8B8A8_UNORM)
.subresource_range(vk::ImageSubresourceRange {
aspect_mask: vk::ImageAspectFlags::COLOR,
base_mip_level: 0,
level_count: 1,
base_array_layer: 0,
layer_count: 1,
})
.components(vk::ComponentMapping {
r: vk::ComponentSwizzle::IDENTITY,
g: vk::ComponentSwizzle::IDENTITY,
b: vk::ComponentSwizzle::IDENTITY,
a: vk::ComponentSwizzle::IDENTITY,
})
.build(),
None,
)?;
Ok(Image {
image,
image_memory,
image_view,
extent,
})
}
unsafe fn create_fence(&self, signaled: bool) -> Result<Self::Fence, Error> {
let device = &self.device.device;
let mut flags = vk::FenceCreateFlags::empty();
if signaled {
flags |= vk::FenceCreateFlags::SIGNALED;
}
Ok(device.create_fence(&vk::FenceCreateInfo::builder().flags(flags).build(), None)?)
}
unsafe fn create_semaphore(&self) -> Result<Self::Semaphore, Error> {
let device = &self.device.device;
Ok(device.create_semaphore(&vk::SemaphoreCreateInfo::default(), None)?)
}
unsafe fn wait_and_reset(&self, fences: &[Self::Fence]) -> Result<(), Error> {
let device = &self.device.device;
device.wait_for_fences(fences, true, !0)?;
device.reset_fences(fences)?;
Ok(())
}
/// This creates a pipeline that runs over the buffer. /// This creates a pipeline that runs over the buffer.
/// ///
/// The descriptor set layout is just some number of buffers (this will change). /// The descriptor set layout is just some number of buffers (this will change).
@ -288,18 +509,30 @@ impl crate::Device for VkDevice {
&self, &self,
code: &[u8], code: &[u8],
n_buffers: u32, n_buffers: u32,
n_images: u32,
) -> Result<Pipeline, Error> { ) -> Result<Pipeline, Error> {
let device = &self.device.device; let device = &self.device.device;
let bindings = (0..n_buffers) let mut bindings = Vec::new();
.map(|i| { for i in 0..n_buffers {
bindings.push(
vk::DescriptorSetLayoutBinding::builder() vk::DescriptorSetLayoutBinding::builder()
.binding(i) .binding(i)
.descriptor_type(vk::DescriptorType::STORAGE_BUFFER) .descriptor_type(vk::DescriptorType::STORAGE_BUFFER)
.descriptor_count(1) .descriptor_count(1)
.stage_flags(vk::ShaderStageFlags::COMPUTE) .stage_flags(vk::ShaderStageFlags::COMPUTE)
.build() .build(),
}) );
.collect::<Vec<_>>(); }
for i in n_buffers..n_buffers + n_images {
bindings.push(
vk::DescriptorSetLayoutBinding::builder()
.binding(i)
.descriptor_type(vk::DescriptorType::STORAGE_IMAGE)
.descriptor_count(1)
.stage_flags(vk::ShaderStageFlags::COMPUTE)
.build(),
);
}
let descriptor_set_layout = device.create_descriptor_set_layout( let descriptor_set_layout = device.create_descriptor_set_layout(
&vk::DescriptorSetLayoutCreateInfo::builder().bindings(&bindings), &vk::DescriptorSetLayoutCreateInfo::builder().bindings(&bindings),
None, None,
@ -344,12 +577,26 @@ impl crate::Device for VkDevice {
&self, &self,
pipeline: &Pipeline, pipeline: &Pipeline,
bufs: &[&Buffer], bufs: &[&Buffer],
images: &[&Image],
) -> Result<DescriptorSet, Error> { ) -> Result<DescriptorSet, Error> {
let device = &self.device.device; let device = &self.device.device;
let descriptor_pool_sizes = [vk::DescriptorPoolSize::builder() let mut descriptor_pool_sizes = Vec::new();
.ty(vk::DescriptorType::STORAGE_BUFFER) if !bufs.is_empty() {
.descriptor_count(bufs.len() as u32) descriptor_pool_sizes.push(
.build()]; vk::DescriptorPoolSize::builder()
.ty(vk::DescriptorType::STORAGE_BUFFER)
.descriptor_count(bufs.len() as u32)
.build(),
);
}
if !images.is_empty() {
descriptor_pool_sizes.push(
vk::DescriptorPoolSize::builder()
.ty(vk::DescriptorType::STORAGE_IMAGE)
.descriptor_count(images.len() as u32)
.build(),
);
}
let descriptor_pool = device.create_descriptor_pool( let descriptor_pool = device.create_descriptor_pool(
&vk::DescriptorPoolCreateInfo::builder() &vk::DescriptorPoolCreateInfo::builder()
.pool_sizes(&descriptor_pool_sizes) .pool_sizes(&descriptor_pool_sizes)
@ -380,6 +627,23 @@ impl crate::Device for VkDevice {
&[], &[],
); );
} }
for (i, image) in images.iter().enumerate() {
let binding = i + bufs.len();
let image_info = vk::DescriptorImageInfo::builder()
.sampler(vk::Sampler::null())
.image_view(image.image_view)
.image_layout(vk::ImageLayout::GENERAL)
.build();
device.update_descriptor_sets(
&[vk::WriteDescriptorSet::builder()
.dst_set(descriptor_sets[0])
.dst_binding(binding as u32)
.descriptor_type(vk::DescriptorType::STORAGE_IMAGE)
.image_info(&[image_info])
.build()],
&[],
);
}
Ok(DescriptorSet { Ok(DescriptorSet {
descriptor_set: descriptor_sets[0], descriptor_set: descriptor_sets[0],
}) })
@ -390,7 +654,7 @@ impl crate::Device for VkDevice {
let device = &self.device.device; let device = &self.device.device;
let command_pool = device.create_command_pool( let command_pool = device.create_command_pool(
&vk::CommandPoolCreateInfo::builder() &vk::CommandPoolCreateInfo::builder()
.flags(vk::CommandPoolCreateFlags::empty()) .flags(vk::CommandPoolCreateFlags::RESET_COMMAND_BUFFER)
.queue_family_index(self.qfi), .queue_family_index(self.qfi),
None, None,
)?; )?;
@ -421,7 +685,7 @@ impl crate::Device for VkDevice {
} }
} }
unsafe fn reap_query_pool(&self, pool: Self::QueryPool) -> Result<Vec<f64>, Error> { unsafe fn reap_query_pool(&self, pool: &Self::QueryPool) -> Result<Vec<f64>, Error> {
let device = &self.device.device; let device = &self.device.device;
let mut buf = vec![0u64; pool.n_queries as usize]; let mut buf = vec![0u64; pool.n_queries as usize];
device.get_query_pool_results( device.get_query_pool_results(
@ -431,7 +695,6 @@ impl crate::Device for VkDevice {
&mut buf, &mut buf,
vk::QueryResultFlags::TYPE_64, vk::QueryResultFlags::TYPE_64,
)?; )?;
device.destroy_query_pool(pool.pool, None);
let ts0 = buf[0]; let ts0 = buf[0];
let tsp = self.timestamp_period as f64 * 1e-9; let tsp = self.timestamp_period as f64 * 1e-9;
let result = buf[1..] let result = buf[1..]
@ -444,23 +707,33 @@ impl crate::Device for VkDevice {
/// Run the command buffer. /// Run the command buffer.
/// ///
/// This version simply blocks until it's complete. /// This version simply blocks until it's complete.
unsafe fn run_cmd_buf(&self, cmd_buf: &CmdBuf) -> Result<(), Error> { unsafe fn run_cmd_buf(
&self,
cmd_buf: &CmdBuf,
wait_semaphores: &[Self::Semaphore],
signal_semaphores: &[Self::Semaphore],
fence: Option<&Self::Fence>,
) -> Result<(), Error> {
let device = &self.device.device; let device = &self.device.device;
// Run the command buffer. let fence = match fence {
let fence = device.create_fence( Some(fence) => *fence,
&vk::FenceCreateInfo::builder().flags(vk::FenceCreateFlags::empty()), None => vk::Fence::null(),
None, };
)?; let wait_stages = wait_semaphores
.iter()
.map(|_| vk::PipelineStageFlags::ALL_COMMANDS)
.collect::<Vec<_>>();
device.queue_submit( device.queue_submit(
self.queue, self.queue,
&[vk::SubmitInfo::builder() &[vk::SubmitInfo::builder()
.command_buffers(&[cmd_buf.cmd_buf]) .command_buffers(&[cmd_buf.cmd_buf])
.wait_semaphores(wait_semaphores)
.signal_semaphores(signal_semaphores)
.wait_dst_stage_mask(&wait_stages)
.build()], .build()],
fence, fence,
)?; )?;
device.wait_for_fences(&[fence], true, 100_000_000)?;
// TODO: handle errors better (currently leaks fence and can lead to other problems)
Ok(()) Ok(())
} }
@ -556,6 +829,37 @@ impl crate::CmdBuf<VkDevice> for CmdBuf {
); );
} }
unsafe fn image_barrier(
&mut self,
image: &Image,
src_layout: ImageLayout,
dst_layout: ImageLayout,
) {
let device = &self.device.device;
device.cmd_pipeline_barrier(
self.cmd_buf,
vk::PipelineStageFlags::ALL_COMMANDS,
vk::PipelineStageFlags::ALL_COMMANDS,
vk::DependencyFlags::empty(),
&[],
&[],
&[vk::ImageMemoryBarrier::builder()
.image(image.image)
.src_access_mask(vk::AccessFlags::MEMORY_WRITE)
.dst_access_mask(vk::AccessFlags::MEMORY_READ)
.old_layout(map_image_layout(src_layout))
.new_layout(map_image_layout(dst_layout))
.subresource_range(vk::ImageSubresourceRange {
aspect_mask: vk::ImageAspectFlags::COLOR,
base_mip_level: 0,
level_count: vk::REMAINING_MIP_LEVELS,
base_array_layer: 0,
layer_count: vk::REMAINING_MIP_LEVELS,
})
.build()],
);
}
unsafe fn clear_buffer(&self, buffer: &Buffer) { unsafe fn clear_buffer(&self, buffer: &Buffer) {
let device = &self.device.device; let device = &self.device.device;
device.cmd_fill_buffer(self.cmd_buf, buffer.buffer, 0, vk::WHOLE_SIZE, 0); device.cmd_fill_buffer(self.cmd_buf, buffer.buffer, 0, vk::WHOLE_SIZE, 0);
@ -572,14 +876,74 @@ impl crate::CmdBuf<VkDevice> for CmdBuf {
); );
} }
unsafe fn copy_image_to_buffer(&self, src: &Image, dst: &Buffer) {
let device = &self.device.device;
device.cmd_copy_image_to_buffer(
self.cmd_buf,
src.image,
vk::ImageLayout::TRANSFER_SRC_OPTIMAL,
dst.buffer,
&[vk::BufferImageCopy {
buffer_offset: 0,
buffer_row_length: 0, // tight packing
buffer_image_height: 0, // tight packing
image_subresource: vk::ImageSubresourceLayers {
aspect_mask: vk::ImageAspectFlags::COLOR,
mip_level: 0,
base_array_layer: 0,
layer_count: 1,
},
image_offset: vk::Offset3D { x: 0, y: 0, z: 0 },
image_extent: src.extent,
}],
);
}
unsafe fn blit_image(&self, src: &Image, dst: &Image) {
let device = &self.device.device;
device.cmd_blit_image(
self.cmd_buf,
src.image,
vk::ImageLayout::TRANSFER_SRC_OPTIMAL,
dst.image,
vk::ImageLayout::TRANSFER_DST_OPTIMAL,
&[vk::ImageBlit {
src_subresource: vk::ImageSubresourceLayers {
aspect_mask: vk::ImageAspectFlags::COLOR,
mip_level: 0,
base_array_layer: 0,
layer_count: 1,
},
src_offsets: [
vk::Offset3D { x: 0, y: 0, z: 0 },
vk::Offset3D {
x: src.extent.width as i32,
y: src.extent.height as i32,
z: src.extent.depth as i32,
},
],
dst_subresource: vk::ImageSubresourceLayers {
aspect_mask: vk::ImageAspectFlags::COLOR,
mip_level: 0,
base_array_layer: 0,
layer_count: 1,
},
dst_offsets: [
vk::Offset3D { x: 0, y: 0, z: 0 },
vk::Offset3D {
x: dst.extent.width as i32,
y: dst.extent.height as i32,
z: dst.extent.depth as i32,
},
],
}],
vk::Filter::LINEAR,
);
}
unsafe fn reset_query_pool(&mut self, pool: &QueryPool) { unsafe fn reset_query_pool(&mut self, pool: &QueryPool) {
let device = &self.device.device; let device = &self.device.device;
device.cmd_reset_query_pool( device.cmd_reset_query_pool(self.cmd_buf, pool.pool, 0, pool.n_queries);
self.cmd_buf,
pool.pool,
0,
pool.n_queries,
);
} }
unsafe fn write_timestamp(&mut self, pool: &QueryPool, query: u32) { unsafe fn write_timestamp(&mut self, pool: &QueryPool, query: u32) {
@ -603,13 +967,68 @@ impl crate::MemFlags for MemFlags {
} }
} }
impl VkSwapchain {
pub unsafe fn next(&mut self) -> Result<(usize, vk::Semaphore), Error> {
let acquisition_semaphore = self.acquisition_semaphores[self.acquisition_idx];
let (image_idx, _suboptimal) = self.swapchain_fn.acquire_next_image(
self.swapchain,
!0,
self.acquisition_semaphores[self.acquisition_idx],
vk::Fence::null(),
)?;
self.acquisition_idx = (self.acquisition_idx + 1) % self.acquisition_semaphores.len();
Ok((image_idx as usize, acquisition_semaphore))
}
pub unsafe fn image(&self, idx: usize) -> Image {
Image {
image: self.images[idx],
image_memory: vk::DeviceMemory::null(),
image_view: vk::ImageView::null(),
extent: vk::Extent3D {
width: self.extent.width,
height: self.extent.height,
depth: 1,
},
}
}
pub unsafe fn present(
&self,
image_idx: usize,
semaphores: &[vk::Semaphore],
) -> Result<bool, Error> {
Ok(self.swapchain_fn.queue_present(
self.present_queue,
&vk::PresentInfoKHR::builder()
.swapchains(&[self.swapchain])
.image_indices(&[image_idx as u32])
.wait_semaphores(semaphores)
.build(),
)?)
}
}
unsafe fn choose_compute_device( unsafe fn choose_compute_device(
instance: &Instance, instance: &Instance,
devices: &[vk::PhysicalDevice], devices: &[vk::PhysicalDevice],
surface: Option<&VkSurface>,
) -> Option<(vk::PhysicalDevice, u32)> { ) -> Option<(vk::PhysicalDevice, u32)> {
for pdevice in devices { for pdevice in devices {
let props = instance.get_physical_device_queue_family_properties(*pdevice); let props = instance.get_physical_device_queue_family_properties(*pdevice);
for (ix, info) in props.iter().enumerate() { for (ix, info) in props.iter().enumerate() {
// Check for surface presentation support
if let Some(surface) = surface {
if !surface
.surface_fn
.get_physical_device_surface_support(*pdevice, ix as u32, surface.surface)
.unwrap()
{
continue;
}
}
if info.queue_flags.contains(vk::QueueFlags::COMPUTE) { if info.queue_flags.contains(vk::QueueFlags::COMPUTE) {
return Some((*pdevice, ix as u32)); return Some((*pdevice, ix as u32));
} }
@ -644,3 +1063,13 @@ fn convert_u32_vec(src: &[u8]) -> Vec<u32> {
}) })
.collect() .collect()
} }
fn map_image_layout(layout: ImageLayout) -> vk::ImageLayout {
match layout {
ImageLayout::Undefined => vk::ImageLayout::UNDEFINED,
ImageLayout::Present => vk::ImageLayout::PRESENT_SRC_KHR,
ImageLayout::BlitSrc => vk::ImageLayout::TRANSFER_SRC_OPTIMAL,
ImageLayout::BlitDst => vk::ImageLayout::TRANSFER_DST_OPTIMAL,
ImageLayout::General => vk::ImageLayout::GENERAL,
}
}

View file

@ -1,20 +1,21 @@
[package] [package]
name = "piet-gpu" name = "piet-gpu"
version = "0.1.0" version = "0.1.0"
authors = ["Raph Levien <raph.levien@gmail.com>"] authors = ["Raph Levien <raph.levien@gmail.com>"]
description = "A compute-centric GPU 2D renderer." description = "A compute-centric GPU 2D renderer."
license = "MIT/Apache-2.0" license = "MIT/Apache-2.0"
edition = "2018" edition = "2018"
[dependencies.piet-gpu-hal] [dependencies.piet-gpu-hal]
path = "../piet-gpu-hal" path = "../piet-gpu-hal"
[dependencies.piet-gpu-types] [dependencies.piet-gpu-types]
path = "../piet-gpu-types" path = "../piet-gpu-types"
[dependencies] [dependencies]
kurbo = "0.5.11" kurbo = "0.5.11"
piet = "0.0.12" piet = "0.0.12"
png = "0.16.2" png = "0.16.2"
rand = "0.7.3" rand = "0.7.3"
roxmltree = "0.11" roxmltree = "0.11"
winit = "0.22"

View file

@ -24,9 +24,7 @@ layout(set = 0, binding = 2) buffer FillSegBuf {
uint[] fill_seg; uint[] fill_seg;
}; };
layout(set = 0, binding = 3) buffer ImageBuf { layout(rgba8, set = 0, binding = 2) uniform writeonly image2D image;
uint[] image;
};
#include "ptcl.h" #include "ptcl.h"
#include "segment.h" #include "segment.h"
@ -65,11 +63,11 @@ void main() {
SegChunk seg_chunk = SegChunk_read(seg_chunk_ref); SegChunk seg_chunk = SegChunk_read(seg_chunk_ref);
for (int i = 0; i < seg_chunk.n; i++) { for (int i = 0; i < seg_chunk.n; i++) {
Segment seg = Segment_read(SegmentRef(seg_chunk_ref.offset + SegChunk_size + Segment_size * i)); Segment seg = Segment_read(SegmentRef(seg_chunk_ref.offset + SegChunk_size + Segment_size * i));
vec2 line_vec = seg.end - seg.start; vec2 line_vec = seg.end - seg.start;
vec2 dpos = xy + vec2(0.5, 0.5) - seg.start; vec2 dpos = xy + vec2(0.5, 0.5) - seg.start;
float t = clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0, 1.0); float t = clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0, 1.0);
df = min(df, length(line_vec * t - dpos)); df = min(df, length(line_vec * t - dpos));
} }
seg_chunk_ref = seg_chunk.next; seg_chunk_ref = seg_chunk.next;
} while (seg_chunk_ref.offset != 0); } while (seg_chunk_ref.offset != 0);
fg_rgba = unpackUnorm4x8(stroke.rgba_color).wzyx; fg_rgba = unpackUnorm4x8(stroke.rgba_color).wzyx;
@ -118,8 +116,5 @@ void main() {
cmd_ref.offset += Cmd_size; cmd_ref.offset += Cmd_size;
} }
// TODO: sRGB imageStore(image, ivec2(xy_uint), vec4(rgb, 1.0));
uvec4 s = uvec4(round(vec4(rgb, 1.0) * 255.0));
uint rgba_packed = s.r | (s.g << 8) | (s.b << 16) | (s.a << 24);
image[xy_uint.y * IMAGE_WIDTH + xy_uint.x] = rgba_packed;
} }

View file

@ -8,7 +8,7 @@ use piet::kurbo::{BezPath, Circle, Line, Point, Vec2};
use piet::{Color, RenderContext}; use piet::{Color, RenderContext};
use piet_gpu_hal::vulkan::VkInstance; use piet_gpu_hal::vulkan::VkInstance;
use piet_gpu_hal::{CmdBuf, Device, MemFlags}; use piet_gpu_hal::{CmdBuf, Device, Error, ImageLayout, MemFlags};
mod pico_svg; mod pico_svg;
mod render_ctx; mod render_ctx;
@ -16,8 +16,14 @@ mod render_ctx;
use render_ctx::PietGpuRenderContext; use render_ctx::PietGpuRenderContext;
use pico_svg::PicoSvg; use pico_svg::PicoSvg;
const WIDTH: usize = 2048; use winit::{
const HEIGHT: usize = 1536; event::{Event, WindowEvent},
event_loop::{ControlFlow, EventLoop},
window::WindowBuilder,
};
const WIDTH: usize = TILE_W * WIDTH_IN_TILES;
const HEIGHT: usize = TILE_H * HEIGHT_IN_TILES;
const TILE_W: usize = 16; const TILE_W: usize = 16;
const TILE_H: usize = 16; const TILE_H: usize = 16;
@ -34,6 +40,8 @@ const K2_PER_TILE_SIZE: usize = 8;
const N_CIRCLES: usize = 1; const N_CIRCLES: usize = 1;
const NUM_FRAMES: usize = 2;
fn render_scene(rc: &mut impl RenderContext) { fn render_scene(rc: &mut impl RenderContext) {
let mut rng = rand::thread_rng(); let mut rng = rand::thread_rng();
for _ in 0..N_CIRCLES { for _ in 0..N_CIRCLES {
@ -108,10 +116,35 @@ fn dump_k1_data(k1_buf: &[u32]) {
} }
} }
fn main() { fn main() -> Result<(), Error> {
let instance = VkInstance::new().unwrap(); let event_loop = EventLoop::new();
let window = WindowBuilder::new()
.with_inner_size(winit::dpi::LogicalSize {
width: (WIDTH_IN_TILES * 8) as f64,
height: (HEIGHT_IN_TILES * 8) as f64,
})
.with_resizable(false) // currently not supported
.build(&event_loop)?;
let (instance, surface) = VkInstance::new(Some(&window))?;
unsafe { unsafe {
let device = instance.device().unwrap(); let device = instance.device(surface.as_ref())?;
let mut swapchain = instance.swapchain(&device, surface.as_ref().unwrap())?;
let mut current_frame = 0;
let present_semaphores = (0..NUM_FRAMES)
.map(|_| device.create_semaphore())
.collect::<Result<Vec<_>, Error>>()?;
let frame_fences = (0..NUM_FRAMES)
.map(|_| device.create_fence(false))
.collect::<Result<Vec<_>, Error>>()?;
let mut cmd_buffers = (0..NUM_FRAMES)
.map(|_| device.create_cmd_buf())
.collect::<Result<Vec<_>, Error>>()?;
let query_pools = (0..NUM_FRAMES)
.map(|_| device.create_query_pool(6))
.collect::<Result<Vec<_>, Error>>()?;
let host = MemFlags::host_coherent(); let host = MemFlags::host_coherent();
let dev = MemFlags::device_local(); let dev = MemFlags::device_local();
let mut ctx = PietGpuRenderContext::new(); let mut ctx = PietGpuRenderContext::new();
@ -124,30 +157,27 @@ fn main() {
let scene_dev = device let scene_dev = device
.create_buffer(std::mem::size_of_val(&scene[..]) as u64, dev) .create_buffer(std::mem::size_of_val(&scene[..]) as u64, dev)
.unwrap(); .unwrap();
device.write_buffer(&scene_buf, &scene).unwrap(); device.write_buffer(&scene_buf, &scene)?;
let tilegroup_buf = device.create_buffer(4 * 1024 * 1024, dev).unwrap(); let tilegroup_buf = device.create_buffer(4 * 1024 * 1024, dev)?;
let ptcl_buf = device.create_buffer(48 * 1024 * 1024, dev).unwrap(); let ptcl_buf = device.create_buffer(48 * 1024 * 1024, dev)?;
let segment_buf = device.create_buffer(64 * 1024 * 1024, dev).unwrap(); let segment_buf = device.create_buffer(64 * 1024 * 1024, dev)?;
let fill_seg_buf = device.create_buffer(64 * 1024 * 1024, dev).unwrap(); let fill_seg_buf = device.create_buffer(64 * 1024 * 1024, dev).unwrap();
let image_buf = device let image_buf = device.create_buffer((WIDTH * HEIGHT * 4) as u64, host)?;
.create_buffer((WIDTH * HEIGHT * 4) as u64, host) let image_dev = device.create_image2d(WIDTH as u32, HEIGHT as u32, dev)?;
.unwrap();
let image_dev = device
.create_buffer((WIDTH * HEIGHT * 4) as u64, dev)
.unwrap();
let k1_alloc_buf_host = device.create_buffer(4, host).unwrap(); let k1_alloc_buf_host = device.create_buffer(4, host)?;
let k1_alloc_buf_dev = device.create_buffer(4, dev).unwrap(); let k1_alloc_buf_dev = device.create_buffer(4, dev)?;
let k1_alloc_start = WIDTH_IN_TILEGROUPS * HEIGHT_IN_TILEGROUPS * TILEGROUP_STRIDE; let k1_alloc_start = WIDTH_IN_TILEGROUPS * HEIGHT_IN_TILEGROUPS * TILEGROUP_STRIDE;
device device.write_buffer(&k1_alloc_buf_host, &[k1_alloc_start as u32])?;
.write_buffer(&k1_alloc_buf_host, &[k1_alloc_start as u32])
.unwrap();
let k1_code = include_bytes!("../shader/kernel1.spv"); let k1_code = include_bytes!("../shader/kernel1.spv");
let k1_pipeline = device.create_simple_compute_pipeline(k1_code, 3).unwrap(); let k1_pipeline = device
.create_simple_compute_pipeline(k1_code, 3, 0)
.unwrap();
let k1_ds = device let k1_ds = device
.create_descriptor_set( .create_descriptor_set(
&k1_pipeline, &k1_pipeline,
&[&scene_dev, &tilegroup_buf, &k1_alloc_buf_dev], &[&scene_dev, &tilegroup_buf, &k1_alloc_buf_dev],
&[],
) )
.unwrap(); .unwrap();
@ -158,11 +188,14 @@ fn main() {
.write_buffer(&k2s_alloc_buf_host, &[k2s_alloc_start as u32]) .write_buffer(&k2s_alloc_buf_host, &[k2s_alloc_start as u32])
.unwrap(); .unwrap();
let k2s_code = include_bytes!("../shader/kernel2s.spv"); let k2s_code = include_bytes!("../shader/kernel2s.spv");
let k2s_pipeline = device.create_simple_compute_pipeline(k2s_code, 4).unwrap(); let k2s_pipeline = device
.create_simple_compute_pipeline(k2s_code, 4, 0)
.unwrap();
let k2s_ds = device let k2s_ds = device
.create_descriptor_set( .create_descriptor_set(
&k2s_pipeline, &k2s_pipeline,
&[&scene_dev, &tilegroup_buf, &segment_buf, &k2s_alloc_buf_dev], &[&scene_dev, &tilegroup_buf, &segment_buf, &k2s_alloc_buf_dev],
&[],
) )
.unwrap(); .unwrap();
@ -193,7 +226,7 @@ fn main() {
.write_buffer(&k3_alloc_buf_host, &[k3_alloc_start as u32]) .write_buffer(&k3_alloc_buf_host, &[k3_alloc_start as u32])
.unwrap(); .unwrap();
let k3_code = include_bytes!("../shader/kernel3.spv"); let k3_code = include_bytes!("../shader/kernel3.spv");
let k3_pipeline = device.create_simple_compute_pipeline(k3_code, 6).unwrap(); let k3_pipeline = device.create_simple_compute_pipeline(k3_code, 6, 0).unwrap();
let k3_ds = device let k3_ds = device
.create_descriptor_set( .create_descriptor_set(
&k3_pipeline, &k3_pipeline,
@ -205,19 +238,16 @@ fn main() {
&ptcl_buf, &ptcl_buf,
&k3_alloc_buf_dev, &k3_alloc_buf_dev,
], ],
&[],
) )
.unwrap(); .unwrap();
let k4_code = include_bytes!("../shader/kernel4.spv"); let k4_code = include_bytes!("../shader/kernel4.spv");
let k4_pipeline = device.create_simple_compute_pipeline(k4_code, 4).unwrap(); let k4_pipeline = device.create_simple_compute_pipeline(k4_code, 3, 1).unwrap();
let k4_ds = device let k4_ds = device
.create_descriptor_set( .create_descriptor_set(&k4_pipeline, &[&ptcl_buf, &segment_buf, &fill_seg_buf], &[&image_dev])
&k4_pipeline,
&[&ptcl_buf, &segment_buf, &fill_seg_buf, &image_dev],
)
.unwrap(); .unwrap();
let query_pool = &query_pools[0];
let query_pool = device.create_query_pool(6).unwrap();
let mut cmd_buf = device.create_cmd_buf().unwrap(); let mut cmd_buf = device.create_cmd_buf().unwrap();
cmd_buf.begin(); cmd_buf.begin();
cmd_buf.copy_buffer(&scene_buf, &scene_dev); cmd_buf.copy_buffer(&scene_buf, &scene_dev);
@ -232,6 +262,7 @@ fn main() {
cmd_buf.clear_buffer(&tilegroup_buf); cmd_buf.clear_buffer(&tilegroup_buf);
cmd_buf.clear_buffer(&ptcl_buf); cmd_buf.clear_buffer(&ptcl_buf);
cmd_buf.memory_barrier(); cmd_buf.memory_barrier();
cmd_buf.image_barrier(&image_dev, ImageLayout::Undefined, ImageLayout::General);
cmd_buf.reset_query_pool(&query_pool); cmd_buf.reset_query_pool(&query_pool);
cmd_buf.write_timestamp(&query_pool, 0); cmd_buf.write_timestamp(&query_pool, 0);
cmd_buf.dispatch( cmd_buf.dispatch(
@ -272,10 +303,12 @@ fn main() {
); );
cmd_buf.write_timestamp(&query_pool, 5); cmd_buf.write_timestamp(&query_pool, 5);
cmd_buf.memory_barrier(); cmd_buf.memory_barrier();
cmd_buf.copy_buffer(&image_dev, &image_buf); cmd_buf.image_barrier(&image_dev, ImageLayout::General, ImageLayout::BlitSrc);
cmd_buf.copy_image_to_buffer(&image_dev, &image_buf);
cmd_buf.finish(); cmd_buf.finish();
device.run_cmd_buf(&cmd_buf).unwrap(); device.run_cmd_buf(&cmd_buf, &[], &[], Some(&frame_fences[0]))?;
let timestamps = device.reap_query_pool(query_pool).unwrap(); device.wait_and_reset(&[frame_fences[0]])?;
let timestamps = device.reap_query_pool(&query_pool).unwrap();
println!("Kernel 1 time: {:.3}ms", timestamps[0] * 1e3); println!("Kernel 1 time: {:.3}ms", timestamps[0] * 1e3);
println!( println!(
"Kernel 2s time: {:.3}ms", "Kernel 2s time: {:.3}ms",
@ -300,21 +333,129 @@ fn main() {
dump_k1_data(&k1_data); dump_k1_data(&k1_data);
*/ */
let mut img_data: Vec<u8> = Default::default(); if false {
// Note: because png can use a `&[u8]` slice, we could avoid an extra copy let mut img_data: Vec<u8> = Default::default();
// (probably passing a slice into a closure). But for now: keep it simple. // Note: because png can use a `&[u8]` slice, we could avoid an extra copy
device.read_buffer(&image_buf, &mut img_data).unwrap(); // (probably passing a slice into a closure). But for now: keep it simple.
device.read_buffer(&image_buf, &mut img_data).unwrap();
// Write image as PNG file. // Write image as PNG file.
let path = Path::new("image.png"); let path = Path::new("image.png");
let file = File::create(path).unwrap(); let file = File::create(path).unwrap();
let ref mut w = BufWriter::new(file); let ref mut w = BufWriter::new(file);
let mut encoder = png::Encoder::new(w, WIDTH as u32, HEIGHT as u32); let mut encoder = png::Encoder::new(w, WIDTH as u32, HEIGHT as u32);
encoder.set_color(png::ColorType::RGBA); encoder.set_color(png::ColorType::RGBA);
encoder.set_depth(png::BitDepth::Eight); encoder.set_depth(png::BitDepth::Eight);
let mut writer = encoder.write_header().unwrap(); let mut writer = encoder.write_header().unwrap();
writer.write_image_data(&img_data).unwrap(); writer.write_image_data(&img_data).unwrap();
}
event_loop.run(move |event, _, control_flow| {
*control_flow = ControlFlow::Wait;
match event {
Event::WindowEvent { event, window_id } if window_id == window.id() => {
match event {
WindowEvent::CloseRequested => {
*control_flow = ControlFlow::Exit;
}
_ => (),
}
}
Event::MainEventsCleared => {
window.request_redraw();
}
Event::RedrawRequested(window_id) if window_id == window.id() => {
let frame_idx = current_frame % NUM_FRAMES;
let query_pool = &query_pools[frame_idx];
if current_frame >= NUM_FRAMES {
device.wait_and_reset(&[frame_fences[frame_idx]]).unwrap();
let timestamps = device.reap_query_pool(query_pool).unwrap();
window.set_title(&format!("k1: {:.3}ms, k2: {:.3}ms, k3: {:.3}ms, k4: {:.3}ms",
timestamps[0] * 1e3,
(timestamps[1] - timestamps[0]) * 1e3,
(timestamps[2] - timestamps[1]) * 1e3,
(timestamps[3] - timestamps[2]) * 1e3,
));
}
let (image_idx, acquisition_semaphore) = swapchain.next().unwrap();
let swap_image = swapchain.image(image_idx);
let cmd_buf = &mut cmd_buffers[frame_idx];
cmd_buf.begin();
cmd_buf.reset_query_pool(&query_pool);
cmd_buf.copy_buffer(&scene_buf, &scene_dev);
cmd_buf.copy_buffer(&k1_alloc_buf_host, &k1_alloc_buf_dev);
cmd_buf.copy_buffer(&k2s_alloc_buf_host, &k2s_alloc_buf_dev);
cmd_buf.copy_buffer(&k3_alloc_buf_host, &k3_alloc_buf_dev);
cmd_buf.clear_buffer(&tilegroup_buf);
cmd_buf.clear_buffer(&ptcl_buf);
cmd_buf.memory_barrier();
cmd_buf.write_timestamp(&query_pool, 0);
cmd_buf.dispatch(
&k1_pipeline,
&k1_ds,
((WIDTH / 512) as u32, (HEIGHT / 512) as u32, 1),
);
cmd_buf.write_timestamp(&query_pool, 1);
cmd_buf.memory_barrier();
cmd_buf.dispatch(
&k2s_pipeline,
&k2s_ds,
((WIDTH / 512) as u32, (HEIGHT / 16) as u32, 1),
);
cmd_buf.write_timestamp(&query_pool, 2);
cmd_buf.memory_barrier();
cmd_buf.dispatch(
&k3_pipeline,
&k3_ds,
((WIDTH / 512) as u32, (HEIGHT / 16) as u32, 1),
);
cmd_buf.write_timestamp(&query_pool, 3);
cmd_buf.memory_barrier();
cmd_buf.image_barrier(&image_dev, ImageLayout::BlitSrc, ImageLayout::General);
cmd_buf.dispatch(
&k4_pipeline,
&k4_ds,
((WIDTH / TILE_W) as u32, (HEIGHT / TILE_H) as u32, 1),
);
cmd_buf.write_timestamp(&query_pool, 4);
cmd_buf.memory_barrier();
cmd_buf.image_barrier(
&swap_image,
ImageLayout::Undefined,
ImageLayout::BlitDst,
);
cmd_buf.image_barrier(&image_dev, ImageLayout::General, ImageLayout::BlitSrc);
cmd_buf.blit_image(&image_dev, &swap_image);
cmd_buf.image_barrier(
&swap_image,
ImageLayout::BlitDst,
ImageLayout::Present,
);
cmd_buf.finish();
device
.run_cmd_buf(
&cmd_buf,
&[acquisition_semaphore],
&[present_semaphores[frame_idx]],
Some(&frame_fences[frame_idx]),
)
.unwrap();
swapchain
.present(image_idx, &[present_semaphores[frame_idx]])
.unwrap();
current_frame += 1;
}
_ => (),
}
})
} }
} }