From b38e43f0c25b2ad2435cba7f22b3d9dee3abb1b2 Mon Sep 17 00:00:00 2001 From: msiglreith Date: Thu, 30 Apr 2020 15:02:48 +0200 Subject: [PATCH] Initial work for surface support surface: handle extensions Implement swapchain creation and blit image to screen --- piet-gpu-hal/Cargo.toml | 2 + piet-gpu-hal/examples/collatz.rs | 4 +- piet-gpu-hal/src/lib.rs | 49 ++- piet-gpu-hal/src/vulkan.rs | 591 ++++++++++++++++++++++++++----- piet-gpu/Cargo.toml | 41 +-- piet-gpu/shader/kernel4.comp | 19 +- piet-gpu/src/main.rs | 235 +++++++++--- 7 files changed, 776 insertions(+), 165 deletions(-) diff --git a/piet-gpu-hal/Cargo.toml b/piet-gpu-hal/Cargo.toml index 7019002..d2edbd7 100644 --- a/piet-gpu-hal/Cargo.toml +++ b/piet-gpu-hal/Cargo.toml @@ -9,3 +9,5 @@ edition = "2018" [dependencies] ash = "0.30" once_cell = "1.3.1" +ash-window = { git = "https://github.com/norse-rs/ash-window.git", branch = "dyn_trait" } +raw-window-handle = "0.3" diff --git a/piet-gpu-hal/examples/collatz.rs b/piet-gpu-hal/examples/collatz.rs index a4777b4..fed6a1d 100644 --- a/piet-gpu-hal/examples/collatz.rs +++ b/piet-gpu-hal/examples/collatz.rs @@ -2,9 +2,9 @@ use piet_gpu_hal::vulkan::VkInstance; use piet_gpu_hal::{CmdBuf, Device, MemFlags}; fn main() { - let instance = VkInstance::new().unwrap(); + let (instance, _) = VkInstance::new(None).unwrap(); unsafe { - let device = instance.device().unwrap(); + let device = instance.device(None).unwrap(); let mem_flags = MemFlags::host_coherent(); let src = (0..256).map(|x| x + 1).collect::>(); let buffer = device diff --git a/piet-gpu-hal/src/lib.rs b/piet-gpu-hal/src/lib.rs index d215490..67fe18e 100644 --- a/piet-gpu-hal/src/lib.rs +++ b/piet-gpu-hal/src/lib.rs @@ -5,28 +5,49 @@ pub mod vulkan; /// This isn't great but is expedient. -type Error = Box; +pub type Error = Box; + +#[derive(Copy, Clone, Debug)] +pub enum ImageLayout { + Undefined, + Present, + BlitSrc, + BlitDst, + General, +} pub trait Device: Sized { type Buffer; + type Image; type MemFlags: MemFlags; type Pipeline; type DescriptorSet; type QueryPool; type CmdBuf: CmdBuf; + type Fence; + type Semaphore; fn create_buffer(&self, size: u64, mem_flags: Self::MemFlags) -> Result; + unsafe fn create_image2d( + &self, + width: u32, + height: u32, + mem_flags: Self::MemFlags, + ) -> Result; + unsafe fn create_simple_compute_pipeline( &self, code: &[u8], n_buffers: u32, + n_images: u32, ) -> Result; unsafe fn create_descriptor_set( &self, pipeline: &Self::Pipeline, bufs: &[&Self::Buffer], + images: &[&Self::Image], ) -> Result; fn create_cmd_buf(&self) -> Result; @@ -40,9 +61,15 @@ pub trait Device: Sized { /// /// # Safety /// All submitted commands that refer to this query pool must have completed. - unsafe fn reap_query_pool(&self, pool: Self::QueryPool) -> Result, Error>; + unsafe fn reap_query_pool(&self, pool: &Self::QueryPool) -> Result, Error>; - unsafe fn run_cmd_buf(&self, cmd_buf: &Self::CmdBuf) -> Result<(), Error>; + unsafe fn run_cmd_buf( + &self, + cmd_buf: &Self::CmdBuf, + wait_semaphores: &[Self::Semaphore], + signal_semaphores: &[Self::Semaphore], + fence: Option<&Self::Fence>, + ) -> Result<(), Error>; unsafe fn read_buffer( &self, @@ -55,6 +82,10 @@ pub trait Device: Sized { buffer: &Self::Buffer, contents: &[T], ) -> Result<(), Error>; + + unsafe fn create_semaphore(&self) -> Result; + unsafe fn create_fence(&self, signaled: bool) -> Result; + unsafe fn wait_and_reset(&self, fences: &[Self::Fence]) -> Result<(), Error>; } pub trait CmdBuf { @@ -71,6 +102,13 @@ pub trait CmdBuf { unsafe fn memory_barrier(&mut self); + unsafe fn image_barrier( + &mut self, + image: &D::Image, + src_layout: ImageLayout, + dst_layout: ImageLayout, + ); + /// Clear the buffer. /// /// This is readily supported in Vulkan, but for portability it is remarkably @@ -80,6 +118,11 @@ pub trait CmdBuf { unsafe fn copy_buffer(&self, src: &D::Buffer, dst: &D::Buffer); + unsafe fn copy_image_to_buffer(&self, src: &D::Image, dst: &D::Buffer); + + // low portability, dx12 doesn't support it natively + unsafe fn blit_image(&self, src: &D::Image, dst: &D::Image); + /// Reset the query pool. /// /// The query pool must be reset before each use, to avoid validation errors. diff --git a/piet-gpu-hal/src/vulkan.rs b/piet-gpu-hal/src/vulkan.rs index 2a5abd9..cd44ec7 100644 --- a/piet-gpu-hal/src/vulkan.rs +++ b/piet-gpu-hal/src/vulkan.rs @@ -4,12 +4,12 @@ use std::borrow::Cow; use std::ffi::{CStr, CString}; use std::sync::Arc; -use ash::extensions::ext::DebugUtils; +use ash::extensions::{ext::DebugUtils, khr}; use ash::version::{DeviceV1_0, EntryV1_0, InstanceV1_0}; use ash::{vk, Device, Entry, Instance}; use once_cell::sync::Lazy; -use crate::Error; +use crate::{Device as DeviceTrait, Error, ImageLayout}; pub struct VkInstance { /// Retain the dynamic lib. @@ -22,6 +22,7 @@ pub struct VkInstance { pub struct VkDevice { device: Arc, + physical_device: vk::PhysicalDevice, device_mem_props: vk::PhysicalDeviceMemoryProperties, queue: vk::Queue, qfi: u32, @@ -32,6 +33,23 @@ struct RawDevice { device: Device, } +pub struct VkSurface { + surface: vk::SurfaceKHR, + surface_fn: khr::Surface, +} + +pub struct VkSwapchain { + swapchain: vk::SwapchainKHR, + swapchain_fn: khr::Swapchain, + + present_queue: vk::Queue, + + acquisition_idx: usize, + acquisition_semaphores: Vec, // same length as `images` + images: Vec, + extent: vk::Extent2D, +} + /// A handle to a buffer. /// /// There is no lifetime tracking at this level; the caller is responsible @@ -42,6 +60,13 @@ pub struct Buffer { size: u64, } +pub struct Image { + image: vk::Image, + image_memory: vk::DeviceMemory, + image_view: vk::ImageView, + extent: vk::Extent3D, +} + pub struct Pipeline { pipeline: vk::Pipeline, descriptor_set_layout: vk::DescriptorSetLayout, @@ -88,11 +113,7 @@ unsafe extern "system" fn vulkan_debug_callback( println!( "{:?}:\n{:?} [{} ({})] : {}\n", - message_severity, - message_type, - message_id_name, - message_id_number, - message, + message_severity, message_type, message_id_name, message_id_number, message, ); vk::FALSE @@ -119,40 +140,59 @@ impl VkInstance { /// /// There's more to be done to make this suitable for integration with other /// systems, but for now the goal is to make things simple. - pub fn new() -> Result { + /// + /// The caller is responsible for making sure that window which owns the raw window handle + /// outlives the surface. + pub fn new( + window_handle: Option<&dyn raw_window_handle::HasRawWindowHandle>, + ) -> Result<(VkInstance, Option), Error> { unsafe { let app_name = CString::new("VkToy").unwrap(); let entry = Entry::new()?; - let exist_layers = entry - .enumerate_instance_layer_properties()?; - let layers = LAYERS.iter().filter_map(|&lyr| { - exist_layers - .iter() - .find(|x| - CStr::from_ptr(x.layer_name.as_ptr()) == lyr - ) - .map(|_| lyr.as_ptr()) - .or_else(|| { - println!("Unable to find layer: {}, have you installed the Vulkan SDK?", lyr.to_string_lossy()); - None - }) - }).collect::>(); + let exist_layers = entry.enumerate_instance_layer_properties()?; + let layers = LAYERS + .iter() + .filter_map(|&lyr| { + exist_layers + .iter() + .find(|x| CStr::from_ptr(x.layer_name.as_ptr()) == lyr) + .map(|_| lyr.as_ptr()) + .or_else(|| { + println!( + "Unable to find layer: {}, have you installed the Vulkan SDK?", + lyr.to_string_lossy() + ); + None + }) + }) + .collect::>(); - let exist_exts = entry - .enumerate_instance_extension_properties()?; - let exts = EXTS.iter().filter_map(|&ext| { - exist_exts - .iter() - .find(|x| - CStr::from_ptr(x.extension_name.as_ptr()) == ext - ) - .map(|_| ext.as_ptr()) - .or_else(|| { - println!("Unable to find extension: {}, have you installed the Vulkan SDK?", ext.to_string_lossy()); - None - }) - }).collect::>(); + let exist_exts = entry.enumerate_instance_extension_properties()?; + let mut exts = EXTS + .iter() + .filter_map(|&ext| { + exist_exts + .iter() + .find(|x| CStr::from_ptr(x.extension_name.as_ptr()) == ext) + .map(|_| ext.as_ptr()) + .or_else(|| { + println!( + "Unable to find extension: {}, have you installed the Vulkan SDK?", + ext.to_string_lossy() + ); + None + }) + }) + .collect::>(); + + let surface_extensions = match window_handle { + Some(ref handle) => ash_window::enumerate_required_extensions(*handle)?, + None => vec![], + }; + for extension in surface_extensions { + exts.push(extension.as_ptr()); + } let instance = entry.create_instance( &vk::InstanceCreateInfo::builder() @@ -168,7 +208,7 @@ impl VkInstance { None, )?; - let (_dbg_loader, _dbg_callbk) = if cfg!(debug_assertions) { + let (_dbg_loader, _dbg_callbk) = if false { let dbg_info = vk::DebugUtilsMessengerCreateInfoEXT::builder() .message_severity( vk::DebugUtilsMessageSeverityFlagsEXT::ERROR @@ -185,37 +225,51 @@ impl VkInstance { (None, None) }; - Ok(VkInstance { + let vk_surface = match window_handle { + Some(handle) => Some(VkSurface { + surface: ash_window::create_surface(&entry, &instance, handle, None)?, + surface_fn: khr::Surface::new(&entry, &instance), + }), + None => None, + }; + + let vk_instance = VkInstance { entry, instance, _dbg_loader, _dbg_callbk, - }) + }; + + Ok((vk_instance, vk_surface)) } } - /// Create a device from the instance, suitable for compute. + /// Create a device from the instance, suitable for compute, with an optional surface. /// /// # Safety /// - /// The caller is responsible for making sure that the instance outlives the device. - /// We could enforce that, for example having an `Arc` of the raw instance, but for - /// now keep things simple. - pub unsafe fn device(&self) -> Result { + /// The caller is responsible for making sure that the instance outlives the device + /// and surface. We could enforce that, for example having an `Arc` of the raw instance, + /// but for now keep things simple. + pub unsafe fn device(&self, surface: Option<&VkSurface>) -> Result { let devices = self.instance.enumerate_physical_devices()?; let (pdevice, qfi) = - choose_compute_device(&self.instance, &devices).ok_or("no suitable device")?; + choose_compute_device(&self.instance, &devices, surface).ok_or("no suitable device")?; - let device = self.instance.create_device( - pdevice, - &vk::DeviceCreateInfo::builder().queue_create_infos(&[ - vk::DeviceQueueCreateInfo::builder() - .queue_family_index(qfi) - .queue_priorities(&[1.0]) - .build(), - ]), - None, - )?; + let queue_priorities = [1.0]; + let queue_create_infos = [vk::DeviceQueueCreateInfo::builder() + .queue_family_index(qfi) + .queue_priorities(&queue_priorities) + .build()]; + let extensions = match surface { + Some(_) => vec![khr::Swapchain::name().as_ptr()], + None => vec![], + }; + let create_info = vk::DeviceCreateInfo::builder() + .queue_create_infos(&queue_create_infos) + .enabled_extension_names(&extensions) + .build(); + let device = self.instance.create_device(pdevice, &create_info, None)?; let device_mem_props = self.instance.get_physical_device_memory_properties(pdevice); @@ -229,21 +283,98 @@ impl VkInstance { Ok(VkDevice { device, + physical_device: pdevice, device_mem_props, qfi, queue, timestamp_period, }) } + + pub unsafe fn swapchain( + &self, + device: &VkDevice, + surface: &VkSurface, + ) -> Result { + let formats = surface + .surface_fn + .get_physical_device_surface_formats(device.physical_device, surface.surface)?; + let surface_format = formats + .iter() + .map(|surface_fmt| match surface_fmt.format { + vk::Format::UNDEFINED => { + vk::SurfaceFormatKHR { + format: vk::Format::B8G8R8A8_UNORM, // most common format on desktop + color_space: surface_fmt.color_space, + } + } + _ => *surface_fmt, + }) + .next() + .ok_or("no surface format found")?; + + let capabilities = surface + .surface_fn + .get_physical_device_surface_capabilities(device.physical_device, surface.surface)?; + + let present_modes = surface + .surface_fn + .get_physical_device_surface_present_modes(device.physical_device, surface.surface)?; + + let present_mode = present_modes + .into_iter() + .find(|mode| mode == &vk::PresentModeKHR::MAILBOX) + .unwrap_or(vk::PresentModeKHR::FIFO); + + let image_count = 2; // TODO + let extent = capabilities.current_extent; // TODO: wayland for example will complain here .. + + let create_info = vk::SwapchainCreateInfoKHR::builder() + .surface(surface.surface) + .min_image_count(image_count) + .image_format(surface_format.format) + .image_color_space(surface_format.color_space) + .image_extent(extent) + .image_array_layers(1) + .image_usage(vk::ImageUsageFlags::TRANSFER_DST) + .image_sharing_mode(vk::SharingMode::EXCLUSIVE) + .pre_transform(vk::SurfaceTransformFlagsKHR::IDENTITY) + .composite_alpha(vk::CompositeAlphaFlagsKHR::OPAQUE) + .present_mode(present_mode) + .clipped(true); + + let swapchain_fn = khr::Swapchain::new(&self.instance, &device.device.device); + let swapchain = swapchain_fn.create_swapchain(&create_info, None)?; + + let images = swapchain_fn.get_swapchain_images(swapchain)?; + let acquisition_semaphores = (0..images.len()) + .map(|_| device.create_semaphore()) + .collect::, Error>>()?; + + Ok(VkSwapchain { + swapchain, + swapchain_fn, + + present_queue: device.queue, + + images, + acquisition_semaphores, + acquisition_idx: 0, + extent, + }) + } } impl crate::Device for VkDevice { type Buffer = Buffer; + type Image = Image; type CmdBuf = CmdBuf; type DescriptorSet = DescriptorSet; type Pipeline = Pipeline; type QueryPool = QueryPool; type MemFlags = MemFlags; + type Fence = vk::Fence; + type Semaphore = vk::Semaphore; fn create_buffer(&self, size: u64, mem_flags: MemFlags) -> Result { unsafe { @@ -281,6 +412,96 @@ impl crate::Device for VkDevice { } } + unsafe fn create_image2d( + &self, + width: u32, + height: u32, + mem_flags: Self::MemFlags, + ) -> Result { + let device = &self.device.device; + let extent = vk::Extent3D { + width, + height, + depth: 1, + }; + let image = device.create_image( + &vk::ImageCreateInfo::builder() + .image_type(vk::ImageType::TYPE_2D) + .format(vk::Format::R8G8B8A8_UNORM) + .extent(extent) + .mip_levels(1) + .array_layers(1) + .samples(vk::SampleCountFlags::TYPE_1) + .tiling(vk::ImageTiling::OPTIMAL) + .initial_layout(vk::ImageLayout::UNDEFINED) + .usage(vk::ImageUsageFlags::STORAGE | vk::ImageUsageFlags::TRANSFER_SRC) // write in compute and blit src + .sharing_mode(vk::SharingMode::EXCLUSIVE), + None, + )?; + let mem_requirements = device.get_image_memory_requirements(image); + let mem_type = find_memory_type( + mem_requirements.memory_type_bits, + mem_flags.0, + &self.device_mem_props, + ) + .unwrap(); // TODO: proper error + let image_memory = device.allocate_memory( + &vk::MemoryAllocateInfo::builder() + .allocation_size(mem_requirements.size) + .memory_type_index(mem_type), + None, + )?; + device.bind_image_memory(image, image_memory, 0)?; + let image_view = device.create_image_view( + &vk::ImageViewCreateInfo::builder() + .view_type(vk::ImageViewType::TYPE_2D) + .image(image) + .format(vk::Format::R8G8B8A8_UNORM) + .subresource_range(vk::ImageSubresourceRange { + aspect_mask: vk::ImageAspectFlags::COLOR, + base_mip_level: 0, + level_count: 1, + base_array_layer: 0, + layer_count: 1, + }) + .components(vk::ComponentMapping { + r: vk::ComponentSwizzle::IDENTITY, + g: vk::ComponentSwizzle::IDENTITY, + b: vk::ComponentSwizzle::IDENTITY, + a: vk::ComponentSwizzle::IDENTITY, + }) + .build(), + None, + )?; + Ok(Image { + image, + image_memory, + image_view, + extent, + }) + } + + unsafe fn create_fence(&self, signaled: bool) -> Result { + let device = &self.device.device; + let mut flags = vk::FenceCreateFlags::empty(); + if signaled { + flags |= vk::FenceCreateFlags::SIGNALED; + } + Ok(device.create_fence(&vk::FenceCreateInfo::builder().flags(flags).build(), None)?) + } + + unsafe fn create_semaphore(&self) -> Result { + let device = &self.device.device; + Ok(device.create_semaphore(&vk::SemaphoreCreateInfo::default(), None)?) + } + + unsafe fn wait_and_reset(&self, fences: &[Self::Fence]) -> Result<(), Error> { + let device = &self.device.device; + device.wait_for_fences(fences, true, !0)?; + device.reset_fences(fences)?; + Ok(()) + } + /// This creates a pipeline that runs over the buffer. /// /// The descriptor set layout is just some number of buffers (this will change). @@ -288,18 +509,30 @@ impl crate::Device for VkDevice { &self, code: &[u8], n_buffers: u32, + n_images: u32, ) -> Result { let device = &self.device.device; - let bindings = (0..n_buffers) - .map(|i| { + let mut bindings = Vec::new(); + for i in 0..n_buffers { + bindings.push( vk::DescriptorSetLayoutBinding::builder() .binding(i) .descriptor_type(vk::DescriptorType::STORAGE_BUFFER) .descriptor_count(1) .stage_flags(vk::ShaderStageFlags::COMPUTE) - .build() - }) - .collect::>(); + .build(), + ); + } + for i in n_buffers..n_buffers + n_images { + bindings.push( + vk::DescriptorSetLayoutBinding::builder() + .binding(i) + .descriptor_type(vk::DescriptorType::STORAGE_IMAGE) + .descriptor_count(1) + .stage_flags(vk::ShaderStageFlags::COMPUTE) + .build(), + ); + } let descriptor_set_layout = device.create_descriptor_set_layout( &vk::DescriptorSetLayoutCreateInfo::builder().bindings(&bindings), None, @@ -344,12 +577,26 @@ impl crate::Device for VkDevice { &self, pipeline: &Pipeline, bufs: &[&Buffer], + images: &[&Image], ) -> Result { let device = &self.device.device; - let descriptor_pool_sizes = [vk::DescriptorPoolSize::builder() - .ty(vk::DescriptorType::STORAGE_BUFFER) - .descriptor_count(bufs.len() as u32) - .build()]; + let mut descriptor_pool_sizes = Vec::new(); + if !bufs.is_empty() { + descriptor_pool_sizes.push( + vk::DescriptorPoolSize::builder() + .ty(vk::DescriptorType::STORAGE_BUFFER) + .descriptor_count(bufs.len() as u32) + .build(), + ); + } + if !images.is_empty() { + descriptor_pool_sizes.push( + vk::DescriptorPoolSize::builder() + .ty(vk::DescriptorType::STORAGE_IMAGE) + .descriptor_count(images.len() as u32) + .build(), + ); + } let descriptor_pool = device.create_descriptor_pool( &vk::DescriptorPoolCreateInfo::builder() .pool_sizes(&descriptor_pool_sizes) @@ -380,6 +627,23 @@ impl crate::Device for VkDevice { &[], ); } + for (i, image) in images.iter().enumerate() { + let binding = i + bufs.len(); + let image_info = vk::DescriptorImageInfo::builder() + .sampler(vk::Sampler::null()) + .image_view(image.image_view) + .image_layout(vk::ImageLayout::GENERAL) + .build(); + device.update_descriptor_sets( + &[vk::WriteDescriptorSet::builder() + .dst_set(descriptor_sets[0]) + .dst_binding(binding as u32) + .descriptor_type(vk::DescriptorType::STORAGE_IMAGE) + .image_info(&[image_info]) + .build()], + &[], + ); + } Ok(DescriptorSet { descriptor_set: descriptor_sets[0], }) @@ -390,7 +654,7 @@ impl crate::Device for VkDevice { let device = &self.device.device; let command_pool = device.create_command_pool( &vk::CommandPoolCreateInfo::builder() - .flags(vk::CommandPoolCreateFlags::empty()) + .flags(vk::CommandPoolCreateFlags::RESET_COMMAND_BUFFER) .queue_family_index(self.qfi), None, )?; @@ -421,7 +685,7 @@ impl crate::Device for VkDevice { } } - unsafe fn reap_query_pool(&self, pool: Self::QueryPool) -> Result, Error> { + unsafe fn reap_query_pool(&self, pool: &Self::QueryPool) -> Result, Error> { let device = &self.device.device; let mut buf = vec![0u64; pool.n_queries as usize]; device.get_query_pool_results( @@ -431,7 +695,6 @@ impl crate::Device for VkDevice { &mut buf, vk::QueryResultFlags::TYPE_64, )?; - device.destroy_query_pool(pool.pool, None); let ts0 = buf[0]; let tsp = self.timestamp_period as f64 * 1e-9; let result = buf[1..] @@ -444,23 +707,33 @@ impl crate::Device for VkDevice { /// Run the command buffer. /// /// This version simply blocks until it's complete. - unsafe fn run_cmd_buf(&self, cmd_buf: &CmdBuf) -> Result<(), Error> { + unsafe fn run_cmd_buf( + &self, + cmd_buf: &CmdBuf, + wait_semaphores: &[Self::Semaphore], + signal_semaphores: &[Self::Semaphore], + fence: Option<&Self::Fence>, + ) -> Result<(), Error> { let device = &self.device.device; - // Run the command buffer. - let fence = device.create_fence( - &vk::FenceCreateInfo::builder().flags(vk::FenceCreateFlags::empty()), - None, - )?; + let fence = match fence { + Some(fence) => *fence, + None => vk::Fence::null(), + }; + let wait_stages = wait_semaphores + .iter() + .map(|_| vk::PipelineStageFlags::ALL_COMMANDS) + .collect::>(); device.queue_submit( self.queue, &[vk::SubmitInfo::builder() .command_buffers(&[cmd_buf.cmd_buf]) + .wait_semaphores(wait_semaphores) + .signal_semaphores(signal_semaphores) + .wait_dst_stage_mask(&wait_stages) .build()], fence, )?; - device.wait_for_fences(&[fence], true, 100_000_000)?; - // TODO: handle errors better (currently leaks fence and can lead to other problems) Ok(()) } @@ -556,6 +829,37 @@ impl crate::CmdBuf for CmdBuf { ); } + unsafe fn image_barrier( + &mut self, + image: &Image, + src_layout: ImageLayout, + dst_layout: ImageLayout, + ) { + let device = &self.device.device; + device.cmd_pipeline_barrier( + self.cmd_buf, + vk::PipelineStageFlags::ALL_COMMANDS, + vk::PipelineStageFlags::ALL_COMMANDS, + vk::DependencyFlags::empty(), + &[], + &[], + &[vk::ImageMemoryBarrier::builder() + .image(image.image) + .src_access_mask(vk::AccessFlags::MEMORY_WRITE) + .dst_access_mask(vk::AccessFlags::MEMORY_READ) + .old_layout(map_image_layout(src_layout)) + .new_layout(map_image_layout(dst_layout)) + .subresource_range(vk::ImageSubresourceRange { + aspect_mask: vk::ImageAspectFlags::COLOR, + base_mip_level: 0, + level_count: vk::REMAINING_MIP_LEVELS, + base_array_layer: 0, + layer_count: vk::REMAINING_MIP_LEVELS, + }) + .build()], + ); + } + unsafe fn clear_buffer(&self, buffer: &Buffer) { let device = &self.device.device; device.cmd_fill_buffer(self.cmd_buf, buffer.buffer, 0, vk::WHOLE_SIZE, 0); @@ -572,14 +876,74 @@ impl crate::CmdBuf for CmdBuf { ); } + unsafe fn copy_image_to_buffer(&self, src: &Image, dst: &Buffer) { + let device = &self.device.device; + device.cmd_copy_image_to_buffer( + self.cmd_buf, + src.image, + vk::ImageLayout::TRANSFER_SRC_OPTIMAL, + dst.buffer, + &[vk::BufferImageCopy { + buffer_offset: 0, + buffer_row_length: 0, // tight packing + buffer_image_height: 0, // tight packing + image_subresource: vk::ImageSubresourceLayers { + aspect_mask: vk::ImageAspectFlags::COLOR, + mip_level: 0, + base_array_layer: 0, + layer_count: 1, + }, + image_offset: vk::Offset3D { x: 0, y: 0, z: 0 }, + image_extent: src.extent, + }], + ); + } + + unsafe fn blit_image(&self, src: &Image, dst: &Image) { + let device = &self.device.device; + device.cmd_blit_image( + self.cmd_buf, + src.image, + vk::ImageLayout::TRANSFER_SRC_OPTIMAL, + dst.image, + vk::ImageLayout::TRANSFER_DST_OPTIMAL, + &[vk::ImageBlit { + src_subresource: vk::ImageSubresourceLayers { + aspect_mask: vk::ImageAspectFlags::COLOR, + mip_level: 0, + base_array_layer: 0, + layer_count: 1, + }, + src_offsets: [ + vk::Offset3D { x: 0, y: 0, z: 0 }, + vk::Offset3D { + x: src.extent.width as i32, + y: src.extent.height as i32, + z: src.extent.depth as i32, + }, + ], + dst_subresource: vk::ImageSubresourceLayers { + aspect_mask: vk::ImageAspectFlags::COLOR, + mip_level: 0, + base_array_layer: 0, + layer_count: 1, + }, + dst_offsets: [ + vk::Offset3D { x: 0, y: 0, z: 0 }, + vk::Offset3D { + x: dst.extent.width as i32, + y: dst.extent.height as i32, + z: dst.extent.depth as i32, + }, + ], + }], + vk::Filter::LINEAR, + ); + } + unsafe fn reset_query_pool(&mut self, pool: &QueryPool) { let device = &self.device.device; - device.cmd_reset_query_pool( - self.cmd_buf, - pool.pool, - 0, - pool.n_queries, - ); + device.cmd_reset_query_pool(self.cmd_buf, pool.pool, 0, pool.n_queries); } unsafe fn write_timestamp(&mut self, pool: &QueryPool, query: u32) { @@ -603,13 +967,68 @@ impl crate::MemFlags for MemFlags { } } +impl VkSwapchain { + pub unsafe fn next(&mut self) -> Result<(usize, vk::Semaphore), Error> { + let acquisition_semaphore = self.acquisition_semaphores[self.acquisition_idx]; + let (image_idx, _suboptimal) = self.swapchain_fn.acquire_next_image( + self.swapchain, + !0, + self.acquisition_semaphores[self.acquisition_idx], + vk::Fence::null(), + )?; + self.acquisition_idx = (self.acquisition_idx + 1) % self.acquisition_semaphores.len(); + + Ok((image_idx as usize, acquisition_semaphore)) + } + + pub unsafe fn image(&self, idx: usize) -> Image { + Image { + image: self.images[idx], + image_memory: vk::DeviceMemory::null(), + image_view: vk::ImageView::null(), + extent: vk::Extent3D { + width: self.extent.width, + height: self.extent.height, + depth: 1, + }, + } + } + + pub unsafe fn present( + &self, + image_idx: usize, + semaphores: &[vk::Semaphore], + ) -> Result { + Ok(self.swapchain_fn.queue_present( + self.present_queue, + &vk::PresentInfoKHR::builder() + .swapchains(&[self.swapchain]) + .image_indices(&[image_idx as u32]) + .wait_semaphores(semaphores) + .build(), + )?) + } +} + unsafe fn choose_compute_device( instance: &Instance, devices: &[vk::PhysicalDevice], + surface: Option<&VkSurface>, ) -> Option<(vk::PhysicalDevice, u32)> { for pdevice in devices { let props = instance.get_physical_device_queue_family_properties(*pdevice); for (ix, info) in props.iter().enumerate() { + // Check for surface presentation support + if let Some(surface) = surface { + if !surface + .surface_fn + .get_physical_device_surface_support(*pdevice, ix as u32, surface.surface) + .unwrap() + { + continue; + } + } + if info.queue_flags.contains(vk::QueueFlags::COMPUTE) { return Some((*pdevice, ix as u32)); } @@ -644,3 +1063,13 @@ fn convert_u32_vec(src: &[u8]) -> Vec { }) .collect() } + +fn map_image_layout(layout: ImageLayout) -> vk::ImageLayout { + match layout { + ImageLayout::Undefined => vk::ImageLayout::UNDEFINED, + ImageLayout::Present => vk::ImageLayout::PRESENT_SRC_KHR, + ImageLayout::BlitSrc => vk::ImageLayout::TRANSFER_SRC_OPTIMAL, + ImageLayout::BlitDst => vk::ImageLayout::TRANSFER_DST_OPTIMAL, + ImageLayout::General => vk::ImageLayout::GENERAL, + } +} diff --git a/piet-gpu/Cargo.toml b/piet-gpu/Cargo.toml index 2555e62..d4522f0 100644 --- a/piet-gpu/Cargo.toml +++ b/piet-gpu/Cargo.toml @@ -1,20 +1,21 @@ -[package] -name = "piet-gpu" -version = "0.1.0" -authors = ["Raph Levien "] -description = "A compute-centric GPU 2D renderer." -license = "MIT/Apache-2.0" -edition = "2018" - -[dependencies.piet-gpu-hal] -path = "../piet-gpu-hal" - -[dependencies.piet-gpu-types] -path = "../piet-gpu-types" - -[dependencies] -kurbo = "0.5.11" -piet = "0.0.12" -png = "0.16.2" -rand = "0.7.3" -roxmltree = "0.11" +[package] +name = "piet-gpu" +version = "0.1.0" +authors = ["Raph Levien "] +description = "A compute-centric GPU 2D renderer." +license = "MIT/Apache-2.0" +edition = "2018" + +[dependencies.piet-gpu-hal] +path = "../piet-gpu-hal" + +[dependencies.piet-gpu-types] +path = "../piet-gpu-types" + +[dependencies] +kurbo = "0.5.11" +piet = "0.0.12" +png = "0.16.2" +rand = "0.7.3" +roxmltree = "0.11" +winit = "0.22" \ No newline at end of file diff --git a/piet-gpu/shader/kernel4.comp b/piet-gpu/shader/kernel4.comp index 1754bba..f152caf 100644 --- a/piet-gpu/shader/kernel4.comp +++ b/piet-gpu/shader/kernel4.comp @@ -24,9 +24,7 @@ layout(set = 0, binding = 2) buffer FillSegBuf { uint[] fill_seg; }; -layout(set = 0, binding = 3) buffer ImageBuf { - uint[] image; -}; +layout(rgba8, set = 0, binding = 2) uniform writeonly image2D image; #include "ptcl.h" #include "segment.h" @@ -65,11 +63,11 @@ void main() { SegChunk seg_chunk = SegChunk_read(seg_chunk_ref); for (int i = 0; i < seg_chunk.n; i++) { Segment seg = Segment_read(SegmentRef(seg_chunk_ref.offset + SegChunk_size + Segment_size * i)); - vec2 line_vec = seg.end - seg.start; - vec2 dpos = xy + vec2(0.5, 0.5) - seg.start; - float t = clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0, 1.0); - df = min(df, length(line_vec * t - dpos)); - } + vec2 line_vec = seg.end - seg.start; + vec2 dpos = xy + vec2(0.5, 0.5) - seg.start; + float t = clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0, 1.0); + df = min(df, length(line_vec * t - dpos)); + } seg_chunk_ref = seg_chunk.next; } while (seg_chunk_ref.offset != 0); fg_rgba = unpackUnorm4x8(stroke.rgba_color).wzyx; @@ -118,8 +116,5 @@ void main() { cmd_ref.offset += Cmd_size; } - // TODO: sRGB - uvec4 s = uvec4(round(vec4(rgb, 1.0) * 255.0)); - uint rgba_packed = s.r | (s.g << 8) | (s.b << 16) | (s.a << 24); - image[xy_uint.y * IMAGE_WIDTH + xy_uint.x] = rgba_packed; + imageStore(image, ivec2(xy_uint), vec4(rgb, 1.0)); } diff --git a/piet-gpu/src/main.rs b/piet-gpu/src/main.rs index c40b4d5..0b7a6fe 100644 --- a/piet-gpu/src/main.rs +++ b/piet-gpu/src/main.rs @@ -8,7 +8,7 @@ use piet::kurbo::{BezPath, Circle, Line, Point, Vec2}; use piet::{Color, RenderContext}; use piet_gpu_hal::vulkan::VkInstance; -use piet_gpu_hal::{CmdBuf, Device, MemFlags}; +use piet_gpu_hal::{CmdBuf, Device, Error, ImageLayout, MemFlags}; mod pico_svg; mod render_ctx; @@ -16,8 +16,14 @@ mod render_ctx; use render_ctx::PietGpuRenderContext; use pico_svg::PicoSvg; -const WIDTH: usize = 2048; -const HEIGHT: usize = 1536; +use winit::{ + event::{Event, WindowEvent}, + event_loop::{ControlFlow, EventLoop}, + window::WindowBuilder, +}; + +const WIDTH: usize = TILE_W * WIDTH_IN_TILES; +const HEIGHT: usize = TILE_H * HEIGHT_IN_TILES; const TILE_W: usize = 16; const TILE_H: usize = 16; @@ -34,6 +40,8 @@ const K2_PER_TILE_SIZE: usize = 8; const N_CIRCLES: usize = 1; +const NUM_FRAMES: usize = 2; + fn render_scene(rc: &mut impl RenderContext) { let mut rng = rand::thread_rng(); for _ in 0..N_CIRCLES { @@ -108,10 +116,35 @@ fn dump_k1_data(k1_buf: &[u32]) { } } -fn main() { - let instance = VkInstance::new().unwrap(); +fn main() -> Result<(), Error> { + let event_loop = EventLoop::new(); + let window = WindowBuilder::new() + .with_inner_size(winit::dpi::LogicalSize { + width: (WIDTH_IN_TILES * 8) as f64, + height: (HEIGHT_IN_TILES * 8) as f64, + }) + .with_resizable(false) // currently not supported + .build(&event_loop)?; + + let (instance, surface) = VkInstance::new(Some(&window))?; unsafe { - let device = instance.device().unwrap(); + let device = instance.device(surface.as_ref())?; + let mut swapchain = instance.swapchain(&device, surface.as_ref().unwrap())?; + + let mut current_frame = 0; + let present_semaphores = (0..NUM_FRAMES) + .map(|_| device.create_semaphore()) + .collect::, Error>>()?; + let frame_fences = (0..NUM_FRAMES) + .map(|_| device.create_fence(false)) + .collect::, Error>>()?; + let mut cmd_buffers = (0..NUM_FRAMES) + .map(|_| device.create_cmd_buf()) + .collect::, Error>>()?; + let query_pools = (0..NUM_FRAMES) + .map(|_| device.create_query_pool(6)) + .collect::, Error>>()?; + let host = MemFlags::host_coherent(); let dev = MemFlags::device_local(); let mut ctx = PietGpuRenderContext::new(); @@ -124,30 +157,27 @@ fn main() { let scene_dev = device .create_buffer(std::mem::size_of_val(&scene[..]) as u64, dev) .unwrap(); - device.write_buffer(&scene_buf, &scene).unwrap(); - let tilegroup_buf = device.create_buffer(4 * 1024 * 1024, dev).unwrap(); - let ptcl_buf = device.create_buffer(48 * 1024 * 1024, dev).unwrap(); - let segment_buf = device.create_buffer(64 * 1024 * 1024, dev).unwrap(); + device.write_buffer(&scene_buf, &scene)?; + let tilegroup_buf = device.create_buffer(4 * 1024 * 1024, dev)?; + let ptcl_buf = device.create_buffer(48 * 1024 * 1024, dev)?; + let segment_buf = device.create_buffer(64 * 1024 * 1024, dev)?; let fill_seg_buf = device.create_buffer(64 * 1024 * 1024, dev).unwrap(); - let image_buf = device - .create_buffer((WIDTH * HEIGHT * 4) as u64, host) - .unwrap(); - let image_dev = device - .create_buffer((WIDTH * HEIGHT * 4) as u64, dev) - .unwrap(); + let image_buf = device.create_buffer((WIDTH * HEIGHT * 4) as u64, host)?; + let image_dev = device.create_image2d(WIDTH as u32, HEIGHT as u32, dev)?; - let k1_alloc_buf_host = device.create_buffer(4, host).unwrap(); - let k1_alloc_buf_dev = device.create_buffer(4, dev).unwrap(); + let k1_alloc_buf_host = device.create_buffer(4, host)?; + let k1_alloc_buf_dev = device.create_buffer(4, dev)?; let k1_alloc_start = WIDTH_IN_TILEGROUPS * HEIGHT_IN_TILEGROUPS * TILEGROUP_STRIDE; - device - .write_buffer(&k1_alloc_buf_host, &[k1_alloc_start as u32]) - .unwrap(); + device.write_buffer(&k1_alloc_buf_host, &[k1_alloc_start as u32])?; let k1_code = include_bytes!("../shader/kernel1.spv"); - let k1_pipeline = device.create_simple_compute_pipeline(k1_code, 3).unwrap(); + let k1_pipeline = device + .create_simple_compute_pipeline(k1_code, 3, 0) + .unwrap(); let k1_ds = device .create_descriptor_set( &k1_pipeline, &[&scene_dev, &tilegroup_buf, &k1_alloc_buf_dev], + &[], ) .unwrap(); @@ -158,11 +188,14 @@ fn main() { .write_buffer(&k2s_alloc_buf_host, &[k2s_alloc_start as u32]) .unwrap(); let k2s_code = include_bytes!("../shader/kernel2s.spv"); - let k2s_pipeline = device.create_simple_compute_pipeline(k2s_code, 4).unwrap(); + let k2s_pipeline = device + .create_simple_compute_pipeline(k2s_code, 4, 0) + .unwrap(); let k2s_ds = device .create_descriptor_set( &k2s_pipeline, &[&scene_dev, &tilegroup_buf, &segment_buf, &k2s_alloc_buf_dev], + &[], ) .unwrap(); @@ -193,7 +226,7 @@ fn main() { .write_buffer(&k3_alloc_buf_host, &[k3_alloc_start as u32]) .unwrap(); let k3_code = include_bytes!("../shader/kernel3.spv"); - let k3_pipeline = device.create_simple_compute_pipeline(k3_code, 6).unwrap(); + let k3_pipeline = device.create_simple_compute_pipeline(k3_code, 6, 0).unwrap(); let k3_ds = device .create_descriptor_set( &k3_pipeline, @@ -205,19 +238,16 @@ fn main() { &ptcl_buf, &k3_alloc_buf_dev, ], + &[], ) .unwrap(); let k4_code = include_bytes!("../shader/kernel4.spv"); - let k4_pipeline = device.create_simple_compute_pipeline(k4_code, 4).unwrap(); + let k4_pipeline = device.create_simple_compute_pipeline(k4_code, 3, 1).unwrap(); let k4_ds = device - .create_descriptor_set( - &k4_pipeline, - &[&ptcl_buf, &segment_buf, &fill_seg_buf, &image_dev], - ) + .create_descriptor_set(&k4_pipeline, &[&ptcl_buf, &segment_buf, &fill_seg_buf], &[&image_dev]) .unwrap(); - - let query_pool = device.create_query_pool(6).unwrap(); + let query_pool = &query_pools[0]; let mut cmd_buf = device.create_cmd_buf().unwrap(); cmd_buf.begin(); cmd_buf.copy_buffer(&scene_buf, &scene_dev); @@ -232,6 +262,7 @@ fn main() { cmd_buf.clear_buffer(&tilegroup_buf); cmd_buf.clear_buffer(&ptcl_buf); cmd_buf.memory_barrier(); + cmd_buf.image_barrier(&image_dev, ImageLayout::Undefined, ImageLayout::General); cmd_buf.reset_query_pool(&query_pool); cmd_buf.write_timestamp(&query_pool, 0); cmd_buf.dispatch( @@ -272,10 +303,12 @@ fn main() { ); cmd_buf.write_timestamp(&query_pool, 5); cmd_buf.memory_barrier(); - cmd_buf.copy_buffer(&image_dev, &image_buf); + cmd_buf.image_barrier(&image_dev, ImageLayout::General, ImageLayout::BlitSrc); + cmd_buf.copy_image_to_buffer(&image_dev, &image_buf); cmd_buf.finish(); - device.run_cmd_buf(&cmd_buf).unwrap(); - let timestamps = device.reap_query_pool(query_pool).unwrap(); + device.run_cmd_buf(&cmd_buf, &[], &[], Some(&frame_fences[0]))?; + device.wait_and_reset(&[frame_fences[0]])?; + let timestamps = device.reap_query_pool(&query_pool).unwrap(); println!("Kernel 1 time: {:.3}ms", timestamps[0] * 1e3); println!( "Kernel 2s time: {:.3}ms", @@ -300,21 +333,129 @@ fn main() { dump_k1_data(&k1_data); */ - let mut img_data: Vec = Default::default(); - // Note: because png can use a `&[u8]` slice, we could avoid an extra copy - // (probably passing a slice into a closure). But for now: keep it simple. - device.read_buffer(&image_buf, &mut img_data).unwrap(); + if false { + let mut img_data: Vec = Default::default(); + // Note: because png can use a `&[u8]` slice, we could avoid an extra copy + // (probably passing a slice into a closure). But for now: keep it simple. + device.read_buffer(&image_buf, &mut img_data).unwrap(); - // Write image as PNG file. - let path = Path::new("image.png"); - let file = File::create(path).unwrap(); - let ref mut w = BufWriter::new(file); + // Write image as PNG file. + let path = Path::new("image.png"); + let file = File::create(path).unwrap(); + let ref mut w = BufWriter::new(file); - let mut encoder = png::Encoder::new(w, WIDTH as u32, HEIGHT as u32); - encoder.set_color(png::ColorType::RGBA); - encoder.set_depth(png::BitDepth::Eight); - let mut writer = encoder.write_header().unwrap(); + let mut encoder = png::Encoder::new(w, WIDTH as u32, HEIGHT as u32); + encoder.set_color(png::ColorType::RGBA); + encoder.set_depth(png::BitDepth::Eight); + let mut writer = encoder.write_header().unwrap(); - writer.write_image_data(&img_data).unwrap(); + writer.write_image_data(&img_data).unwrap(); + } + + event_loop.run(move |event, _, control_flow| { + *control_flow = ControlFlow::Wait; + + match event { + Event::WindowEvent { event, window_id } if window_id == window.id() => { + match event { + WindowEvent::CloseRequested => { + *control_flow = ControlFlow::Exit; + } + _ => (), + } + } + Event::MainEventsCleared => { + window.request_redraw(); + } + Event::RedrawRequested(window_id) if window_id == window.id() => { + let frame_idx = current_frame % NUM_FRAMES; + let query_pool = &query_pools[frame_idx]; + + if current_frame >= NUM_FRAMES { + device.wait_and_reset(&[frame_fences[frame_idx]]).unwrap(); + + let timestamps = device.reap_query_pool(query_pool).unwrap(); + window.set_title(&format!("k1: {:.3}ms, k2: {:.3}ms, k3: {:.3}ms, k4: {:.3}ms", + timestamps[0] * 1e3, + (timestamps[1] - timestamps[0]) * 1e3, + (timestamps[2] - timestamps[1]) * 1e3, + (timestamps[3] - timestamps[2]) * 1e3, + )); + } + + let (image_idx, acquisition_semaphore) = swapchain.next().unwrap(); + let swap_image = swapchain.image(image_idx); + let cmd_buf = &mut cmd_buffers[frame_idx]; + cmd_buf.begin(); + cmd_buf.reset_query_pool(&query_pool); + cmd_buf.copy_buffer(&scene_buf, &scene_dev); + cmd_buf.copy_buffer(&k1_alloc_buf_host, &k1_alloc_buf_dev); + cmd_buf.copy_buffer(&k2s_alloc_buf_host, &k2s_alloc_buf_dev); + cmd_buf.copy_buffer(&k3_alloc_buf_host, &k3_alloc_buf_dev); + cmd_buf.clear_buffer(&tilegroup_buf); + cmd_buf.clear_buffer(&ptcl_buf); + cmd_buf.memory_barrier(); + cmd_buf.write_timestamp(&query_pool, 0); + cmd_buf.dispatch( + &k1_pipeline, + &k1_ds, + ((WIDTH / 512) as u32, (HEIGHT / 512) as u32, 1), + ); + cmd_buf.write_timestamp(&query_pool, 1); + cmd_buf.memory_barrier(); + cmd_buf.dispatch( + &k2s_pipeline, + &k2s_ds, + ((WIDTH / 512) as u32, (HEIGHT / 16) as u32, 1), + ); + cmd_buf.write_timestamp(&query_pool, 2); + cmd_buf.memory_barrier(); + cmd_buf.dispatch( + &k3_pipeline, + &k3_ds, + ((WIDTH / 512) as u32, (HEIGHT / 16) as u32, 1), + ); + cmd_buf.write_timestamp(&query_pool, 3); + cmd_buf.memory_barrier(); + cmd_buf.image_barrier(&image_dev, ImageLayout::BlitSrc, ImageLayout::General); + cmd_buf.dispatch( + &k4_pipeline, + &k4_ds, + ((WIDTH / TILE_W) as u32, (HEIGHT / TILE_H) as u32, 1), + ); + cmd_buf.write_timestamp(&query_pool, 4); + cmd_buf.memory_barrier(); + cmd_buf.image_barrier( + &swap_image, + ImageLayout::Undefined, + ImageLayout::BlitDst, + ); + cmd_buf.image_barrier(&image_dev, ImageLayout::General, ImageLayout::BlitSrc); + cmd_buf.blit_image(&image_dev, &swap_image); + cmd_buf.image_barrier( + &swap_image, + ImageLayout::BlitDst, + ImageLayout::Present, + ); + cmd_buf.finish(); + + device + .run_cmd_buf( + &cmd_buf, + &[acquisition_semaphore], + &[present_semaphores[frame_idx]], + Some(&frame_fences[frame_idx]), + ) + .unwrap(); + + swapchain + .present(image_idx, &[present_semaphores[frame_idx]]) + .unwrap(); + + current_frame += 1; + } + _ => (), + } + }) } }