commit 1b0248fbbf69899090b72dea01b6d8c6cbc4a9db Author: Raph Levien Date: Sun Apr 5 15:17:26 2020 -0700 Starting piet-gpu repo This brings in a bunch of code from vk-toy but doesn't yet do anything. diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6853bbc --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +/target +**/*.rs.bk +.ninja_deps +.ninja_log diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..0456295 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,55 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "ash" +version = "0.30.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69daec0742947f33a85931fa3cb0ce5f07929159dcbd1f0cbb5b2912e2978509" +dependencies = [ + "libloading", +] + +[[package]] +name = "cc" +version = "1.0.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95e28fa049fda1c330bcf9d723be7663a899c4679724b34c81e9f5a326aab8cd" + +[[package]] +name = "libloading" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b111a074963af1d37a139918ac6d49ad1d0d5e47f72fd55388619691a7d753" +dependencies = [ + "cc", + "winapi", +] + +[[package]] +name = "piet-gpu-hal" +version = "0.1.0" +dependencies = [ + "ash", +] + +[[package]] +name = "winapi" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8093091eeb260906a183e6ae1abdba2ef5ef2257a21801128899c3fc699229c6" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..d580bf5 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,5 @@ +[workspace] + +members = [ + "piet-gpu-hal" +] diff --git a/piet-gpu-hal/Cargo.toml b/piet-gpu-hal/Cargo.toml new file mode 100644 index 0000000..c70dfff --- /dev/null +++ b/piet-gpu-hal/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "piet-gpu-hal" +version = "0.1.0" +authors = ["Raph Levien "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +ash = "0.30" diff --git a/piet-gpu-hal/src/lib.rs b/piet-gpu-hal/src/lib.rs new file mode 100644 index 0000000..79c1e01 --- /dev/null +++ b/piet-gpu-hal/src/lib.rs @@ -0,0 +1,60 @@ +/// The cross-platform abstraction for a GPU device. +/// +/// This abstraction is inspired by gfx-hal, but is specialized to the needs of piet-gpu. +/// In time, it may go away and be replaced by either gfx-hal or wgpu. + +#[macro_use] +extern crate ash; + +pub mod vulkan; + +/// This isn't great but is expedient. +type Error = Box; + +pub trait Device: Sized { + type Buffer; + type MemFlags; + type Pipeline; + type DescriptorSet; + type CmdBuf: CmdBuf; + + fn create_buffer(&self, size: u64, mem_flags: Self::MemFlags) -> Result; + + unsafe fn create_simple_compute_pipeline( + &self, + code: &[u8], + n_buffers: u32, + ) -> Result; + + unsafe fn create_descriptor_set( + &self, + pipeline: &Self::Pipeline, + bufs: &[&Self::Buffer], + ) -> Result; + + fn create_cmd_buf(&self) -> Result; + + unsafe fn run_cmd_buf(&self, cmd_buf: &Self::CmdBuf) -> Result<(), Error>; + + unsafe fn read_buffer( + &self, + buffer: &Self::Buffer, + result: &mut Vec, + ) -> Result<(), Error>; + + unsafe fn write_buffer( + &self, + buffer: &Self::Buffer, + contents: &[T], + ) -> Result<(), Error>; +} + +pub trait CmdBuf { + unsafe fn begin(&mut self); + + unsafe fn finish(&mut self); + + unsafe fn dispatch(&mut self, pipeline: &D::Pipeline, descriptor_set: &D::DescriptorSet); + + unsafe fn memory_barrier(&mut self); +} diff --git a/piet-gpu-hal/src/vulkan.rs b/piet-gpu-hal/src/vulkan.rs new file mode 100644 index 0000000..a2b5631 --- /dev/null +++ b/piet-gpu-hal/src/vulkan.rs @@ -0,0 +1,428 @@ +//! Vulkan implemenation of HAL trait. + +use std::ffi::CString; +use std::sync::Arc; + +use ash::version::{DeviceV1_0, EntryV1_0, InstanceV1_0}; +use ash::{vk, Device, Entry, Instance}; + +use crate::Error; + +/// A base for allocating resources and dispatching work. +/// +/// This is quite similar to "device" in most GPU API's, but I didn't want to overload +/// that term further. +pub struct Base { + /// Retain the dynamic lib. + #[allow(unused)] + entry: Entry, + + #[allow(unused)] + instance: Instance, + + device: Arc, + device_mem_props: vk::PhysicalDeviceMemoryProperties, + queue: vk::Queue, + qfi: u32, +} + +struct RawDevice { + device: Device, +} + +/// A handle to a buffer. +/// +/// There is no lifetime tracking at this level; the caller is responsible +/// for destroying the buffer at the appropriate time. +pub struct Buffer { + buffer: vk::Buffer, + buffer_memory: vk::DeviceMemory, + size: u64, +} + +pub struct Pipeline { + pipeline: vk::Pipeline, + descriptor_set_layout: vk::DescriptorSetLayout, + pipeline_layout: vk::PipelineLayout, +} + +pub struct DescriptorSet { + descriptor_set: vk::DescriptorSet, +} + +pub struct CmdBuf { + cmd_buf: vk::CommandBuffer, + device: Arc, +} + +impl Base { + /// Create a new instance. + /// + /// There's more to be done to make this suitable for integration with other + /// systems, but for now the goal is to make things simple. + pub fn new() -> Result { + unsafe { + let app_name = CString::new("VkToy").unwrap(); + let entry = Entry::new()?; + let instance = entry.create_instance( + &vk::InstanceCreateInfo::builder().application_info( + &vk::ApplicationInfo::builder() + .application_name(&app_name) + .application_version(0) + .engine_name(&app_name) + .api_version(vk::make_version(1, 0, 0)), + ), + None, + )?; + + let devices = instance.enumerate_physical_devices()?; + let (pdevice, qfi) = + choose_compute_device(&instance, &devices).ok_or("no suitable device")?; + + let device = instance.create_device( + pdevice, + &vk::DeviceCreateInfo::builder().queue_create_infos(&[ + vk::DeviceQueueCreateInfo::builder() + .queue_family_index(qfi) + .queue_priorities(&[1.0]) + .build(), + ]), + None, + )?; + + let device_mem_props = instance.get_physical_device_memory_properties(pdevice); + + let queue_index = 0; + let queue = device.get_device_queue(qfi, queue_index); + + let device = Arc::new(RawDevice { device }); + + Ok(Base { + entry, + instance, + device, + device_mem_props, + qfi, + queue, + }) + } + } + + pub fn create_buffer( + &self, + size: u64, + mem_flags: vk::MemoryPropertyFlags, + ) -> Result { + unsafe { + let device = &self.device.device; + let buffer = device.create_buffer( + &vk::BufferCreateInfo::builder() + .size(size) + .usage(vk::BufferUsageFlags::STORAGE_BUFFER) + .sharing_mode(vk::SharingMode::EXCLUSIVE), + None, + )?; + let mem_requirements = device.get_buffer_memory_requirements(buffer); + let mem_type = find_memory_type( + mem_requirements.memory_type_bits, + mem_flags, + &self.device_mem_props, + ) + .unwrap(); // TODO: proper error + let buffer_memory = device.allocate_memory( + &vk::MemoryAllocateInfo::builder() + .allocation_size(mem_requirements.size) + .memory_type_index(mem_type), + None, + )?; + device.bind_buffer_memory(buffer, buffer_memory, 0)?; + Ok(Buffer { + buffer, + buffer_memory, + size, + }) + } + } + + /// This creates a pipeline that runs over the buffer. + /// + /// The code is included from "../comp.spv", and the descriptor set layout is just some + /// number of buffers. + pub unsafe fn create_simple_compute_pipeline( + &self, + code: &[u8], + n_buffers: u32, + ) -> Result { + let device = &self.device.device; + let descriptor_set_layout = device.create_descriptor_set_layout( + &vk::DescriptorSetLayoutCreateInfo::builder().bindings(&[ + vk::DescriptorSetLayoutBinding::builder() + .binding(0) + .descriptor_type(vk::DescriptorType::STORAGE_BUFFER) + .descriptor_count(n_buffers) + .stage_flags(vk::ShaderStageFlags::COMPUTE) + .build(), + ]), + None, + )?; + + let descriptor_set_layouts = [descriptor_set_layout]; + + // Create compute pipeline. + let code_u32 = convert_u32_vec(code); + let compute_shader_module = device + .create_shader_module(&vk::ShaderModuleCreateInfo::builder().code(&code_u32), None)?; + let entry_name = CString::new("main").unwrap(); + let pipeline_layout = device.create_pipeline_layout( + &vk::PipelineLayoutCreateInfo::builder().set_layouts(&descriptor_set_layouts), + None, + )?; + + let pipeline = device.create_compute_pipelines( + vk::PipelineCache::null(), + &[vk::ComputePipelineCreateInfo::builder() + .stage( + vk::PipelineShaderStageCreateInfo::builder() + .stage(vk::ShaderStageFlags::COMPUTE) + .module(compute_shader_module) + .name(&entry_name) + .build(), + ) + .layout(pipeline_layout) + .build()], + None, + ).map_err(|(_pipeline, err)| err)?[0]; + Ok(Pipeline { + pipeline, + pipeline_layout, + descriptor_set_layout, + }) + } + + pub unsafe fn create_descriptor_set( + &self, + pipeline: &Pipeline, + bufs: &[&Buffer], + ) -> Result { + let device = &self.device.device; + let descriptor_pool_sizes = [vk::DescriptorPoolSize::builder() + .ty(vk::DescriptorType::STORAGE_BUFFER) + .descriptor_count(bufs.len() as u32) + .build()]; + let descriptor_pool = device.create_descriptor_pool( + &vk::DescriptorPoolCreateInfo::builder() + .pool_sizes(&descriptor_pool_sizes) + .max_sets(1), + None, + )?; + let descriptor_set_layouts = [pipeline.descriptor_set_layout]; + let descriptor_sets = device + .allocate_descriptor_sets( + &vk::DescriptorSetAllocateInfo::builder() + .descriptor_pool(descriptor_pool) + .set_layouts(&descriptor_set_layouts), + ) + .unwrap(); + let buf_infos = bufs + .iter() + .map(|buf| { + vk::DescriptorBufferInfo::builder() + .buffer(buf.buffer) + .offset(0) + .range(vk::WHOLE_SIZE) + .build() + }) + .collect::>(); + device.update_descriptor_sets( + &[vk::WriteDescriptorSet::builder() + .dst_set(descriptor_sets[0]) + .dst_binding(0) + .descriptor_type(vk::DescriptorType::STORAGE_BUFFER) + .buffer_info(&buf_infos) + .build()], + &[], + ); + Ok(DescriptorSet { + descriptor_set: descriptor_sets[0], + }) + } + + pub fn create_cmd_buf(&self) -> Result { + unsafe { + let device = &self.device.device; + let command_pool = device.create_command_pool( + &vk::CommandPoolCreateInfo::builder() + .flags(vk::CommandPoolCreateFlags::empty()) + .queue_family_index(self.qfi), + None, + )?; + let cmd_buf = device.allocate_command_buffers( + &vk::CommandBufferAllocateInfo::builder() + .command_pool(command_pool) + .level(vk::CommandBufferLevel::PRIMARY) + .command_buffer_count(1), + )?[0]; + Ok(CmdBuf { + cmd_buf, + device: self.device.clone(), + }) + } + } + + /// Run the command buffer. + /// + /// This version simply blocks until it's complete. + pub unsafe fn run_cmd_buf(&self, cmd_buf: &CmdBuf) -> Result<(), Error> { + let device = &self.device.device; + + // Run the command buffer. + let fence = device.create_fence( + &vk::FenceCreateInfo::builder().flags(vk::FenceCreateFlags::empty()), + None, + )?; + device.queue_submit( + self.queue, + &[vk::SubmitInfo::builder() + .command_buffers(&[cmd_buf.cmd_buf]) + .build()], + fence, + )?; + device.wait_for_fences(&[fence], true, 1_000_000)?; + device.destroy_fence(fence, None); + Ok(()) + } + + pub unsafe fn read_buffer( + &self, + buffer: &Buffer, + result: &mut Vec, + ) -> Result<(), Error> { + let device = &self.device.device; + let size = buffer.size as usize; + let buf = device.map_memory( + buffer.buffer_memory, + 0, + size as u64, + vk::MemoryMapFlags::empty(), + )?; + if size > result.len() { + result.reserve(size - result.len()); + } + std::ptr::copy_nonoverlapping(buf as *const T, result.as_mut_ptr(), size); + result.set_len(size); + device.unmap_memory(buffer.buffer_memory); + Ok(()) + } + + pub unsafe fn write_buffer( + &self, + buffer: &Buffer, + contents: &[T], + ) -> Result<(), Error> { + let device = &self.device.device; + let buf = device.map_memory( + buffer.buffer_memory, + 0, + std::mem::size_of_val(contents) as u64, + vk::MemoryMapFlags::empty(), + )?; + std::ptr::copy_nonoverlapping(contents.as_ptr(), buf as *mut T, contents.len()); + device.unmap_memory(buffer.buffer_memory); + Ok(()) + } +} + +impl CmdBuf { + pub unsafe fn begin(&mut self) { + self.device + .device + .begin_command_buffer( + self.cmd_buf, + &vk::CommandBufferBeginInfo::builder() + .flags(vk::CommandBufferUsageFlags::ONE_TIME_SUBMIT), + ) + .unwrap(); + } + + pub unsafe fn finish(&mut self) { + self.device.device.end_command_buffer(self.cmd_buf).unwrap(); + } + + pub unsafe fn dispatch(&mut self, pipeline: &Pipeline, descriptor_set: &DescriptorSet) { + let device = &self.device.device; + device.cmd_bind_pipeline( + self.cmd_buf, + vk::PipelineBindPoint::COMPUTE, + pipeline.pipeline, + ); + device.cmd_bind_descriptor_sets( + self.cmd_buf, + vk::PipelineBindPoint::COMPUTE, + pipeline.pipeline_layout, + 0, + &[descriptor_set.descriptor_set], + &[], + ); + device.cmd_dispatch(self.cmd_buf, 256, 1, 1); + } + + /// Insert a pipeline barrier for all memory accesses. + #[allow(unused)] + pub unsafe fn memory_barrier(&mut self) { + let device = &self.device.device; + device.cmd_pipeline_barrier( + self.cmd_buf, + vk::PipelineStageFlags::ALL_COMMANDS, + vk::PipelineStageFlags::ALL_COMMANDS, + vk::DependencyFlags::empty(), + &[vk::MemoryBarrier::builder() + .src_access_mask(vk::AccessFlags::MEMORY_WRITE) + .dst_access_mask(vk::AccessFlags::MEMORY_READ) + .build()], + &[], + &[], + ); + } +} + +unsafe fn choose_compute_device( + instance: &Instance, + devices: &[vk::PhysicalDevice], +) -> Option<(vk::PhysicalDevice, u32)> { + for pdevice in devices { + let props = instance.get_physical_device_queue_family_properties(*pdevice); + for (ix, info) in props.iter().enumerate() { + if info.queue_flags.contains(vk::QueueFlags::COMPUTE) { + return Some((*pdevice, ix as u32)); + } + } + } + None +} + +fn find_memory_type( + memory_type_bits: u32, + property_flags: vk::MemoryPropertyFlags, + props: &vk::PhysicalDeviceMemoryProperties, +) -> Option { + for i in 0..props.memory_type_count { + if (memory_type_bits & (1 << i)) != 0 + && props.memory_types[i as usize] + .property_flags + .contains(property_flags) + { + return Some(i); + } + } + None +} + +fn convert_u32_vec(src: &[u8]) -> Vec { + src.chunks(4) + .map(|chunk| { + let mut buf = [0; 4]; + buf.copy_from_slice(chunk); + u32::from_le_bytes(buf) + }) + .collect() +}