Initial work for surface support

surface: handle extensions

Implement swapchain creation and blit image to screen
This commit is contained in:
msiglreith 2020-04-30 15:02:48 +02:00
parent 4db4b3b87d
commit b38e43f0c2
7 changed files with 776 additions and 165 deletions

View file

@ -9,3 +9,5 @@ edition = "2018"
[dependencies]
ash = "0.30"
once_cell = "1.3.1"
ash-window = { git = "https://github.com/norse-rs/ash-window.git", branch = "dyn_trait" }
raw-window-handle = "0.3"

View file

@ -2,9 +2,9 @@ use piet_gpu_hal::vulkan::VkInstance;
use piet_gpu_hal::{CmdBuf, Device, MemFlags};
fn main() {
let instance = VkInstance::new().unwrap();
let (instance, _) = VkInstance::new(None).unwrap();
unsafe {
let device = instance.device().unwrap();
let device = instance.device(None).unwrap();
let mem_flags = MemFlags::host_coherent();
let src = (0..256).map(|x| x + 1).collect::<Vec<u32>>();
let buffer = device

View file

@ -5,28 +5,49 @@
pub mod vulkan;
/// This isn't great but is expedient.
type Error = Box<dyn std::error::Error>;
pub type Error = Box<dyn std::error::Error>;
#[derive(Copy, Clone, Debug)]
pub enum ImageLayout {
Undefined,
Present,
BlitSrc,
BlitDst,
General,
}
pub trait Device: Sized {
type Buffer;
type Image;
type MemFlags: MemFlags;
type Pipeline;
type DescriptorSet;
type QueryPool;
type CmdBuf: CmdBuf<Self>;
type Fence;
type Semaphore;
fn create_buffer(&self, size: u64, mem_flags: Self::MemFlags) -> Result<Self::Buffer, Error>;
unsafe fn create_image2d(
&self,
width: u32,
height: u32,
mem_flags: Self::MemFlags,
) -> Result<Self::Image, Error>;
unsafe fn create_simple_compute_pipeline(
&self,
code: &[u8],
n_buffers: u32,
n_images: u32,
) -> Result<Self::Pipeline, Error>;
unsafe fn create_descriptor_set(
&self,
pipeline: &Self::Pipeline,
bufs: &[&Self::Buffer],
images: &[&Self::Image],
) -> Result<Self::DescriptorSet, Error>;
fn create_cmd_buf(&self) -> Result<Self::CmdBuf, Error>;
@ -40,9 +61,15 @@ pub trait Device: Sized {
///
/// # Safety
/// All submitted commands that refer to this query pool must have completed.
unsafe fn reap_query_pool(&self, pool: Self::QueryPool) -> Result<Vec<f64>, Error>;
unsafe fn reap_query_pool(&self, pool: &Self::QueryPool) -> Result<Vec<f64>, Error>;
unsafe fn run_cmd_buf(&self, cmd_buf: &Self::CmdBuf) -> Result<(), Error>;
unsafe fn run_cmd_buf(
&self,
cmd_buf: &Self::CmdBuf,
wait_semaphores: &[Self::Semaphore],
signal_semaphores: &[Self::Semaphore],
fence: Option<&Self::Fence>,
) -> Result<(), Error>;
unsafe fn read_buffer<T: Sized>(
&self,
@ -55,6 +82,10 @@ pub trait Device: Sized {
buffer: &Self::Buffer,
contents: &[T],
) -> Result<(), Error>;
unsafe fn create_semaphore(&self) -> Result<Self::Semaphore, Error>;
unsafe fn create_fence(&self, signaled: bool) -> Result<Self::Fence, Error>;
unsafe fn wait_and_reset(&self, fences: &[Self::Fence]) -> Result<(), Error>;
}
pub trait CmdBuf<D: Device> {
@ -71,6 +102,13 @@ pub trait CmdBuf<D: Device> {
unsafe fn memory_barrier(&mut self);
unsafe fn image_barrier(
&mut self,
image: &D::Image,
src_layout: ImageLayout,
dst_layout: ImageLayout,
);
/// Clear the buffer.
///
/// This is readily supported in Vulkan, but for portability it is remarkably
@ -80,6 +118,11 @@ pub trait CmdBuf<D: Device> {
unsafe fn copy_buffer(&self, src: &D::Buffer, dst: &D::Buffer);
unsafe fn copy_image_to_buffer(&self, src: &D::Image, dst: &D::Buffer);
// low portability, dx12 doesn't support it natively
unsafe fn blit_image(&self, src: &D::Image, dst: &D::Image);
/// Reset the query pool.
///
/// The query pool must be reset before each use, to avoid validation errors.

View file

@ -4,12 +4,12 @@ use std::borrow::Cow;
use std::ffi::{CStr, CString};
use std::sync::Arc;
use ash::extensions::ext::DebugUtils;
use ash::extensions::{ext::DebugUtils, khr};
use ash::version::{DeviceV1_0, EntryV1_0, InstanceV1_0};
use ash::{vk, Device, Entry, Instance};
use once_cell::sync::Lazy;
use crate::Error;
use crate::{Device as DeviceTrait, Error, ImageLayout};
pub struct VkInstance {
/// Retain the dynamic lib.
@ -22,6 +22,7 @@ pub struct VkInstance {
pub struct VkDevice {
device: Arc<RawDevice>,
physical_device: vk::PhysicalDevice,
device_mem_props: vk::PhysicalDeviceMemoryProperties,
queue: vk::Queue,
qfi: u32,
@ -32,6 +33,23 @@ struct RawDevice {
device: Device,
}
pub struct VkSurface {
surface: vk::SurfaceKHR,
surface_fn: khr::Surface,
}
pub struct VkSwapchain {
swapchain: vk::SwapchainKHR,
swapchain_fn: khr::Swapchain,
present_queue: vk::Queue,
acquisition_idx: usize,
acquisition_semaphores: Vec<vk::Semaphore>, // same length as `images`
images: Vec<vk::Image>,
extent: vk::Extent2D,
}
/// A handle to a buffer.
///
/// There is no lifetime tracking at this level; the caller is responsible
@ -42,6 +60,13 @@ pub struct Buffer {
size: u64,
}
pub struct Image {
image: vk::Image,
image_memory: vk::DeviceMemory,
image_view: vk::ImageView,
extent: vk::Extent3D,
}
pub struct Pipeline {
pipeline: vk::Pipeline,
descriptor_set_layout: vk::DescriptorSetLayout,
@ -88,11 +113,7 @@ unsafe extern "system" fn vulkan_debug_callback(
println!(
"{:?}:\n{:?} [{} ({})] : {}\n",
message_severity,
message_type,
message_id_name,
message_id_number,
message,
message_severity, message_type, message_id_name, message_id_number, message,
);
vk::FALSE
@ -119,40 +140,59 @@ impl VkInstance {
///
/// There's more to be done to make this suitable for integration with other
/// systems, but for now the goal is to make things simple.
pub fn new() -> Result<VkInstance, Error> {
///
/// The caller is responsible for making sure that window which owns the raw window handle
/// outlives the surface.
pub fn new(
window_handle: Option<&dyn raw_window_handle::HasRawWindowHandle>,
) -> Result<(VkInstance, Option<VkSurface>), Error> {
unsafe {
let app_name = CString::new("VkToy").unwrap();
let entry = Entry::new()?;
let exist_layers = entry
.enumerate_instance_layer_properties()?;
let layers = LAYERS.iter().filter_map(|&lyr| {
exist_layers
.iter()
.find(|x|
CStr::from_ptr(x.layer_name.as_ptr()) == lyr
)
.map(|_| lyr.as_ptr())
.or_else(|| {
println!("Unable to find layer: {}, have you installed the Vulkan SDK?", lyr.to_string_lossy());
None
})
}).collect::<Vec<_>>();
let exist_layers = entry.enumerate_instance_layer_properties()?;
let layers = LAYERS
.iter()
.filter_map(|&lyr| {
exist_layers
.iter()
.find(|x| CStr::from_ptr(x.layer_name.as_ptr()) == lyr)
.map(|_| lyr.as_ptr())
.or_else(|| {
println!(
"Unable to find layer: {}, have you installed the Vulkan SDK?",
lyr.to_string_lossy()
);
None
})
})
.collect::<Vec<_>>();
let exist_exts = entry
.enumerate_instance_extension_properties()?;
let exts = EXTS.iter().filter_map(|&ext| {
exist_exts
.iter()
.find(|x|
CStr::from_ptr(x.extension_name.as_ptr()) == ext
)
.map(|_| ext.as_ptr())
.or_else(|| {
println!("Unable to find extension: {}, have you installed the Vulkan SDK?", ext.to_string_lossy());
None
})
}).collect::<Vec<_>>();
let exist_exts = entry.enumerate_instance_extension_properties()?;
let mut exts = EXTS
.iter()
.filter_map(|&ext| {
exist_exts
.iter()
.find(|x| CStr::from_ptr(x.extension_name.as_ptr()) == ext)
.map(|_| ext.as_ptr())
.or_else(|| {
println!(
"Unable to find extension: {}, have you installed the Vulkan SDK?",
ext.to_string_lossy()
);
None
})
})
.collect::<Vec<_>>();
let surface_extensions = match window_handle {
Some(ref handle) => ash_window::enumerate_required_extensions(*handle)?,
None => vec![],
};
for extension in surface_extensions {
exts.push(extension.as_ptr());
}
let instance = entry.create_instance(
&vk::InstanceCreateInfo::builder()
@ -168,7 +208,7 @@ impl VkInstance {
None,
)?;
let (_dbg_loader, _dbg_callbk) = if cfg!(debug_assertions) {
let (_dbg_loader, _dbg_callbk) = if false {
let dbg_info = vk::DebugUtilsMessengerCreateInfoEXT::builder()
.message_severity(
vk::DebugUtilsMessageSeverityFlagsEXT::ERROR
@ -185,37 +225,51 @@ impl VkInstance {
(None, None)
};
Ok(VkInstance {
let vk_surface = match window_handle {
Some(handle) => Some(VkSurface {
surface: ash_window::create_surface(&entry, &instance, handle, None)?,
surface_fn: khr::Surface::new(&entry, &instance),
}),
None => None,
};
let vk_instance = VkInstance {
entry,
instance,
_dbg_loader,
_dbg_callbk,
})
};
Ok((vk_instance, vk_surface))
}
}
/// Create a device from the instance, suitable for compute.
/// Create a device from the instance, suitable for compute, with an optional surface.
///
/// # Safety
///
/// The caller is responsible for making sure that the instance outlives the device.
/// We could enforce that, for example having an `Arc` of the raw instance, but for
/// now keep things simple.
pub unsafe fn device(&self) -> Result<VkDevice, Error> {
/// The caller is responsible for making sure that the instance outlives the device
/// and surface. We could enforce that, for example having an `Arc` of the raw instance,
/// but for now keep things simple.
pub unsafe fn device(&self, surface: Option<&VkSurface>) -> Result<VkDevice, Error> {
let devices = self.instance.enumerate_physical_devices()?;
let (pdevice, qfi) =
choose_compute_device(&self.instance, &devices).ok_or("no suitable device")?;
choose_compute_device(&self.instance, &devices, surface).ok_or("no suitable device")?;
let device = self.instance.create_device(
pdevice,
&vk::DeviceCreateInfo::builder().queue_create_infos(&[
vk::DeviceQueueCreateInfo::builder()
.queue_family_index(qfi)
.queue_priorities(&[1.0])
.build(),
]),
None,
)?;
let queue_priorities = [1.0];
let queue_create_infos = [vk::DeviceQueueCreateInfo::builder()
.queue_family_index(qfi)
.queue_priorities(&queue_priorities)
.build()];
let extensions = match surface {
Some(_) => vec![khr::Swapchain::name().as_ptr()],
None => vec![],
};
let create_info = vk::DeviceCreateInfo::builder()
.queue_create_infos(&queue_create_infos)
.enabled_extension_names(&extensions)
.build();
let device = self.instance.create_device(pdevice, &create_info, None)?;
let device_mem_props = self.instance.get_physical_device_memory_properties(pdevice);
@ -229,21 +283,98 @@ impl VkInstance {
Ok(VkDevice {
device,
physical_device: pdevice,
device_mem_props,
qfi,
queue,
timestamp_period,
})
}
pub unsafe fn swapchain(
&self,
device: &VkDevice,
surface: &VkSurface,
) -> Result<VkSwapchain, Error> {
let formats = surface
.surface_fn
.get_physical_device_surface_formats(device.physical_device, surface.surface)?;
let surface_format = formats
.iter()
.map(|surface_fmt| match surface_fmt.format {
vk::Format::UNDEFINED => {
vk::SurfaceFormatKHR {
format: vk::Format::B8G8R8A8_UNORM, // most common format on desktop
color_space: surface_fmt.color_space,
}
}
_ => *surface_fmt,
})
.next()
.ok_or("no surface format found")?;
let capabilities = surface
.surface_fn
.get_physical_device_surface_capabilities(device.physical_device, surface.surface)?;
let present_modes = surface
.surface_fn
.get_physical_device_surface_present_modes(device.physical_device, surface.surface)?;
let present_mode = present_modes
.into_iter()
.find(|mode| mode == &vk::PresentModeKHR::MAILBOX)
.unwrap_or(vk::PresentModeKHR::FIFO);
let image_count = 2; // TODO
let extent = capabilities.current_extent; // TODO: wayland for example will complain here ..
let create_info = vk::SwapchainCreateInfoKHR::builder()
.surface(surface.surface)
.min_image_count(image_count)
.image_format(surface_format.format)
.image_color_space(surface_format.color_space)
.image_extent(extent)
.image_array_layers(1)
.image_usage(vk::ImageUsageFlags::TRANSFER_DST)
.image_sharing_mode(vk::SharingMode::EXCLUSIVE)
.pre_transform(vk::SurfaceTransformFlagsKHR::IDENTITY)
.composite_alpha(vk::CompositeAlphaFlagsKHR::OPAQUE)
.present_mode(present_mode)
.clipped(true);
let swapchain_fn = khr::Swapchain::new(&self.instance, &device.device.device);
let swapchain = swapchain_fn.create_swapchain(&create_info, None)?;
let images = swapchain_fn.get_swapchain_images(swapchain)?;
let acquisition_semaphores = (0..images.len())
.map(|_| device.create_semaphore())
.collect::<Result<Vec<_>, Error>>()?;
Ok(VkSwapchain {
swapchain,
swapchain_fn,
present_queue: device.queue,
images,
acquisition_semaphores,
acquisition_idx: 0,
extent,
})
}
}
impl crate::Device for VkDevice {
type Buffer = Buffer;
type Image = Image;
type CmdBuf = CmdBuf;
type DescriptorSet = DescriptorSet;
type Pipeline = Pipeline;
type QueryPool = QueryPool;
type MemFlags = MemFlags;
type Fence = vk::Fence;
type Semaphore = vk::Semaphore;
fn create_buffer(&self, size: u64, mem_flags: MemFlags) -> Result<Buffer, Error> {
unsafe {
@ -281,6 +412,96 @@ impl crate::Device for VkDevice {
}
}
unsafe fn create_image2d(
&self,
width: u32,
height: u32,
mem_flags: Self::MemFlags,
) -> Result<Self::Image, Error> {
let device = &self.device.device;
let extent = vk::Extent3D {
width,
height,
depth: 1,
};
let image = device.create_image(
&vk::ImageCreateInfo::builder()
.image_type(vk::ImageType::TYPE_2D)
.format(vk::Format::R8G8B8A8_UNORM)
.extent(extent)
.mip_levels(1)
.array_layers(1)
.samples(vk::SampleCountFlags::TYPE_1)
.tiling(vk::ImageTiling::OPTIMAL)
.initial_layout(vk::ImageLayout::UNDEFINED)
.usage(vk::ImageUsageFlags::STORAGE | vk::ImageUsageFlags::TRANSFER_SRC) // write in compute and blit src
.sharing_mode(vk::SharingMode::EXCLUSIVE),
None,
)?;
let mem_requirements = device.get_image_memory_requirements(image);
let mem_type = find_memory_type(
mem_requirements.memory_type_bits,
mem_flags.0,
&self.device_mem_props,
)
.unwrap(); // TODO: proper error
let image_memory = device.allocate_memory(
&vk::MemoryAllocateInfo::builder()
.allocation_size(mem_requirements.size)
.memory_type_index(mem_type),
None,
)?;
device.bind_image_memory(image, image_memory, 0)?;
let image_view = device.create_image_view(
&vk::ImageViewCreateInfo::builder()
.view_type(vk::ImageViewType::TYPE_2D)
.image(image)
.format(vk::Format::R8G8B8A8_UNORM)
.subresource_range(vk::ImageSubresourceRange {
aspect_mask: vk::ImageAspectFlags::COLOR,
base_mip_level: 0,
level_count: 1,
base_array_layer: 0,
layer_count: 1,
})
.components(vk::ComponentMapping {
r: vk::ComponentSwizzle::IDENTITY,
g: vk::ComponentSwizzle::IDENTITY,
b: vk::ComponentSwizzle::IDENTITY,
a: vk::ComponentSwizzle::IDENTITY,
})
.build(),
None,
)?;
Ok(Image {
image,
image_memory,
image_view,
extent,
})
}
unsafe fn create_fence(&self, signaled: bool) -> Result<Self::Fence, Error> {
let device = &self.device.device;
let mut flags = vk::FenceCreateFlags::empty();
if signaled {
flags |= vk::FenceCreateFlags::SIGNALED;
}
Ok(device.create_fence(&vk::FenceCreateInfo::builder().flags(flags).build(), None)?)
}
unsafe fn create_semaphore(&self) -> Result<Self::Semaphore, Error> {
let device = &self.device.device;
Ok(device.create_semaphore(&vk::SemaphoreCreateInfo::default(), None)?)
}
unsafe fn wait_and_reset(&self, fences: &[Self::Fence]) -> Result<(), Error> {
let device = &self.device.device;
device.wait_for_fences(fences, true, !0)?;
device.reset_fences(fences)?;
Ok(())
}
/// This creates a pipeline that runs over the buffer.
///
/// The descriptor set layout is just some number of buffers (this will change).
@ -288,18 +509,30 @@ impl crate::Device for VkDevice {
&self,
code: &[u8],
n_buffers: u32,
n_images: u32,
) -> Result<Pipeline, Error> {
let device = &self.device.device;
let bindings = (0..n_buffers)
.map(|i| {
let mut bindings = Vec::new();
for i in 0..n_buffers {
bindings.push(
vk::DescriptorSetLayoutBinding::builder()
.binding(i)
.descriptor_type(vk::DescriptorType::STORAGE_BUFFER)
.descriptor_count(1)
.stage_flags(vk::ShaderStageFlags::COMPUTE)
.build()
})
.collect::<Vec<_>>();
.build(),
);
}
for i in n_buffers..n_buffers + n_images {
bindings.push(
vk::DescriptorSetLayoutBinding::builder()
.binding(i)
.descriptor_type(vk::DescriptorType::STORAGE_IMAGE)
.descriptor_count(1)
.stage_flags(vk::ShaderStageFlags::COMPUTE)
.build(),
);
}
let descriptor_set_layout = device.create_descriptor_set_layout(
&vk::DescriptorSetLayoutCreateInfo::builder().bindings(&bindings),
None,
@ -344,12 +577,26 @@ impl crate::Device for VkDevice {
&self,
pipeline: &Pipeline,
bufs: &[&Buffer],
images: &[&Image],
) -> Result<DescriptorSet, Error> {
let device = &self.device.device;
let descriptor_pool_sizes = [vk::DescriptorPoolSize::builder()
.ty(vk::DescriptorType::STORAGE_BUFFER)
.descriptor_count(bufs.len() as u32)
.build()];
let mut descriptor_pool_sizes = Vec::new();
if !bufs.is_empty() {
descriptor_pool_sizes.push(
vk::DescriptorPoolSize::builder()
.ty(vk::DescriptorType::STORAGE_BUFFER)
.descriptor_count(bufs.len() as u32)
.build(),
);
}
if !images.is_empty() {
descriptor_pool_sizes.push(
vk::DescriptorPoolSize::builder()
.ty(vk::DescriptorType::STORAGE_IMAGE)
.descriptor_count(images.len() as u32)
.build(),
);
}
let descriptor_pool = device.create_descriptor_pool(
&vk::DescriptorPoolCreateInfo::builder()
.pool_sizes(&descriptor_pool_sizes)
@ -380,6 +627,23 @@ impl crate::Device for VkDevice {
&[],
);
}
for (i, image) in images.iter().enumerate() {
let binding = i + bufs.len();
let image_info = vk::DescriptorImageInfo::builder()
.sampler(vk::Sampler::null())
.image_view(image.image_view)
.image_layout(vk::ImageLayout::GENERAL)
.build();
device.update_descriptor_sets(
&[vk::WriteDescriptorSet::builder()
.dst_set(descriptor_sets[0])
.dst_binding(binding as u32)
.descriptor_type(vk::DescriptorType::STORAGE_IMAGE)
.image_info(&[image_info])
.build()],
&[],
);
}
Ok(DescriptorSet {
descriptor_set: descriptor_sets[0],
})
@ -390,7 +654,7 @@ impl crate::Device for VkDevice {
let device = &self.device.device;
let command_pool = device.create_command_pool(
&vk::CommandPoolCreateInfo::builder()
.flags(vk::CommandPoolCreateFlags::empty())
.flags(vk::CommandPoolCreateFlags::RESET_COMMAND_BUFFER)
.queue_family_index(self.qfi),
None,
)?;
@ -421,7 +685,7 @@ impl crate::Device for VkDevice {
}
}
unsafe fn reap_query_pool(&self, pool: Self::QueryPool) -> Result<Vec<f64>, Error> {
unsafe fn reap_query_pool(&self, pool: &Self::QueryPool) -> Result<Vec<f64>, Error> {
let device = &self.device.device;
let mut buf = vec![0u64; pool.n_queries as usize];
device.get_query_pool_results(
@ -431,7 +695,6 @@ impl crate::Device for VkDevice {
&mut buf,
vk::QueryResultFlags::TYPE_64,
)?;
device.destroy_query_pool(pool.pool, None);
let ts0 = buf[0];
let tsp = self.timestamp_period as f64 * 1e-9;
let result = buf[1..]
@ -444,23 +707,33 @@ impl crate::Device for VkDevice {
/// Run the command buffer.
///
/// This version simply blocks until it's complete.
unsafe fn run_cmd_buf(&self, cmd_buf: &CmdBuf) -> Result<(), Error> {
unsafe fn run_cmd_buf(
&self,
cmd_buf: &CmdBuf,
wait_semaphores: &[Self::Semaphore],
signal_semaphores: &[Self::Semaphore],
fence: Option<&Self::Fence>,
) -> Result<(), Error> {
let device = &self.device.device;
// Run the command buffer.
let fence = device.create_fence(
&vk::FenceCreateInfo::builder().flags(vk::FenceCreateFlags::empty()),
None,
)?;
let fence = match fence {
Some(fence) => *fence,
None => vk::Fence::null(),
};
let wait_stages = wait_semaphores
.iter()
.map(|_| vk::PipelineStageFlags::ALL_COMMANDS)
.collect::<Vec<_>>();
device.queue_submit(
self.queue,
&[vk::SubmitInfo::builder()
.command_buffers(&[cmd_buf.cmd_buf])
.wait_semaphores(wait_semaphores)
.signal_semaphores(signal_semaphores)
.wait_dst_stage_mask(&wait_stages)
.build()],
fence,
)?;
device.wait_for_fences(&[fence], true, 100_000_000)?;
// TODO: handle errors better (currently leaks fence and can lead to other problems)
Ok(())
}
@ -556,6 +829,37 @@ impl crate::CmdBuf<VkDevice> for CmdBuf {
);
}
unsafe fn image_barrier(
&mut self,
image: &Image,
src_layout: ImageLayout,
dst_layout: ImageLayout,
) {
let device = &self.device.device;
device.cmd_pipeline_barrier(
self.cmd_buf,
vk::PipelineStageFlags::ALL_COMMANDS,
vk::PipelineStageFlags::ALL_COMMANDS,
vk::DependencyFlags::empty(),
&[],
&[],
&[vk::ImageMemoryBarrier::builder()
.image(image.image)
.src_access_mask(vk::AccessFlags::MEMORY_WRITE)
.dst_access_mask(vk::AccessFlags::MEMORY_READ)
.old_layout(map_image_layout(src_layout))
.new_layout(map_image_layout(dst_layout))
.subresource_range(vk::ImageSubresourceRange {
aspect_mask: vk::ImageAspectFlags::COLOR,
base_mip_level: 0,
level_count: vk::REMAINING_MIP_LEVELS,
base_array_layer: 0,
layer_count: vk::REMAINING_MIP_LEVELS,
})
.build()],
);
}
unsafe fn clear_buffer(&self, buffer: &Buffer) {
let device = &self.device.device;
device.cmd_fill_buffer(self.cmd_buf, buffer.buffer, 0, vk::WHOLE_SIZE, 0);
@ -572,14 +876,74 @@ impl crate::CmdBuf<VkDevice> for CmdBuf {
);
}
unsafe fn copy_image_to_buffer(&self, src: &Image, dst: &Buffer) {
let device = &self.device.device;
device.cmd_copy_image_to_buffer(
self.cmd_buf,
src.image,
vk::ImageLayout::TRANSFER_SRC_OPTIMAL,
dst.buffer,
&[vk::BufferImageCopy {
buffer_offset: 0,
buffer_row_length: 0, // tight packing
buffer_image_height: 0, // tight packing
image_subresource: vk::ImageSubresourceLayers {
aspect_mask: vk::ImageAspectFlags::COLOR,
mip_level: 0,
base_array_layer: 0,
layer_count: 1,
},
image_offset: vk::Offset3D { x: 0, y: 0, z: 0 },
image_extent: src.extent,
}],
);
}
unsafe fn blit_image(&self, src: &Image, dst: &Image) {
let device = &self.device.device;
device.cmd_blit_image(
self.cmd_buf,
src.image,
vk::ImageLayout::TRANSFER_SRC_OPTIMAL,
dst.image,
vk::ImageLayout::TRANSFER_DST_OPTIMAL,
&[vk::ImageBlit {
src_subresource: vk::ImageSubresourceLayers {
aspect_mask: vk::ImageAspectFlags::COLOR,
mip_level: 0,
base_array_layer: 0,
layer_count: 1,
},
src_offsets: [
vk::Offset3D { x: 0, y: 0, z: 0 },
vk::Offset3D {
x: src.extent.width as i32,
y: src.extent.height as i32,
z: src.extent.depth as i32,
},
],
dst_subresource: vk::ImageSubresourceLayers {
aspect_mask: vk::ImageAspectFlags::COLOR,
mip_level: 0,
base_array_layer: 0,
layer_count: 1,
},
dst_offsets: [
vk::Offset3D { x: 0, y: 0, z: 0 },
vk::Offset3D {
x: dst.extent.width as i32,
y: dst.extent.height as i32,
z: dst.extent.depth as i32,
},
],
}],
vk::Filter::LINEAR,
);
}
unsafe fn reset_query_pool(&mut self, pool: &QueryPool) {
let device = &self.device.device;
device.cmd_reset_query_pool(
self.cmd_buf,
pool.pool,
0,
pool.n_queries,
);
device.cmd_reset_query_pool(self.cmd_buf, pool.pool, 0, pool.n_queries);
}
unsafe fn write_timestamp(&mut self, pool: &QueryPool, query: u32) {
@ -603,13 +967,68 @@ impl crate::MemFlags for MemFlags {
}
}
impl VkSwapchain {
pub unsafe fn next(&mut self) -> Result<(usize, vk::Semaphore), Error> {
let acquisition_semaphore = self.acquisition_semaphores[self.acquisition_idx];
let (image_idx, _suboptimal) = self.swapchain_fn.acquire_next_image(
self.swapchain,
!0,
self.acquisition_semaphores[self.acquisition_idx],
vk::Fence::null(),
)?;
self.acquisition_idx = (self.acquisition_idx + 1) % self.acquisition_semaphores.len();
Ok((image_idx as usize, acquisition_semaphore))
}
pub unsafe fn image(&self, idx: usize) -> Image {
Image {
image: self.images[idx],
image_memory: vk::DeviceMemory::null(),
image_view: vk::ImageView::null(),
extent: vk::Extent3D {
width: self.extent.width,
height: self.extent.height,
depth: 1,
},
}
}
pub unsafe fn present(
&self,
image_idx: usize,
semaphores: &[vk::Semaphore],
) -> Result<bool, Error> {
Ok(self.swapchain_fn.queue_present(
self.present_queue,
&vk::PresentInfoKHR::builder()
.swapchains(&[self.swapchain])
.image_indices(&[image_idx as u32])
.wait_semaphores(semaphores)
.build(),
)?)
}
}
unsafe fn choose_compute_device(
instance: &Instance,
devices: &[vk::PhysicalDevice],
surface: Option<&VkSurface>,
) -> Option<(vk::PhysicalDevice, u32)> {
for pdevice in devices {
let props = instance.get_physical_device_queue_family_properties(*pdevice);
for (ix, info) in props.iter().enumerate() {
// Check for surface presentation support
if let Some(surface) = surface {
if !surface
.surface_fn
.get_physical_device_surface_support(*pdevice, ix as u32, surface.surface)
.unwrap()
{
continue;
}
}
if info.queue_flags.contains(vk::QueueFlags::COMPUTE) {
return Some((*pdevice, ix as u32));
}
@ -644,3 +1063,13 @@ fn convert_u32_vec(src: &[u8]) -> Vec<u32> {
})
.collect()
}
fn map_image_layout(layout: ImageLayout) -> vk::ImageLayout {
match layout {
ImageLayout::Undefined => vk::ImageLayout::UNDEFINED,
ImageLayout::Present => vk::ImageLayout::PRESENT_SRC_KHR,
ImageLayout::BlitSrc => vk::ImageLayout::TRANSFER_SRC_OPTIMAL,
ImageLayout::BlitDst => vk::ImageLayout::TRANSFER_DST_OPTIMAL,
ImageLayout::General => vk::ImageLayout::GENERAL,
}
}

View file

@ -1,20 +1,21 @@
[package]
name = "piet-gpu"
version = "0.1.0"
authors = ["Raph Levien <raph.levien@gmail.com>"]
description = "A compute-centric GPU 2D renderer."
license = "MIT/Apache-2.0"
edition = "2018"
[dependencies.piet-gpu-hal]
path = "../piet-gpu-hal"
[dependencies.piet-gpu-types]
path = "../piet-gpu-types"
[dependencies]
kurbo = "0.5.11"
piet = "0.0.12"
png = "0.16.2"
rand = "0.7.3"
roxmltree = "0.11"
[package]
name = "piet-gpu"
version = "0.1.0"
authors = ["Raph Levien <raph.levien@gmail.com>"]
description = "A compute-centric GPU 2D renderer."
license = "MIT/Apache-2.0"
edition = "2018"
[dependencies.piet-gpu-hal]
path = "../piet-gpu-hal"
[dependencies.piet-gpu-types]
path = "../piet-gpu-types"
[dependencies]
kurbo = "0.5.11"
piet = "0.0.12"
png = "0.16.2"
rand = "0.7.3"
roxmltree = "0.11"
winit = "0.22"

View file

@ -24,9 +24,7 @@ layout(set = 0, binding = 2) buffer FillSegBuf {
uint[] fill_seg;
};
layout(set = 0, binding = 3) buffer ImageBuf {
uint[] image;
};
layout(rgba8, set = 0, binding = 2) uniform writeonly image2D image;
#include "ptcl.h"
#include "segment.h"
@ -65,11 +63,11 @@ void main() {
SegChunk seg_chunk = SegChunk_read(seg_chunk_ref);
for (int i = 0; i < seg_chunk.n; i++) {
Segment seg = Segment_read(SegmentRef(seg_chunk_ref.offset + SegChunk_size + Segment_size * i));
vec2 line_vec = seg.end - seg.start;
vec2 dpos = xy + vec2(0.5, 0.5) - seg.start;
float t = clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0, 1.0);
df = min(df, length(line_vec * t - dpos));
}
vec2 line_vec = seg.end - seg.start;
vec2 dpos = xy + vec2(0.5, 0.5) - seg.start;
float t = clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0, 1.0);
df = min(df, length(line_vec * t - dpos));
}
seg_chunk_ref = seg_chunk.next;
} while (seg_chunk_ref.offset != 0);
fg_rgba = unpackUnorm4x8(stroke.rgba_color).wzyx;
@ -118,8 +116,5 @@ void main() {
cmd_ref.offset += Cmd_size;
}
// TODO: sRGB
uvec4 s = uvec4(round(vec4(rgb, 1.0) * 255.0));
uint rgba_packed = s.r | (s.g << 8) | (s.b << 16) | (s.a << 24);
image[xy_uint.y * IMAGE_WIDTH + xy_uint.x] = rgba_packed;
imageStore(image, ivec2(xy_uint), vec4(rgb, 1.0));
}

View file

@ -8,7 +8,7 @@ use piet::kurbo::{BezPath, Circle, Line, Point, Vec2};
use piet::{Color, RenderContext};
use piet_gpu_hal::vulkan::VkInstance;
use piet_gpu_hal::{CmdBuf, Device, MemFlags};
use piet_gpu_hal::{CmdBuf, Device, Error, ImageLayout, MemFlags};
mod pico_svg;
mod render_ctx;
@ -16,8 +16,14 @@ mod render_ctx;
use render_ctx::PietGpuRenderContext;
use pico_svg::PicoSvg;
const WIDTH: usize = 2048;
const HEIGHT: usize = 1536;
use winit::{
event::{Event, WindowEvent},
event_loop::{ControlFlow, EventLoop},
window::WindowBuilder,
};
const WIDTH: usize = TILE_W * WIDTH_IN_TILES;
const HEIGHT: usize = TILE_H * HEIGHT_IN_TILES;
const TILE_W: usize = 16;
const TILE_H: usize = 16;
@ -34,6 +40,8 @@ const K2_PER_TILE_SIZE: usize = 8;
const N_CIRCLES: usize = 1;
const NUM_FRAMES: usize = 2;
fn render_scene(rc: &mut impl RenderContext) {
let mut rng = rand::thread_rng();
for _ in 0..N_CIRCLES {
@ -108,10 +116,35 @@ fn dump_k1_data(k1_buf: &[u32]) {
}
}
fn main() {
let instance = VkInstance::new().unwrap();
fn main() -> Result<(), Error> {
let event_loop = EventLoop::new();
let window = WindowBuilder::new()
.with_inner_size(winit::dpi::LogicalSize {
width: (WIDTH_IN_TILES * 8) as f64,
height: (HEIGHT_IN_TILES * 8) as f64,
})
.with_resizable(false) // currently not supported
.build(&event_loop)?;
let (instance, surface) = VkInstance::new(Some(&window))?;
unsafe {
let device = instance.device().unwrap();
let device = instance.device(surface.as_ref())?;
let mut swapchain = instance.swapchain(&device, surface.as_ref().unwrap())?;
let mut current_frame = 0;
let present_semaphores = (0..NUM_FRAMES)
.map(|_| device.create_semaphore())
.collect::<Result<Vec<_>, Error>>()?;
let frame_fences = (0..NUM_FRAMES)
.map(|_| device.create_fence(false))
.collect::<Result<Vec<_>, Error>>()?;
let mut cmd_buffers = (0..NUM_FRAMES)
.map(|_| device.create_cmd_buf())
.collect::<Result<Vec<_>, Error>>()?;
let query_pools = (0..NUM_FRAMES)
.map(|_| device.create_query_pool(6))
.collect::<Result<Vec<_>, Error>>()?;
let host = MemFlags::host_coherent();
let dev = MemFlags::device_local();
let mut ctx = PietGpuRenderContext::new();
@ -124,30 +157,27 @@ fn main() {
let scene_dev = device
.create_buffer(std::mem::size_of_val(&scene[..]) as u64, dev)
.unwrap();
device.write_buffer(&scene_buf, &scene).unwrap();
let tilegroup_buf = device.create_buffer(4 * 1024 * 1024, dev).unwrap();
let ptcl_buf = device.create_buffer(48 * 1024 * 1024, dev).unwrap();
let segment_buf = device.create_buffer(64 * 1024 * 1024, dev).unwrap();
device.write_buffer(&scene_buf, &scene)?;
let tilegroup_buf = device.create_buffer(4 * 1024 * 1024, dev)?;
let ptcl_buf = device.create_buffer(48 * 1024 * 1024, dev)?;
let segment_buf = device.create_buffer(64 * 1024 * 1024, dev)?;
let fill_seg_buf = device.create_buffer(64 * 1024 * 1024, dev).unwrap();
let image_buf = device
.create_buffer((WIDTH * HEIGHT * 4) as u64, host)
.unwrap();
let image_dev = device
.create_buffer((WIDTH * HEIGHT * 4) as u64, dev)
.unwrap();
let image_buf = device.create_buffer((WIDTH * HEIGHT * 4) as u64, host)?;
let image_dev = device.create_image2d(WIDTH as u32, HEIGHT as u32, dev)?;
let k1_alloc_buf_host = device.create_buffer(4, host).unwrap();
let k1_alloc_buf_dev = device.create_buffer(4, dev).unwrap();
let k1_alloc_buf_host = device.create_buffer(4, host)?;
let k1_alloc_buf_dev = device.create_buffer(4, dev)?;
let k1_alloc_start = WIDTH_IN_TILEGROUPS * HEIGHT_IN_TILEGROUPS * TILEGROUP_STRIDE;
device
.write_buffer(&k1_alloc_buf_host, &[k1_alloc_start as u32])
.unwrap();
device.write_buffer(&k1_alloc_buf_host, &[k1_alloc_start as u32])?;
let k1_code = include_bytes!("../shader/kernel1.spv");
let k1_pipeline = device.create_simple_compute_pipeline(k1_code, 3).unwrap();
let k1_pipeline = device
.create_simple_compute_pipeline(k1_code, 3, 0)
.unwrap();
let k1_ds = device
.create_descriptor_set(
&k1_pipeline,
&[&scene_dev, &tilegroup_buf, &k1_alloc_buf_dev],
&[],
)
.unwrap();
@ -158,11 +188,14 @@ fn main() {
.write_buffer(&k2s_alloc_buf_host, &[k2s_alloc_start as u32])
.unwrap();
let k2s_code = include_bytes!("../shader/kernel2s.spv");
let k2s_pipeline = device.create_simple_compute_pipeline(k2s_code, 4).unwrap();
let k2s_pipeline = device
.create_simple_compute_pipeline(k2s_code, 4, 0)
.unwrap();
let k2s_ds = device
.create_descriptor_set(
&k2s_pipeline,
&[&scene_dev, &tilegroup_buf, &segment_buf, &k2s_alloc_buf_dev],
&[],
)
.unwrap();
@ -193,7 +226,7 @@ fn main() {
.write_buffer(&k3_alloc_buf_host, &[k3_alloc_start as u32])
.unwrap();
let k3_code = include_bytes!("../shader/kernel3.spv");
let k3_pipeline = device.create_simple_compute_pipeline(k3_code, 6).unwrap();
let k3_pipeline = device.create_simple_compute_pipeline(k3_code, 6, 0).unwrap();
let k3_ds = device
.create_descriptor_set(
&k3_pipeline,
@ -205,19 +238,16 @@ fn main() {
&ptcl_buf,
&k3_alloc_buf_dev,
],
&[],
)
.unwrap();
let k4_code = include_bytes!("../shader/kernel4.spv");
let k4_pipeline = device.create_simple_compute_pipeline(k4_code, 4).unwrap();
let k4_pipeline = device.create_simple_compute_pipeline(k4_code, 3, 1).unwrap();
let k4_ds = device
.create_descriptor_set(
&k4_pipeline,
&[&ptcl_buf, &segment_buf, &fill_seg_buf, &image_dev],
)
.create_descriptor_set(&k4_pipeline, &[&ptcl_buf, &segment_buf, &fill_seg_buf], &[&image_dev])
.unwrap();
let query_pool = device.create_query_pool(6).unwrap();
let query_pool = &query_pools[0];
let mut cmd_buf = device.create_cmd_buf().unwrap();
cmd_buf.begin();
cmd_buf.copy_buffer(&scene_buf, &scene_dev);
@ -232,6 +262,7 @@ fn main() {
cmd_buf.clear_buffer(&tilegroup_buf);
cmd_buf.clear_buffer(&ptcl_buf);
cmd_buf.memory_barrier();
cmd_buf.image_barrier(&image_dev, ImageLayout::Undefined, ImageLayout::General);
cmd_buf.reset_query_pool(&query_pool);
cmd_buf.write_timestamp(&query_pool, 0);
cmd_buf.dispatch(
@ -272,10 +303,12 @@ fn main() {
);
cmd_buf.write_timestamp(&query_pool, 5);
cmd_buf.memory_barrier();
cmd_buf.copy_buffer(&image_dev, &image_buf);
cmd_buf.image_barrier(&image_dev, ImageLayout::General, ImageLayout::BlitSrc);
cmd_buf.copy_image_to_buffer(&image_dev, &image_buf);
cmd_buf.finish();
device.run_cmd_buf(&cmd_buf).unwrap();
let timestamps = device.reap_query_pool(query_pool).unwrap();
device.run_cmd_buf(&cmd_buf, &[], &[], Some(&frame_fences[0]))?;
device.wait_and_reset(&[frame_fences[0]])?;
let timestamps = device.reap_query_pool(&query_pool).unwrap();
println!("Kernel 1 time: {:.3}ms", timestamps[0] * 1e3);
println!(
"Kernel 2s time: {:.3}ms",
@ -300,21 +333,129 @@ fn main() {
dump_k1_data(&k1_data);
*/
let mut img_data: Vec<u8> = Default::default();
// Note: because png can use a `&[u8]` slice, we could avoid an extra copy
// (probably passing a slice into a closure). But for now: keep it simple.
device.read_buffer(&image_buf, &mut img_data).unwrap();
if false {
let mut img_data: Vec<u8> = Default::default();
// Note: because png can use a `&[u8]` slice, we could avoid an extra copy
// (probably passing a slice into a closure). But for now: keep it simple.
device.read_buffer(&image_buf, &mut img_data).unwrap();
// Write image as PNG file.
let path = Path::new("image.png");
let file = File::create(path).unwrap();
let ref mut w = BufWriter::new(file);
// Write image as PNG file.
let path = Path::new("image.png");
let file = File::create(path).unwrap();
let ref mut w = BufWriter::new(file);
let mut encoder = png::Encoder::new(w, WIDTH as u32, HEIGHT as u32);
encoder.set_color(png::ColorType::RGBA);
encoder.set_depth(png::BitDepth::Eight);
let mut writer = encoder.write_header().unwrap();
let mut encoder = png::Encoder::new(w, WIDTH as u32, HEIGHT as u32);
encoder.set_color(png::ColorType::RGBA);
encoder.set_depth(png::BitDepth::Eight);
let mut writer = encoder.write_header().unwrap();
writer.write_image_data(&img_data).unwrap();
writer.write_image_data(&img_data).unwrap();
}
event_loop.run(move |event, _, control_flow| {
*control_flow = ControlFlow::Wait;
match event {
Event::WindowEvent { event, window_id } if window_id == window.id() => {
match event {
WindowEvent::CloseRequested => {
*control_flow = ControlFlow::Exit;
}
_ => (),
}
}
Event::MainEventsCleared => {
window.request_redraw();
}
Event::RedrawRequested(window_id) if window_id == window.id() => {
let frame_idx = current_frame % NUM_FRAMES;
let query_pool = &query_pools[frame_idx];
if current_frame >= NUM_FRAMES {
device.wait_and_reset(&[frame_fences[frame_idx]]).unwrap();
let timestamps = device.reap_query_pool(query_pool).unwrap();
window.set_title(&format!("k1: {:.3}ms, k2: {:.3}ms, k3: {:.3}ms, k4: {:.3}ms",
timestamps[0] * 1e3,
(timestamps[1] - timestamps[0]) * 1e3,
(timestamps[2] - timestamps[1]) * 1e3,
(timestamps[3] - timestamps[2]) * 1e3,
));
}
let (image_idx, acquisition_semaphore) = swapchain.next().unwrap();
let swap_image = swapchain.image(image_idx);
let cmd_buf = &mut cmd_buffers[frame_idx];
cmd_buf.begin();
cmd_buf.reset_query_pool(&query_pool);
cmd_buf.copy_buffer(&scene_buf, &scene_dev);
cmd_buf.copy_buffer(&k1_alloc_buf_host, &k1_alloc_buf_dev);
cmd_buf.copy_buffer(&k2s_alloc_buf_host, &k2s_alloc_buf_dev);
cmd_buf.copy_buffer(&k3_alloc_buf_host, &k3_alloc_buf_dev);
cmd_buf.clear_buffer(&tilegroup_buf);
cmd_buf.clear_buffer(&ptcl_buf);
cmd_buf.memory_barrier();
cmd_buf.write_timestamp(&query_pool, 0);
cmd_buf.dispatch(
&k1_pipeline,
&k1_ds,
((WIDTH / 512) as u32, (HEIGHT / 512) as u32, 1),
);
cmd_buf.write_timestamp(&query_pool, 1);
cmd_buf.memory_barrier();
cmd_buf.dispatch(
&k2s_pipeline,
&k2s_ds,
((WIDTH / 512) as u32, (HEIGHT / 16) as u32, 1),
);
cmd_buf.write_timestamp(&query_pool, 2);
cmd_buf.memory_barrier();
cmd_buf.dispatch(
&k3_pipeline,
&k3_ds,
((WIDTH / 512) as u32, (HEIGHT / 16) as u32, 1),
);
cmd_buf.write_timestamp(&query_pool, 3);
cmd_buf.memory_barrier();
cmd_buf.image_barrier(&image_dev, ImageLayout::BlitSrc, ImageLayout::General);
cmd_buf.dispatch(
&k4_pipeline,
&k4_ds,
((WIDTH / TILE_W) as u32, (HEIGHT / TILE_H) as u32, 1),
);
cmd_buf.write_timestamp(&query_pool, 4);
cmd_buf.memory_barrier();
cmd_buf.image_barrier(
&swap_image,
ImageLayout::Undefined,
ImageLayout::BlitDst,
);
cmd_buf.image_barrier(&image_dev, ImageLayout::General, ImageLayout::BlitSrc);
cmd_buf.blit_image(&image_dev, &swap_image);
cmd_buf.image_barrier(
&swap_image,
ImageLayout::BlitDst,
ImageLayout::Present,
);
cmd_buf.finish();
device
.run_cmd_buf(
&cmd_buf,
&[acquisition_semaphore],
&[present_semaphores[frame_idx]],
Some(&frame_fences[frame_idx]),
)
.unwrap();
swapchain
.present(image_idx, &[present_semaphores[frame_idx]])
.unwrap();
current_frame += 1;
}
_ => (),
}
})
}
}