diff --git a/piet-gpu-hal/src/dx12.rs b/piet-gpu-hal/src/dx12.rs index cc9e77b..0194e1e 100644 --- a/piet-gpu-hal/src/dx12.rs +++ b/piet-gpu-hal/src/dx12.rs @@ -3,35 +3,40 @@ mod error; mod wrappers; +use std::sync::{Arc, Mutex, Weak}; use std::{cell::Cell, convert::TryInto, mem, ptr}; -use winapi::shared::dxgi1_3; use winapi::shared::minwindef::TRUE; +use winapi::shared::{dxgi, dxgi1_2, dxgi1_3, dxgitype}; use winapi::um::d3d12; +use raw_window_handle::{HasRawWindowHandle, RawWindowHandle}; + use smallvec::SmallVec; -use crate::{BufferUsage, Error, ImageLayout}; +use crate::{BufferUsage, Error, GpuInfo, ImageLayout}; -use self::wrappers::{ - CommandAllocator, CommandQueue, Device, Factory4, GraphicsCommandList, Resource, ShaderByteCode, -}; +use self::wrappers::{CommandAllocator, CommandQueue, Device, Factory4, Resource, ShaderByteCode}; pub struct Dx12Instance { factory: Factory4, } -// TODO -pub struct Dx12Surface; +pub struct Dx12Surface { + hwnd: winapi::shared::windef::HWND, +} -// TODO -pub struct Dx12Swapchain; +pub struct Dx12Swapchain { + swapchain: wrappers::SwapChain3, + size: (u32, u32), +} pub struct Dx12Device { device: Device, - command_allocator: CommandAllocator, + free_allocators: Arc>>, command_queue: CommandQueue, ts_freq: u64, + gpu_info: GpuInfo, memory_arch: MemoryArchitecture, } @@ -47,7 +52,13 @@ pub struct Image { size: (u32, u32), } -pub struct CmdBuf(GraphicsCommandList); +pub struct CmdBuf { + c: wrappers::GraphicsCommandList, + allocator: Option, + // One for resetting, one to put back into the allocator pool + allocator_clone: CommandAllocator, + free_allocators: Weak>>, +} pub struct Pipeline { pipeline_state: wrappers::PipelineState, @@ -72,6 +83,8 @@ pub struct Fence { val: Cell, } +/// This will probably be renamed "PresentSem" or similar. I believe no +/// semaphore is needed for presentation on DX12. pub struct Semaphore; #[derive(Default)] @@ -103,7 +116,9 @@ impl Dx12Instance { /// /// TODO: take a raw window handle. /// TODO: can probably be a trait. - pub fn new(window_handle: Option<&dyn raw_window_handle::HasRawWindowHandle>) -> Result<(Dx12Instance, Option), Error> { + pub fn new( + window_handle: Option<&dyn HasRawWindowHandle>, + ) -> Result<(Dx12Instance, Option), Error> { unsafe { #[cfg(debug_assertions)] if let Err(e) = wrappers::enable_debug_layer() { @@ -118,7 +133,16 @@ impl Dx12Instance { let factory_flags: u32 = 0; let factory = Factory4::create(factory_flags)?; - Ok((Dx12Instance { factory }, None)) + + let mut surface = None; + if let Some(window_handle) = window_handle { + let window_handle = window_handle.raw_window_handle(); + if let RawWindowHandle::Windows(w) = window_handle { + let hwnd = w.hwnd as *mut _; + surface = Some(Dx12Surface { hwnd }); + } + } + Ok((Dx12Instance { factory }, surface)) } } @@ -136,7 +160,7 @@ impl Dx12Instance { d3d12::D3D12_COMMAND_QUEUE_FLAG_NONE, 0, )?; - let command_allocator = device.create_command_allocator(list_type)?; + let ts_freq = command_queue.get_timestamp_frequency()?; let features_architecture = device.get_features_architecture()?; let uma = features_architecture.UMA == TRUE; @@ -146,15 +170,58 @@ impl Dx12Instance { (true, false) => MemoryArchitecture::UMA, _ => MemoryArchitecture::NUMA, }; + let use_staging_buffers = memory_arch == MemoryArchitecture::NUMA; + // These values are appropriate for Shader Model 5. When we open up + // DXIL, fix this with proper dynamic queries. + let gpu_info = GpuInfo { + has_descriptor_indexing: false, + has_subgroups: false, + subgroup_size: None, + has_memory_model: false, + use_staging_buffers, + }; + let free_allocators = Default::default(); Ok(Dx12Device { device, command_queue, - command_allocator, + free_allocators, ts_freq, memory_arch, + gpu_info, }) } } + + pub unsafe fn swapchain( + &self, + width: usize, + height: usize, + device: &Dx12Device, + surface: &Dx12Surface, + ) -> Result { + const FRAME_COUNT: u32 = 2; + let desc = dxgi1_2::DXGI_SWAP_CHAIN_DESC1 { + Width: width as u32, + Height: height as u32, + AlphaMode: dxgi1_2::DXGI_ALPHA_MODE_IGNORE, + BufferCount: FRAME_COUNT, + Format: winapi::shared::dxgiformat::DXGI_FORMAT_R8G8B8A8_UNORM, + Flags: 0, + BufferUsage: dxgitype::DXGI_USAGE_RENDER_TARGET_OUTPUT, + SampleDesc: dxgitype::DXGI_SAMPLE_DESC { + Count: 1, + Quality: 0, + }, + Scaling: dxgi1_2::DXGI_SCALING_STRETCH, + Stereo: winapi::shared::minwindef::FALSE, + SwapEffect: dxgi::DXGI_SWAP_EFFECT_FLIP_DISCARD, + }; + let swapchain = + self.factory + .create_swapchain_for_hwnd(&device.command_queue, surface.hwnd, desc)?; + let size = (width as u32, height as u32); + Ok(Dx12Swapchain { swapchain, size }) + } } impl crate::Device for Dx12Device { @@ -224,15 +291,24 @@ impl crate::Device for Dx12Device { fn create_cmd_buf(&self) -> Result { let list_type = d3d12::D3D12_COMMAND_LIST_TYPE_DIRECT; + let allocator = self.free_allocators.lock().unwrap().pop(); + let allocator = if let Some(allocator) = allocator { + allocator + } else { + unsafe { self.device.create_command_allocator(list_type)? } + }; let node_mask = 0; unsafe { - let cmd_buf = self.device.create_graphics_command_list( - list_type, - &self.command_allocator, - None, - node_mask, - )?; - Ok(CmdBuf(cmd_buf)) + let c = self + .device + .create_graphics_command_list(list_type, &allocator, None, node_mask)?; + let free_allocators = Arc::downgrade(&self.free_allocators); + Ok(CmdBuf { + c, + allocator: Some(allocator.clone()), + allocator_clone: allocator, + free_allocators, + }) } } @@ -270,16 +346,19 @@ impl crate::Device for Dx12Device { unsafe fn run_cmd_bufs( &self, cmd_bufs: &[&Self::CmdBuf], - wait_semaphores: &[&Self::Semaphore], - signal_semaphores: &[&Self::Semaphore], + _wait_semaphores: &[&Self::Semaphore], + _signal_semaphores: &[&Self::Semaphore], fence: Option<&Self::Fence>, ) -> Result<(), Error> { // TODO: handle semaphores let lists = cmd_bufs .iter() - .map(|c| c.0.as_raw_command_list()) + .map(|c| c.c.as_raw_command_list()) .collect::>(); self.command_queue.execute_command_lists(&lists); + for c in cmd_bufs { + c.c.reset(&c.allocator_clone, None); + } if let Some(fence) = fence { let val = fence.val.get() + 1; fence.val.set(val); @@ -316,7 +395,7 @@ impl crate::Device for Dx12Device { } unsafe fn create_semaphore(&self) -> Result { - todo!() + Ok(Semaphore) } unsafe fn create_fence(&self, signaled: bool) -> Result { @@ -340,7 +419,7 @@ impl crate::Device for Dx12Device { } fn query_gpu_info(&self) -> crate::GpuInfo { - todo!() + self.gpu_info.clone() } unsafe fn pipeline_builder(&self) -> Self::PipelineBuilder { @@ -376,7 +455,13 @@ impl crate::CmdBuf for CmdBuf { unsafe fn begin(&mut self) {} unsafe fn finish(&mut self) { - let _ = self.0.close(); + let _ = self.c.close(); + if let Some(free_allocators) = self.free_allocators.upgrade() { + free_allocators + .lock() + .unwrap() + .push(self.allocator.take().unwrap()); + } } unsafe fn dispatch( @@ -385,15 +470,15 @@ impl crate::CmdBuf for CmdBuf { descriptor_set: &DescriptorSet, size: (u32, u32, u32), ) { - self.0.set_pipeline_state(&pipeline.pipeline_state); - self.0 + self.c.set_pipeline_state(&pipeline.pipeline_state); + self.c .set_compute_pipeline_root_signature(&pipeline.root_signature); - self.0.set_descriptor_heaps(&[&descriptor_set.0]); - self.0.set_compute_root_descriptor_table( + self.c.set_descriptor_heaps(&[&descriptor_set.0]); + self.c.set_compute_root_descriptor_table( 0, descriptor_set.0.get_gpu_descriptor_handle_at_offset(0), ); - self.0.dispatch(size.0, size.1, size.2); + self.c.dispatch(size.0, size.1, size.2); } unsafe fn memory_barrier(&mut self) { @@ -402,7 +487,7 @@ impl crate::CmdBuf for CmdBuf { // in the barrier. But it seems like this is a reasonable way to create a // global barrier. let bar = wrappers::create_uav_resource_barrier(ptr::null_mut()); - self.0.resource_barrier(&[bar]); + self.c.resource_barrier(&[bar]); } unsafe fn host_barrier(&mut self) { @@ -427,7 +512,7 @@ impl crate::CmdBuf for CmdBuf { src_state, dst_state, ); - self.0.resource_barrier(&[bar]); + self.c.resource_barrier(&[bar]); } } @@ -440,33 +525,31 @@ impl crate::CmdBuf for CmdBuf { unsafe fn copy_buffer(&self, src: &Buffer, dst: &Buffer) { // TODO: consider using copy_resource here (if sizes match) let size = src.size.min(dst.size); - self.0.copy_buffer(&dst.resource, 0, &src.resource, 0, size); + self.c.copy_buffer(&dst.resource, 0, &src.resource, 0, size); } unsafe fn copy_image_to_buffer(&self, src: &Image, dst: &Buffer) { - self.0 + self.c .copy_texture_to_buffer(&src.resource, &dst.resource, src.size.0, src.size.1); } unsafe fn copy_buffer_to_image(&self, src: &Buffer, dst: &Image) { - self.0 + self.c .copy_buffer_to_texture(&src.resource, &dst.resource, dst.size.0, dst.size.1); } unsafe fn blit_image(&self, src: &Image, dst: &Image) { - self.0.copy_resource(&src.resource, &dst.resource); + self.c.copy_resource(&src.resource, &dst.resource); } - unsafe fn reset_query_pool(&mut self, pool: &QueryPool) { - todo!() - } + unsafe fn reset_query_pool(&mut self, _pool: &QueryPool) {} unsafe fn write_timestamp(&mut self, pool: &QueryPool, query: u32) { - self.0.end_timing_query(&pool.heap, query); + self.c.end_timing_query(&pool.heap, query); } unsafe fn finish_timestamps(&mut self, pool: &QueryPool) { - self.0 + self.c .resolve_timing_query_data(&pool.heap, 0, pool.n_queries, &pool.buf.resource, 0); } } @@ -637,3 +720,27 @@ fn resource_state_for_image_layout(layout: ImageLayout) -> d3d12::D3D12_RESOURCE ImageLayout::ShaderRead => d3d12::D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, } } + +impl Dx12Swapchain { + pub unsafe fn next(&mut self) -> Result<(usize, Semaphore), Error> { + let idx = self.swapchain.get_current_back_buffer_index(); + Ok((idx as usize, Semaphore)) + } + + pub unsafe fn image(&self, idx: usize) -> Image { + let buffer = self.swapchain.get_buffer(idx as u32); + Image { + resource: buffer, + size: self.size, + } + } + + pub unsafe fn present( + &self, + _image_idx: usize, + _semaphores: &[&Semaphore], + ) -> Result { + self.swapchain.present(1, 0)?; + Ok(false) + } +} diff --git a/piet-gpu-hal/src/dx12/error.rs b/piet-gpu-hal/src/dx12/error.rs index d954ac1..6208fde 100644 --- a/piet-gpu-hal/src/dx12/error.rs +++ b/piet-gpu-hal/src/dx12/error.rs @@ -41,6 +41,7 @@ impl std::error::Error for Error {} /// See https://docs.microsoft.com/en-us/windows/win32/direct3ddxgi/dxgi-error fn err_str_for_hr(hr: winerror::HRESULT) -> Option<&'static str> { Some(match hr as u32 { + 0x80004005 => "E_FAIL", 0x80070057 => "E_INVALIDARG", 0x887a0001 => "DXGI_ERROR_INVALID_CALL", 0x887a0002 => "DXGI_ERROR_NOT_FOUND", diff --git a/piet-gpu-hal/src/dx12/wrappers.rs b/piet-gpu-hal/src/dx12/wrappers.rs index f542549..e132441 100644 --- a/piet-gpu-hal/src/dx12/wrappers.rs +++ b/piet-gpu-hal/src/dx12/wrappers.rs @@ -40,10 +40,6 @@ pub struct Factory2(pub ComPtr); #[derive(Clone)] pub struct Factory4(pub ComPtr); #[derive(Clone)] -pub struct SwapChain(pub ComPtr); -#[derive(Clone)] -pub struct SwapChain1(pub ComPtr); -#[derive(Clone)] pub struct SwapChain3(pub ComPtr); #[derive(Clone)] @@ -218,22 +214,24 @@ impl Factory4 { pub unsafe fn create_swapchain_for_hwnd( &self, - command_queue: CommandQueue, + command_queue: &CommandQueue, hwnd: windef::HWND, desc: dxgi1_2::DXGI_SWAP_CHAIN_DESC1, - ) -> SwapChain3 { + ) -> Result { let mut swap_chain = ptr::null_mut(); - error::error_if_failed_else_unit(self.0.CreateSwapChainForHwnd( - command_queue.0.as_raw() as *mut _, - hwnd, - &desc, - ptr::null(), - ptr::null_mut(), - &mut swap_chain as *mut _ as *mut _, - )) - .expect("could not creation swapchain for hwnd"); + explain_error( + self.0.CreateSwapChainForHwnd( + command_queue.0.as_raw() as *mut _, + hwnd, + &desc, + ptr::null(), + ptr::null_mut(), + &mut swap_chain as *mut _ as *mut _, + ), + "could not creation swapchain for hwnd", + )?; - SwapChain3(ComPtr::from_raw(swap_chain)) + Ok(SwapChain3(ComPtr::from_raw(swap_chain))) } } @@ -263,47 +261,6 @@ impl CommandQueue { } } -impl SwapChain { - pub unsafe fn get_buffer(&self, id: u32) -> Resource { - let mut resource = ptr::null_mut(); - error::error_if_failed_else_unit(self.0.GetBuffer( - id, - &d3d12::ID3D12Resource::uuidof(), - &mut resource as *mut _ as *mut _, - )) - .expect("SwapChain could not get buffer"); - - Resource::new(resource) - } - - // TODO: present flags - pub unsafe fn present(&self, interval: u32, flags: u32) -> winerror::HRESULT { - self.0.Present(interval, flags) - } -} - -impl SwapChain1 { - pub unsafe fn cast_into_swap_chain3(&self) -> SwapChain3 { - SwapChain3( - self.0 - .cast::() - .expect("could not cast into SwapChain3"), - ) - } - - pub unsafe fn get_buffer(&self, id: u32) -> Resource { - let mut resource = ptr::null_mut(); - error::error_if_failed_else_unit(self.0.GetBuffer( - id, - &d3d12::ID3D12Resource::uuidof(), - &mut resource as *mut _ as *mut _, - )) - .expect("SwapChain1 could not get buffer"); - - Resource::new(resource) - } -} - impl SwapChain3 { pub unsafe fn get_buffer(&self, id: u32) -> Resource { let mut resource = ptr::null_mut(); diff --git a/piet-gpu-hal/src/lib.rs b/piet-gpu-hal/src/lib.rs index 9a16686..26147ed 100644 --- a/piet-gpu-hal/src/lib.rs +++ b/piet-gpu-hal/src/lib.rs @@ -9,9 +9,12 @@ pub mod hub; #[macro_use] mod macros; -// TODO make this not pub +// TODO: Don't make the module pub, but do figure out which types to +// export at the root level. pub mod mux; +// TODO: because these are conditionally included, "cargo fmt" does not +// see them. Figure that out, possibly including running rustfmt manually. mux_cfg! { #[cfg(vk)] pub mod vulkan; diff --git a/piet-gpu-hal/src/mux.rs b/piet-gpu-hal/src/mux.rs index 70525c4..0971416 100644 --- a/piet-gpu-hal/src/mux.rs +++ b/piet-gpu-hal/src/mux.rs @@ -20,7 +20,7 @@ use smallvec::SmallVec; mux_cfg! { #[cfg(vk)] - use crate::vulkan; + use crate::vulkan; } mux_cfg! { #[cfg(dx12)] @@ -83,6 +83,13 @@ pub enum ShaderCode<'a> { } impl Instance { + /// Create a new GPU instance appropriate for the surface. + /// + /// When multiple back-end GPU APIs are available (for example, Vulkan + /// and DX12), this function selects one at runtime. + /// + /// When no surface is given, the instance is suitable for compute-only + /// work. pub fn new( window_handle: Option<&dyn raw_window_handle::HasRawWindowHandle>, ) -> Result<(Instance, Option), Error> { @@ -108,6 +115,11 @@ impl Instance { Err("No suitable instances found".into()) } + /// Create a device appropriate for the surface. + /// + /// The "device" is the low-level GPU abstraction for creating resources + /// and submitting work. Most users of this library will want to wrap it in + /// a "session" which is similar but provides many conveniences. pub unsafe fn device(&self, surface: Option<&Surface>) -> Result { mux_match! { self; Instance::Vk(i) => i.device(surface.map(Surface::vk)).map(Device::Vk), @@ -115,6 +127,12 @@ impl Instance { } } + /// Create a swapchain. + /// + /// A swapchain is a small vector of images shared with the platform's + /// presentation logic. To actually display pixels, the application writes + /// into the swapchain images, then calls the present method to display + /// them. pub unsafe fn swapchain( &self, width: usize, @@ -126,7 +144,9 @@ impl Instance { Instance::Vk(i) => i .swapchain(width, height, device.vk(), surface.vk()) .map(Swapchain::Vk), - Instance::Dx12(_i) => todo!(), + Instance::Dx12(i) => i + .swapchain(width, height, device.dx12(), surface.dx12()) + .map(Swapchain::Dx12), } } } @@ -568,8 +588,9 @@ impl Swapchain { let (idx, sem) = s.next()?; Ok((idx, Semaphore::Vk(sem))) } - Swapchain::Dx12(_s) => { - todo!() + Swapchain::Dx12(s) => { + let (idx, sem) = s.next()?; + Ok((idx, Semaphore::Dx12(sem))) } } } @@ -577,7 +598,7 @@ impl Swapchain { pub unsafe fn image(&self, idx: usize) -> Image { mux_match! { self; Swapchain::Vk(s) => Image::Vk(s.image(idx)), - Swapchain::Dx12(_s) => todo!(), + Swapchain::Dx12(s) => Image::Dx12(s.image(idx)), } } @@ -595,7 +616,14 @@ impl Swapchain { .map(Semaphore::vk) .collect::>(), ), - Swapchain::Dx12(_s) => todo!(), + Swapchain::Dx12(s) => s.present( + image_idx, + &semaphores + .iter() + .copied() + .map(Semaphore::dx12) + .collect::>(), + ), } } }