Merge pull request #117 from linebender/cleanup

Reuse command buffers
This commit is contained in:
Raph Levien 2021-10-27 07:23:29 -07:00 committed by GitHub
commit c648038967
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 155 additions and 542 deletions

View file

@ -237,10 +237,10 @@ fn gen_extract(offset: usize, ty: &GpuType, preload: bool) -> (String, String) {
let mut setup = String::new();
let mut extract = glsl_type(ty);
&extract.push_str("(");
extract.push_str("(");
for i in 0..*size {
if i != 0 {
&extract.push_str(", ");
extract.push_str(", ");
}
if is_f16 && i % 2 == 0 {
@ -250,9 +250,9 @@ fn gen_extract(offset: usize, ty: &GpuType, preload: bool) -> (String, String) {
};
let el_offset = offset + i * scalar.size();
&extract.push_str(&gen_extract_scalar(el_offset, scalar));
extract.push_str(&gen_extract_scalar(el_offset, scalar));
}
&extract.push_str(")");
extract.push_str(")");
(setup, extract)
}
GpuType::InlineStruct(name) => (

View file

@ -173,6 +173,9 @@ pub trait CmdBuf<D: Device> {
unsafe fn finish(&mut self);
/// Return true if the command buffer is suitable for reuse.
unsafe fn reset(&mut self) -> bool;
unsafe fn dispatch(
&mut self,
pipeline: &D::Pipeline,

View file

@ -3,7 +3,6 @@
mod error;
mod wrappers;
use std::sync::{Arc, Mutex, Weak};
use std::{cell::Cell, convert::TryInto, mem, ptr};
use winapi::shared::minwindef::TRUE;
@ -33,7 +32,6 @@ pub struct Dx12Swapchain {
pub struct Dx12Device {
device: Device,
free_allocators: Arc<Mutex<Vec<CommandAllocator>>>,
command_queue: CommandQueue,
ts_freq: u64,
gpu_info: GpuInfo,
@ -54,10 +52,8 @@ pub struct Image {
pub struct CmdBuf {
c: wrappers::GraphicsCommandList,
allocator: Option<CommandAllocator>,
// One for resetting, one to put back into the allocator pool
allocator_clone: CommandAllocator,
free_allocators: Weak<Mutex<Vec<CommandAllocator>>>,
allocator: CommandAllocator,
needs_reset: bool,
}
pub struct Pipeline {
@ -150,7 +146,7 @@ impl Dx12Instance {
///
/// TODO: handle window.
/// TODO: probably can also be trait'ified.
pub fn device(&self, surface: Option<&Dx12Surface>) -> Result<Dx12Device, Error> {
pub fn device(&self, _surface: Option<&Dx12Surface>) -> Result<Dx12Device, Error> {
unsafe {
let device = Device::create_device(&self.factory)?;
let list_type = d3d12::D3D12_COMMAND_LIST_TYPE_DIRECT;
@ -184,11 +180,9 @@ impl Dx12Instance {
has_memory_model: false,
use_staging_buffers,
};
let free_allocators = Default::default();
Ok(Dx12Device {
device,
command_queue,
free_allocators,
ts_freq,
memory_arch,
gpu_info,
@ -295,23 +289,18 @@ impl crate::backend::Device for Dx12Device {
fn create_cmd_buf(&self) -> Result<Self::CmdBuf, Error> {
let list_type = d3d12::D3D12_COMMAND_LIST_TYPE_DIRECT;
let allocator = self.free_allocators.lock().unwrap().pop();
let allocator = if let Some(allocator) = allocator {
allocator
} else {
let allocator =
unsafe { self.device.create_command_allocator(list_type)? }
};
;
let node_mask = 0;
unsafe {
let c = self
.device
.create_graphics_command_list(list_type, &allocator, None, node_mask)?;
let free_allocators = Arc::downgrade(&self.free_allocators);
Ok(CmdBuf {
c,
allocator: Some(allocator.clone()),
allocator_clone: allocator,
free_allocators,
allocator,
needs_reset: false,
})
}
}
@ -364,9 +353,6 @@ impl crate::backend::Device for Dx12Device {
.map(|c| c.c.as_raw_command_list())
.collect::<SmallVec<[_; 4]>>();
self.command_queue.execute_command_lists(&lists);
for c in cmd_bufs {
c.c.reset(&c.allocator_clone, None);
}
if let Some(fence) = fence {
let val = fence.val.get() + 1;
fence.val.set(val);
@ -442,7 +428,7 @@ impl crate::backend::Device for Dx12Device {
DescriptorSetBuilder::default()
}
unsafe fn create_sampler(&self, params: crate::SamplerParams) -> Result<Self::Sampler, Error> {
unsafe fn create_sampler(&self, _params: crate::SamplerParams) -> Result<Self::Sampler, Error> {
todo!()
}
}
@ -464,19 +450,18 @@ impl Dx12Device {
}
impl crate::backend::CmdBuf<Dx12Device> for CmdBuf {
unsafe fn begin(&mut self) {}
unsafe fn begin(&mut self) {
if self.needs_reset {
}
}
unsafe fn finish(&mut self) {
let _ = self.c.close();
// This is a bit of a mess. Returning the allocator to the free pool
// makes sense if the command list will be dropped, but not if it will
// be reused. Probably need to implement some logic on drop.
if let Some(free_allocators) = self.free_allocators.upgrade() {
free_allocators
.lock()
.unwrap()
.push(self.allocator.take().unwrap());
self.needs_reset = true;
}
unsafe fn reset(&mut self) -> bool {
self.allocator.reset().is_ok() && self.c.reset(&self.allocator, None).is_ok()
}
unsafe fn dispatch(
@ -536,7 +521,7 @@ impl crate::backend::CmdBuf<Dx12Device> for CmdBuf {
self.memory_barrier();
}
unsafe fn clear_buffer(&self, buffer: &Buffer, size: Option<u64>) {
unsafe fn clear_buffer(&self, _buffer: &Buffer, _size: Option<u64>) {
// Open question: do we call ClearUnorderedAccessViewUint or dispatch a
// compute shader? Either way we will need descriptors here.
todo!()
@ -597,7 +582,7 @@ impl crate::backend::PipelineBuilder<Dx12Device> for PipelineBuilder {
self.add_buffers(n_images);
}
fn add_textures(&mut self, max_textures: u32) {
fn add_textures(&mut self, _max_textures: u32) {
todo!()
}
@ -666,7 +651,7 @@ impl crate::backend::DescriptorSetBuilder<Dx12Device> for DescriptorSetBuilder {
self.images.extend(images.iter().copied().cloned());
}
fn add_textures(&mut self, images: &[&Image]) {
fn add_textures(&mut self, _images: &[&Image]) {
todo!()
}

View file

@ -9,9 +9,9 @@
use crate::dx12::error::{self, error_if_failed_else_unit, explain_error, Error};
use std::convert::{TryFrom, TryInto};
use std::sync::atomic::{AtomicPtr, Ordering};
use std::{ffi, mem, path::Path, ptr};
use std::{ffi, mem, ptr};
use winapi::shared::{
dxgi, dxgi1_2, dxgi1_3, dxgi1_4, dxgiformat, dxgitype, minwindef, windef, winerror,
dxgi, dxgi1_2, dxgi1_3, dxgi1_4, dxgiformat, dxgitype, minwindef, windef,
};
use winapi::um::d3dcommon::ID3DBlob;
use winapi::um::{
@ -31,8 +31,6 @@ pub struct Resource {
ptr: AtomicPtr<d3d12::ID3D12Resource>,
}
pub struct VertexBufferView(pub ComPtr<d3d12::D3D12_VERTEX_BUFFER_VIEW>);
#[derive(Clone)]
pub struct Adapter1(pub ComPtr<dxgi::IDXGIAdapter1>);
#[derive(Clone)]
@ -61,8 +59,6 @@ pub struct DescriptorHeap {
pub heap: ComPtr<d3d12::ID3D12DescriptorHeap>,
}
pub type TextureAddressMode = [d3d12::D3D12_TEXTURE_ADDRESS_MODE; 3];
#[derive(Clone)]
pub struct RootSignature(pub ComPtr<d3d12::ID3D12RootSignature>);
@ -90,8 +86,6 @@ pub struct ShaderByteCode {
blob: Option<Blob>,
}
pub struct DebugController(pub d3d12sdklayers::ID3D12Debug);
#[derive(Clone)]
pub struct QueryHeap(pub ComPtr<d3d12::ID3D12QueryHeap>);
@ -158,10 +152,6 @@ impl Resource {
(*self.get()).Unmap(0, &zero_range);
Ok(())
}
pub unsafe fn get_gpu_virtual_address(&self) -> d3d12::D3D12_GPU_VIRTUAL_ADDRESS {
(*self.get()).GetGPUVirtualAddress()
}
}
impl Drop for Resource {
@ -406,22 +396,6 @@ impl Device {
self.0.GetDescriptorHandleIncrementSize(heap_type)
}
pub unsafe fn create_graphics_pipeline_state(
&self,
graphics_pipeline_desc: &d3d12::D3D12_GRAPHICS_PIPELINE_STATE_DESC,
) -> PipelineState {
let mut pipeline_state = ptr::null_mut();
error::error_if_failed_else_unit(self.0.CreateGraphicsPipelineState(
graphics_pipeline_desc as *const _,
&d3d12::ID3D12PipelineState::uuidof(),
&mut pipeline_state as *mut _ as *mut _,
))
.expect("device could not create graphics pipeline state");
PipelineState(ComPtr::from_raw(pipeline_state))
}
pub unsafe fn create_compute_pipeline_state(
&self,
compute_pipeline_desc: &d3d12::D3D12_COMPUTE_PIPELINE_STATE_DESC,
@ -460,33 +434,6 @@ impl Device {
Ok(RootSignature(ComPtr::from_raw(signature)))
}
// This is for indirect command submission and we probably won't use it.
pub unsafe fn create_command_signature(
&self,
root_signature: RootSignature,
arguments: &[d3d12::D3D12_INDIRECT_ARGUMENT_DESC],
stride: u32,
node_mask: minwindef::UINT,
) -> CommandSignature {
let mut signature = ptr::null_mut();
let desc = d3d12::D3D12_COMMAND_SIGNATURE_DESC {
ByteStride: stride,
NumArgumentDescs: arguments.len() as _,
pArgumentDescs: arguments.as_ptr() as *const _,
NodeMask: node_mask,
};
error::error_if_failed_else_unit(self.0.CreateCommandSignature(
&desc,
root_signature.0.as_raw(),
&d3d12::ID3D12CommandSignature::uuidof(),
&mut signature as *mut _ as *mut _,
))
.expect("device could not create command signature");
CommandSignature(ComPtr::from_raw(signature))
}
pub unsafe fn create_graphics_command_list(
&self,
list_type: d3d12::D3D12_COMMAND_LIST_TYPE,
@ -550,102 +497,6 @@ impl Device {
)
}
pub unsafe fn create_constant_buffer_view(
&self,
resource: &Resource,
descriptor: CpuDescriptor,
size_in_bytes: u32,
) {
let cbv_desc = d3d12::D3D12_CONSTANT_BUFFER_VIEW_DESC {
BufferLocation: resource.get_gpu_virtual_address(),
SizeInBytes: size_in_bytes,
};
self.0
.CreateConstantBufferView(&cbv_desc as *const _, descriptor);
}
pub unsafe fn create_byte_addressed_buffer_shader_resource_view(
&self,
resource: &Resource,
descriptor: CpuDescriptor,
first_element: u64,
num_elements: u32,
) {
let mut srv_desc = d3d12::D3D12_SHADER_RESOURCE_VIEW_DESC {
// shouldn't flags be dxgiformat::DXGI_FORMAT_R32_TYPELESS?
Format: dxgiformat::DXGI_FORMAT_R32_TYPELESS,
ViewDimension: d3d12::D3D12_SRV_DIMENSION_BUFFER,
Shader4ComponentMapping: 0x1688,
..mem::zeroed()
};
*srv_desc.u.Buffer_mut() = d3d12::D3D12_BUFFER_SRV {
FirstElement: first_element,
NumElements: num_elements,
// shouldn't StructureByteStride be 0?
StructureByteStride: 0,
// shouldn't flags be d3d12::D3D12_BUFFER_SRV_FLAG_RAW?
Flags: d3d12::D3D12_BUFFER_SRV_FLAG_RAW,
};
self.0
.CreateShaderResourceView(resource.get_mut(), &srv_desc as *const _, descriptor);
}
pub unsafe fn create_structured_buffer_shader_resource_view(
&self,
resource: &Resource,
descriptor: CpuDescriptor,
first_element: u64,
num_elements: u32,
structure_byte_stride: u32,
) {
let mut srv_desc = d3d12::D3D12_SHADER_RESOURCE_VIEW_DESC {
Format: dxgiformat::DXGI_FORMAT_UNKNOWN,
ViewDimension: d3d12::D3D12_SRV_DIMENSION_BUFFER,
Shader4ComponentMapping: 0x1688,
..mem::zeroed()
};
*srv_desc.u.Buffer_mut() = d3d12::D3D12_BUFFER_SRV {
FirstElement: first_element,
NumElements: num_elements,
StructureByteStride: structure_byte_stride,
Flags: d3d12::D3D12_BUFFER_SRV_FLAG_NONE,
};
self.0
.CreateShaderResourceView(resource.get_mut(), &srv_desc as *const _, descriptor);
}
pub unsafe fn create_texture2d_shader_resource_view(
&self,
resource: &Resource,
format: dxgiformat::DXGI_FORMAT,
descriptor: CpuDescriptor,
) {
let mut srv_desc = d3d12::D3D12_SHADER_RESOURCE_VIEW_DESC {
Format: format,
ViewDimension: d3d12::D3D12_SRV_DIMENSION_TEXTURE2D,
Shader4ComponentMapping: 0x1688,
..mem::zeroed()
};
*srv_desc.u.Texture2D_mut() = d3d12::D3D12_TEX2D_SRV {
MostDetailedMip: 0,
MipLevels: 1,
PlaneSlice: 0,
ResourceMinLODClamp: 0.0,
};
self.0
.CreateShaderResourceView(resource.get_mut(), &srv_desc as *const _, descriptor);
}
pub unsafe fn create_render_target_view(
&self,
resource: &Resource,
desc: *const d3d12::D3D12_RENDER_TARGET_VIEW_DESC,
descriptor: CpuDescriptor,
) {
self.0
.CreateRenderTargetView(resource.get_mut(), desc, descriptor);
}
pub unsafe fn create_fence(&self, initial: u64) -> Result<Fence, Error> {
let mut fence = ptr::null_mut();
explain_error(
@ -661,10 +512,6 @@ impl Device {
Ok(Fence(ComPtr::from_raw(fence)))
}
pub unsafe fn destroy_fence(&self, fence: &Fence) -> Result<(), Error> {
Ok(())
}
pub unsafe fn create_committed_resource(
&self,
heap_properties: &d3d12::D3D12_HEAP_PROPERTIES,
@ -716,71 +563,6 @@ impl Device {
Ok(QueryHeap(ComPtr::from_raw(query_heap)))
}
// based on: https://github.com/microsoft/DirectX-Graphics-Samples/blob/682051ddbe4be820195fffed0bfbdbbde8611a90/Libraries/D3DX12/d3dx12.h#L1875
pub unsafe fn get_required_intermediate_buffer_size(
&self,
dest_resource: Resource,
first_subresource: u32,
num_subresources: u32,
) -> u64 {
let desc: d3d12::D3D12_RESOURCE_DESC = (*dest_resource.get()).GetDesc();
let mut required_size: *mut u64 = ptr::null_mut();
self.0.GetCopyableFootprints(
&desc as *const _,
first_subresource,
num_subresources,
0,
ptr::null_mut(),
ptr::null_mut(),
ptr::null_mut(),
&mut required_size as *mut _ as *mut _,
);
*required_size
}
pub unsafe fn get_copyable_footprint(
&self,
first_subresource: u32,
num_subresources: usize,
base_offset: u64,
dest_resource: &Resource,
) -> (
Vec<d3d12::D3D12_PLACED_SUBRESOURCE_FOOTPRINT>,
Vec<u32>,
Vec<u64>,
u64,
) {
let desc: d3d12::D3D12_RESOURCE_DESC = (*dest_resource.get()).GetDesc();
let mut layouts: Vec<d3d12::D3D12_PLACED_SUBRESOURCE_FOOTPRINT> =
Vec::with_capacity(num_subresources);
let mut num_rows: Vec<u32> = Vec::with_capacity(num_subresources);
let mut row_size_in_bytes: Vec<u64> = Vec::with_capacity(num_subresources);
let mut total_size: u64 = 0;
self.0.GetCopyableFootprints(
&desc as *const _,
first_subresource,
u32::try_from(num_subresources)
.expect("could not safely convert num_subresources into u32"),
base_offset,
layouts.as_mut_ptr(),
num_rows.as_mut_ptr(),
row_size_in_bytes.as_mut_ptr(),
&mut total_size as *mut _,
);
layouts.set_len(num_subresources);
num_rows.set_len(num_subresources);
row_size_in_bytes.set_len(num_subresources);
(layouts, num_rows, row_size_in_bytes, total_size)
}
pub unsafe fn create_buffer(
&self,
@ -898,38 +680,6 @@ impl Device {
)?;
Ok(features_architecture)
}
pub unsafe fn get_removal_reason(&self) -> Error {
Error::Hresult(self.0.GetDeviceRemovedReason())
}
}
pub struct SubresourceData {
pub data: Vec<u8>,
pub row_size: isize,
pub column_size: isize,
}
impl SubresourceData {
pub fn size(&self) -> usize {
self.data.len()
}
pub fn as_d3d12_subresource_data(&self) -> d3d12::D3D12_SUBRESOURCE_DATA {
assert_eq!(self.row_size % 256, 0);
d3d12::D3D12_SUBRESOURCE_DATA {
pData: self.data.as_ptr() as *const _,
RowPitch: self.row_size,
SlicePitch: self.column_size,
}
}
}
impl CommandAllocator {
pub unsafe fn reset(&self) -> Result<(), Error> {
explain_error(self.0.Reset(), "error resetting command allocator")
}
}
impl DescriptorHeap {
@ -956,10 +706,6 @@ impl DescriptorHeap {
}
}
#[repr(transparent)]
pub struct DescriptorRange(d3d12::D3D12_DESCRIPTOR_RANGE);
impl DescriptorRange {}
impl RootSignature {
pub unsafe fn serialize_description(
desc: &d3d12::D3D12_ROOT_SIGNATURE_DESC,
@ -990,17 +736,6 @@ impl RootSignature {
}
impl ShaderByteCode {
// empty byte code
pub unsafe fn empty() -> ShaderByteCode {
ShaderByteCode {
bytecode: d3d12::D3D12_SHADER_BYTECODE {
BytecodeLength: 0,
pShaderBytecode: ptr::null(),
},
blob: None,
}
}
// `blob` may not be null.
// TODO: this is not super elegant, maybe want to move the get
// operations closer to where they're used.
@ -1063,18 +798,6 @@ impl ShaderByteCode {
Ok(Blob(ComPtr::from_raw(shader_blob_ptr)))
}
pub unsafe fn compile_from_file(
file_path: &Path,
target: &str,
entry: &str,
flags: minwindef::DWORD,
) -> Result<Blob, Error> {
let file_open_error = format!("could not open shader source file for entry: {}", &entry);
let source = std::fs::read_to_string(file_path).expect(&file_open_error);
ShaderByteCode::compile(&source, target, entry, flags)
}
}
impl Fence {
@ -1088,10 +811,6 @@ impl Fence {
pub unsafe fn get_value(&self) -> u64 {
self.0.GetCompletedValue()
}
pub unsafe fn signal(&self, value: u64) -> winerror::HRESULT {
self.0.Signal(value)
}
}
impl Event {
@ -1120,11 +839,6 @@ impl Event {
pub unsafe fn wait(&self, timeout_ms: u32) -> u32 {
synchapi::WaitForSingleObject(self.0, timeout_ms)
}
// TODO: probably remove, yagni
pub unsafe fn wait_ex(&self, timeout_ms: u32, alertable: bool) -> u32 {
synchapi::WaitForSingleObjectEx(self.0, timeout_ms, alertable as _)
}
}
impl Drop for Event {
@ -1135,6 +849,12 @@ impl Drop for Event {
}
}
impl CommandAllocator {
pub unsafe fn reset(&self) -> Result<(), Error> {
error::error_if_failed_else_unit(self.0.Reset())
}
}
impl GraphicsCommandList {
pub unsafe fn as_raw_command_list(&self) -> *mut d3d12::ID3D12CommandList {
self.0.as_raw() as *mut d3d12::ID3D12CommandList
@ -1144,20 +864,15 @@ impl GraphicsCommandList {
explain_error(self.0.Close(), "error closing command list")
}
pub unsafe fn reset(&self, allocator: &CommandAllocator, initial_pso: Option<&PipelineState>) {
pub unsafe fn reset(&self, allocator: &CommandAllocator, initial_pso: Option<&PipelineState>) -> Result<(), Error> {
let p_initial_state = initial_pso.map(|p| p.0.as_raw()).unwrap_or(ptr::null_mut());
error::error_if_failed_else_unit(self.0.Reset(allocator.0.as_raw(), p_initial_state))
.expect("could not reset command list");
}
pub unsafe fn set_compute_pipeline_root_signature(&self, signature: &RootSignature) {
self.0.SetComputeRootSignature(signature.0.as_raw());
}
pub unsafe fn set_graphics_pipeline_root_signature(&self, signature: &RootSignature) {
self.0.SetGraphicsRootSignature(signature.0.as_raw());
}
pub unsafe fn resource_barrier(&self, resource_barriers: &[d3d12::D3D12_RESOURCE_BARRIER]) {
self.0.ResourceBarrier(
resource_barriers
@ -1168,42 +883,14 @@ impl GraphicsCommandList {
);
}
pub unsafe fn set_viewport(&self, viewport: &d3d12::D3D12_VIEWPORT) {
self.0.RSSetViewports(1, viewport as *const _);
}
pub unsafe fn set_scissor_rect(&self, scissor_rect: &d3d12::D3D12_RECT) {
self.0.RSSetScissorRects(1, scissor_rect as *const _);
}
pub unsafe fn dispatch(&self, count_x: u32, count_y: u32, count_z: u32) {
self.0.Dispatch(count_x, count_y, count_z);
}
pub unsafe fn draw_instanced(
&self,
num_vertices: u32,
num_instances: u32,
start_vertex: u32,
start_instance: u32,
) {
self.0
.DrawInstanced(num_vertices, num_instances, start_vertex, start_instance);
}
pub unsafe fn set_pipeline_state(&self, pipeline_state: &PipelineState) {
self.0.SetPipelineState(pipeline_state.0.as_raw());
}
pub unsafe fn set_compute_root_unordered_access_view(
&self,
root_parameter_index: u32,
buffer_location: d3d12::D3D12_GPU_VIRTUAL_ADDRESS,
) {
self.0
.SetComputeRootUnorderedAccessView(root_parameter_index, buffer_location);
}
pub unsafe fn set_compute_root_descriptor_table(
&self,
root_parameter_index: u32,
@ -1213,66 +900,6 @@ impl GraphicsCommandList {
.SetComputeRootDescriptorTable(root_parameter_index, base_descriptor);
}
pub unsafe fn set_graphics_root_shader_resource_view(
&self,
root_parameter_index: u32,
buffer_location: d3d12::D3D12_GPU_VIRTUAL_ADDRESS,
) {
self.0
.SetGraphicsRootShaderResourceView(root_parameter_index, buffer_location);
}
pub unsafe fn set_graphics_root_descriptor_table(
&self,
root_parameter_index: u32,
base_descriptor: d3d12::D3D12_GPU_DESCRIPTOR_HANDLE,
) {
self.0
.SetGraphicsRootDescriptorTable(root_parameter_index, base_descriptor);
}
pub unsafe fn set_render_target(
&self,
render_target_descriptor: d3d12::D3D12_CPU_DESCRIPTOR_HANDLE,
) {
self.0.OMSetRenderTargets(
1,
&render_target_descriptor as *const _,
false as _,
ptr::null(),
);
}
pub unsafe fn clear_render_target_view(
&self,
render_target_descriptor: d3d12::D3D12_CPU_DESCRIPTOR_HANDLE,
clear_color: &[f32; 4],
) {
self.0.ClearRenderTargetView(
render_target_descriptor,
clear_color as *const _,
0,
ptr::null(),
);
}
pub unsafe fn set_primitive_topology(
&self,
primitive_topology: d3dcommon::D3D_PRIMITIVE_TOPOLOGY,
) {
self.0.IASetPrimitiveTopology(primitive_topology);
}
pub unsafe fn set_vertex_buffer(
&self,
start_slot: u32,
num_views: u32,
vertex_buffer_view: &d3d12::D3D12_VERTEX_BUFFER_VIEW,
) {
self.0
.IASetVertexBuffers(start_slot, num_views, vertex_buffer_view as *const _);
}
pub unsafe fn set_descriptor_heaps(&self, descriptor_heaps: &[&DescriptorHeap]) {
let mut descriptor_heap_pointers: Vec<_> =
descriptor_heaps.iter().map(|dh| dh.heap.as_raw()).collect();
@ -1410,41 +1037,6 @@ impl GraphicsCommandList {
}
}
pub fn default_render_target_blend_desc() -> d3d12::D3D12_RENDER_TARGET_BLEND_DESC {
d3d12::D3D12_RENDER_TARGET_BLEND_DESC {
BlendEnable: minwindef::FALSE,
LogicOpEnable: minwindef::FALSE,
SrcBlend: d3d12::D3D12_BLEND_ONE,
DestBlend: d3d12::D3D12_BLEND_ZERO,
// enum variant 0
BlendOp: d3d12::D3D12_BLEND_OP_ADD,
SrcBlendAlpha: d3d12::D3D12_BLEND_ONE,
DestBlendAlpha: d3d12::D3D12_BLEND_ZERO,
BlendOpAlpha: d3d12::D3D12_BLEND_OP_ADD,
// enum variant 0
LogicOp: d3d12::D3D12_LOGIC_OP_NOOP,
RenderTargetWriteMask: d3d12::D3D12_COLOR_WRITE_ENABLE_ALL as u8,
}
}
pub fn default_blend_desc() -> d3d12::D3D12_BLEND_DESC {
// see default description here: https://docs.microsoft.com/en-us/windows/win32/direct3d12/cd3dx12-blend-desc
d3d12::D3D12_BLEND_DESC {
AlphaToCoverageEnable: minwindef::FALSE,
IndependentBlendEnable: minwindef::FALSE,
RenderTarget: [
default_render_target_blend_desc(),
default_render_target_blend_desc(),
default_render_target_blend_desc(),
default_render_target_blend_desc(),
default_render_target_blend_desc(),
default_render_target_blend_desc(),
default_render_target_blend_desc(),
default_render_target_blend_desc(),
],
}
}
pub unsafe fn create_uav_resource_barrier(
resource: *mut d3d12::ID3D12Resource,
) -> d3d12::D3D12_RESOURCE_BARRIER {
@ -1507,29 +1099,3 @@ pub unsafe fn enable_debug_layer() -> Result<(), Error> {
debug_controller.SetEnableGPUBasedValidation(minwindef::TRUE);
Ok(())
}
pub struct InputElementDesc {
pub semantic_name: String,
pub semantic_index: u32,
pub format: dxgiformat::DXGI_FORMAT,
pub input_slot: u32,
pub aligned_byte_offset: u32,
pub input_slot_class: d3d12::D3D12_INPUT_CLASSIFICATION,
pub instance_data_step_rate: u32,
}
impl InputElementDesc {
pub fn as_winapi_struct(&self) -> d3d12::D3D12_INPUT_ELEMENT_DESC {
d3d12::D3D12_INPUT_ELEMENT_DESC {
SemanticName: std::ffi::CString::new(self.semantic_name.as_str())
.unwrap()
.into_raw() as *const _,
SemanticIndex: self.semantic_index,
Format: self.format,
InputSlot: self.input_slot,
AlignedByteOffset: self.aligned_byte_offset,
InputSlotClass: self.input_slot_class,
InstanceDataStepRate: self.instance_data_step_rate,
}
}
}

View file

@ -48,8 +48,12 @@ struct SessionInner {
/// Actual work done by the GPU is encoded into a command buffer and then
/// submitted to the session in a batch.
pub struct CmdBuf {
cmd_buf: mux::CmdBuf,
fence: Fence,
// The invariant is that these options are always populated except
// when the struct is being destroyed. It would be possible to get
// rid of them by using this unsafe trick:
// https://phaazon.net/blog/blog/rust-no-drop
cmd_buf: Option<mux::CmdBuf>,
fence: Option<Fence>,
resources: Vec<RetainResource>,
session: Weak<SessionInner>,
}
@ -158,8 +162,8 @@ impl Session {
(cmd_buf, fence)
};
Ok(CmdBuf {
cmd_buf,
fence,
cmd_buf: Some(cmd_buf),
fence: Some(fence),
resources: Vec::new(),
session: Arc::downgrade(&self.0),
})
@ -202,23 +206,23 @@ impl Session {
// some cases.
staging.memory_barrier();
staging.finish();
cmd_bufs.push(&staging.cmd_buf);
cmd_bufs.push(staging.cmd_buf.as_ref().unwrap());
}
cmd_bufs.push(&cmd_buf.cmd_buf);
cmd_bufs.push(cmd_buf.cmd_buf.as_ref().unwrap());
self.0.device.run_cmd_bufs(
&cmd_bufs,
wait_semaphores,
signal_semaphores,
Some(&mut cmd_buf.fence),
Some(cmd_buf.fence.as_mut().unwrap()),
)?;
Ok(SubmittedCmdBuf(
Some(SubmittedCmdBufInner {
cmd_buf: cmd_buf.cmd_buf,
fence: cmd_buf.fence,
resources: cmd_buf.resources,
cmd_buf: cmd_buf.cmd_buf.take().unwrap(),
fence: cmd_buf.fence.take().unwrap(),
resources: std::mem::take(&mut cmd_buf.resources),
staging_cmd_buf,
}),
cmd_buf.session,
std::mem::replace(&mut cmd_buf.session, Weak::new()),
))
}
@ -369,8 +373,8 @@ impl Session {
#[doc(hidden)]
/// Create a sampler.
///
/// Noy yet implemented.
pub unsafe fn create_sampler(&self, params: SamplerParams) -> Result<Sampler, Error> {
/// Not yet implemented.
pub unsafe fn create_sampler(&self, _params: SamplerParams) -> Result<Sampler, Error> {
todo!()
//self.0.device.create_sampler(params)
}
@ -397,22 +401,24 @@ impl SessionInner {
let _should_handle_err = self.device.destroy_fence(item.fence);
std::mem::drop(item.resources);
if let Some(staging_cmd_buf) = item.staging_cmd_buf {
let _should_handle_err = self.device.destroy_cmd_buf(staging_cmd_buf.cmd_buf);
let _should_handle_err = self.device.destroy_fence(staging_cmd_buf.fence);
std::mem::drop(staging_cmd_buf.resources);
if let Some(mut staging_cmd_buf) = item.staging_cmd_buf {
staging_cmd_buf.destroy(self);
}
}
}
impl CmdBuf {
fn cmd_buf(&mut self) -> &mut mux::CmdBuf {
self.cmd_buf.as_mut().unwrap()
}
/// Begin recording into a command buffer.
///
/// Always call this before encoding any actual work.
///
/// Discussion question: can this be subsumed?
pub unsafe fn begin(&mut self) {
self.cmd_buf.begin();
self.cmd_buf().begin();
}
/// Finish recording into a command buffer.
@ -420,7 +426,7 @@ impl CmdBuf {
/// Always call this as the last method before submitting the command
/// buffer.
pub unsafe fn finish(&mut self) {
self.cmd_buf.finish();
self.cmd_buf().finish();
}
/// Dispatch a compute shader.
@ -438,7 +444,7 @@ impl CmdBuf {
workgroup_count: (u32, u32, u32),
workgroup_size: (u32, u32, u32),
) {
self.cmd_buf
self.cmd_buf()
.dispatch(pipeline, descriptor_set, workgroup_count, workgroup_size);
}
@ -447,7 +453,7 @@ impl CmdBuf {
/// Compute kernels (and other actions) after this barrier may read from buffers
/// that were written before this barrier.
pub unsafe fn memory_barrier(&mut self) {
self.cmd_buf.memory_barrier();
self.cmd_buf().memory_barrier();
}
/// Insert a barrier for host access to buffers.
@ -458,7 +464,7 @@ impl CmdBuf {
/// See http://themaister.net/blog/2019/08/14/yet-another-blog-explaining-vulkan-synchronization/
/// ("Host memory reads") for an explanation of this barrier.
pub unsafe fn host_barrier(&mut self) {
self.cmd_buf.memory_barrier();
self.cmd_buf().memory_barrier();
}
/// Insert an image barrier, transitioning image layout.
@ -475,7 +481,7 @@ impl CmdBuf {
src_layout: ImageLayout,
dst_layout: ImageLayout,
) {
self.cmd_buf
self.cmd_buf()
.image_barrier(image.mux_image(), src_layout, dst_layout);
}
@ -483,21 +489,22 @@ impl CmdBuf {
///
/// When the size is not specified, it clears the whole buffer.
pub unsafe fn clear_buffer(&mut self, buffer: &Buffer, size: Option<u64>) {
self.cmd_buf.clear_buffer(buffer.mux_buffer(), size);
self.cmd_buf().clear_buffer(buffer.mux_buffer(), size);
}
/// Copy one buffer to another.
///
/// When the buffers differ in size, the minimum of the sizes is used.
pub unsafe fn copy_buffer(&mut self, src: &Buffer, dst: &Buffer) {
self.cmd_buf.copy_buffer(src.mux_buffer(), dst.mux_buffer());
self.cmd_buf()
.copy_buffer(src.mux_buffer(), dst.mux_buffer());
}
/// Copy an image to a buffer.
///
/// The size of the image and buffer must match.
pub unsafe fn copy_image_to_buffer(&mut self, src: &Image, dst: &Buffer) {
self.cmd_buf
self.cmd_buf()
.copy_image_to_buffer(src.mux_image(), dst.mux_buffer());
// TODO: change the backend signature to allow failure, as in "not
// implemented" or "unaligned", and fall back to compute shader
@ -508,7 +515,7 @@ impl CmdBuf {
///
/// The size of the image and buffer must match.
pub unsafe fn copy_buffer_to_image(&mut self, src: &Buffer, dst: &Image) {
self.cmd_buf
self.cmd_buf()
.copy_buffer_to_image(src.mux_buffer(), dst.mux_image());
// See above.
}
@ -521,7 +528,7 @@ impl CmdBuf {
/// Discussion question: we might have a specialized version of this
/// function for copying to the swapchain image, and a separate type.
pub unsafe fn blit_image(&mut self, src: &Image, dst: &Image) {
self.cmd_buf.blit_image(src.mux_image(), dst.mux_image());
self.cmd_buf().blit_image(src.mux_image(), dst.mux_image());
}
/// Reset the query pool.
@ -530,14 +537,14 @@ impl CmdBuf {
/// This is annoying, and we could tweak the API to make it implicit, doing
/// the reset before the first timestamp write.
pub unsafe fn reset_query_pool(&mut self, pool: &QueryPool) {
self.cmd_buf.reset_query_pool(pool);
self.cmd_buf().reset_query_pool(pool);
}
/// Write a timestamp.
///
/// The query index must be less than the size of the query pool on creation.
pub unsafe fn write_timestamp(&mut self, pool: &QueryPool, query: u32) {
self.cmd_buf.write_timestamp(pool, query);
self.cmd_buf().write_timestamp(pool, query);
}
/// Prepare the timestamps for reading. This isn't required on Vulkan but
@ -546,7 +553,7 @@ impl CmdBuf {
/// It's possible we'll make this go away, by implicitly including it
/// on command buffer submission when a query pool has been written.
pub unsafe fn finish_timestamps(&mut self, pool: &QueryPool) {
self.cmd_buf.finish_timestamps(pool);
self.cmd_buf().finish_timestamps(pool);
}
/// Make sure the resource lives until the command buffer completes.
@ -574,16 +581,52 @@ impl SubmittedCmdBuf {
///
/// Resources for which destruction was deferred through
/// [`add_resource`][`CmdBuf::add_resource`] will actually be dropped here.
pub fn wait(mut self) -> Result<(), Error> {
///
/// If the command buffer is still available for reuse, it is returned.
pub fn wait(mut self) -> Result<Option<CmdBuf>, Error> {
let mut item = self.0.take().unwrap();
if let Some(session) = Weak::upgrade(&self.1) {
unsafe {
session.device.wait_and_reset(vec![&mut item.fence])?;
session.cleanup_submitted_cmd_buf(item);
if let Some(mut staging_cmd_buf) = item.staging_cmd_buf {
staging_cmd_buf.destroy(&session);
}
if item.cmd_buf.reset() {
return Ok(Some(CmdBuf {
cmd_buf: Some(item.cmd_buf),
fence: Some(item.fence),
resources: Vec::new(),
session: std::mem::take(&mut self.1),
}));
} else {
return Ok(None);
}
}
}
// else session dropped error?
Ok(())
Ok(None)
}
}
impl Drop for CmdBuf {
fn drop(&mut self) {
if let Some(session) = Weak::upgrade(&self.session) {
unsafe {
self.destroy(&session);
}
}
}
}
impl CmdBuf {
unsafe fn destroy(&mut self, session: &SessionInner) {
if let Some(cmd_buf) = self.cmd_buf.take() {
let _ = session.device.destroy_cmd_buf(cmd_buf);
}
if let Some(fence) = self.fence.take() {
let _ = session.device.destroy_fence(fence);
}
self.resources.clear();
}
}

View file

@ -407,6 +407,10 @@ impl crate::backend::CmdBuf<MtlDevice> for CmdBuf {
unsafe fn finish(&mut self) {}
unsafe fn reset(&mut self) -> bool {
false
}
unsafe fn dispatch(
&mut self,
pipeline: &Pipeline,

View file

@ -622,6 +622,14 @@ impl CmdBuf {
}
}
pub unsafe fn reset(&mut self) -> bool {
mux_match! { self;
CmdBuf::Vk(c) => c.reset(),
CmdBuf::Dx12(c) => c.reset(),
CmdBuf::Mtl(c) => c.reset(),
}
}
/// Dispatch a compute shader.
///
/// Note that both the number of workgroups (`workgroup_count`) and the number of

View file

@ -80,7 +80,6 @@ pub struct Pipeline {
pipeline: vk::Pipeline,
descriptor_set_layout: vk::DescriptorSetLayout,
pipeline_layout: vk::PipelineLayout,
max_textures: u32,
}
pub struct DescriptorSet {
@ -111,7 +110,7 @@ pub struct DescriptorSetBuilder {
buffers: Vec<vk::Buffer>,
images: Vec<vk::ImageView>,
textures: Vec<vk::ImageView>,
sampler: vk::Sampler,
// TODO: we had a sampler here, might need it again
}
struct Extensions {
@ -667,7 +666,6 @@ impl crate::backend::Device for VkDevice {
buffers: Vec::new(),
images: Vec::new(),
textures: Vec::new(),
sampler: vk::Sampler::null(),
}
}
@ -868,6 +866,10 @@ impl crate::backend::CmdBuf<VkDevice> for CmdBuf {
self.device.device.end_command_buffer(self.cmd_buf).unwrap();
}
unsafe fn reset(&mut self) -> bool {
true
}
unsafe fn dispatch(
&mut self,
pipeline: &Pipeline,
@ -1181,7 +1183,6 @@ impl crate::backend::PipelineBuilder<VkDevice> for PipelineBuilder {
pipeline,
pipeline_layout,
descriptor_set_layout,
max_textures: self.max_textures,
})
}
}

View file

@ -39,3 +39,6 @@ ndk = "0.3"
ndk-sys = "0.2.0"
ndk-glue = "0.3"
raw-window-handle = "0.3"
[package.metadata.android.application]
debuggable = true

View file

@ -12,7 +12,7 @@ use ndk::native_window::NativeWindow;
use ndk_glue::Event;
use piet_gpu_hal::{
Error, ImageLayout, Instance, QueryPool, Semaphore, Session, SubmittedCmdBuf, Surface,
CmdBuf, Error, ImageLayout, Instance, QueryPool, Semaphore, Session, SubmittedCmdBuf, Surface,
Swapchain,
};
@ -37,6 +37,7 @@ struct GfxState {
swapchain: Swapchain,
current_frame: usize,
submitted: [Option<SubmittedCmdBuf>; NUM_FRAMES],
cmd_bufs: [Option<CmdBuf>; NUM_FRAMES],
query_pools: Vec<QueryPool>,
present_semaphores: Vec<Semaphore>,
}
@ -112,6 +113,7 @@ impl GfxState {
.map(|_| session.create_query_pool(8))
.collect::<Result<Vec<_>, Error>>()?;
let submitted = Default::default();
let cmd_bufs = Default::default();
let renderer = Renderer::new(&session, width, height, NUM_FRAMES)?;
@ -121,6 +123,7 @@ impl GfxState {
swapchain,
current_frame,
submitted,
cmd_bufs,
query_pools,
present_semaphores,
})
@ -134,7 +137,7 @@ impl GfxState {
let mut info_string = String::new();
if let Some(submitted) = self.submitted[frame_idx].take() {
submitted.wait().unwrap();
self.cmd_bufs[frame_idx] = submitted.wait().unwrap();
let ts = self
.session
.fetch_query_pool(&self.query_pools[frame_idx])
@ -152,7 +155,9 @@ impl GfxState {
let (image_idx, acquisition_semaphore) = self.swapchain.next().unwrap();
let swap_image = self.swapchain.image(image_idx);
let query_pool = &self.query_pools[frame_idx];
let mut cmd_buf = self.session.cmd_buf().unwrap();
let mut cmd_buf = self.cmd_bufs[frame_idx]
.take()
.unwrap_or_else(|| self.session.cmd_buf().unwrap());
cmd_buf.begin();
self.renderer.record(&mut cmd_buf, &query_pool, frame_idx);

View file

@ -1,6 +1,6 @@
use piet::kurbo::Point;
use piet::{RenderContext, Text, TextAttribute, TextLayoutBuilder};
use piet_gpu_hal::{Error, ImageLayout, Instance, Session, SubmittedCmdBuf};
use piet_gpu_hal::{CmdBuf, Error, ImageLayout, Instance, Session, SubmittedCmdBuf};
use piet_gpu::{test_scenes, PietGpuRenderContext, Renderer};
@ -53,6 +53,7 @@ fn main() -> Result<(), Error> {
let query_pools = (0..NUM_FRAMES)
.map(|_| session.create_query_pool(8))
.collect::<Result<Vec<_>, Error>>()?;
let mut cmd_bufs: [Option<CmdBuf>; NUM_FRAMES] = Default::default();
let mut submitted: [Option<SubmittedCmdBuf>; NUM_FRAMES] = Default::default();
let mut renderer = Renderer::new(&session, WIDTH, HEIGHT, NUM_FRAMES)?;
@ -76,7 +77,7 @@ fn main() -> Result<(), Error> {
let frame_idx = current_frame % NUM_FRAMES;
if let Some(submitted) = submitted[frame_idx].take() {
submitted.wait().unwrap();
cmd_bufs[frame_idx] = submitted.wait().unwrap();
let ts = session.fetch_query_pool(&query_pools[frame_idx]).unwrap();
info_string = format!(
"{:.3}ms :: e:{:.3}ms|alloc:{:.3}ms|cp:{:.3}ms|bd:{:.3}ms|bin:{:.3}ms|cr:{:.3}ms|r:{:.3}ms",
@ -112,7 +113,7 @@ fn main() -> Result<(), Error> {
let (image_idx, acquisition_semaphore) = swapchain.next().unwrap();
let swap_image = swapchain.image(image_idx);
let query_pool = &query_pools[frame_idx];
let mut cmd_buf = session.cmd_buf().unwrap();
let mut cmd_buf = cmd_bufs[frame_idx].take().unwrap_or_else(|| session.cmd_buf().unwrap());
cmd_buf.begin();
renderer.record(&mut cmd_buf, &query_pool, frame_idx);

View file

@ -6,16 +6,10 @@ mod text;
use std::convert::TryInto;
use piet_gpu_types::scene;
pub use render_ctx::PietGpuRenderContext;
use rand::{Rng, RngCore};
use piet::kurbo::{BezPath, Circle, Point, Rect, Shape, Vec2};
use piet::{
Color, FixedGradient, FixedLinearGradient, GradientStop, ImageFormat, RenderContext, Text,
TextAttribute, TextLayoutBuilder,
};
use piet::kurbo::Vec2;
use piet::{ImageFormat, RenderContext};
use piet_gpu_types::encoder::Encode;
@ -313,7 +307,10 @@ impl Renderer {
// Upload gradient data.
let ramp_data = render_ctx.get_ramp_data();
if !ramp_data.is_empty() {
assert!(self.gradient_bufs[buf_ix].size() as usize >= std::mem::size_of_val(&*ramp_data));
assert!(
self.gradient_bufs[buf_ix].size() as usize
>= std::mem::size_of_val(&*ramp_data)
);
self.gradient_bufs[buf_ix].write(&ramp_data)?;
}
}

View file

@ -1,13 +1,10 @@
use std::{borrow::Cow, ops::RangeBounds};
use std::borrow::Cow;
use crate::MAX_BLEND_STACK;
use piet::kurbo::{Affine, Insets, PathEl, Point, Rect, Shape};
use piet::{
kurbo::{Affine, Insets, PathEl, Point, Rect, Shape, Size},
HitTestPosition, TextAttribute, TextStorage,
};
use piet::{
Color, Error, FixedGradient, FontFamily, HitTestPoint, ImageFormat, InterpolationMode,
IntoBrush, LineMetric, RenderContext, StrokeStyle, Text, TextLayout, TextLayoutBuilder,
Color, Error, FixedGradient, ImageFormat, InterpolationMode, IntoBrush, RenderContext,
StrokeStyle,
};
use piet_gpu_types::encoder::{Encode, Encoder};