From 9fb2ae91ebb141ad54e753cfd7a0989db86406f2 Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Thu, 25 Nov 2021 13:12:25 -0800 Subject: [PATCH 1/5] Access buffer data through mapping This patch includes a number of changes to encourage reading and writing buffers through mapping rather than copying data as before. This includes a new `BufWrite` abstraction which is designed for filling buffers. It behaves much like a Vec, but with fixed capacity. --- piet-gpu-hal/src/backend.rs | 29 ++-- piet-gpu-hal/src/bufwrite.rs | 117 +++++++++++++ piet-gpu-hal/src/dx12.rs | 32 ++-- piet-gpu-hal/src/dx12/wrappers.rs | 55 +++--- piet-gpu-hal/src/hub.rs | 279 +++++++++++++++++++++++++----- piet-gpu-hal/src/lib.rs | 13 +- piet-gpu-hal/src/metal.rs | 24 +++ piet-gpu-hal/src/mux.rs | 23 +-- piet-gpu-hal/src/vulkan.rs | 34 ++-- tests/src/clear.rs | 5 +- tests/src/linkedlist.rs | 5 +- tests/src/prefix.rs | 16 +- tests/src/prefix_tree.rs | 16 +- tests/src/runner.rs | 9 +- 14 files changed, 499 insertions(+), 158 deletions(-) create mode 100644 piet-gpu-hal/src/bufwrite.rs diff --git a/piet-gpu-hal/src/backend.rs b/piet-gpu-hal/src/backend.rs index 8df7354..a4422b9 100644 --- a/piet-gpu-hal/src/backend.rs +++ b/piet-gpu-hal/src/backend.rs @@ -16,7 +16,7 @@ //! The generic trait for backends to implement. -use crate::{BindType, BufferUsage, Error, GpuInfo, ImageLayout, SamplerParams}; +use crate::{BindType, BufferUsage, Error, GpuInfo, ImageLayout, MapMode, SamplerParams}; pub trait Device: Sized { type Buffer: 'static; @@ -114,36 +114,33 @@ pub trait Device: Sized { fence: Option<&mut Self::Fence>, ) -> Result<(), Error>; - /// Copy data from the buffer to memory. - /// - /// Discussion question: add offset? + /// Map the buffer into addressable memory. /// /// # Safety /// - /// The buffer must be valid to access. The destination memory must be valid to - /// write to. The ranges must not overlap. The offset + size must be within - /// the buffer's allocation, and size within the destination. - unsafe fn read_buffer( + /// The buffer must be valid to access. The offset + size much be within the + /// buffer's allocation. The buffer must not already be mapped. Of course, + /// the usual safety rules apply to the returned pointer. + unsafe fn map_buffer( &self, buffer: &Self::Buffer, - dst: *mut u8, offset: u64, size: u64, - ) -> Result<(), Error>; + mode: MapMode, + ) -> Result<*mut u8, Error>; - /// Copy data from memory to the buffer. + /// Map the buffer into addressable memory. /// /// # Safety /// - /// The buffer must be valid to access. The source memory must be valid to - /// read from. The ranges must not overlap. The offset + size must be within - /// the buffer's allocation, and size within the source. - unsafe fn write_buffer( + /// The buffer must be mapped. The parameters must be the same as the map + /// call. + unsafe fn unmap_buffer( &self, buffer: &Self::Buffer, - contents: *const u8, offset: u64, size: u64, + mode: MapMode, ) -> Result<(), Error>; unsafe fn create_semaphore(&self) -> Result; diff --git a/piet-gpu-hal/src/bufwrite.rs b/piet-gpu-hal/src/bufwrite.rs new file mode 100644 index 0000000..eefe66f --- /dev/null +++ b/piet-gpu-hal/src/bufwrite.rs @@ -0,0 +1,117 @@ +// Copyright © 2021 piet-gpu developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those + +//! An abstraction for writing to GPU buffers. + +use bytemuck::Pod; + +/// A GPU buffer to be filled. +pub struct BufWrite { + ptr: *mut u8, + len: usize, + capacity: usize, +} + +impl BufWrite { + pub(crate) fn new(ptr: *mut u8, len: usize, capacity: usize) -> BufWrite { + BufWrite { ptr, len, capacity } + } + + /// Append a plain data object to the buffer. + /// + /// Panics if capacity is inadequate. + #[inline] + pub fn push(&mut self, item: &impl Pod) { + self.push_bytes(bytemuck::bytes_of(item)); + } + + /// Extend with a slice of plain data objects. + /// + /// Panics if capacity is inadequate. + #[inline] + pub fn extend_slice(&mut self, slice: &[impl Pod]) { + self.push_bytes(bytemuck::cast_slice(slice)); + } + + /// Extend with a byte slice. + /// + /// Panics if capacity is inadequate. + #[inline] + pub fn push_bytes(&mut self, bytes: &[u8]) { + let len = bytes.len(); + assert!(self.capacity - self.len >= len); + unsafe { + std::ptr::copy_nonoverlapping(bytes.as_ptr(), self.ptr.add(self.len), len); + } + self.len += len; + } + + /// Extend with zeros. + /// + /// Panics if capacity is inadequate. + #[inline] + pub fn fill_zero(&mut self, len: usize) { + assert!(self.capacity - self.len >= len); + unsafe { + let slice = std::slice::from_raw_parts_mut(self.ptr.add(self.len), len); + slice.fill(0); + } + self.len += len; + } + + /// The total capacity of the buffer, in bytes. + #[inline] + pub fn capacity(&self) -> usize { + self.capacity + } + + /// Extend with an iterator over plain data objects. + /// + /// Currently, this doesn't panic, just truncates. That may change. + pub fn extend<'a, I, T: Pod + 'a>(&mut self, iter: I) + where + I: IntoIterator, + { + let item_size = std::mem::size_of::(); + if item_size == 0 { + return; + } + let mut iter = iter.into_iter(); + let n_remaining = (self.capacity - self.len) / item_size; + unsafe { + let mut dst = self.ptr.add(self.len); + for _ in 0..n_remaining { + if let Some(item) = iter.next() { + std::ptr::copy_nonoverlapping( + bytemuck::bytes_of(item).as_ptr(), + dst, + item_size, + ); + self.len += item_size; + dst = dst.add(item_size); + } else { + break; + } + } + } + // TODO: should we test the iter and panic on overflow? + } +} + +impl std::ops::Deref for BufWrite { + type Target = [u8]; + fn deref(&self) -> &[u8] { + unsafe { std::slice::from_raw_parts(self.ptr, self.len) } + } +} + +impl std::ops::DerefMut for BufWrite { + fn deref_mut(&mut self) -> &mut [u8] { + unsafe { std::slice::from_raw_parts_mut(self.ptr, self.len) } + } +} diff --git a/piet-gpu-hal/src/dx12.rs b/piet-gpu-hal/src/dx12.rs index 5172cc3..6c8464d 100644 --- a/piet-gpu-hal/src/dx12.rs +++ b/piet-gpu-hal/src/dx12.rs @@ -21,7 +21,7 @@ use raw_window_handle::{HasRawWindowHandle, RawWindowHandle}; use smallvec::SmallVec; -use crate::{BindType, BufferUsage, Error, GpuInfo, ImageLayout, WorkgroupLimits}; +use crate::{BindType, BufferUsage, Error, GpuInfo, ImageLayout, MapMode, WorkgroupLimits}; use self::{ descriptor::{CpuHeapRefOwned, DescriptorPool, GpuHeapRefOwned}, @@ -381,12 +381,10 @@ impl crate::backend::Device for Dx12Device { unsafe fn fetch_query_pool(&self, pool: &Self::QueryPool) -> Result, Error> { let mut buf = vec![0u64; pool.n_queries as usize]; - self.read_buffer( - &pool.buf, - buf.as_mut_ptr() as *mut u8, - 0, - mem::size_of_val(buf.as_slice()) as u64, - )?; + let size = mem::size_of_val(buf.as_slice()); + let mapped = self.map_buffer(&pool.buf, 0, size as u64, MapMode::Read)?; + std::ptr::copy_nonoverlapping(mapped, buf.as_mut_ptr() as *mut u8, size); + self.unmap_buffer(&pool.buf, 0, size as u64, MapMode::Read)?; let ts0 = buf[0]; let tsp = (self.ts_freq as f64).recip(); let result = buf[1..] @@ -418,29 +416,25 @@ impl crate::backend::Device for Dx12Device { Ok(()) } - unsafe fn read_buffer( + unsafe fn map_buffer( &self, buffer: &Self::Buffer, - dst: *mut u8, offset: u64, size: u64, - ) -> Result<(), Error> { - buffer - .resource - .read_resource(dst, offset as usize, size as usize)?; - Ok(()) + mode: MapMode, + ) -> Result<*mut u8, Error> { + let mapped = buffer.resource.map_buffer(offset, size, mode)?; + Ok(mapped) } - unsafe fn write_buffer( + unsafe fn unmap_buffer( &self, buffer: &Self::Buffer, - contents: *const u8, offset: u64, size: u64, + mode: MapMode, ) -> Result<(), Error> { - buffer - .resource - .write_resource(contents, offset as usize, size as usize)?; + buffer.resource.unmap_buffer(offset, size, mode)?; Ok(()) } diff --git a/piet-gpu-hal/src/dx12/wrappers.rs b/piet-gpu-hal/src/dx12/wrappers.rs index a8eade7..4bbb86c 100644 --- a/piet-gpu-hal/src/dx12/wrappers.rs +++ b/piet-gpu-hal/src/dx12/wrappers.rs @@ -7,6 +7,7 @@ // except according to those terms. use crate::dx12::error::{self, error_if_failed_else_unit, explain_error, Error}; +use crate::MapMode; use smallvec::SmallVec; use std::convert::{TryFrom, TryInto}; use std::sync::atomic::{AtomicPtr, Ordering}; @@ -105,46 +106,38 @@ impl Resource { self.ptr.store(ptr::null_mut(), Ordering::Relaxed); } - pub unsafe fn write_resource( + pub unsafe fn map_buffer( &self, - data: *const u8, - offset: usize, - size: usize, - ) -> Result<(), Error> { + offset: u64, + size: u64, + mode: MapMode, + ) -> Result<*mut u8, Error> { let mut mapped_memory: *mut u8 = ptr::null_mut(); - let zero_range = d3d12::D3D12_RANGE { ..mem::zeroed() }; - let range = d3d12::D3D12_RANGE { - Begin: offset, - End: offset + size, + let (begin, end) = match mode { + MapMode::Read => (offset as usize, (offset + size) as usize), + MapMode::Write => (0, 0), }; - explain_error( - (*self.get()).Map(0, &zero_range, &mut mapped_memory as *mut _ as *mut _), - "could not map GPU mem to CPU mem", - )?; - - ptr::copy_nonoverlapping(data, mapped_memory.add(offset), size); - (*self.get()).Unmap(0, &range); - Ok(()) - } - - pub unsafe fn read_resource( - &self, - dst: *mut u8, - offset: usize, - size: usize, - ) -> Result<(), Error> { - let mut mapped_memory: *mut u8 = ptr::null_mut(); let range = d3d12::D3D12_RANGE { - Begin: offset, - End: offset + size, + Begin: begin, + End: end, }; - let zero_range = d3d12::D3D12_RANGE { ..mem::zeroed() }; explain_error( (*self.get()).Map(0, &range, &mut mapped_memory as *mut _ as *mut _), "could not map GPU mem to CPU mem", )?; - ptr::copy_nonoverlapping(mapped_memory.add(offset), dst, size); - (*self.get()).Unmap(0, &zero_range); + Ok(mapped_memory.add(offset as usize)) + } + + pub unsafe fn unmap_buffer(&self, offset: u64, size: u64, mode: MapMode) -> Result<(), Error> { + let (begin, end) = match mode { + MapMode::Read => (0, 0), + MapMode::Write => (offset as usize, (offset + size) as usize), + }; + let range = d3d12::D3D12_RANGE { + Begin: begin, + End: end, + }; + (*self.get()).Unmap(0, &range); Ok(()) } } diff --git a/piet-gpu-hal/src/hub.rs b/piet-gpu-hal/src/hub.rs index cdffade..edf6535 100644 --- a/piet-gpu-hal/src/hub.rs +++ b/piet-gpu-hal/src/hub.rs @@ -7,12 +7,13 @@ //! even more in time. use std::convert::TryInto; +use std::ops::{Bound, RangeBounds}; use std::sync::{Arc, Mutex, Weak}; use bytemuck::Pod; use smallvec::SmallVec; -use crate::{mux, BackendType}; +use crate::{mux, BackendType, BufWrite, MapMode}; use crate::{BindType, BufferUsage, Error, GpuInfo, ImageLayout, SamplerParams}; @@ -112,6 +113,28 @@ pub enum RetainResource { Image(Image), } +/// A buffer mapped for writing. +/// +/// When this structure is dropped, the buffer will be unmapped. +pub struct BufWriteGuard<'a> { + buf_write: BufWrite, + session: Arc, + buffer: &'a mux::Buffer, + offset: u64, + size: u64, +} + +/// A buffer mapped for reading. +/// +/// When this structure is dropped, the buffer will be unmapped. +pub struct BufReadGuard<'a> { + bytes: &'a [u8], + session: Arc, + buffer: &'a mux::Buffer, + offset: u64, + size: u64, +} + impl Session { /// Create a new session, choosing the best backend. pub fn new(device: mux::Device) -> Session { @@ -232,45 +255,56 @@ impl Session { contents: &[impl Pod], usage: BufferUsage, ) -> Result { - unsafe { - let bytes = bytemuck::cast_slice(contents); - self.create_buffer_init_raw(bytes.as_ptr(), bytes.len().try_into()?, usage) - } + let size = std::mem::size_of_val(contents); + let bytes = bytemuck::cast_slice(contents); + self.create_buffer_with(size as u64, |b| b.push_bytes(bytes), usage) } - /// Create a buffer with initialized data, from a raw pointer memory region. - pub unsafe fn create_buffer_init_raw( + /// Create a buffer with initialized data. + /// + /// The buffer is filled by the provided function. The same details about + /// staging buffers apply as [`create_buffer_init`]. + pub fn create_buffer_with( &self, - contents: *const u8, size: u64, + f: impl Fn(&mut BufWrite), usage: BufferUsage, ) -> Result { - let use_staging_buffer = !usage.intersects(BufferUsage::MAP_READ | BufferUsage::MAP_WRITE) - && self.gpu_info().use_staging_buffers; - let create_usage = if use_staging_buffer { - BufferUsage::MAP_WRITE | BufferUsage::COPY_SRC - } else { - usage | BufferUsage::MAP_WRITE - }; - let create_buf = self.create_buffer(size, create_usage)?; - self.0 - .device - .write_buffer(&create_buf.mux_buffer(), contents, 0, size)?; - if use_staging_buffer { - let buf = self.create_buffer(size, usage | BufferUsage::COPY_DST)?; - let mut staging_cmd_buf = self.0.staging_cmd_buf.lock().unwrap(); - if staging_cmd_buf.is_none() { - let mut cmd_buf = self.cmd_buf()?; - cmd_buf.begin(); - *staging_cmd_buf = Some(cmd_buf); + unsafe { + let use_staging_buffer = !usage + .intersects(BufferUsage::MAP_READ | BufferUsage::MAP_WRITE) + && self.gpu_info().use_staging_buffers; + let create_usage = if use_staging_buffer { + BufferUsage::MAP_WRITE | BufferUsage::COPY_SRC + } else { + usage | BufferUsage::MAP_WRITE + }; + let create_buf = self.create_buffer(size, create_usage)?; + let mapped = + self.0 + .device + .map_buffer(&create_buf.mux_buffer(), 0, size, MapMode::Write)?; + let mut buf_write = BufWrite::new(mapped, 0, size as usize); + f(&mut buf_write); + self.0 + .device + .unmap_buffer(&create_buf.mux_buffer(), 0, size, MapMode::Write)?; + if use_staging_buffer { + let buf = self.create_buffer(size, usage | BufferUsage::COPY_DST)?; + let mut staging_cmd_buf = self.0.staging_cmd_buf.lock().unwrap(); + if staging_cmd_buf.is_none() { + let mut cmd_buf = self.cmd_buf()?; + cmd_buf.begin(); + *staging_cmd_buf = Some(cmd_buf); + } + let staging_cmd_buf = staging_cmd_buf.as_mut().unwrap(); + // This will ensure the staging buffer is deallocated. + staging_cmd_buf.copy_buffer(&create_buf, &buf); + staging_cmd_buf.add_resource(create_buf); + Ok(buf) + } else { + Ok(create_buf) } - let staging_cmd_buf = staging_cmd_buf.as_mut().unwrap(); - // This will ensure the staging buffer is deallocated. - staging_cmd_buf.copy_buffer(&create_buf, &buf); - staging_cmd_buf.add_resource(create_buf); - Ok(buf) - } else { - Ok(create_buf) } } @@ -669,12 +703,22 @@ impl Buffer { pub unsafe fn write(&mut self, contents: &[impl Pod]) -> Result<(), Error> { let bytes = bytemuck::cast_slice(contents); if let Some(session) = Weak::upgrade(&self.0.session) { - session.device.write_buffer( - &self.0.buffer, - bytes.as_ptr(), - 0, - bytes.len().try_into()?, - )?; + let size = bytes.len().try_into()?; + let buf_size = self.0.buffer.size(); + if size > buf_size { + return Err(format!( + "Trying to write {} bytes into buffer of size {}", + size, buf_size + ) + .into()); + } + let mapped = session + .device + .map_buffer(&self.0.buffer, 0, size, MapMode::Write)?; + std::ptr::copy_nonoverlapping(bytes.as_ptr(), mapped, bytes.len()); + session + .device + .unmap_buffer(&self.0.buffer, 0, size, MapMode::Write)?; } // else session lost error? Ok(()) @@ -694,15 +738,115 @@ impl Buffer { result.reserve(len - result.len()); } if let Some(session) = Weak::upgrade(&self.0.session) { + let mapped = session + .device + .map_buffer(&self.0.buffer, 0, size, MapMode::Read)?; + std::ptr::copy_nonoverlapping(mapped, result.as_mut_ptr() as *mut u8, size as usize); session .device - .read_buffer(&self.0.buffer, result.as_mut_ptr() as *mut u8, 0, size)?; + .unmap_buffer(&self.0.buffer, 0, size, MapMode::Read)?; result.set_len(len); } // else session lost error? Ok(()) } + /// Map a buffer for writing. + /// + /// The mapped buffer is represented by a "guard" structure, which will unmap + /// the buffer when it's dropped. That also has a number of methods for pushing + /// bytes and [`bytemuck::Pod`] objects. + /// + /// The buffer must have been created with `MAP_WRITE` usage. + pub unsafe fn map_write<'a>( + &'a mut self, + range: impl RangeBounds, + ) -> Result, Error> { + let offset = match range.start_bound() { + Bound::Unbounded => 0, + Bound::Included(&s) => s.try_into()?, + Bound::Excluded(_) => unreachable!(), + }; + let end = match range.end_bound() { + Bound::Unbounded => self.size(), + Bound::Included(&s) => s.try_into()?, + Bound::Excluded(&s) => s.checked_add(1).unwrap().try_into()?, + }; + self.map_write_impl(offset, end - offset) + } + + unsafe fn map_write_impl<'a>( + &'a self, + offset: u64, + size: u64, + ) -> Result, Error> { + if let Some(session) = Weak::upgrade(&self.0.session) { + let ptr = session + .device + .map_buffer(&self.0.buffer, offset, size, MapMode::Write)?; + let buf_write = BufWrite::new(ptr, 0, size as usize); + let guard = BufWriteGuard { + buf_write, + session, + buffer: &self.0.buffer, + offset, + size, + }; + Ok(guard) + } else { + Err("session lost".into()) + } + } + + /// Map a buffer for reading. + /// + /// The mapped buffer is represented by a "guard" structure, which will unmap + /// the buffer when it's dropped, and derefs to a plain byte slice. + /// + /// The buffer must have been created with `MAP_READ` usage. The caller + /// is also responsible for ensuring that this does not read uninitialized + /// memory. + pub unsafe fn map_read<'a>( + // Discussion: should be &mut? Buffer is Clone, but maybe that should change. + &'a self, + range: impl RangeBounds, + ) -> Result, Error> { + let offset = match range.start_bound() { + Bound::Unbounded => 0, + Bound::Excluded(&s) => s.try_into()?, + Bound::Included(_) => unreachable!(), + }; + let end = match range.end_bound() { + Bound::Unbounded => self.size(), + Bound::Excluded(&s) => s.try_into()?, + Bound::Included(&s) => s.checked_add(1).unwrap().try_into()?, + }; + self.map_read_impl(offset, end - offset) + } + + unsafe fn map_read_impl<'a>( + &'a self, + offset: u64, + size: u64, + ) -> Result, Error> { + if let Some(session) = Weak::upgrade(&self.0.session) { + let ptr = session + .device + .map_buffer(&self.0.buffer, offset, size, MapMode::Read)?; + let bytes = std::slice::from_raw_parts(ptr, size as usize); + let guard = BufReadGuard { + bytes, + session, + buffer: &self.0.buffer, + offset, + size, + }; + Ok(guard) + } else { + Err("session lost".into()) + } + } + /// The size of the buffer. /// /// This is at least as large as the value provided on creation. @@ -801,3 +945,58 @@ impl<'a, T: Clone + Into> From<&'a T> for RetainResource { resource.clone().into() } } + +impl<'a> Drop for BufWriteGuard<'a> { + fn drop(&mut self) { + unsafe { + let _ = self.session.device.unmap_buffer( + self.buffer, + self.offset, + self.size, + MapMode::Write, + ); + } + } +} + +impl<'a> std::ops::Deref for BufWriteGuard<'a> { + type Target = BufWrite; + + fn deref(&self) -> &Self::Target { + &self.buf_write + } +} + +impl<'a> std::ops::DerefMut for BufWriteGuard<'a> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.buf_write + } +} + +impl<'a> Drop for BufReadGuard<'a> { + fn drop(&mut self) { + unsafe { + let _ = self.session.device.unmap_buffer( + self.buffer, + self.offset, + self.size, + MapMode::Read, + ); + } + } +} + +impl<'a> std::ops::Deref for BufReadGuard<'a> { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + self.bytes + } +} + +impl<'a> BufReadGuard<'a> { + /// Interpret the buffer as a slice of a plain data type. + pub fn cast_slice(&self) -> &[T] { + bytemuck::cast_slice(self.bytes) + } +} diff --git a/piet-gpu-hal/src/lib.rs b/piet-gpu-hal/src/lib.rs index dff607f..a0a4da3 100644 --- a/piet-gpu-hal/src/lib.rs +++ b/piet-gpu-hal/src/lib.rs @@ -7,6 +7,7 @@ use bitflags::bitflags; mod backend; mod bestfit; +mod bufwrite; mod hub; #[macro_use] @@ -18,8 +19,10 @@ pub use crate::mux::{ DescriptorSet, Fence, Instance, Pipeline, QueryPool, Sampler, Semaphore, ShaderCode, Surface, Swapchain, }; +pub use bufwrite::BufWrite; pub use hub::{ - Buffer, CmdBuf, DescriptorSetBuilder, Image, RetainResource, Session, SubmittedCmdBuf, + BufReadGuard, BufWriteGuard, Buffer, CmdBuf, DescriptorSetBuilder, Image, RetainResource, + Session, SubmittedCmdBuf, }; // TODO: because these are conditionally included, "cargo fmt" does not @@ -128,6 +131,14 @@ pub enum BindType { // TODO: Uniform, Sampler, maybe others } +/// Whether to map a buffer in read or write mode. +pub enum MapMode { + /// Map for reading. + Read, + /// Map for writing. + Write, +} + #[derive(Clone, Debug)] /// Information about the GPU. pub struct GpuInfo { diff --git a/piet-gpu-hal/src/metal.rs b/piet-gpu-hal/src/metal.rs index 4b8acb8..7c8cc94 100644 --- a/piet-gpu-hal/src/metal.rs +++ b/piet-gpu-hal/src/metal.rs @@ -377,6 +377,30 @@ impl crate::backend::Device for MtlDevice { Ok(()) } + unsafe fn map_buffer( + &self, + buffer: &Self::Buffer, + offset: u64, + size: u64, + mode: MapMode, + ) -> Result<*mut u8, Error> { + let contents_ptr = buffer.buffer.contents(); + if contents_ptr.is_null() { + return Err("probably trying to map private buffer".into()); + } + Ok((contents_ptr as *mut u8).add(offset as usize)) + } + + unsafe fn unmap_buffer( + &self, + buffer: &Self::Buffer, + _offset: u64, + _size: u64, + _mode: MapMode, + ) -> Result<(), Error> { + Ok(()) + } + unsafe fn create_semaphore(&self) -> Result { Ok(Semaphore) } diff --git a/piet-gpu-hal/src/mux.rs b/piet-gpu-hal/src/mux.rs index a0ea28a..24fef5c 100644 --- a/piet-gpu-hal/src/mux.rs +++ b/piet-gpu-hal/src/mux.rs @@ -35,6 +35,7 @@ use crate::backend::DescriptorSetBuilder as DescriptorSetBuilderTrait; use crate::backend::Device as DeviceTrait; use crate::BackendType; use crate::BindType; +use crate::MapMode; use crate::{BufferUsage, Error, GpuInfo, ImageLayout, InstanceFlags}; mux_enum! { @@ -445,31 +446,31 @@ impl Device { } } - pub unsafe fn read_buffer( + pub unsafe fn map_buffer( &self, buffer: &Buffer, - dst: *mut u8, offset: u64, size: u64, - ) -> Result<(), Error> { + mode: MapMode, + ) -> Result<*mut u8, Error> { mux_match! { self; - Device::Vk(d) => d.read_buffer(buffer.vk(), dst, offset, size), - Device::Dx12(d) => d.read_buffer(buffer.dx12(), dst, offset, size), - Device::Mtl(d) => d.read_buffer(buffer.mtl(), dst, offset, size), + Device::Vk(d) => d.map_buffer(buffer.vk(), offset, size, mode), + Device::Dx12(d) => d.map_buffer(buffer.dx12(), offset, size, mode), + Device::Mtl(d) => d.map_buffer(buffer.mtl(), offset, size, mode), } } - pub unsafe fn write_buffer( + pub unsafe fn unmap_buffer( &self, buffer: &Buffer, - contents: *const u8, offset: u64, size: u64, + mode: MapMode, ) -> Result<(), Error> { mux_match! { self; - Device::Vk(d) => d.write_buffer(buffer.vk(), contents, offset, size), - Device::Dx12(d) => d.write_buffer(buffer.dx12(), contents, offset, size), - Device::Mtl(d) => d.write_buffer(buffer.mtl(), contents, offset, size), + Device::Vk(d) => d.unmap_buffer(buffer.vk(), offset, size, mode), + Device::Dx12(d) => d.unmap_buffer(buffer.dx12(), offset, size, mode), + Device::Mtl(d) => d.unmap_buffer(buffer.mtl(), offset, size, mode), } } diff --git a/piet-gpu-hal/src/vulkan.rs b/piet-gpu-hal/src/vulkan.rs index 34b6109..d5b31cb 100644 --- a/piet-gpu-hal/src/vulkan.rs +++ b/piet-gpu-hal/src/vulkan.rs @@ -13,7 +13,7 @@ use smallvec::SmallVec; use crate::backend::Device as DeviceTrait; use crate::{ - BindType, BufferUsage, Error, GpuInfo, ImageLayout, SamplerParams, SubgroupSize, + BindType, BufferUsage, Error, GpuInfo, ImageLayout, MapMode, SamplerParams, SubgroupSize, WorkgroupLimits, }; @@ -821,14 +821,13 @@ impl crate::backend::Device for VkDevice { Ok(()) } - unsafe fn read_buffer( + unsafe fn map_buffer( &self, buffer: &Self::Buffer, - dst: *mut u8, offset: u64, size: u64, - ) -> Result<(), Error> { - let copy_size = size.try_into()?; + _mode: MapMode, + ) -> Result<*mut u8, Error> { let device = &self.device.device; let buf = device.map_memory( buffer.buffer_memory, @@ -836,28 +835,17 @@ impl crate::backend::Device for VkDevice { size, vk::MemoryMapFlags::empty(), )?; - std::ptr::copy_nonoverlapping(buf as *const u8, dst, copy_size); - device.unmap_memory(buffer.buffer_memory); - Ok(()) + Ok(buf as *mut u8) } - unsafe fn write_buffer( + unsafe fn unmap_buffer( &self, - buffer: &Buffer, - contents: *const u8, - offset: u64, - size: u64, + buffer: &Self::Buffer, + _offset: u64, + _size: u64, + _mode: MapMode, ) -> Result<(), Error> { - let copy_size = size.try_into()?; - let device = &self.device.device; - let buf = device.map_memory( - buffer.buffer_memory, - offset, - size, - vk::MemoryMapFlags::empty(), - )?; - std::ptr::copy_nonoverlapping(contents, buf as *mut u8, copy_size); - device.unmap_memory(buffer.buffer_memory); + self.device.device.unmap_memory(buffer.buffer_memory); Ok(()) } diff --git a/tests/src/clear.rs b/tests/src/clear.rs index 7d8bee0..c490cd3 100644 --- a/tests/src/clear.rs +++ b/tests/src/clear.rs @@ -61,9 +61,8 @@ pub unsafe fn run_clear_test(runner: &mut Runner, config: &Config) -> TestResult } total_elapsed += runner.submit(commands); if i == 0 { - let mut dst: Vec = Default::default(); - out_buf.read(&mut dst); - if let Some(failure) = verify(&dst) { + let dst = out_buf.map_read(..); + if let Some(failure) = verify(dst.cast_slice()) { result.fail(format!("failure at {}", failure)); } } diff --git a/tests/src/linkedlist.rs b/tests/src/linkedlist.rs index b3d03ed..34d1cc3 100644 --- a/tests/src/linkedlist.rs +++ b/tests/src/linkedlist.rs @@ -54,9 +54,8 @@ pub unsafe fn run_linkedlist_test(runner: &mut Runner, config: &Config) -> TestR } total_elapsed += runner.submit(commands); if i == 0 { - let mut dst: Vec = Default::default(); - mem_buf.read(&mut dst); - if !verify(&dst) { + let dst = mem_buf.map_read(..); + if !verify(dst.cast_slice()) { result.fail("incorrect data"); } } diff --git a/tests/src/prefix.rs b/tests/src/prefix.rs index 71be865..eb50761 100644 --- a/tests/src/prefix.rs +++ b/tests/src/prefix.rs @@ -69,10 +69,17 @@ pub unsafe fn run_prefix_test( } */ let n_elements: u64 = config.size.choose(1 << 12, 1 << 24, 1 << 25); - let data: Vec = (0..n_elements as u32).collect(); let data_buf = runner .session - .create_buffer_init(&data, BufferUsage::STORAGE) + .create_buffer_with( + n_elements * 4, + |b| { + for i in 0..n_elements as u32 { + b.push(&i); + } + }, + BufferUsage::STORAGE, + ) .unwrap(); let out_buf = runner.buf_down(data_buf.size(), BufferUsage::empty()); let code = PrefixCode::new(runner, variant); @@ -91,9 +98,8 @@ pub unsafe fn run_prefix_test( } total_elapsed += runner.submit(commands); if i == 0 { - let mut dst: Vec = Default::default(); - out_buf.read(&mut dst); - if let Some(failure) = verify(&dst) { + let dst = out_buf.map_read(..); + if let Some(failure) = verify(dst.cast_slice()) { result.fail(format!("failure at {}", failure)); } } diff --git a/tests/src/prefix_tree.rs b/tests/src/prefix_tree.rs index 9603385..e2a94e0 100644 --- a/tests/src/prefix_tree.rs +++ b/tests/src/prefix_tree.rs @@ -47,10 +47,17 @@ pub unsafe fn run_prefix_test(runner: &mut Runner, config: &Config) -> TestResul // prone to reading and writing past the end of buffers if this is // not a power of the number of elements processed in a workgroup. let n_elements: u64 = config.size.choose(1 << 12, 1 << 24, 1 << 24); - let data: Vec = (0..n_elements as u32).collect(); let data_buf = runner .session - .create_buffer_init(&data, BufferUsage::STORAGE) + .create_buffer_with( + n_elements * 4, + |b| { + for i in 0..n_elements as u32 { + b.push(&i); + } + }, + BufferUsage::STORAGE, + ) .unwrap(); let out_buf = runner.buf_down(data_buf.size(), BufferUsage::empty()); let code = PrefixTreeCode::new(runner); @@ -72,9 +79,8 @@ pub unsafe fn run_prefix_test(runner: &mut Runner, config: &Config) -> TestResul } total_elapsed += runner.submit(commands); if i == 0 { - let mut dst: Vec = Default::default(); - out_buf.read(&mut dst); - if let Some(failure) = verify(&dst) { + let dst = out_buf.map_read(..); + if let Some(failure) = verify(dst.cast_slice()) { result.fail(format!("failure at {}", failure)); } } diff --git a/tests/src/runner.rs b/tests/src/runner.rs index 4965795..9bca26b 100644 --- a/tests/src/runner.rs +++ b/tests/src/runner.rs @@ -16,9 +16,12 @@ //! Test runner intended to make it easy to write tests. +use std::ops::RangeBounds; + use bytemuck::Pod; use piet_gpu_hal::{ - BackendType, Buffer, BufferUsage, CmdBuf, Instance, InstanceFlags, QueryPool, Session, + BackendType, BufReadGuard, Buffer, BufferUsage, CmdBuf, Instance, InstanceFlags, QueryPool, + Session, }; pub struct Runner { @@ -140,4 +143,8 @@ impl BufDown { pub unsafe fn read(&self, dst: &mut Vec) { self.stage_buf.read(dst).unwrap() } + + pub unsafe fn map_read<'a>(&'a self, range: impl RangeBounds) -> BufReadGuard<'a> { + self.stage_buf.map_read(range).unwrap() + } } From ac0fb228c1111cd3d929db451c98cd5fa90536b9 Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Thu, 25 Nov 2021 21:34:06 -0800 Subject: [PATCH 2/5] Fix Metal port --- piet-gpu-hal/src/metal.rs | 46 ++++----------------------------------- 1 file changed, 4 insertions(+), 42 deletions(-) diff --git a/piet-gpu-hal/src/metal.rs b/piet-gpu-hal/src/metal.rs index 7c8cc94..e12cef2 100644 --- a/piet-gpu-hal/src/metal.rs +++ b/piet-gpu-hal/src/metal.rs @@ -30,7 +30,7 @@ use metal::{CGFloat, MTLFeatureSet}; use raw_window_handle::{HasRawWindowHandle, RawWindowHandle}; -use crate::{BufferUsage, Error, GpuInfo, WorkgroupLimits}; +use crate::{BufferUsage, Error, GpuInfo, MapMode, WorkgroupLimits}; use util::*; @@ -339,50 +339,12 @@ impl crate::backend::Device for MtlDevice { Ok(()) } - unsafe fn read_buffer( - &self, - buffer: &Self::Buffer, - dst: *mut u8, - offset: u64, - size: u64, - ) -> Result<(), Error> { - let contents_ptr = buffer.buffer.contents(); - if contents_ptr.is_null() { - return Err("probably trying to read from private buffer".into()); - } - std::ptr::copy_nonoverlapping( - (contents_ptr as *const u8).add(offset as usize), - dst, - size as usize, - ); - Ok(()) - } - - unsafe fn write_buffer( - &self, - buffer: &Buffer, - contents: *const u8, - offset: u64, - size: u64, - ) -> Result<(), Error> { - let contents_ptr = buffer.buffer.contents(); - if contents_ptr.is_null() { - return Err("probably trying to write to private buffer".into()); - } - std::ptr::copy_nonoverlapping( - contents, - (contents_ptr as *mut u8).add(offset as usize), - size as usize, - ); - Ok(()) - } - unsafe fn map_buffer( &self, buffer: &Self::Buffer, offset: u64, - size: u64, - mode: MapMode, + _size: u64, + _mode: MapMode, ) -> Result<*mut u8, Error> { let contents_ptr = buffer.buffer.contents(); if contents_ptr.is_null() { @@ -393,7 +355,7 @@ impl crate::backend::Device for MtlDevice { unsafe fn unmap_buffer( &self, - buffer: &Self::Buffer, + _buffer: &Self::Buffer, _offset: u64, _size: u64, _mode: MapMode, From f1d7560b3c8fe6676a7bd7ec61b88924cd593e8d Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Thu, 25 Nov 2021 22:02:04 -0800 Subject: [PATCH 3/5] Tweak extend implementation The one that takes T is more useful than the one that takes references to T. When specialization lands, we will be able to have both under the `extend` name. --- piet-gpu-hal/src/bufwrite.rs | 35 ++++++++++++++++++++++++++++++++++- tests/src/prefix.rs | 6 +----- tests/src/prefix_tree.rs | 6 +----- 3 files changed, 36 insertions(+), 11 deletions(-) diff --git a/piet-gpu-hal/src/bufwrite.rs b/piet-gpu-hal/src/bufwrite.rs index eefe66f..980b4b2 100644 --- a/piet-gpu-hal/src/bufwrite.rs +++ b/piet-gpu-hal/src/bufwrite.rs @@ -73,7 +73,9 @@ impl BufWrite { /// Extend with an iterator over plain data objects. /// /// Currently, this doesn't panic, just truncates. That may change. - pub fn extend<'a, I, T: Pod + 'a>(&mut self, iter: I) + // Note: when specialization lands, this can be another impl of + // `Extend`. + pub fn extend_ref_iter<'a, I, T: Pod + 'a>(&mut self, iter: I) where I: IntoIterator, { @@ -115,3 +117,34 @@ impl std::ops::DerefMut for BufWrite { unsafe { std::slice::from_raw_parts_mut(self.ptr, self.len) } } } + +impl std::iter::Extend for BufWrite { + fn extend(&mut self, iter: I) + where + I: IntoIterator, + { + let item_size = std::mem::size_of::(); + if item_size == 0 { + return; + } + let mut iter = iter.into_iter(); + let n_remaining = (self.capacity - self.len) / item_size; + unsafe { + let mut dst = self.ptr.add(self.len); + for _ in 0..n_remaining { + if let Some(item) = iter.next() { + std::ptr::copy_nonoverlapping( + bytemuck::bytes_of(&item).as_ptr(), + dst, + item_size, + ); + self.len += item_size; + dst = dst.add(item_size); + } else { + break; + } + } + } + // TODO: should we test the iter and panic on overflow? + } +} diff --git a/tests/src/prefix.rs b/tests/src/prefix.rs index eb50761..82f471f 100644 --- a/tests/src/prefix.rs +++ b/tests/src/prefix.rs @@ -73,11 +73,7 @@ pub unsafe fn run_prefix_test( .session .create_buffer_with( n_elements * 4, - |b| { - for i in 0..n_elements as u32 { - b.push(&i); - } - }, + |b| b.extend(0..n_elements as u32), BufferUsage::STORAGE, ) .unwrap(); diff --git a/tests/src/prefix_tree.rs b/tests/src/prefix_tree.rs index e2a94e0..4fb3423 100644 --- a/tests/src/prefix_tree.rs +++ b/tests/src/prefix_tree.rs @@ -51,11 +51,7 @@ pub unsafe fn run_prefix_test(runner: &mut Runner, config: &Config) -> TestResul .session .create_buffer_with( n_elements * 4, - |b| { - for i in 0..n_elements as u32 { - b.push(&i); - } - }, + |b| b.extend(0..n_elements as u32), BufferUsage::STORAGE, ) .unwrap(); From 97bc4c4471498442608f59a06b18f7d07ee93dfc Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Fri, 26 Nov 2021 07:50:45 -0800 Subject: [PATCH 4/5] Tweak BufWrite::push Make it pass the value, not a reference, to more closely match Vec's behavior. It's not a big difference because the type is `Copy`, but still better. --- piet-gpu-hal/src/bufwrite.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/piet-gpu-hal/src/bufwrite.rs b/piet-gpu-hal/src/bufwrite.rs index 980b4b2..37f0292 100644 --- a/piet-gpu-hal/src/bufwrite.rs +++ b/piet-gpu-hal/src/bufwrite.rs @@ -26,8 +26,8 @@ impl BufWrite { /// /// Panics if capacity is inadequate. #[inline] - pub fn push(&mut self, item: &impl Pod) { - self.push_bytes(bytemuck::bytes_of(item)); + pub fn push(&mut self, item: impl Pod) { + self.push_bytes(bytemuck::bytes_of(&item)); } /// Extend with a slice of plain data objects. From a7a5b84c8685019bc25f262cff92ede031ba54bc Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Tue, 30 Nov 2021 10:34:42 -0800 Subject: [PATCH 5/5] Clean up stray files --- tests/src/piet_tests.rs | 133 ---------------------------------------- tests/src/util.rs | 20 ------ 2 files changed, 153 deletions(-) delete mode 100644 tests/src/piet_tests.rs delete mode 100644 tests/src/util.rs diff --git a/tests/src/piet_tests.rs b/tests/src/piet_tests.rs deleted file mode 100644 index 79cf4ec..0000000 --- a/tests/src/piet_tests.rs +++ /dev/null @@ -1,133 +0,0 @@ -// Copyright 2021 The piet-gpu authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Also licensed under MIT license, at your choice. - -//! Test for piet-gpu transform scan. - -use crate::{Config, Runner, TestResult}; - -use kurbo::Affine; -use piet_gpu::stages::{self, Transform, TransformCode, TransformStage}; -use piet_gpu_hal::BufferUsage; -use rand::Rng; - -struct AffineTestData { - input_data: Vec, - expected: Vec, -} - -pub unsafe fn transform_test(runner: &mut Runner, config: &Config) -> TestResult { - let mut result = TestResult::new("transform"); - let n_elements: u64 = config.size.choose(1 << 12, 1 << 18, 1 << 24); - // TODO: would be nice to validate with real transform. - let data = AffineTestData::new(n_elements as usize); - let data_buf = runner - .session - .create_buffer_init(&data.input_data, BufferUsage::STORAGE) - .unwrap(); - let memory = runner.buf_down(data_buf.size() + 24, BufferUsage::empty()); - let stage_config = stages::Config { - n_trans: n_elements as u32, - // This is a hack to get elements aligned. - trans_alloc: 16, - ..Default::default() - }; - let config_buf = runner - .session - .create_buffer_init(std::slice::from_ref(&stage_config), BufferUsage::STORAGE) - .unwrap(); - - let code = TransformCode::new(&runner.session); - let stage = TransformStage::new(&runner.session, &code); - let binding = stage.bind( - &runner.session, - &code, - &config_buf, - &data_buf, - &memory.dev_buf, - ); - let mut total_elapsed = 0.0; - let n_iter = config.n_iter; - for i in 0..n_iter { - let mut commands = runner.commands(); - commands.write_timestamp(0); - stage.record(&mut commands.cmd_buf, &code, &binding, n_elements); - commands.write_timestamp(1); - if i == 0 { - commands.cmd_buf.memory_barrier(); - commands.download(&memory); - } - total_elapsed += runner.submit(commands); - if i == 0 || config.verify_all { - let mut dst: Vec = Default::default(); - memory.read(&mut dst); - if let Some(failure) = data.verify(&dst[1..]) { - result.fail(failure); - } - } - } - result.timing(total_elapsed, n_elements * n_iter); - result -} - -impl AffineTestData { - fn new(n: usize) -> AffineTestData { - let mut rng = rand::thread_rng(); - let mut a = Affine::default(); - let mut b; - let mut input_data = Vec::with_capacity(n); - let mut expected = Vec::with_capacity(n); - for _ in 0..n { - loop { - b = Affine::new([ - rng.gen_range(-10.0, 10.0), - rng.gen_range(-10.0, 10.0), - rng.gen_range(-10.0, 10.0), - rng.gen_range(-10.0, 10.0), - rng.gen_range(-10.0, 10.0), - rng.gen_range(-10.0, 10.0), - ]); - if b.determinant() >= 1.0 { - break; - } - } - expected.push(b); - let c = a.inverse() * b; - input_data.push(Transform::from_kurbo(c)); - a = b; - } - AffineTestData { - input_data, - expected, - } - } - - fn verify(&self, actual: &[Transform]) -> Option { - for (i, (actual, expected)) in actual.iter().zip(&self.expected).enumerate() { - let error: f64 = actual - .to_kurbo() - .as_coeffs() - .iter() - .zip(expected.as_coeffs()) - .map(|(actual, expected)| (actual - expected).powi(2)) - .sum(); - let tolerance = 1e-6 * (i + 1) as f64; - if error > tolerance { - return Some(format!("{}: {} {}", i, error, tolerance)); - } - } - None - } -} diff --git a/tests/src/util.rs b/tests/src/util.rs deleted file mode 100644 index 473f601..0000000 --- a/tests/src/util.rs +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright 2021 The piet-gpu authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Also licensed under MIT license, at your choice. - -pub fn align_size(size: u64, alignment: u64) -> u64 { - let tmp = size + alignment - 1; - tmp - tmp % alignment -}