From 5560c1ed0977c95aea0bb28f5ad8e3d619a7cbc9 Mon Sep 17 00:00:00 2001 From: chyyran Date: Tue, 20 Aug 2024 17:29:39 -0400 Subject: [PATCH] runtime: use a more efficient swizzling implementation without array_chunks_mut --- librashader-runtime/src/array_chunks_mut.rs | 87 ------------------- librashader-runtime/src/image.rs | 92 ++++++++++++++++++--- librashader-runtime/src/lib.rs | 3 - 3 files changed, 82 insertions(+), 100 deletions(-) delete mode 100644 librashader-runtime/src/array_chunks_mut.rs diff --git a/librashader-runtime/src/array_chunks_mut.rs b/librashader-runtime/src/array_chunks_mut.rs deleted file mode 100644 index 453d386..0000000 --- a/librashader-runtime/src/array_chunks_mut.rs +++ /dev/null @@ -1,87 +0,0 @@ -/// An iterator over a slice in (non-overlapping) mutable chunks (`N` elements -/// at a time), starting at the beginning of the slice. -/// -/// When the slice len is not evenly divided by the chunk size, the last -/// up to `N-1` elements will be omitted but can be retrieved from -/// the [`into_remainder`] function from the iterator. -/// -/// This struct is created by the [`array_chunks_mut`] method on [slices]. -/// -/// -/// [`array_chunks_mut`]: slice::array_chunks_mut -/// [`into_remainder`]: ../../std/slice/struct.ArrayChunksMut.html#method.into_remainder -/// [slices]: slice -#[derive(Debug)] -#[must_use = "iterators are lazy and do nothing unless consumed"] -pub struct ArrayChunksMut<'a, T: 'a, const N: usize> { - iter: core::slice::IterMut<'a, [T; N]>, -} - -impl<'a, T, const N: usize> ArrayChunksMut<'a, T, N> { - #[inline] - pub(super) fn new(slice: &'a mut [T]) -> Self { - let (array_slice, _rem) = as_chunks_mut(slice); - Self { - iter: array_slice.iter_mut(), - } - } -} - -impl<'a, T, const N: usize> Iterator for ArrayChunksMut<'a, T, N> { - type Item = &'a mut [T; N]; - - #[inline] - fn next(&mut self) -> Option<&'a mut [T; N]> { - self.iter.next() - } - - #[inline] - fn size_hint(&self) -> (usize, Option) { - self.iter.size_hint() - } - - #[inline] - fn count(self) -> usize { - self.iter.count() - } - - #[inline] - fn nth(&mut self, n: usize) -> Option { - self.iter.nth(n) - } - - #[inline] - fn last(self) -> Option { - self.iter.last() - } -} - -/// Splits the slice into a slice of `N`-element arrays, -/// starting at the beginning of the slice, -/// and a remainder slice with length strictly less than `N`. -/// -/// # Panics -/// -/// Panics if `N` is 0. This check will most probably get changed to a compile time -/// error before this method gets stabilized. -/// -#[inline] -#[must_use] -fn as_chunks_mut(slice: &mut [T]) -> (&mut [[T; N]], &mut [T]) { - unsafe fn as_chunks_unchecked_mut(slice: &mut [T]) -> &mut [[T; N]] { - // SAFETY: Caller must guarantee that `N` is nonzero and exactly divides the slice length - let new_len = slice.len() / N; - - // SAFETY: We cast a slice of `new_len * N` elements into - // a slice of `new_len` many `N` elements chunks. - unsafe { core::slice::from_raw_parts_mut(slice.as_mut_ptr().cast(), new_len) } - } - - assert!(N != 0, "chunk size must be non-zero"); - let len = slice.len() / N; - let (multiple_of_n, remainder) = slice.split_at_mut(len * N); - // SAFETY: We already panicked for zero, and ensured by construction - // that the length of the subslice is a multiple of N. - let array_slice = unsafe { as_chunks_unchecked_mut(multiple_of_n) }; - (array_slice, remainder) -} diff --git a/librashader-runtime/src/image.rs b/librashader-runtime/src/image.rs index 2621d91..834e215 100644 --- a/librashader-runtime/src/image.rs +++ b/librashader-runtime/src/image.rs @@ -2,7 +2,6 @@ pub use image::ImageError; use librashader_common::Size; use std::marker::PhantomData; -use crate::array_chunks_mut::ArrayChunksMut; use std::path::Path; /// An uncompressed raw image ready to upload to GPU buffers. @@ -43,20 +42,15 @@ impl PixelFormat for RGBA8 { impl PixelFormat for BGRA8 { fn convert(pixels: &mut Vec) { - assert!(pixels.len() % 4 == 0); - for [r, _g, b, _a] in ArrayChunksMut::new(pixels) { - std::mem::swap(b, r) - } + const BGRA_SWIZZLE: &[usize; 32] = &generate_swizzle([2, 1, 0, 3]); + swizzle_pixels(pixels, BGRA_SWIZZLE); } } impl PixelFormat for ARGB8 { fn convert(pixels: &mut Vec) { - assert!(pixels.len() % 4 == 0); - for [r, _g, b, a] in ArrayChunksMut::new(pixels) { - std::mem::swap(r, a); // abgr - std::mem::swap(b, r); // argb - } + const ARGB_SWIZZLE: &[usize; 32] = &generate_swizzle([3, 0, 1, 2]); + swizzle_pixels(pixels, ARGB_SWIZZLE); } } @@ -97,3 +91,81 @@ impl Image

{ }) } } + +fn swizzle_pixels(pixels: &mut Vec, swizzle: &'static [usize; 32]) { + assert!(pixels.len() % 4 == 0); + let mut chunks = pixels.chunks_exact_mut(32); + + // This should vectorize faster than a naive mem swap + for chunk in &mut chunks { + let tmp = swizzle.map(|i| chunk[i]); + chunk.copy_from_slice(&tmp[..]) + } + + let remainder = chunks.into_remainder(); + for chunk in remainder.chunks_exact_mut(4) { + let argb = [chunk[3], chunk[0], chunk[1], chunk[2]]; + chunk.copy_from_slice(&argb[..]) + } +} + +const fn generate_swizzle(swizzle: [usize; 4]) -> [usize; LEN] { + assert!(LEN % 4 == 0, "length of swizzle must be divisible by 4"); + let mut out: [usize; LEN] = [0; LEN]; + + let mut index = 0; + while index < LEN { + let chunk = [index, index + 1, index + 2, index + 3]; + out[index + 0] = chunk[swizzle[0]]; + out[index + 1] = chunk[swizzle[1]]; + out[index + 2] = chunk[swizzle[2]]; + out[index + 3] = chunk[swizzle[3]]; + + index += 4; + } + + out +} + +#[cfg(test)] +mod test { + use crate::image::generate_swizzle; + + #[test] + pub fn generate_normal_swizzle() { + let swizzle = generate_swizzle::<32>([0, 1, 2, 3]); + assert_eq!( + swizzle, + #[rustfmt::skip] + [ + 0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11, + 12, 13, 14, 15, + 16, 17, 18, 19, + 20, 21, 22, 23, + 24, 25, 26, 27, + 28, 29, 30, 31 + ] + ) + } + + #[test] + pub fn generate_argb_swizzle() { + let swizzle = generate_swizzle::<32>([3, 0, 1, 2]); + assert_eq!( + swizzle, + #[rustfmt::skip] + [ + 3, 0, 1, 2, + 7, 4, 5, 6, + 11, 8, 9, 10, + 15, 12, 13, 14, + 19, 16, 17, 18, + 23, 20, 21, 22, + 27, 24, 25, 26, + 31, 28, 29, 30 + ] + ) + } +} diff --git a/librashader-runtime/src/lib.rs b/librashader-runtime/src/lib.rs index 4702df3..018c016 100644 --- a/librashader-runtime/src/lib.rs +++ b/librashader-runtime/src/lib.rs @@ -36,6 +36,3 @@ pub mod render_target; /// Helpers for handling framebuffers. pub mod framebuffer; - -/// array_chunks_mut polyfill -mod array_chunks_mut;