runtime: use a more efficient swizzling implementation without array_chunks_mut

This commit is contained in:
chyyran 2024-08-20 17:29:39 -04:00 committed by Ronny Chan
parent f8c055524b
commit 5560c1ed09
3 changed files with 82 additions and 100 deletions

View file

@ -1,87 +0,0 @@
/// An iterator over a slice in (non-overlapping) mutable chunks (`N` elements
/// at a time), starting at the beginning of the slice.
///
/// When the slice len is not evenly divided by the chunk size, the last
/// up to `N-1` elements will be omitted but can be retrieved from
/// the [`into_remainder`] function from the iterator.
///
/// This struct is created by the [`array_chunks_mut`] method on [slices].
///
///
/// [`array_chunks_mut`]: slice::array_chunks_mut
/// [`into_remainder`]: ../../std/slice/struct.ArrayChunksMut.html#method.into_remainder
/// [slices]: slice
#[derive(Debug)]
#[must_use = "iterators are lazy and do nothing unless consumed"]
pub struct ArrayChunksMut<'a, T: 'a, const N: usize> {
iter: core::slice::IterMut<'a, [T; N]>,
}
impl<'a, T, const N: usize> ArrayChunksMut<'a, T, N> {
#[inline]
pub(super) fn new(slice: &'a mut [T]) -> Self {
let (array_slice, _rem) = as_chunks_mut(slice);
Self {
iter: array_slice.iter_mut(),
}
}
}
impl<'a, T, const N: usize> Iterator for ArrayChunksMut<'a, T, N> {
type Item = &'a mut [T; N];
#[inline]
fn next(&mut self) -> Option<&'a mut [T; N]> {
self.iter.next()
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
#[inline]
fn count(self) -> usize {
self.iter.count()
}
#[inline]
fn nth(&mut self, n: usize) -> Option<Self::Item> {
self.iter.nth(n)
}
#[inline]
fn last(self) -> Option<Self::Item> {
self.iter.last()
}
}
/// Splits the slice into a slice of `N`-element arrays,
/// starting at the beginning of the slice,
/// and a remainder slice with length strictly less than `N`.
///
/// # Panics
///
/// Panics if `N` is 0. This check will most probably get changed to a compile time
/// error before this method gets stabilized.
///
#[inline]
#[must_use]
fn as_chunks_mut<T, const N: usize>(slice: &mut [T]) -> (&mut [[T; N]], &mut [T]) {
unsafe fn as_chunks_unchecked_mut<T, const N: usize>(slice: &mut [T]) -> &mut [[T; N]] {
// SAFETY: Caller must guarantee that `N` is nonzero and exactly divides the slice length
let new_len = slice.len() / N;
// SAFETY: We cast a slice of `new_len * N` elements into
// a slice of `new_len` many `N` elements chunks.
unsafe { core::slice::from_raw_parts_mut(slice.as_mut_ptr().cast(), new_len) }
}
assert!(N != 0, "chunk size must be non-zero");
let len = slice.len() / N;
let (multiple_of_n, remainder) = slice.split_at_mut(len * N);
// SAFETY: We already panicked for zero, and ensured by construction
// that the length of the subslice is a multiple of N.
let array_slice = unsafe { as_chunks_unchecked_mut(multiple_of_n) };
(array_slice, remainder)
}

View file

@ -2,7 +2,6 @@ pub use image::ImageError;
use librashader_common::Size;
use std::marker::PhantomData;
use crate::array_chunks_mut::ArrayChunksMut;
use std::path::Path;
/// An uncompressed raw image ready to upload to GPU buffers.
@ -43,20 +42,15 @@ impl PixelFormat for RGBA8 {
impl PixelFormat for BGRA8 {
fn convert(pixels: &mut Vec<u8>) {
assert!(pixels.len() % 4 == 0);
for [r, _g, b, _a] in ArrayChunksMut::new(pixels) {
std::mem::swap(b, r)
}
const BGRA_SWIZZLE: &[usize; 32] = &generate_swizzle([2, 1, 0, 3]);
swizzle_pixels(pixels, BGRA_SWIZZLE);
}
}
impl PixelFormat for ARGB8 {
fn convert(pixels: &mut Vec<u8>) {
assert!(pixels.len() % 4 == 0);
for [r, _g, b, a] in ArrayChunksMut::new(pixels) {
std::mem::swap(r, a); // abgr
std::mem::swap(b, r); // argb
}
const ARGB_SWIZZLE: &[usize; 32] = &generate_swizzle([3, 0, 1, 2]);
swizzle_pixels(pixels, ARGB_SWIZZLE);
}
}
@ -97,3 +91,81 @@ impl<P: PixelFormat> Image<P> {
})
}
}
fn swizzle_pixels(pixels: &mut Vec<u8>, swizzle: &'static [usize; 32]) {
assert!(pixels.len() % 4 == 0);
let mut chunks = pixels.chunks_exact_mut(32);
// This should vectorize faster than a naive mem swap
for chunk in &mut chunks {
let tmp = swizzle.map(|i| chunk[i]);
chunk.copy_from_slice(&tmp[..])
}
let remainder = chunks.into_remainder();
for chunk in remainder.chunks_exact_mut(4) {
let argb = [chunk[3], chunk[0], chunk[1], chunk[2]];
chunk.copy_from_slice(&argb[..])
}
}
const fn generate_swizzle<const LEN: usize>(swizzle: [usize; 4]) -> [usize; LEN] {
assert!(LEN % 4 == 0, "length of swizzle must be divisible by 4");
let mut out: [usize; LEN] = [0; LEN];
let mut index = 0;
while index < LEN {
let chunk = [index, index + 1, index + 2, index + 3];
out[index + 0] = chunk[swizzle[0]];
out[index + 1] = chunk[swizzle[1]];
out[index + 2] = chunk[swizzle[2]];
out[index + 3] = chunk[swizzle[3]];
index += 4;
}
out
}
#[cfg(test)]
mod test {
use crate::image::generate_swizzle;
#[test]
pub fn generate_normal_swizzle() {
let swizzle = generate_swizzle::<32>([0, 1, 2, 3]);
assert_eq!(
swizzle,
#[rustfmt::skip]
[
0, 1, 2, 3,
4, 5, 6, 7,
8, 9, 10, 11,
12, 13, 14, 15,
16, 17, 18, 19,
20, 21, 22, 23,
24, 25, 26, 27,
28, 29, 30, 31
]
)
}
#[test]
pub fn generate_argb_swizzle() {
let swizzle = generate_swizzle::<32>([3, 0, 1, 2]);
assert_eq!(
swizzle,
#[rustfmt::skip]
[
3, 0, 1, 2,
7, 4, 5, 6,
11, 8, 9, 10,
15, 12, 13, 14,
19, 16, 17, 18,
23, 20, 21, 22,
27, 24, 25, 26,
31, 28, 29, 30
]
)
}
}

View file

@ -36,6 +36,3 @@ pub mod render_target;
/// Helpers for handling framebuffers.
pub mod framebuffer;
/// array_chunks_mut polyfill
mod array_chunks_mut;