runtime: use a more efficient swizzling implementation without array_chunks_mut
This commit is contained in:
parent
f8c055524b
commit
5560c1ed09
|
@ -1,87 +0,0 @@
|
|||
/// An iterator over a slice in (non-overlapping) mutable chunks (`N` elements
|
||||
/// at a time), starting at the beginning of the slice.
|
||||
///
|
||||
/// When the slice len is not evenly divided by the chunk size, the last
|
||||
/// up to `N-1` elements will be omitted but can be retrieved from
|
||||
/// the [`into_remainder`] function from the iterator.
|
||||
///
|
||||
/// This struct is created by the [`array_chunks_mut`] method on [slices].
|
||||
///
|
||||
///
|
||||
/// [`array_chunks_mut`]: slice::array_chunks_mut
|
||||
/// [`into_remainder`]: ../../std/slice/struct.ArrayChunksMut.html#method.into_remainder
|
||||
/// [slices]: slice
|
||||
#[derive(Debug)]
|
||||
#[must_use = "iterators are lazy and do nothing unless consumed"]
|
||||
pub struct ArrayChunksMut<'a, T: 'a, const N: usize> {
|
||||
iter: core::slice::IterMut<'a, [T; N]>,
|
||||
}
|
||||
|
||||
impl<'a, T, const N: usize> ArrayChunksMut<'a, T, N> {
|
||||
#[inline]
|
||||
pub(super) fn new(slice: &'a mut [T]) -> Self {
|
||||
let (array_slice, _rem) = as_chunks_mut(slice);
|
||||
Self {
|
||||
iter: array_slice.iter_mut(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T, const N: usize> Iterator for ArrayChunksMut<'a, T, N> {
|
||||
type Item = &'a mut [T; N];
|
||||
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<&'a mut [T; N]> {
|
||||
self.iter.next()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
self.iter.size_hint()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn count(self) -> usize {
|
||||
self.iter.count()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn nth(&mut self, n: usize) -> Option<Self::Item> {
|
||||
self.iter.nth(n)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn last(self) -> Option<Self::Item> {
|
||||
self.iter.last()
|
||||
}
|
||||
}
|
||||
|
||||
/// Splits the slice into a slice of `N`-element arrays,
|
||||
/// starting at the beginning of the slice,
|
||||
/// and a remainder slice with length strictly less than `N`.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Panics if `N` is 0. This check will most probably get changed to a compile time
|
||||
/// error before this method gets stabilized.
|
||||
///
|
||||
#[inline]
|
||||
#[must_use]
|
||||
fn as_chunks_mut<T, const N: usize>(slice: &mut [T]) -> (&mut [[T; N]], &mut [T]) {
|
||||
unsafe fn as_chunks_unchecked_mut<T, const N: usize>(slice: &mut [T]) -> &mut [[T; N]] {
|
||||
// SAFETY: Caller must guarantee that `N` is nonzero and exactly divides the slice length
|
||||
let new_len = slice.len() / N;
|
||||
|
||||
// SAFETY: We cast a slice of `new_len * N` elements into
|
||||
// a slice of `new_len` many `N` elements chunks.
|
||||
unsafe { core::slice::from_raw_parts_mut(slice.as_mut_ptr().cast(), new_len) }
|
||||
}
|
||||
|
||||
assert!(N != 0, "chunk size must be non-zero");
|
||||
let len = slice.len() / N;
|
||||
let (multiple_of_n, remainder) = slice.split_at_mut(len * N);
|
||||
// SAFETY: We already panicked for zero, and ensured by construction
|
||||
// that the length of the subslice is a multiple of N.
|
||||
let array_slice = unsafe { as_chunks_unchecked_mut(multiple_of_n) };
|
||||
(array_slice, remainder)
|
||||
}
|
|
@ -2,7 +2,6 @@ pub use image::ImageError;
|
|||
use librashader_common::Size;
|
||||
use std::marker::PhantomData;
|
||||
|
||||
use crate::array_chunks_mut::ArrayChunksMut;
|
||||
use std::path::Path;
|
||||
|
||||
/// An uncompressed raw image ready to upload to GPU buffers.
|
||||
|
@ -43,20 +42,15 @@ impl PixelFormat for RGBA8 {
|
|||
|
||||
impl PixelFormat for BGRA8 {
|
||||
fn convert(pixels: &mut Vec<u8>) {
|
||||
assert!(pixels.len() % 4 == 0);
|
||||
for [r, _g, b, _a] in ArrayChunksMut::new(pixels) {
|
||||
std::mem::swap(b, r)
|
||||
}
|
||||
const BGRA_SWIZZLE: &[usize; 32] = &generate_swizzle([2, 1, 0, 3]);
|
||||
swizzle_pixels(pixels, BGRA_SWIZZLE);
|
||||
}
|
||||
}
|
||||
|
||||
impl PixelFormat for ARGB8 {
|
||||
fn convert(pixels: &mut Vec<u8>) {
|
||||
assert!(pixels.len() % 4 == 0);
|
||||
for [r, _g, b, a] in ArrayChunksMut::new(pixels) {
|
||||
std::mem::swap(r, a); // abgr
|
||||
std::mem::swap(b, r); // argb
|
||||
}
|
||||
const ARGB_SWIZZLE: &[usize; 32] = &generate_swizzle([3, 0, 1, 2]);
|
||||
swizzle_pixels(pixels, ARGB_SWIZZLE);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -97,3 +91,81 @@ impl<P: PixelFormat> Image<P> {
|
|||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn swizzle_pixels(pixels: &mut Vec<u8>, swizzle: &'static [usize; 32]) {
|
||||
assert!(pixels.len() % 4 == 0);
|
||||
let mut chunks = pixels.chunks_exact_mut(32);
|
||||
|
||||
// This should vectorize faster than a naive mem swap
|
||||
for chunk in &mut chunks {
|
||||
let tmp = swizzle.map(|i| chunk[i]);
|
||||
chunk.copy_from_slice(&tmp[..])
|
||||
}
|
||||
|
||||
let remainder = chunks.into_remainder();
|
||||
for chunk in remainder.chunks_exact_mut(4) {
|
||||
let argb = [chunk[3], chunk[0], chunk[1], chunk[2]];
|
||||
chunk.copy_from_slice(&argb[..])
|
||||
}
|
||||
}
|
||||
|
||||
const fn generate_swizzle<const LEN: usize>(swizzle: [usize; 4]) -> [usize; LEN] {
|
||||
assert!(LEN % 4 == 0, "length of swizzle must be divisible by 4");
|
||||
let mut out: [usize; LEN] = [0; LEN];
|
||||
|
||||
let mut index = 0;
|
||||
while index < LEN {
|
||||
let chunk = [index, index + 1, index + 2, index + 3];
|
||||
out[index + 0] = chunk[swizzle[0]];
|
||||
out[index + 1] = chunk[swizzle[1]];
|
||||
out[index + 2] = chunk[swizzle[2]];
|
||||
out[index + 3] = chunk[swizzle[3]];
|
||||
|
||||
index += 4;
|
||||
}
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::image::generate_swizzle;
|
||||
|
||||
#[test]
|
||||
pub fn generate_normal_swizzle() {
|
||||
let swizzle = generate_swizzle::<32>([0, 1, 2, 3]);
|
||||
assert_eq!(
|
||||
swizzle,
|
||||
#[rustfmt::skip]
|
||||
[
|
||||
0, 1, 2, 3,
|
||||
4, 5, 6, 7,
|
||||
8, 9, 10, 11,
|
||||
12, 13, 14, 15,
|
||||
16, 17, 18, 19,
|
||||
20, 21, 22, 23,
|
||||
24, 25, 26, 27,
|
||||
28, 29, 30, 31
|
||||
]
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn generate_argb_swizzle() {
|
||||
let swizzle = generate_swizzle::<32>([3, 0, 1, 2]);
|
||||
assert_eq!(
|
||||
swizzle,
|
||||
#[rustfmt::skip]
|
||||
[
|
||||
3, 0, 1, 2,
|
||||
7, 4, 5, 6,
|
||||
11, 8, 9, 10,
|
||||
15, 12, 13, 14,
|
||||
19, 16, 17, 18,
|
||||
23, 20, 21, 22,
|
||||
27, 24, 25, 26,
|
||||
31, 28, 29, 30
|
||||
]
|
||||
)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -36,6 +36,3 @@ pub mod render_target;
|
|||
|
||||
/// Helpers for handling framebuffers.
|
||||
pub mod framebuffer;
|
||||
|
||||
/// array_chunks_mut polyfill
|
||||
mod array_chunks_mut;
|
||||
|
|
Loading…
Reference in a new issue