runtime: use a more efficient swizzling implementation without array_chunks_mut
This commit is contained in:
parent
f8c055524b
commit
5560c1ed09
|
@ -1,87 +0,0 @@
|
||||||
/// An iterator over a slice in (non-overlapping) mutable chunks (`N` elements
|
|
||||||
/// at a time), starting at the beginning of the slice.
|
|
||||||
///
|
|
||||||
/// When the slice len is not evenly divided by the chunk size, the last
|
|
||||||
/// up to `N-1` elements will be omitted but can be retrieved from
|
|
||||||
/// the [`into_remainder`] function from the iterator.
|
|
||||||
///
|
|
||||||
/// This struct is created by the [`array_chunks_mut`] method on [slices].
|
|
||||||
///
|
|
||||||
///
|
|
||||||
/// [`array_chunks_mut`]: slice::array_chunks_mut
|
|
||||||
/// [`into_remainder`]: ../../std/slice/struct.ArrayChunksMut.html#method.into_remainder
|
|
||||||
/// [slices]: slice
|
|
||||||
#[derive(Debug)]
|
|
||||||
#[must_use = "iterators are lazy and do nothing unless consumed"]
|
|
||||||
pub struct ArrayChunksMut<'a, T: 'a, const N: usize> {
|
|
||||||
iter: core::slice::IterMut<'a, [T; N]>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a, T, const N: usize> ArrayChunksMut<'a, T, N> {
|
|
||||||
#[inline]
|
|
||||||
pub(super) fn new(slice: &'a mut [T]) -> Self {
|
|
||||||
let (array_slice, _rem) = as_chunks_mut(slice);
|
|
||||||
Self {
|
|
||||||
iter: array_slice.iter_mut(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a, T, const N: usize> Iterator for ArrayChunksMut<'a, T, N> {
|
|
||||||
type Item = &'a mut [T; N];
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
fn next(&mut self) -> Option<&'a mut [T; N]> {
|
|
||||||
self.iter.next()
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
|
||||||
self.iter.size_hint()
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
fn count(self) -> usize {
|
|
||||||
self.iter.count()
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
fn nth(&mut self, n: usize) -> Option<Self::Item> {
|
|
||||||
self.iter.nth(n)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
fn last(self) -> Option<Self::Item> {
|
|
||||||
self.iter.last()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Splits the slice into a slice of `N`-element arrays,
|
|
||||||
/// starting at the beginning of the slice,
|
|
||||||
/// and a remainder slice with length strictly less than `N`.
|
|
||||||
///
|
|
||||||
/// # Panics
|
|
||||||
///
|
|
||||||
/// Panics if `N` is 0. This check will most probably get changed to a compile time
|
|
||||||
/// error before this method gets stabilized.
|
|
||||||
///
|
|
||||||
#[inline]
|
|
||||||
#[must_use]
|
|
||||||
fn as_chunks_mut<T, const N: usize>(slice: &mut [T]) -> (&mut [[T; N]], &mut [T]) {
|
|
||||||
unsafe fn as_chunks_unchecked_mut<T, const N: usize>(slice: &mut [T]) -> &mut [[T; N]] {
|
|
||||||
// SAFETY: Caller must guarantee that `N` is nonzero and exactly divides the slice length
|
|
||||||
let new_len = slice.len() / N;
|
|
||||||
|
|
||||||
// SAFETY: We cast a slice of `new_len * N` elements into
|
|
||||||
// a slice of `new_len` many `N` elements chunks.
|
|
||||||
unsafe { core::slice::from_raw_parts_mut(slice.as_mut_ptr().cast(), new_len) }
|
|
||||||
}
|
|
||||||
|
|
||||||
assert!(N != 0, "chunk size must be non-zero");
|
|
||||||
let len = slice.len() / N;
|
|
||||||
let (multiple_of_n, remainder) = slice.split_at_mut(len * N);
|
|
||||||
// SAFETY: We already panicked for zero, and ensured by construction
|
|
||||||
// that the length of the subslice is a multiple of N.
|
|
||||||
let array_slice = unsafe { as_chunks_unchecked_mut(multiple_of_n) };
|
|
||||||
(array_slice, remainder)
|
|
||||||
}
|
|
|
@ -2,7 +2,6 @@ pub use image::ImageError;
|
||||||
use librashader_common::Size;
|
use librashader_common::Size;
|
||||||
use std::marker::PhantomData;
|
use std::marker::PhantomData;
|
||||||
|
|
||||||
use crate::array_chunks_mut::ArrayChunksMut;
|
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
/// An uncompressed raw image ready to upload to GPU buffers.
|
/// An uncompressed raw image ready to upload to GPU buffers.
|
||||||
|
@ -43,20 +42,15 @@ impl PixelFormat for RGBA8 {
|
||||||
|
|
||||||
impl PixelFormat for BGRA8 {
|
impl PixelFormat for BGRA8 {
|
||||||
fn convert(pixels: &mut Vec<u8>) {
|
fn convert(pixels: &mut Vec<u8>) {
|
||||||
assert!(pixels.len() % 4 == 0);
|
const BGRA_SWIZZLE: &[usize; 32] = &generate_swizzle([2, 1, 0, 3]);
|
||||||
for [r, _g, b, _a] in ArrayChunksMut::new(pixels) {
|
swizzle_pixels(pixels, BGRA_SWIZZLE);
|
||||||
std::mem::swap(b, r)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl PixelFormat for ARGB8 {
|
impl PixelFormat for ARGB8 {
|
||||||
fn convert(pixels: &mut Vec<u8>) {
|
fn convert(pixels: &mut Vec<u8>) {
|
||||||
assert!(pixels.len() % 4 == 0);
|
const ARGB_SWIZZLE: &[usize; 32] = &generate_swizzle([3, 0, 1, 2]);
|
||||||
for [r, _g, b, a] in ArrayChunksMut::new(pixels) {
|
swizzle_pixels(pixels, ARGB_SWIZZLE);
|
||||||
std::mem::swap(r, a); // abgr
|
|
||||||
std::mem::swap(b, r); // argb
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -97,3 +91,81 @@ impl<P: PixelFormat> Image<P> {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn swizzle_pixels(pixels: &mut Vec<u8>, swizzle: &'static [usize; 32]) {
|
||||||
|
assert!(pixels.len() % 4 == 0);
|
||||||
|
let mut chunks = pixels.chunks_exact_mut(32);
|
||||||
|
|
||||||
|
// This should vectorize faster than a naive mem swap
|
||||||
|
for chunk in &mut chunks {
|
||||||
|
let tmp = swizzle.map(|i| chunk[i]);
|
||||||
|
chunk.copy_from_slice(&tmp[..])
|
||||||
|
}
|
||||||
|
|
||||||
|
let remainder = chunks.into_remainder();
|
||||||
|
for chunk in remainder.chunks_exact_mut(4) {
|
||||||
|
let argb = [chunk[3], chunk[0], chunk[1], chunk[2]];
|
||||||
|
chunk.copy_from_slice(&argb[..])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const fn generate_swizzle<const LEN: usize>(swizzle: [usize; 4]) -> [usize; LEN] {
|
||||||
|
assert!(LEN % 4 == 0, "length of swizzle must be divisible by 4");
|
||||||
|
let mut out: [usize; LEN] = [0; LEN];
|
||||||
|
|
||||||
|
let mut index = 0;
|
||||||
|
while index < LEN {
|
||||||
|
let chunk = [index, index + 1, index + 2, index + 3];
|
||||||
|
out[index + 0] = chunk[swizzle[0]];
|
||||||
|
out[index + 1] = chunk[swizzle[1]];
|
||||||
|
out[index + 2] = chunk[swizzle[2]];
|
||||||
|
out[index + 3] = chunk[swizzle[3]];
|
||||||
|
|
||||||
|
index += 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
out
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use crate::image::generate_swizzle;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
pub fn generate_normal_swizzle() {
|
||||||
|
let swizzle = generate_swizzle::<32>([0, 1, 2, 3]);
|
||||||
|
assert_eq!(
|
||||||
|
swizzle,
|
||||||
|
#[rustfmt::skip]
|
||||||
|
[
|
||||||
|
0, 1, 2, 3,
|
||||||
|
4, 5, 6, 7,
|
||||||
|
8, 9, 10, 11,
|
||||||
|
12, 13, 14, 15,
|
||||||
|
16, 17, 18, 19,
|
||||||
|
20, 21, 22, 23,
|
||||||
|
24, 25, 26, 27,
|
||||||
|
28, 29, 30, 31
|
||||||
|
]
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
pub fn generate_argb_swizzle() {
|
||||||
|
let swizzle = generate_swizzle::<32>([3, 0, 1, 2]);
|
||||||
|
assert_eq!(
|
||||||
|
swizzle,
|
||||||
|
#[rustfmt::skip]
|
||||||
|
[
|
||||||
|
3, 0, 1, 2,
|
||||||
|
7, 4, 5, 6,
|
||||||
|
11, 8, 9, 10,
|
||||||
|
15, 12, 13, 14,
|
||||||
|
19, 16, 17, 18,
|
||||||
|
23, 20, 21, 22,
|
||||||
|
27, 24, 25, 26,
|
||||||
|
31, 28, 29, 30
|
||||||
|
]
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -36,6 +36,3 @@ pub mod render_target;
|
||||||
|
|
||||||
/// Helpers for handling framebuffers.
|
/// Helpers for handling framebuffers.
|
||||||
pub mod framebuffer;
|
pub mod framebuffer;
|
||||||
|
|
||||||
/// array_chunks_mut polyfill
|
|
||||||
mod array_chunks_mut;
|
|
||||||
|
|
Loading…
Reference in a new issue