add an internal function for calling memcpy slightly faster, will make a public form later on probably.

This commit is contained in:
Lokathor 2022-10-17 01:59:49 -06:00
parent 96fc89cdbe
commit 5cb66e1a30
2 changed files with 28 additions and 2 deletions

View file

@ -1018,3 +1018,29 @@ core::arch::global_asm! {
},
options(raw),
}
/// This does a `bx` directly to [`__aeabi_memcpy4`], instead of doing a `bl` to
/// the symbol and having the linker generate a stub function.
///
/// This saves only 6 cycles, but there's no real harm in it other than it being
/// a pain in the butt to manually indirect like this.
///
/// In the future we probably want to allow users to call this, but for now it's
/// fine to just have it internally for the one place we're manually using it.
#[inline]
#[allow(non_snake_case)]
pub(crate) unsafe fn bx__aeabi_memcpy4(
dest: *mut u8, src: *const u8, byte_count: usize,
) {
let f: unsafe extern "C" fn(*mut u8, *const u8, usize) = __aeabi_memcpy4;
core::arch::asm!("bx r3",
inout("r0") dest => _,
inout("r1") src => _,
inout("r2") byte_count => _,
inout("r3") f => _,
out("r12") _,
// CAUTION: this doesn't allow `lr` to be altered in the ASM block, so it
// doesn't work for the general C ABI, but since we've implemented the
// specific function ourselves we know that `lr` won't be trashed.
)
}

View file

@ -265,11 +265,11 @@ macro_rules! make_me_a_screenblock_addr {
/// Overwrites the entire screenblock with the data provided.
pub fn write_words(self, words: &[u32; Self::WORD_COUNT]) {
use crate::prelude::__aeabi_memcpy4;
use crate::prelude::bx__aeabi_memcpy4;
let dest: *mut u32 = self.block.as_ptr() as *mut u32;
let src: *const u32 = words.as_ptr();
let byte_count = size_of::<[u32; Self::WORD_COUNT]>();
unsafe { __aeabi_memcpy4(dest.cast(), src.cast(), byte_count) };
unsafe { bx__aeabi_memcpy4(dest.cast(), src.cast(), byte_count) };
}
}
}