Remove usage of global_asm (#175)

* message

* finish memory function re-arrangement.
This commit is contained in:
Lokathor 2022-12-04 16:44:27 -07:00 committed by GitHub
parent eb5995bf1d
commit 8385504db6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 722 additions and 550 deletions

View file

@ -13,7 +13,7 @@ track_caller = []
[dependencies] [dependencies]
bitfrob = "0.2.3" bitfrob = "0.2.3"
voladdress = { version = "1.2.1", features = ["experimental_volregion"] } voladdress = { version = "1.2.1", features = ["experimental_volregion"] }
bracer = "0.1" bracer = "0.1.2"
[profile.dev] [profile.dev]
opt-level = 3 opt-level = 3

36
examples/asm_viewer.rs Normal file
View file

@ -0,0 +1,36 @@
#![no_std]
#![no_main]
use gba::mem_fns::*;
#[panic_handler]
fn panic_handler(_: &core::panic::PanicInfo) -> ! {
loop {}
}
#[no_mangle]
extern "C" fn main() -> ! {
let dest = unsafe { (0x0400_0000 as *const u16).read_volatile() };
let src = unsafe { (0x0400_0000 as *const u16).read_volatile() };
let count = unsafe { (0x0400_0000 as *const u16).read_volatile() };
unsafe {
__aeabi_memcpy1(dest as *mut u8, src as *mut u8, count as usize);
__aeabi_memcpy2(dest as *mut u16, src as *mut u16, count as usize);
__aeabi_memcpy4(dest as *mut u32, src as *mut u32, count as usize);
__aeabi_memcpy8(dest as *mut u32, src as *mut u32, count as usize);
__aeabi_memcpy(dest as *mut u8, src as *mut u8, count as usize);
memcpy(dest as *mut u8, src as *mut u8, count as usize);
__aeabi_memmove4(dest as *mut u32, src as *mut u32, count as usize);
__aeabi_memmove8(dest as *mut u32, src as *mut u32, count as usize);
__aeabi_memmove(dest as *mut u8, src as *mut u8, count as usize);
memmove(dest as *mut u8, src as *mut u8, count as usize);
__aeabi_memset4(dest as *mut u32, count as usize, count as i32);
__aeabi_memset8(dest as *mut u32, count as usize, count as i32);
__aeabi_memset(dest as *mut u8, count as usize, count as i32);
memset(dest as *mut u8, count as i32, count as usize);
__aeabi_memclr4(dest as *mut u32, count as usize);
__aeabi_memclr8(dest as *mut u32, count as usize);
__aeabi_memclr(dest as *mut u8, count as usize);
}
loop {}
}

View file

@ -1,11 +1,11 @@
#![no_std] #![no_std]
#![no_main] #![no_main]
use core::fmt::Write;
use gba::prelude::*; use gba::prelude::*;
#[panic_handler] #[panic_handler]
fn panic_handler(info: &core::panic::PanicInfo) -> ! { fn panic_handler(info: &core::panic::PanicInfo) -> ! {
use core::fmt::Write;
if let Ok(mut logger) = MgbaBufferedLogger::try_new(MgbaMessageLevel::Fatal) { if let Ok(mut logger) = MgbaBufferedLogger::try_new(MgbaMessageLevel::Fatal) {
writeln!(logger, "{info}").ok(); writeln!(logger, "{info}").ok();
} }

View file

@ -9,8 +9,6 @@
//! * If a function is set in the `RUST_IRQ_HANDLER` variable then that function //! * If a function is set in the `RUST_IRQ_HANDLER` variable then that function
//! will be called and passed the bits for which interrupt(s) occurred. //! will be called and passed the bits for which interrupt(s) occurred.
use core::ffi::c_void;
use crate::{ use crate::{
dma::DmaControl, dma::DmaControl,
gba_cell::GbaCell, gba_cell::GbaCell,
@ -18,7 +16,6 @@ use crate::{
mgba::MGBA_LOGGING_ENABLE_REQUEST, mgba::MGBA_LOGGING_ENABLE_REQUEST,
mmio::{DMA3_SRC, IME, MGBA_LOG_ENABLE}, mmio::{DMA3_SRC, IME, MGBA_LOG_ENABLE},
}; };
use bracer::*;
/// The function pointer that the assembly runtime calls when an interrupt /// The function pointer that the assembly runtime calls when an interrupt
/// occurs. /// occurs.
@ -46,7 +43,7 @@ unsafe extern "C" fn __start() -> ! {
/* iwram copy */ /* iwram copy */
"ldr r4, =__iwram_word_copy_count", "ldr r4, =__iwram_word_copy_count",
when!("r4" != "#0" [label_id=1] { bracer::when!("r4" != "#0" [label_id=1] {
"add r3, r12, #{dma3_offset}", "add r3, r12, #{dma3_offset}",
"mov r5, #{dma3_setting}", "mov r5, #{dma3_setting}",
"ldr r0, =__iwram_start", "ldr r0, =__iwram_start",
@ -59,7 +56,7 @@ unsafe extern "C" fn __start() -> ! {
/* ewram copy */ /* ewram copy */
"ldr r4, =__ewram_word_copy_count", "ldr r4, =__ewram_word_copy_count",
when!("r4" != "#0" [label_id=1] { bracer::when!("r4" != "#0" [label_id=1] {
"add r3, r12, #{dma3_offset}", "add r3, r12, #{dma3_offset}",
"mov r5, #{dma3_setting}", "mov r5, #{dma3_setting}",
"ldr r0, =__ewram_start", "ldr r0, =__ewram_start",
@ -72,7 +69,7 @@ unsafe extern "C" fn __start() -> ! {
/* bss zero */ /* bss zero */
"ldr r4, =__bss_word_clear_count", "ldr r4, =__bss_word_clear_count",
when!("r4" != "#0" [label_id=1] { bracer::when!("r4" != "#0" [label_id=1] {
"ldr r0, =__bss_start", "ldr r0, =__bss_start",
"mov r2, #0", "mov r2, #0",
"2:", "2:",
@ -133,9 +130,9 @@ unsafe extern "C" fn runtime_irq_handler() {
/* Call the Rust fn pointer (if set), using System mode */ /* Call the Rust fn pointer (if set), using System mode */
"ldr r1, ={RUST_IRQ_HANDLER}", "ldr r1, ={RUST_IRQ_HANDLER}",
"ldr r1, [r1]", "ldr r1, [r1]",
when!("r1" != "#0" [label_id=9] { bracer::when!("r1" != "#0" [label_id=9] {
with_spsr_held_in!("r2", { bracer::with_spsr_held_in!("r2", {
set_cpu_control!(System, irq_masked: false, fiq_masked: false), bracer::set_cpu_control!(System, irq_masked: false, fiq_masked: false),
// Note(Lokathor): We are *SKIPPING* the part where we ensure that the // Note(Lokathor): We are *SKIPPING* the part where we ensure that the
// System stack pointer is aligned to 8 during the call to the rust // System stack pointer is aligned to 8 during the call to the rust
@ -145,11 +142,11 @@ unsafe extern "C" fn runtime_irq_handler() {
// cycles total. Which is neat, but if this were on the DS (which has an // cycles total. Which is neat, but if this were on the DS (which has an
// ARMv5TE CPU) you'd want to ensure the aligned stack. // ARMv5TE CPU) you'd want to ensure the aligned stack.
with_pushed_registers!("{{r2, r3, r12, lr}}", { bracer::with_pushed_registers!("{{r2, r3, r12, lr}}", {
adr_lr_then_bx_to!(reg="r1", label_id=1) bracer::adr_lr_then_bx_to!(reg="r1", label_id=1)
}), }),
set_cpu_control!(Supervisor, irq_masked: true, fiq_masked: false), bracer::set_cpu_control!(Supervisor, irq_masked: true, fiq_masked: false),
}), }),
}), }),
@ -162,6 +159,8 @@ unsafe extern "C" fn runtime_irq_handler() {
) )
} }
// For now, the division fns can just keep living here.
/// Returns 0 in `r0`, while placing the `numerator` into `r1`. /// Returns 0 in `r0`, while placing the `numerator` into `r1`.
/// ///
/// This is written in that slightly strange way so that `div` function and /// This is written in that slightly strange way so that `div` function and
@ -207,7 +206,7 @@ extern "C" fn __aeabi_uidiv(numerator: u32, denominator: u32) -> u32 {
core::arch::asm!( core::arch::asm!(
// Check for divide by 0 // Check for divide by 0
"cmp r1, #0", "cmp r1, #0",
"beq __aeabi_idiv0", "beq {__aeabi_idiv0}",
// r3(shifted_denom) = denom // r3(shifted_denom) = denom
"mov r3, r1", "mov r3, r1",
// while shifted_denom < (num>>1): shifted_denom =<< 1; // while shifted_denom < (num>>1): shifted_denom =<< 1;
@ -228,6 +227,7 @@ extern "C" fn __aeabi_uidiv(numerator: u32, denominator: u32) -> u32 {
"cmp r3, r1", "cmp r3, r1",
"bcs 3b", "bcs 3b",
"bx lr", "bx lr",
__aeabi_idiv0 = sym __aeabi_idiv0,
options(noreturn) options(noreturn)
) )
} }
@ -253,14 +253,15 @@ extern "C" fn __aeabi_idiv(numerator: i32, denominator: i32) -> u32 {
"rsblt r0, r0, #0", "rsblt r0, r0, #0",
"cmp r1, #0", "cmp r1, #0",
"rsclt r1, r1, #0", "rsclt r1, r1, #0",
with_pushed_registers!("{{lr}}", { bracer::with_pushed_registers!("{{lr}}", {
// divide them using `u32` division (this will check for divide by 0) // divide them using `u32` division (this will check for divide by 0)
"bl __aeabi_uidiv", "bl {__aeabi_uidiv}",
}), }),
// if they started as different signs, flip the output's sign. // if they started as different signs, flip the output's sign.
"cmp r12, #0", "cmp r12, #0",
"rsblt r0, r0, #0", "rsblt r0, r0, #0",
"bx lr", "bx lr",
__aeabi_uidiv = sym __aeabi_uidiv,
options(noreturn) options(noreturn)
) )
} }
@ -289,14 +290,15 @@ extern "C" fn __aeabi_uidivmod(numerator: u32, denominator: u32) -> u64 {
// touch `r12`, while the other will be pushed onto the stack along with // touch `r12`, while the other will be pushed onto the stack along with
// `lr`. Since the function's output will be in `r0`, we push/pop `r1`. // `lr`. Since the function's output will be in `r0`, we push/pop `r1`.
"mov r12, r0", "mov r12, r0",
with_pushed_registers!("{{r1, lr}}", { bracer::with_pushed_registers!("{{r1, lr}}", {
"bl __aeabi_uidiv", "bl {__aeabi_uidiv}",
}), }),
// Now r0 holds the `quot`, and we use it along with the input args to // Now r0 holds the `quot`, and we use it along with the input args to
// calculate the `rem`. // calculate the `rem`.
"mul r2, r0, r1", "mul r2, r0, r1",
"sub r1, r12, r2", "sub r1, r12, r2",
"bx lr", "bx lr",
__aeabi_uidiv = sym __aeabi_uidiv,
options(noreturn) options(noreturn)
) )
} }
@ -320,7 +322,7 @@ extern "C" fn __aeabi_uidivmod(numerator: u32, denominator: u32) -> u64 {
extern "C" fn __aeabi_idivmod(numerator: i32, denominator: i32) -> u64 { extern "C" fn __aeabi_idivmod(numerator: i32, denominator: i32) -> u64 {
unsafe { unsafe {
core::arch::asm!( core::arch::asm!(
with_pushed_registers!("{{r4, r5, lr}}", { bracer::with_pushed_registers!("{{r4, r5, lr}}", {
// store old numerator then make it the unsigned absolute // store old numerator then make it the unsigned absolute
"movs r4, r0", "movs r4, r0",
"rsblt r0, r0, #0", "rsblt r0, r0, #0",
@ -328,7 +330,7 @@ extern "C" fn __aeabi_idivmod(numerator: i32, denominator: i32) -> u64 {
"movs r5, r1", "movs r5, r1",
"rsblt r1, r1, #0", "rsblt r1, r1, #0",
// divmod using unsigned. // divmod using unsigned.
"bl __aeabi_uidivmod", "bl {__aeabi_uidivmod}",
// if signs started opposite, quot becomes negative // if signs started opposite, quot becomes negative
"eors r12, r4, r5", "eors r12, r4, r5",
"rsblt r0, r0, #0", "rsblt r0, r0, #0",
@ -337,534 +339,8 @@ extern "C" fn __aeabi_idivmod(numerator: i32, denominator: i32) -> u64 {
"rsblt r1, r1, #0", "rsblt r1, r1, #0",
}), }),
"bx lr", "bx lr",
__aeabi_uidivmod = sym __aeabi_uidivmod,
options(noreturn) options(noreturn)
) )
} }
} }
/// Reads 4 bytes, starting at the address given.
///
/// See [__aeabi_uread4]
///
/// [__aeabi_uread4]: https://github.com/ARM-software/abi-aa/blob/main/rtabi32/rtabi32.rst#unaligned-memory-access
#[naked]
#[no_mangle]
#[instruction_set(arm::a32)]
#[link_section = ".iwram.aeabi.uread4"]
unsafe extern "C" fn __aeabi_uread4(address: *const c_void) -> u32 {
core::arch::asm!(
"ldrb r2, [r0]",
"ldrb r3, [r0, #1]",
"orr r2, r2, r3, lsl #8",
"ldrb r3, [r0, #2]",
"orr r2, r2, r3, lsl #16",
"ldrb r3, [r0, #3]",
"orr r2, r2, r3, lsl #24",
"mov r0, r2",
"bx lr",
options(noreturn),
)
}
/// Writes 4 bytes, starting at the address given.
///
/// See [__aeabi_uwrite4]
///
/// [__aeabi_uwrite4]: https://github.com/ARM-software/abi-aa/blob/main/rtabi32/rtabi32.rst#unaligned-memory-access
#[naked]
#[no_mangle]
#[instruction_set(arm::a32)]
#[link_section = ".iwram.aeabi.uwrite4"]
unsafe extern "C" fn __aeabi_uwrite4(value: u32, address: *mut c_void) {
core::arch::asm!(
"strb r0, [r1]",
"lsr r2, r0, #8",
"strb r2, [r1, #1]",
"lsr r2, r2, #8",
"strb r2, [r1, #2]",
"lsr r2, r2, #8",
"strb r2, [r1, #3]",
"bx lr",
options(noreturn),
)
}
/// Reads 8 bytes, starting at the address given.
///
/// See [__aeabi_uread8]
///
/// [__aeabi_uread8]: https://github.com/ARM-software/abi-aa/blob/main/rtabi32/rtabi32.rst#unaligned-memory-access
#[naked]
#[no_mangle]
#[instruction_set(arm::a32)]
#[link_section = ".iwram.aeabi.uread8"]
unsafe extern "C" fn __aeabi_uread8(address: *const c_void) -> u64 {
core::arch::asm!(
"ldrb r1, [r0, #4]",
"ldrb r2, [r0, #5]",
"orr r1, r1, r2, lsl #8",
"ldrb r2, [r0, #6]",
"orr r1, r1, r2, lsl #16",
"ldrb r2, [r0, #7]",
"orr r1, r1, r2, lsl #24",
"b __aeabi_uread4",
options(noreturn),
)
}
/// Writes 8 bytes, starting at the address given.
///
/// See [__aeabi_uwrite8]
///
/// [__aeabi_uwrite8]: https://github.com/ARM-software/abi-aa/blob/main/rtabi32/rtabi32.rst#unaligned-memory-access
#[naked]
#[no_mangle]
#[instruction_set(arm::a32)]
#[link_section = ".iwram.aeabi.uwrite8"]
unsafe extern "C" fn __aeabi_uwrite8(value: u64, address: *mut c_void) {
core::arch::asm!(
"strb r0, [r2]",
"lsr r3, r0, #8",
"strb r3, [r2, #1]",
"lsr r3, r3, #8",
"strb r3, [r2, #2]",
"lsr r3, r3, #8",
"strb r3, [r2, #3]",
"strb r1, [r2, #4]",
"lsr r3, r1, #8",
"strb r3, [r2, #5]",
"lsr r3, r3, #8",
"strb r3, [r2, #6]",
"lsr r3, r3, #8",
"strb r3, [r2, #7]",
"bx lr",
options(noreturn),
)
}
/// Provides a `libc` styled memory copy (transfer between exclusive regions).
///
/// This has mild overhead compared to calling [`__aeabi_memcpy`], prefer that
/// function when possible.
///
/// * **Returns:** the original `dest` pointer.
///
/// ## Safety
/// * `src` must be readable for `byte_count` bytes.
/// * `dest` must be writable for `byte_count` bytes.
/// * The `src` and `dest` regions must not overlap.
#[inline]
#[no_mangle]
pub unsafe extern "C" fn memcpy(
dest: *mut u8, src: *const u8, byte_count: usize,
) -> *mut u8 {
__aeabi_memcpy(dest, src, byte_count);
dest
}
/// Provides a `libc` styled memory move (transfer between non-exclusive
/// regions).
///
/// This has mild overhead compared to calling [`__aeabi_memmove`], prefer that
/// function when possible.
///
/// * **Returns:** the original `dest` pointer.
///
/// ## Safety
/// * `src` must be readable for `byte_count` bytes.
/// * `dest` must be writable for `byte_count` bytes.
#[inline]
#[no_mangle]
pub unsafe extern "C" fn memmove(
dest: *mut u8, src: *const u8, byte_count: usize,
) -> *mut u8 {
__aeabi_memmove(dest, src, byte_count);
dest
}
/// Provides a `libc` styled memory set (assign `u8` in `byte` to the entire
/// region).
///
/// This has mild overhead compared to calling [`__aeabi_memset`], prefer that
/// function when possible. Note that this function and that function have
/// slightly different argument ordering, though the compiler won't let you mess
/// it up like might happen in C.
///
/// * **Returns:** the original `dest` pointer.
///
/// ## Safety
/// * `dest` must be writable for `byte_count` bytes.
#[inline]
#[no_mangle]
pub unsafe extern "C" fn memset(
dest: *mut u8, byte: i32, byte_count: usize,
) -> *mut u8 {
__aeabi_memset(dest, byte_count, byte);
dest
}
extern "C" {
/// Memory transfer between *exclusive* regions.
///
/// There are no alignment requirements for the pointers. This will
/// automatically detect when pointers are sufficiently aligned to use `u16`
/// or `u32` transfers, instead of always using `u8` transfers.
///
/// This follows the AEABI convention of not returning the original `dest`
/// pointer at the end of the function. This actually allows a minor
/// optimization, so if you're going to call a memory copy function at all,
/// prefer this over [`memcpy`].
pub fn __aeabi_memcpy(dest: *mut u8, src: *const u8, byte_count: usize);
/// As [`__aeabi_memcpy`], but both pointers are assumed to be aligned to 4.
pub fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, byte_count: usize);
/// As [`__aeabi_memcpy`], but both pointers are assumed to be aligned to 8.
pub fn __aeabi_memcpy8(dest: *mut u8, src: *const u8, byte_count: usize);
/// As [`__aeabi_memcpy`], but *only* performs `u8` transfers.
///
/// Importantly, this means that this function can be used to get data to/from
/// the SRAM region.
pub fn gba_sram_memcpy(dest: *mut u8, src: *const u8, byte_count: usize);
/// Memory transfer between *non-exclusive* regions.
///
/// As [`__aeabi_memcpy`], but the regions don't need to be exclusive.
pub fn __aeabi_memmove(dest: *mut u8, src: *const u8, byte_count: usize);
/// As [`__aeabi_memmove`], but both pointers are assumed to be aligned to 4.
pub fn __aeabi_memmove4(dest: *mut u8, src: *const u8, byte_count: usize);
/// As [`__aeabi_memmove`], but both pointers are assumed to be aligned to 8.
pub fn __aeabi_memmove8(dest: *mut u8, src: *const u8, byte_count: usize);
/// Sets all bytes in the region to the value given.
///
/// For historical reasons, the "byte" passed in is passed as an `i32`. Still,
/// only the low 8 bits of the value are kept and written to the region.
///
/// There are no alignment requirements for the pointer. This will
/// automatically detect when pointer is sufficiently aligned to use `u16` or
/// `u32` writes, instead of always using `u8` writes.
///
/// This follows the AEABI convention of not returning the original `dest`
/// pointer at the end of the function. This actually allows a minor
/// optimization, so if you're going to call a memory copy function at all,
/// prefer this over [`memcpy`].
pub fn __aeabi_memset(dest: *mut u8, byte_count: usize, byte: i32);
/// As [`__aeabi_memset`], but both pointers are assumed to be aligned to 4.
pub fn __aeabi_memset4(dest: *mut u8, byte_count: usize, byte: i32);
/// As [`__aeabi_memset`], but both pointers are assumed to be aligned to 8.
pub fn __aeabi_memset8(dest: *mut u8, byte_count: usize, byte: i32);
/// Sets all bytes in the region to 0.
///
/// There are no alignment requirements for the pointer. This will
/// automatically detect when the pointer is sufficiently aligned to use `u16`
/// or `u32` writes, instead of always using `u8` writes.
pub fn __aeabi_memclr(dest: *mut u8, byte_count: usize);
/// As [`__aeabi_memclr`], but the pointer is assumed to be aligned to 4.
pub fn __aeabi_memclr4(dest: *mut u8, byte_count: usize);
/// As [`__aeabi_memclr`], but the pointer is assumed to be aligned to 8.
pub fn __aeabi_memclr8(dest: *mut u8, byte_count: usize);
}
core::arch::global_asm! {
emit_a32_code!{
put_code_in_section!(".iwram.aeabi.memory.copy.and.move", {
".global __aeabi_memmove8",
".global __aeabi_memmove4",
".global __aeabi_memmove",
".global __aeabi_memcpy8",
".global __aeabi_memcpy4",
".global __aeabi_memcpy",
//
"__aeabi_memmove8:",
"__aeabi_memmove4:",
"__aeabi_memmove:",
"cmp r0, r1", // if d > s, reverse copy
"bgt .L_r_copy_gain_align",
// else fallthrough
"__aeabi_memcpy:",
".L_f_copy_gain_align:",
"eor r3, r0, r1",
"lsls r3, r3, #31",
"bmi .L_f_copy_max_coalign1",
"bcs .L_f_copy_max_coalign2",
// else fallthrough
".L_f_copy_max_coalign4:",
"tst r0, #3",
"bne .L_f_copy_fixup4",
// else fallthrough
"__aeabi_memcpy8:",
"__aeabi_memcpy4:",
".L_f_copy_coalign4_assured:",
"cmp r2, #32",
"bge .L_f_copy_block",
".L_f_copy_post_block:",
// copy 4 words, two at a time
"tst r2, #0b10000",
"ldmne r1!, {r3, r12}",
"stmne r0!, {r3, r12}",
"ldmne r1!, {r3, r12}",
"stmne r0!, {r3, r12}",
"bics r2, r2, #0b10000",
"bxeq lr",
// copy 2 and/or 1 words
"lsls r3, r2, #29",
"ldmcs r1!, {r3, r12}",
"stmcs r0!, {r3, r12}",
"ldrmi r3, [r1], #4",
"strmi r3, [r0], #4",
"bics r2, r2, #0b1100",
"bxeq lr",
// copy halfword and/or byte
"lsls r3, r2, #31",
"ldrhcs r3, [r1], #2",
"strhcs r3, [r0], #2",
"ldrbmi r3, [r1], #1",
"strbmi r3, [r0], #1",
"bx lr",
".L_f_copy_block:",
with_pushed_registers!("{r4-r9}", {
"1:",
"subs r2, r2, #32",
"ldmge r1!, {r3-r9, r12}",
"stmge r0!, {r3-r9, r12}",
"bgt 1b",
}),
"bxeq lr",
"b .L_f_copy_post_block",
".L_f_copy_fixup4:",
"cmp r2, #7", // if count <= (fix+word): just byte copy
"ble .L_f_copy_max_coalign1",
"lsls r3, r0, #31",
"submi r2, r2, #1",
"ldrbmi r3, [r1], #1",
"strbmi r3, [r0], #1",
"subcs r2, r2, #2",
"ldrhcs r3, [r1], #2",
"strhcs r3, [r0], #2",
"b .L_f_copy_coalign4_assured",
".L_f_copy_max_coalign2:",
"tst r0, #1",
"bne .L_f_copy_fixup2",
".L_f_copy_coalign2_assured:",
"1:",
"subs r2, r2, #2",
"ldrhge r3, [r1], #2",
"strhge r3, [r0], #2",
"bgt 1b",
"bxeq lr",
"tst r2, #1",
"ldrbne r3, [r1], #1",
"strbne r3, [r0], #1",
"bx lr",
".L_f_copy_fixup2:",
"cmp r2, #3", // if count <= (fix+halfword): just byte copy
"ble .L_f_copy_max_coalign1",
"sub r2, r2, #1",
"ldrb r3, [r1], #1",
"strb r3, [r0], #1",
"b .L_f_copy_coalign2_assured",
"gba_sram_memcpy:",
".L_f_copy_max_coalign1:",
"1:",
"subs r2, r2, #1",
"ldrbge r3, [r1], #1",
"strbge r3, [r0], #1",
"bgt 1b",
"bx lr",
".L_r_copy_gain_align:",
"add r0, r0, r2",
"add r1, r1, r2",
"eor r3, r0, r1",
"lsls r3, r3, #31",
"bmi .L_r_copy_max_coalign1",
"bcs .L_r_copy_max_coalign2",
// else fallthrough
".L_r_copy_max_coalign4:",
"tst r0, #3",
"bne .L_r_copy_fixup4",
".L_r_copy_coalign4_assured:",
"cmp r2, #32",
"bge .L_r_copy_block",
".L_r_copy_post_block:",
// copy 4 words, two at a time
"tst r2, #0b10000",
"ldmdbne r1!, {r3, r12}",
"stmdbne r0!, {r3, r12}",
"ldmdbne r1!, {r3, r12}",
"stmdbne r0!, {r3, r12}",
"bics r2, r2, #0b10000",
"bxeq lr",
// copy 2 and/or 1 words
"lsls r3, r2, #29",
"ldmdbcs r1!, {r3, r12}",
"stmdbcs r0!, {r3, r12}",
"ldrmi r3, [r1, #-4]!",
"strmi r3, [r0, #-4]!",
"bxeq lr",
"lsls r2, r2, #31",
"ldrhcs r3, [r1, #-2]!",
"strhcs r3, [r0, #-2]!",
"ldrbmi r3, [r1, #-1]!",
"strbmi r3, [r0, #-1]!",
"bx lr",
".L_r_copy_block:",
with_pushed_registers!("{r4-r9}", {
"1:",
"subs r2, r2, #32",
"ldmdbcs r1!, {r3-r9, r12}",
"stmdbcs r0!, {r3-r9, r12}",
"bgt 1b",
}),
"bxeq lr",
"b .L_r_copy_post_block",
".L_r_copy_fixup4:",
"cmp r2, #7", // if count <= (fix+word): just byte copy
"ble .L_r_copy_max_coalign1",
"lsls r3, r0, #31",
"submi r2, r2, #1",
"ldrbmi r3, [r1, #-1]!",
"strbmi r3, [r0, #-1]!",
"subcs r2, r2, #2",
"ldrhcs r3, [r1, #-2]!",
"strhcs r3, [r0, #-2]!",
"b .L_r_copy_coalign4_assured",
".L_r_copy_max_coalign2:",
"tst r0, #1",
"bne .L_r_copy_fixup2",
".L_r_copy_coalign2_assured:",
"1:",
"subs r2, r2, #2",
"ldrhge r3, [r1, #-2]!",
"strhge r3, [r0, #-2]!",
"bgt 1b",
"bxeq lr",
"tst r2, #1",
"ldrbne r3, [r1, #-1]!",
"strbne r3, [r0, #-1]!",
"bx lr",
".L_r_copy_fixup2:",
"cmp r2, #3", // if count <= (fix+halfword): just byte copy
"ble .L_r_copy_max_coalign1",
"sub r2, r2, #1",
"ldrb r3, [r1, #-1]!",
"strb r3, [r0, #-1]!",
"b .L_r_copy_coalign2_assured",
".L_r_copy_max_coalign1:",
"1:",
"subs r2, r2, #1",
"ldrbge r3, [r1, #-1]!",
"strbge r3, [r0, #-1]!",
"bgt 1b",
"bx lr",
}),
},
options(raw)
}
core::arch::global_asm! {
emit_a32_code!{
put_code_in_section!(".iwram.aeabi.memory.clear.and.set", {
".global __aeabi_memclr8",
".global __aeabi_memclr4",
".global __aeabi_memclr",
".global __aeabi_memset8",
".global __aeabi_memset4",
".global __aeabi_memset",
//
"__aeabi_memclr8:",
"__aeabi_memclr4:",
"mov r2, #0",
"mov r3, #0",
"b .L_memset_check_for_block_work",
"__aeabi_memclr:",
"mov r2, #0",
"__aeabi_memset8:",
"__aeabi_memset4:",
"__aeabi_memset:", // r0(dest), r1(count), r2(byte)
// duplicate the byte across all of r2 and r3
"and r2, r2, #0xFF",
"orr r2, r2, r2, lsl #8",
"orr r2, r2, r2, lsl #16",
"mov r3, r2",
// for 'sets' too small to fixup we just byte loop
"cmp r1, #3",
"ble .L_memset_byte_loop",
// carry/sign test on the address, then do fixup
"lsls r12, r0, #31",
"submi r1, r1, #1",
"strbmi r2, [r0], #1",
"subcs r1, r1, #2",
"strhcs r2, [r0], #2",
".L_memset_check_for_block_work:",
"cmp r1, #32",
"bge .L_memset_block_work",
".L_memset_post_block_work:",
// set 4 words
"tst r1, #0b10000",
"stmne r0!, {r2, r3}",
"stmne r0!, {r2, r3}",
// set 2 and/or 1 words
"lsls r12, r1, #29",
"stmcs r0!, {r2, r3}",
"strmi r2, [r0], #4",
// set halfword and/or byte
"lsls r12, r1, #31",
"strhcs r2, [r0], #2",
"strbmi r2, [r0], #1",
"bx lr",
".L_memset_block_work:",
with_pushed_registers!("{r4-r9}", {
"mov r4, r2",
"mov r5, r2",
"mov r6, r2",
"mov r7, r2",
"mov r8, r2",
"mov r9, r2",
"1:",
"subs r1, r1, #32",
"stmge r0!, {r2-r9}",
"bgt 1b",
}),
"bxeq lr",
"b .L_memset_post_block_work",
".L_memset_byte_loop:",
"1:",
"subs r1, r1, #1",
"strbcs r2, [r0], #1",
"bgt 1b",
"bx lr",
}),
},
options(raw),
}

View file

@ -96,6 +96,7 @@ pub mod fixed;
pub mod gba_cell; pub mod gba_cell;
pub mod interrupts; pub mod interrupts;
pub mod keys; pub mod keys;
pub mod mem_fns;
pub mod mgba; pub mod mgba;
pub mod mmio; pub mod mmio;
pub mod prelude; pub mod prelude;

659
src/mem_fns.rs Normal file
View file

@ -0,0 +1,659 @@
//! Module for direct memory operations.
//!
//! Generally you don't need to call these yourself. Instead, the compiler will
//! insert calls to the functions defined here as necessary.
use core::ffi::c_void;
/// Byte copy between exclusive regions.
///
/// * This will *always* copy one byte at a time, making it suitable for use
/// with SRAM memory.
#[inline]
#[no_mangle]
#[instruction_set(arm::a32)]
#[link_section = ".iwram.__aeabi_memcpy1"]
pub unsafe extern "C" fn __aeabi_memcpy1(
dest: *mut u8, src: *const u8, byte_count: usize,
) {
core::arch::asm! {
"1:",
"subs {count}, {count}, #1",
"ldrbge {temp}, [{src}], #1",
"strbge {temp}, [{dest}], #1",
"bgt 1b",
temp = out(reg) _,
count = inout(reg) byte_count => _,
src = inout(reg) src => _,
dest = inout(reg) dest => _,
options(nostack)
}
}
/// Halfword copy between exclusive regions.
///
/// * **Safety:** The pointers must start aligned to 2.
/// * If the `byte_count` is odd then a single byte copy will happen at the end.
#[inline]
#[no_mangle]
#[instruction_set(arm::a32)]
#[link_section = ".iwram.__aeabi_memcpy2"]
pub unsafe extern "C" fn __aeabi_memcpy2(
mut dest: *mut u16, mut src: *const u16, mut byte_count: usize,
) {
core::arch::asm! {
"1:",
"subs {count}, {count}, #2",
"ldrhge {temp}, [{src}], #2",
"strhge {temp}, [{dest}], #2",
"bgt 1b",
temp = out(reg) _,
count = inout(reg) byte_count,
src = inout(reg) src,
dest = inout(reg) dest,
options(nostack)
}
if byte_count != 0 {
let dest = dest.cast::<u8>();
let src = src.cast::<u8>();
dest.write_volatile(src.read_volatile());
}
}
/// Word copy between exclusive regions.
///
/// * **Safety:** The pointers must start aligned to 4.
/// * If `byte_count` is not a multiple of 4 then a halfword and/or byte copy
/// will happen at the end.
#[naked]
#[no_mangle]
#[instruction_set(arm::a32)]
#[link_section = ".iwram.__aeabi_memcpy4"]
pub unsafe extern "C" fn __aeabi_memcpy4(
dest: *mut u32, src: *const u32, byte_count: usize,
) {
core::arch::asm! {
bracer::when!( "r2" >=u "#32" [label_id=2] {
bracer::with_pushed_registers!("{{r4-r9}}", {
"1:",
"subs r2, r2, #32",
"ldmge r1!, {{r3-r9, r12}}",
"stmge r0!, {{r3-r9, r12}}",
"bgt 1b",
}),
"bxeq lr",
}),
// copy 4 words, two at a time
"tst r2, #0b10000",
"ldmne r1!, {{r3, r12}}",
"stmne r0!, {{r3, r12}}",
"ldmne r1!, {{r3, r12}}",
"stmne r0!, {{r3, r12}}",
"bics r2, r2, #0b10000",
"bxeq lr",
// copy 2 and/or 1 words
"lsls r3, r2, #29",
"ldmcs r1!, {{r3, r12}}",
"stmcs r0!, {{r3, r12}}",
"ldrmi r3, [r1], #4",
"strmi r3, [r0], #4",
"bics r2, r2, #0b1100",
"bxeq lr",
// copy halfword and/or byte
"lsls r3, r2, #31",
"ldrhcs r3, [r1], #2",
"strhcs r3, [r0], #2",
"ldrbmi r3, [r1], #1",
"strbmi r3, [r0], #1",
"bx lr",
options(noreturn),
}
}
/// Just call [`__aeabi_memcpy4`] instead.
///
/// This function is provided only for API completeness, because in some cases
/// the compiler might automatically generate a call to this function.
#[inline]
#[no_mangle]
#[instruction_set(arm::a32)]
#[link_section = ".iwram.__aeabi_memcpy8"]
pub unsafe extern "C" fn __aeabi_memcpy8(
dest: *mut u32, src: *const u32, byte_count: usize,
) {
__aeabi_memcpy4(dest, src, byte_count);
}
/// Arbitrary width copy between exclusive regions.
///
/// * The pointers do not have a minimum alignment.
/// * The function will automatically use the best type of copy possible, based
/// on the pointers given.
#[naked]
#[no_mangle]
#[instruction_set(arm::a32)]
#[link_section = ".iwram.__aeabi_memcpy"]
pub unsafe extern "C" fn __aeabi_memcpy(
dest: *mut u8, src: *const u8, byte_count: usize,
) {
core::arch::asm! {
"cmp r2, #7", // if count <= (fix+word): just byte copy
"ble {__aeabi_memcpy1}",
// check max coalign
"eor r3, r0, r1",
"lsls r3, r3, #31",
"bmi {__aeabi_memcpy1}",
"bcs 2f",
// max coalign4, possible fixup and jump
"lsls r3, r0, #31",
"submi r2, r2, #1",
"ldrbmi r3, [r1], #1",
"strbmi r3, [r0], #1",
"subcs r2, r2, #2",
"ldrhcs r3, [r1], #2",
"strhcs r3, [r0], #2",
"b {__aeabi_memcpy4}",
// max coalign2, possible fixup and jump
"2:",
"lsls r3, r0, #31",
"submi r2, r2, #1",
"ldrbmi r3, [r1], #1",
"strbmi r3, [r0], #1",
"b {__aeabi_memcpy2}",
//
__aeabi_memcpy4 = sym __aeabi_memcpy4,
__aeabi_memcpy2 = sym __aeabi_memcpy2,
__aeabi_memcpy1 = sym __aeabi_memcpy1,
options(noreturn)
}
}
/// Copy between exclusive regions, prefer [`__aeabi_memcpy`] if possible.
///
/// This is the libc version of a memory copy. It's required to return the
/// `dest` pointer at the end of the call, which makes it need an extra
/// push/pop compared to a direct call to `__aeabi_memcpy`.
///
/// * **Returns:** The `dest` pointer.
#[naked]
#[no_mangle]
#[instruction_set(arm::a32)]
#[link_section = ".iwram.memcpy"]
pub unsafe extern "C" fn memcpy(
dest: *mut u8, src: *const u8, byte_count: usize,
) -> *mut u8 {
core::arch::asm! {
bracer::with_pushed_registers!("{{r0, lr}}", {
"bl {__aeabi_memcpy}",
}),
"bx lr",
__aeabi_memcpy = sym __aeabi_memcpy,
options(noreturn)
}
}
// MOVE
// used by `__aeabi_memmove` in some cases
#[inline]
#[instruction_set(arm::a32)]
#[link_section = ".iwram.reverse_copy_u8"]
unsafe extern "C" fn reverse_copy_u8(
dest: *mut u8, src: *const u8, byte_count: usize,
) {
core::arch::asm! {
"1:",
"subs {count}, {count}, #1",
"ldrbge {temp}, [{src}, #-1]!",
"strbge {temp}, [{dest}, #-1]!",
"bgt 1b",
temp = out(reg) _,
count = inout(reg) byte_count => _,
src = inout(reg) src => _,
dest = inout(reg) dest => _,
options(nostack)
}
}
// used by `__aeabi_memmove` in some cases
#[inline]
#[instruction_set(arm::a32)]
#[link_section = ".iwram.reverse_copy_u16"]
unsafe extern "C" fn reverse_copy_u16(
mut dest: *mut u16, mut src: *const u16, mut byte_count: usize,
) {
core::arch::asm! {
"1:",
"subs {count}, {count}, #2",
"ldrhge {temp}, [{src}, #-2]!",
"strhge {temp}, [{dest}, #-2]!",
"bgt 1b",
temp = out(reg) _,
count = inout(reg) byte_count,
src = inout(reg) src,
dest = inout(reg) dest,
options(nostack)
}
if byte_count != 0 {
let dest = dest.cast::<u8>().sub(1);
let src = src.cast::<u8>().sub(1);
dest.write_volatile(src.read_volatile());
}
}
// used by `__aeabi_memmove` in some cases
#[naked]
#[instruction_set(arm::a32)]
#[link_section = ".iwram.reverse_copy_u32"]
unsafe extern "C" fn reverse_copy_u32(
dest: *mut u32, src: *const u32, byte_count: usize,
) {
core::arch::asm! {
bracer::when!( "r2" >=u "#32" [label_id=2] {
bracer::with_pushed_registers!("{{r4-r9}}", {
"1:",
"subs r2, r2, #32",
"ldmdbcs r1!, {{r3-r9, r12}}",
"stmdbcs r0!, {{r3-r9, r12}}",
"bgt 1b",
}),
"bxeq lr",
}),
// copy 4 words, two at a time
"tst r2, #0b10000",
"ldmdbne r1!, {{r3, r12}}",
"stmdbne r0!, {{r3, r12}}",
"ldmdbne r1!, {{r3, r12}}",
"stmdbne r0!, {{r3, r12}}",
"bics r2, r2, #0b10000",
"bxeq lr",
// copy 2 and/or 1 words
"lsls r3, r2, #29",
"ldmdbcs r1!, {{r3, r12}}",
"stmdbcs r0!, {{r3, r12}}",
"ldrmi r3, [r1, #-4]!",
"strmi r3, [r0, #-4]!",
"bxeq lr",
// copy halfword and/or byte
"lsls r2, r2, #31",
"ldrhcs r3, [r1, #-2]!",
"strhcs r3, [r0, #-2]!",
"ldrbmi r3, [r1, #-1]!",
"strbmi r3, [r0, #-1]!",
"bx lr",
options(noreturn),
}
}
/// Copy between non-exclusive regions, prefer [`__aeabi_memmove`] if possible.
///
/// This function is provided only for API completeness, because in some cases
/// the compiler might automatically generate a call to this function.
#[inline]
#[no_mangle]
#[instruction_set(arm::a32)]
#[link_section = ".iwram.__aeabi_memmove4"]
pub unsafe extern "C" fn __aeabi_memmove4(
dest: *mut u32, src: *const u32, byte_count: usize,
) {
__aeabi_memmove(dest.cast(), src.cast(), byte_count)
}
/// Copy between non-exclusive regions, prefer [`__aeabi_memmove`] if possible.
///
/// This function is provided only for API completeness, because in some cases
/// the compiler might automatically generate a call to this function.
#[inline]
#[no_mangle]
#[instruction_set(arm::a32)]
#[link_section = ".iwram.__aeabi_memmove8"]
pub unsafe extern "C" fn __aeabi_memmove8(
dest: *mut u32, src: *const u32, byte_count: usize,
) {
__aeabi_memmove(dest.cast(), src.cast(), byte_count)
}
/// Copy between non-exclusive regions.
///
/// * The pointers do not have a minimum alignment. The function will
/// automatically detect the best type of copy to perform.
#[naked]
#[no_mangle]
#[instruction_set(arm::a32)]
#[link_section = ".iwram.__aeabi_memmove"]
pub unsafe extern "C" fn __aeabi_memmove(
dest: *mut u8, src: *const u8, byte_count: usize,
) {
core::arch::asm! {
"cmp r2, #7", // if count <= (fix+word): just byte copy
"ble {__aeabi_memcpy1}",
bracer::when!("r0" >=u "r1" [label_id=1] {
// when d > s we need to reverse-direction copy
"add r0, r0, r2",
"add r1, r1, r2",
"eor r3, r0, r1",
"lsls r3, r3, #31",
"bmi {reverse_copy_u8}",
"bcs 2f",
// max coalign4, possible fixup and jump
"lsls r3, r0, #31",
"submi r2, r2, #1",
"ldrbmi r3, [r1, #-1]!",
"strbmi r3, [r0, #-1]!",
"subcs r2, r2, #2",
"ldrhcs r3, [r1, #-2]!",
"strhcs r3, [r0, #-2]!",
"b {reverse_copy_u32}",
// max coalign2, possible fixup and jump
"2:",
"tst r0, #1",
"sub r2, r2, #1",
"ldrb r3, [r1, #-1]!",
"strb r3, [r0, #-1]!",
"b {reverse_copy_u16}",
}),
"b {__aeabi_memcpy}",
__aeabi_memcpy = sym __aeabi_memcpy,
__aeabi_memcpy1 = sym __aeabi_memcpy1,
reverse_copy_u8 = sym reverse_copy_u8,
reverse_copy_u16 = sym reverse_copy_u16,
reverse_copy_u32 = sym reverse_copy_u32,
options(noreturn),
}
}
/// Copy between non-exclusive regions, prefer [`__aeabi_memmove`] if possible.
///
/// This is the libc version of a memory move. It's required to return the
/// `dest` pointer at the end of the call, which makes it need an extra
/// push/pop compared to a direct call to `__aeabi_memmove`.
///
/// * **Returns:** The `dest` pointer.
#[naked]
#[no_mangle]
#[instruction_set(arm::a32)]
#[link_section = ".iwram.memmove"]
pub unsafe extern "C" fn memmove(
dest: *mut u8, src: *const u8, byte_count: usize,
) -> *mut u8 {
core::arch::asm! {
bracer::with_pushed_registers!("{{r0, lr}}", {
"bl {__aeabi_memmove}",
}),
"bx lr",
__aeabi_memmove = sym __aeabi_memmove,
options(noreturn)
}
}
// SET
/// Copy between non-exclusive regions, prefer [`__aeabi_memset`] if possible.
///
/// This function is provided only for API completeness, because in some cases
/// the compiler might automatically generate a call to this function.
#[inline]
#[no_mangle]
#[instruction_set(arm::a32)]
#[link_section = ".iwram.__aeabi_memset4"]
pub unsafe extern "C" fn __aeabi_memset4(
dest: *mut u32, byte_count: usize, byte: i32,
) {
__aeabi_memset(dest.cast(), byte_count, byte)
}
/// Copy between non-exclusive regions, prefer [`__aeabi_memset`] if possible.
///
/// This function is provided only for API completeness, because in some cases
/// the compiler might automatically generate a call to this function.
#[inline]
#[no_mangle]
#[instruction_set(arm::a32)]
#[link_section = ".iwram.__aeabi_memset8"]
pub unsafe extern "C" fn __aeabi_memset8(
dest: *mut u32, byte_count: usize, byte: i32,
) {
__aeabi_memset(dest.cast(), byte_count, byte)
}
/// Sets all bytes in the region to the `byte` given.
///
/// Because of historical reasons, the byte is passed in as an `i32`, but only
/// the lowest 8 bits are used.
#[naked]
#[no_mangle]
#[instruction_set(arm::a32)]
#[link_section = ".iwram.__aeabi_memset"]
pub unsafe extern "C" fn __aeabi_memset(
dest: *mut u8, byte_count: usize, byte: i32,
) {
core::arch::asm! {
bracer::when!("r1" >=u "#8" [label_id=7] {
// duplicate the byte across all of r2 and r3
"and r2, r2, #0xFF",
"orr r2, r2, r2, lsl #8",
"orr r2, r2, r2, lsl #16",
"mov r3, r2",
// carry/sign test on the address, then do fixup
"lsls r12, r0, #31",
"submi r1, r1, #1",
"strbmi r2, [r0], #1",
"subcs r1, r1, #2",
"strhcs r2, [r0], #2",
bracer::when!("r1" >=u "#32" [label_id=8] {
bracer::with_pushed_registers!("{{r4-r9}}", {
"mov r4, r2",
"mov r5, r2",
"mov r6, r2",
"mov r7, r2",
"mov r8, r2",
"mov r9, r2",
"1:",
"subs r1, r1, #32",
"stmge r0!, {{r2-r9}}",
"bgt 1b",
}),
"bxeq lr",
}),
// set 4 words
"tst r1, #0b10000",
"stmne r0!, {{r2, r3}}",
"stmne r0!, {{r2, r3}}",
// set 2 and/or 1 words
"lsls r12, r1, #29",
"stmcs r0!, {{r2, r3}}",
"strmi r2, [r0], #4",
// set halfword and/or byte
"lsls r12, r1, #31",
"strhcs r2, [r0], #2",
"strbmi r2, [r0], #1",
"bx lr",
}),
// byte loop
"9:",
"subs r1, r1, #1",
"strbcs r2, [r0], #1",
"bgt 9b",
"bx lr",
options(noreturn)
}
}
/// Copy between non-exclusive regions, prefer [`__aeabi_memset`] if possible.
///
/// This is the libc version of a memory set. It's required to return the
/// `dest` pointer at the end of the call, which makes it need an extra
/// push/pop compared to a direct call to `__aeabi_memset`.
///
/// * **Returns:** The `dest` pointer.
#[naked]
#[no_mangle]
#[instruction_set(arm::a32)]
#[link_section = ".iwram.memset"]
pub unsafe extern "C" fn memset(
dest: *mut u8, byte: i32, byte_count: usize,
) -> *mut u8 {
core::arch::asm! {
bracer::with_pushed_registers!("{{r0, lr}}", {
"bl {__aeabi_memset}",
}),
"bx lr",
__aeabi_memset = sym __aeabi_memset,
options(noreturn)
}
}
// CLEAR
/// Just call [`__aeabi_memset`] with 0 as the `byte` instead.
///
/// This function is provided only for API completeness, because in some cases
/// the compiler might automatically generate a call to this function.
#[inline]
#[no_mangle]
#[instruction_set(arm::a32)]
#[link_section = ".iwram.__aeabi_memclr4"]
pub unsafe extern "C" fn __aeabi_memclr4(dest: *mut u32, byte_count: usize) {
__aeabi_memset(dest.cast(), byte_count, 0)
}
/// Just call [`__aeabi_memset`] with 0 as the `byte` instead.
///
/// This function is provided only for API completeness, because in some cases
/// the compiler might automatically generate a call to this function.
#[inline]
#[no_mangle]
#[instruction_set(arm::a32)]
#[link_section = ".iwram.__aeabi_memclr8"]
pub unsafe extern "C" fn __aeabi_memclr8(dest: *mut u32, byte_count: usize) {
__aeabi_memset(dest.cast(), byte_count, 0)
}
/// Just call [`__aeabi_memset`] with 0 as the `byte` instead.
///
/// This function is provided only for API completeness, because in some cases
/// the compiler might automatically generate a call to this function.
#[inline]
#[no_mangle]
#[instruction_set(arm::a32)]
#[link_section = ".iwram.__aeabi_memclr"]
pub unsafe extern "C" fn __aeabi_memclr(dest: *mut u8, byte_count: usize) {
__aeabi_memset(dest, byte_count, 0)
}
/// Reads 4 bytes, starting at the address given.
///
/// See [__aeabi_uread4]
///
/// [__aeabi_uread4]: https://github.com/ARM-software/abi-aa/blob/main/rtabi32/rtabi32.rst#unaligned-memory-access
#[naked]
#[no_mangle]
#[instruction_set(arm::a32)]
#[link_section = ".iwram.aeabi.uread4"]
unsafe extern "C" fn __aeabi_uread4(address: *const c_void) -> u32 {
core::arch::asm!(
"ldrb r2, [r0]",
"ldrb r3, [r0, #1]",
"orr r2, r2, r3, lsl #8",
"ldrb r3, [r0, #2]",
"orr r2, r2, r3, lsl #16",
"ldrb r3, [r0, #3]",
"orr r2, r2, r3, lsl #24",
"mov r0, r2",
"bx lr",
options(noreturn),
)
}
/// Writes 4 bytes, starting at the address given.
///
/// See [__aeabi_uwrite4]
///
/// [__aeabi_uwrite4]: https://github.com/ARM-software/abi-aa/blob/main/rtabi32/rtabi32.rst#unaligned-memory-access
#[naked]
#[no_mangle]
#[instruction_set(arm::a32)]
#[link_section = ".iwram.aeabi.uwrite4"]
unsafe extern "C" fn __aeabi_uwrite4(value: u32, address: *mut c_void) {
core::arch::asm!(
"strb r0, [r1]",
"lsr r2, r0, #8",
"strb r2, [r1, #1]",
"lsr r2, r2, #8",
"strb r2, [r1, #2]",
"lsr r2, r2, #8",
"strb r2, [r1, #3]",
"bx lr",
options(noreturn),
)
}
/// Reads 8 bytes, starting at the address given.
///
/// See [__aeabi_uread8]
///
/// [__aeabi_uread8]: https://github.com/ARM-software/abi-aa/blob/main/rtabi32/rtabi32.rst#unaligned-memory-access
#[naked]
#[no_mangle]
#[instruction_set(arm::a32)]
#[link_section = ".iwram.aeabi.uread8"]
unsafe extern "C" fn __aeabi_uread8(address: *const c_void) -> u64 {
core::arch::asm!(
"ldrb r1, [r0, #4]",
"ldrb r2, [r0, #5]",
"orr r1, r1, r2, lsl #8",
"ldrb r2, [r0, #6]",
"orr r1, r1, r2, lsl #16",
"ldrb r2, [r0, #7]",
"orr r1, r1, r2, lsl #24",
"b __aeabi_uread4",
options(noreturn),
)
}
/// Writes 8 bytes, starting at the address given.
///
/// See [__aeabi_uwrite8]
///
/// [__aeabi_uwrite8]: https://github.com/ARM-software/abi-aa/blob/main/rtabi32/rtabi32.rst#unaligned-memory-access
#[naked]
#[no_mangle]
#[instruction_set(arm::a32)]
#[link_section = ".iwram.aeabi.uwrite8"]
unsafe extern "C" fn __aeabi_uwrite8(value: u64, address: *mut c_void) {
core::arch::asm!(
"strb r0, [r2]",
"lsr r3, r0, #8",
"strb r3, [r2, #1]",
"lsr r3, r3, #8",
"strb r3, [r2, #2]",
"lsr r3, r3, #8",
"strb r3, [r2, #3]",
"strb r1, [r2, #4]",
"lsr r3, r1, #8",
"strb r3, [r2, #5]",
"lsr r3, r3, #8",
"strb r3, [r2, #6]",
"lsr r3, r3, #8",
"strb r3, [r2, #7]",
"bx lr",
options(noreturn),
)
}

View file

@ -277,7 +277,7 @@ impl TextScreenblockAddress {
#[inline] #[inline]
pub fn write_word_array(self, words: &[u32; Self::WORD_COUNT]) { pub fn write_word_array(self, words: &[u32; Self::WORD_COUNT]) {
use crate::prelude::__aeabi_memcpy4; use crate::mem_fns::__aeabi_memcpy4;
let dest: *mut u32 = self.block.as_ptr() as *mut u32; let dest: *mut u32 = self.block.as_ptr() as *mut u32;
let src: *const u32 = words.as_ptr(); let src: *const u32 = words.as_ptr();
let byte_count = size_of::<[u32; Self::WORD_COUNT]>(); let byte_count = size_of::<[u32; Self::WORD_COUNT]>();
@ -347,7 +347,7 @@ macro_rules! make_affine_screenblock_address_type {
#[inline] #[inline]
pub fn write_word_array(self, words: &[u32; Self::WORD_COUNT]) { pub fn write_word_array(self, words: &[u32; Self::WORD_COUNT]) {
use crate::prelude::__aeabi_memcpy4; use crate::mem_fns::__aeabi_memcpy4;
let dest: *mut u32 = self.block.as_ptr() as *mut u32; let dest: *mut u32 = self.block.as_ptr() as *mut u32;
let src: *const u32 = words.as_ptr(); let src: *const u32 = words.as_ptr();
let byte_count = size_of::<[u32; Self::WORD_COUNT]>(); let byte_count = size_of::<[u32; Self::WORD_COUNT]>();