From 8385504db6d7626ebecca261fcff1e3ce1b4fca8 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Sun, 4 Dec 2022 16:44:27 -0700 Subject: [PATCH] Remove usage of global_asm (#175) * message * finish memory function re-arrangement. --- Cargo.toml | 2 +- examples/asm_viewer.rs | 36 +++ examples/game.rs | 2 +- src/asm_runtime.rs | 568 ++--------------------------------- src/lib.rs | 1 + src/mem_fns.rs | 659 +++++++++++++++++++++++++++++++++++++++++ src/mmio.rs | 4 +- 7 files changed, 722 insertions(+), 550 deletions(-) create mode 100644 examples/asm_viewer.rs create mode 100644 src/mem_fns.rs diff --git a/Cargo.toml b/Cargo.toml index 20073b6..ce81dd4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,7 +13,7 @@ track_caller = [] [dependencies] bitfrob = "0.2.3" voladdress = { version = "1.2.1", features = ["experimental_volregion"] } -bracer = "0.1" +bracer = "0.1.2" [profile.dev] opt-level = 3 diff --git a/examples/asm_viewer.rs b/examples/asm_viewer.rs new file mode 100644 index 0000000..65bd750 --- /dev/null +++ b/examples/asm_viewer.rs @@ -0,0 +1,36 @@ +#![no_std] +#![no_main] + +use gba::mem_fns::*; + +#[panic_handler] +fn panic_handler(_: &core::panic::PanicInfo) -> ! { + loop {} +} + +#[no_mangle] +extern "C" fn main() -> ! { + let dest = unsafe { (0x0400_0000 as *const u16).read_volatile() }; + let src = unsafe { (0x0400_0000 as *const u16).read_volatile() }; + let count = unsafe { (0x0400_0000 as *const u16).read_volatile() }; + unsafe { + __aeabi_memcpy1(dest as *mut u8, src as *mut u8, count as usize); + __aeabi_memcpy2(dest as *mut u16, src as *mut u16, count as usize); + __aeabi_memcpy4(dest as *mut u32, src as *mut u32, count as usize); + __aeabi_memcpy8(dest as *mut u32, src as *mut u32, count as usize); + __aeabi_memcpy(dest as *mut u8, src as *mut u8, count as usize); + memcpy(dest as *mut u8, src as *mut u8, count as usize); + __aeabi_memmove4(dest as *mut u32, src as *mut u32, count as usize); + __aeabi_memmove8(dest as *mut u32, src as *mut u32, count as usize); + __aeabi_memmove(dest as *mut u8, src as *mut u8, count as usize); + memmove(dest as *mut u8, src as *mut u8, count as usize); + __aeabi_memset4(dest as *mut u32, count as usize, count as i32); + __aeabi_memset8(dest as *mut u32, count as usize, count as i32); + __aeabi_memset(dest as *mut u8, count as usize, count as i32); + memset(dest as *mut u8, count as i32, count as usize); + __aeabi_memclr4(dest as *mut u32, count as usize); + __aeabi_memclr8(dest as *mut u32, count as usize); + __aeabi_memclr(dest as *mut u8, count as usize); + } + loop {} +} diff --git a/examples/game.rs b/examples/game.rs index e1f7e07..d7458c6 100644 --- a/examples/game.rs +++ b/examples/game.rs @@ -1,11 +1,11 @@ #![no_std] #![no_main] -use core::fmt::Write; use gba::prelude::*; #[panic_handler] fn panic_handler(info: &core::panic::PanicInfo) -> ! { + use core::fmt::Write; if let Ok(mut logger) = MgbaBufferedLogger::try_new(MgbaMessageLevel::Fatal) { writeln!(logger, "{info}").ok(); } diff --git a/src/asm_runtime.rs b/src/asm_runtime.rs index b48f072..63f2749 100644 --- a/src/asm_runtime.rs +++ b/src/asm_runtime.rs @@ -9,8 +9,6 @@ //! * If a function is set in the `RUST_IRQ_HANDLER` variable then that function //! will be called and passed the bits for which interrupt(s) occurred. -use core::ffi::c_void; - use crate::{ dma::DmaControl, gba_cell::GbaCell, @@ -18,7 +16,6 @@ use crate::{ mgba::MGBA_LOGGING_ENABLE_REQUEST, mmio::{DMA3_SRC, IME, MGBA_LOG_ENABLE}, }; -use bracer::*; /// The function pointer that the assembly runtime calls when an interrupt /// occurs. @@ -46,7 +43,7 @@ unsafe extern "C" fn __start() -> ! { /* iwram copy */ "ldr r4, =__iwram_word_copy_count", - when!("r4" != "#0" [label_id=1] { + bracer::when!("r4" != "#0" [label_id=1] { "add r3, r12, #{dma3_offset}", "mov r5, #{dma3_setting}", "ldr r0, =__iwram_start", @@ -59,7 +56,7 @@ unsafe extern "C" fn __start() -> ! { /* ewram copy */ "ldr r4, =__ewram_word_copy_count", - when!("r4" != "#0" [label_id=1] { + bracer::when!("r4" != "#0" [label_id=1] { "add r3, r12, #{dma3_offset}", "mov r5, #{dma3_setting}", "ldr r0, =__ewram_start", @@ -72,7 +69,7 @@ unsafe extern "C" fn __start() -> ! { /* bss zero */ "ldr r4, =__bss_word_clear_count", - when!("r4" != "#0" [label_id=1] { + bracer::when!("r4" != "#0" [label_id=1] { "ldr r0, =__bss_start", "mov r2, #0", "2:", @@ -133,9 +130,9 @@ unsafe extern "C" fn runtime_irq_handler() { /* Call the Rust fn pointer (if set), using System mode */ "ldr r1, ={RUST_IRQ_HANDLER}", "ldr r1, [r1]", - when!("r1" != "#0" [label_id=9] { - with_spsr_held_in!("r2", { - set_cpu_control!(System, irq_masked: false, fiq_masked: false), + bracer::when!("r1" != "#0" [label_id=9] { + bracer::with_spsr_held_in!("r2", { + bracer::set_cpu_control!(System, irq_masked: false, fiq_masked: false), // Note(Lokathor): We are *SKIPPING* the part where we ensure that the // System stack pointer is aligned to 8 during the call to the rust @@ -145,11 +142,11 @@ unsafe extern "C" fn runtime_irq_handler() { // cycles total. Which is neat, but if this were on the DS (which has an // ARMv5TE CPU) you'd want to ensure the aligned stack. - with_pushed_registers!("{{r2, r3, r12, lr}}", { - adr_lr_then_bx_to!(reg="r1", label_id=1) + bracer::with_pushed_registers!("{{r2, r3, r12, lr}}", { + bracer::adr_lr_then_bx_to!(reg="r1", label_id=1) }), - set_cpu_control!(Supervisor, irq_masked: true, fiq_masked: false), + bracer::set_cpu_control!(Supervisor, irq_masked: true, fiq_masked: false), }), }), @@ -162,6 +159,8 @@ unsafe extern "C" fn runtime_irq_handler() { ) } +// For now, the division fns can just keep living here. + /// Returns 0 in `r0`, while placing the `numerator` into `r1`. /// /// This is written in that slightly strange way so that `div` function and @@ -207,7 +206,7 @@ extern "C" fn __aeabi_uidiv(numerator: u32, denominator: u32) -> u32 { core::arch::asm!( // Check for divide by 0 "cmp r1, #0", - "beq __aeabi_idiv0", + "beq {__aeabi_idiv0}", // r3(shifted_denom) = denom "mov r3, r1", // while shifted_denom < (num>>1): shifted_denom =<< 1; @@ -228,6 +227,7 @@ extern "C" fn __aeabi_uidiv(numerator: u32, denominator: u32) -> u32 { "cmp r3, r1", "bcs 3b", "bx lr", + __aeabi_idiv0 = sym __aeabi_idiv0, options(noreturn) ) } @@ -253,14 +253,15 @@ extern "C" fn __aeabi_idiv(numerator: i32, denominator: i32) -> u32 { "rsblt r0, r0, #0", "cmp r1, #0", "rsclt r1, r1, #0", - with_pushed_registers!("{{lr}}", { + bracer::with_pushed_registers!("{{lr}}", { // divide them using `u32` division (this will check for divide by 0) - "bl __aeabi_uidiv", + "bl {__aeabi_uidiv}", }), // if they started as different signs, flip the output's sign. "cmp r12, #0", "rsblt r0, r0, #0", "bx lr", + __aeabi_uidiv = sym __aeabi_uidiv, options(noreturn) ) } @@ -289,14 +290,15 @@ extern "C" fn __aeabi_uidivmod(numerator: u32, denominator: u32) -> u64 { // touch `r12`, while the other will be pushed onto the stack along with // `lr`. Since the function's output will be in `r0`, we push/pop `r1`. "mov r12, r0", - with_pushed_registers!("{{r1, lr}}", { - "bl __aeabi_uidiv", + bracer::with_pushed_registers!("{{r1, lr}}", { + "bl {__aeabi_uidiv}", }), // Now r0 holds the `quot`, and we use it along with the input args to // calculate the `rem`. "mul r2, r0, r1", "sub r1, r12, r2", "bx lr", + __aeabi_uidiv = sym __aeabi_uidiv, options(noreturn) ) } @@ -320,7 +322,7 @@ extern "C" fn __aeabi_uidivmod(numerator: u32, denominator: u32) -> u64 { extern "C" fn __aeabi_idivmod(numerator: i32, denominator: i32) -> u64 { unsafe { core::arch::asm!( - with_pushed_registers!("{{r4, r5, lr}}", { + bracer::with_pushed_registers!("{{r4, r5, lr}}", { // store old numerator then make it the unsigned absolute "movs r4, r0", "rsblt r0, r0, #0", @@ -328,7 +330,7 @@ extern "C" fn __aeabi_idivmod(numerator: i32, denominator: i32) -> u64 { "movs r5, r1", "rsblt r1, r1, #0", // divmod using unsigned. - "bl __aeabi_uidivmod", + "bl {__aeabi_uidivmod}", // if signs started opposite, quot becomes negative "eors r12, r4, r5", "rsblt r0, r0, #0", @@ -337,534 +339,8 @@ extern "C" fn __aeabi_idivmod(numerator: i32, denominator: i32) -> u64 { "rsblt r1, r1, #0", }), "bx lr", + __aeabi_uidivmod = sym __aeabi_uidivmod, options(noreturn) ) } } - -/// Reads 4 bytes, starting at the address given. -/// -/// See [__aeabi_uread4] -/// -/// [__aeabi_uread4]: https://github.com/ARM-software/abi-aa/blob/main/rtabi32/rtabi32.rst#unaligned-memory-access -#[naked] -#[no_mangle] -#[instruction_set(arm::a32)] -#[link_section = ".iwram.aeabi.uread4"] -unsafe extern "C" fn __aeabi_uread4(address: *const c_void) -> u32 { - core::arch::asm!( - "ldrb r2, [r0]", - "ldrb r3, [r0, #1]", - "orr r2, r2, r3, lsl #8", - "ldrb r3, [r0, #2]", - "orr r2, r2, r3, lsl #16", - "ldrb r3, [r0, #3]", - "orr r2, r2, r3, lsl #24", - "mov r0, r2", - "bx lr", - options(noreturn), - ) -} - -/// Writes 4 bytes, starting at the address given. -/// -/// See [__aeabi_uwrite4] -/// -/// [__aeabi_uwrite4]: https://github.com/ARM-software/abi-aa/blob/main/rtabi32/rtabi32.rst#unaligned-memory-access -#[naked] -#[no_mangle] -#[instruction_set(arm::a32)] -#[link_section = ".iwram.aeabi.uwrite4"] -unsafe extern "C" fn __aeabi_uwrite4(value: u32, address: *mut c_void) { - core::arch::asm!( - "strb r0, [r1]", - "lsr r2, r0, #8", - "strb r2, [r1, #1]", - "lsr r2, r2, #8", - "strb r2, [r1, #2]", - "lsr r2, r2, #8", - "strb r2, [r1, #3]", - "bx lr", - options(noreturn), - ) -} - -/// Reads 8 bytes, starting at the address given. -/// -/// See [__aeabi_uread8] -/// -/// [__aeabi_uread8]: https://github.com/ARM-software/abi-aa/blob/main/rtabi32/rtabi32.rst#unaligned-memory-access -#[naked] -#[no_mangle] -#[instruction_set(arm::a32)] -#[link_section = ".iwram.aeabi.uread8"] -unsafe extern "C" fn __aeabi_uread8(address: *const c_void) -> u64 { - core::arch::asm!( - "ldrb r1, [r0, #4]", - "ldrb r2, [r0, #5]", - "orr r1, r1, r2, lsl #8", - "ldrb r2, [r0, #6]", - "orr r1, r1, r2, lsl #16", - "ldrb r2, [r0, #7]", - "orr r1, r1, r2, lsl #24", - "b __aeabi_uread4", - options(noreturn), - ) -} - -/// Writes 8 bytes, starting at the address given. -/// -/// See [__aeabi_uwrite8] -/// -/// [__aeabi_uwrite8]: https://github.com/ARM-software/abi-aa/blob/main/rtabi32/rtabi32.rst#unaligned-memory-access -#[naked] -#[no_mangle] -#[instruction_set(arm::a32)] -#[link_section = ".iwram.aeabi.uwrite8"] -unsafe extern "C" fn __aeabi_uwrite8(value: u64, address: *mut c_void) { - core::arch::asm!( - "strb r0, [r2]", - "lsr r3, r0, #8", - "strb r3, [r2, #1]", - "lsr r3, r3, #8", - "strb r3, [r2, #2]", - "lsr r3, r3, #8", - "strb r3, [r2, #3]", - "strb r1, [r2, #4]", - "lsr r3, r1, #8", - "strb r3, [r2, #5]", - "lsr r3, r3, #8", - "strb r3, [r2, #6]", - "lsr r3, r3, #8", - "strb r3, [r2, #7]", - "bx lr", - options(noreturn), - ) -} - -/// Provides a `libc` styled memory copy (transfer between exclusive regions). -/// -/// This has mild overhead compared to calling [`__aeabi_memcpy`], prefer that -/// function when possible. -/// -/// * **Returns:** the original `dest` pointer. -/// -/// ## Safety -/// * `src` must be readable for `byte_count` bytes. -/// * `dest` must be writable for `byte_count` bytes. -/// * The `src` and `dest` regions must not overlap. -#[inline] -#[no_mangle] -pub unsafe extern "C" fn memcpy( - dest: *mut u8, src: *const u8, byte_count: usize, -) -> *mut u8 { - __aeabi_memcpy(dest, src, byte_count); - dest -} - -/// Provides a `libc` styled memory move (transfer between non-exclusive -/// regions). -/// -/// This has mild overhead compared to calling [`__aeabi_memmove`], prefer that -/// function when possible. -/// -/// * **Returns:** the original `dest` pointer. -/// -/// ## Safety -/// * `src` must be readable for `byte_count` bytes. -/// * `dest` must be writable for `byte_count` bytes. -#[inline] -#[no_mangle] -pub unsafe extern "C" fn memmove( - dest: *mut u8, src: *const u8, byte_count: usize, -) -> *mut u8 { - __aeabi_memmove(dest, src, byte_count); - dest -} - -/// Provides a `libc` styled memory set (assign `u8` in `byte` to the entire -/// region). -/// -/// This has mild overhead compared to calling [`__aeabi_memset`], prefer that -/// function when possible. Note that this function and that function have -/// slightly different argument ordering, though the compiler won't let you mess -/// it up like might happen in C. -/// -/// * **Returns:** the original `dest` pointer. -/// -/// ## Safety -/// * `dest` must be writable for `byte_count` bytes. -#[inline] -#[no_mangle] -pub unsafe extern "C" fn memset( - dest: *mut u8, byte: i32, byte_count: usize, -) -> *mut u8 { - __aeabi_memset(dest, byte_count, byte); - dest -} - -extern "C" { - /// Memory transfer between *exclusive* regions. - /// - /// There are no alignment requirements for the pointers. This will - /// automatically detect when pointers are sufficiently aligned to use `u16` - /// or `u32` transfers, instead of always using `u8` transfers. - /// - /// This follows the AEABI convention of not returning the original `dest` - /// pointer at the end of the function. This actually allows a minor - /// optimization, so if you're going to call a memory copy function at all, - /// prefer this over [`memcpy`]. - pub fn __aeabi_memcpy(dest: *mut u8, src: *const u8, byte_count: usize); - - /// As [`__aeabi_memcpy`], but both pointers are assumed to be aligned to 4. - pub fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, byte_count: usize); - - /// As [`__aeabi_memcpy`], but both pointers are assumed to be aligned to 8. - pub fn __aeabi_memcpy8(dest: *mut u8, src: *const u8, byte_count: usize); - - /// As [`__aeabi_memcpy`], but *only* performs `u8` transfers. - /// - /// Importantly, this means that this function can be used to get data to/from - /// the SRAM region. - pub fn gba_sram_memcpy(dest: *mut u8, src: *const u8, byte_count: usize); - - /// Memory transfer between *non-exclusive* regions. - /// - /// As [`__aeabi_memcpy`], but the regions don't need to be exclusive. - pub fn __aeabi_memmove(dest: *mut u8, src: *const u8, byte_count: usize); - - /// As [`__aeabi_memmove`], but both pointers are assumed to be aligned to 4. - pub fn __aeabi_memmove4(dest: *mut u8, src: *const u8, byte_count: usize); - - /// As [`__aeabi_memmove`], but both pointers are assumed to be aligned to 8. - pub fn __aeabi_memmove8(dest: *mut u8, src: *const u8, byte_count: usize); - - /// Sets all bytes in the region to the value given. - /// - /// For historical reasons, the "byte" passed in is passed as an `i32`. Still, - /// only the low 8 bits of the value are kept and written to the region. - /// - /// There are no alignment requirements for the pointer. This will - /// automatically detect when pointer is sufficiently aligned to use `u16` or - /// `u32` writes, instead of always using `u8` writes. - /// - /// This follows the AEABI convention of not returning the original `dest` - /// pointer at the end of the function. This actually allows a minor - /// optimization, so if you're going to call a memory copy function at all, - /// prefer this over [`memcpy`]. - pub fn __aeabi_memset(dest: *mut u8, byte_count: usize, byte: i32); - - /// As [`__aeabi_memset`], but both pointers are assumed to be aligned to 4. - pub fn __aeabi_memset4(dest: *mut u8, byte_count: usize, byte: i32); - - /// As [`__aeabi_memset`], but both pointers are assumed to be aligned to 8. - pub fn __aeabi_memset8(dest: *mut u8, byte_count: usize, byte: i32); - - /// Sets all bytes in the region to 0. - /// - /// There are no alignment requirements for the pointer. This will - /// automatically detect when the pointer is sufficiently aligned to use `u16` - /// or `u32` writes, instead of always using `u8` writes. - pub fn __aeabi_memclr(dest: *mut u8, byte_count: usize); - - /// As [`__aeabi_memclr`], but the pointer is assumed to be aligned to 4. - pub fn __aeabi_memclr4(dest: *mut u8, byte_count: usize); - - /// As [`__aeabi_memclr`], but the pointer is assumed to be aligned to 8. - pub fn __aeabi_memclr8(dest: *mut u8, byte_count: usize); -} - -core::arch::global_asm! { - emit_a32_code!{ - put_code_in_section!(".iwram.aeabi.memory.copy.and.move", { - ".global __aeabi_memmove8", - ".global __aeabi_memmove4", - ".global __aeabi_memmove", - ".global __aeabi_memcpy8", - ".global __aeabi_memcpy4", - ".global __aeabi_memcpy", - // - "__aeabi_memmove8:", - "__aeabi_memmove4:", - "__aeabi_memmove:", - "cmp r0, r1", // if d > s, reverse copy - "bgt .L_r_copy_gain_align", - // else fallthrough - - "__aeabi_memcpy:", - ".L_f_copy_gain_align:", - "eor r3, r0, r1", - "lsls r3, r3, #31", - "bmi .L_f_copy_max_coalign1", - "bcs .L_f_copy_max_coalign2", - // else fallthrough - - ".L_f_copy_max_coalign4:", - "tst r0, #3", - "bne .L_f_copy_fixup4", - // else fallthrough - - "__aeabi_memcpy8:", - "__aeabi_memcpy4:", - ".L_f_copy_coalign4_assured:", - "cmp r2, #32", - "bge .L_f_copy_block", - - ".L_f_copy_post_block:", - // copy 4 words, two at a time - "tst r2, #0b10000", - "ldmne r1!, {r3, r12}", - "stmne r0!, {r3, r12}", - "ldmne r1!, {r3, r12}", - "stmne r0!, {r3, r12}", - "bics r2, r2, #0b10000", - "bxeq lr", - - // copy 2 and/or 1 words - "lsls r3, r2, #29", - "ldmcs r1!, {r3, r12}", - "stmcs r0!, {r3, r12}", - "ldrmi r3, [r1], #4", - "strmi r3, [r0], #4", - "bics r2, r2, #0b1100", - "bxeq lr", - - // copy halfword and/or byte - "lsls r3, r2, #31", - "ldrhcs r3, [r1], #2", - "strhcs r3, [r0], #2", - "ldrbmi r3, [r1], #1", - "strbmi r3, [r0], #1", - "bx lr", - - ".L_f_copy_block:", - with_pushed_registers!("{r4-r9}", { - "1:", - "subs r2, r2, #32", - "ldmge r1!, {r3-r9, r12}", - "stmge r0!, {r3-r9, r12}", - "bgt 1b", - }), - "bxeq lr", - "b .L_f_copy_post_block", - - ".L_f_copy_fixup4:", - "cmp r2, #7", // if count <= (fix+word): just byte copy - "ble .L_f_copy_max_coalign1", - "lsls r3, r0, #31", - "submi r2, r2, #1", - "ldrbmi r3, [r1], #1", - "strbmi r3, [r0], #1", - "subcs r2, r2, #2", - "ldrhcs r3, [r1], #2", - "strhcs r3, [r0], #2", - "b .L_f_copy_coalign4_assured", - - ".L_f_copy_max_coalign2:", - "tst r0, #1", - "bne .L_f_copy_fixup2", - ".L_f_copy_coalign2_assured:", - "1:", - "subs r2, r2, #2", - "ldrhge r3, [r1], #2", - "strhge r3, [r0], #2", - "bgt 1b", - "bxeq lr", - "tst r2, #1", - "ldrbne r3, [r1], #1", - "strbne r3, [r0], #1", - "bx lr", - - ".L_f_copy_fixup2:", - "cmp r2, #3", // if count <= (fix+halfword): just byte copy - "ble .L_f_copy_max_coalign1", - "sub r2, r2, #1", - "ldrb r3, [r1], #1", - "strb r3, [r0], #1", - "b .L_f_copy_coalign2_assured", - - "gba_sram_memcpy:", - ".L_f_copy_max_coalign1:", - "1:", - "subs r2, r2, #1", - "ldrbge r3, [r1], #1", - "strbge r3, [r0], #1", - "bgt 1b", - "bx lr", - - ".L_r_copy_gain_align:", - "add r0, r0, r2", - "add r1, r1, r2", - "eor r3, r0, r1", - "lsls r3, r3, #31", - "bmi .L_r_copy_max_coalign1", - "bcs .L_r_copy_max_coalign2", - // else fallthrough - - ".L_r_copy_max_coalign4:", - "tst r0, #3", - "bne .L_r_copy_fixup4", - ".L_r_copy_coalign4_assured:", - "cmp r2, #32", - "bge .L_r_copy_block", - ".L_r_copy_post_block:", - // copy 4 words, two at a time - "tst r2, #0b10000", - "ldmdbne r1!, {r3, r12}", - "stmdbne r0!, {r3, r12}", - "ldmdbne r1!, {r3, r12}", - "stmdbne r0!, {r3, r12}", - "bics r2, r2, #0b10000", - "bxeq lr", - - // copy 2 and/or 1 words - "lsls r3, r2, #29", - "ldmdbcs r1!, {r3, r12}", - "stmdbcs r0!, {r3, r12}", - "ldrmi r3, [r1, #-4]!", - "strmi r3, [r0, #-4]!", - "bxeq lr", - "lsls r2, r2, #31", - "ldrhcs r3, [r1, #-2]!", - "strhcs r3, [r0, #-2]!", - "ldrbmi r3, [r1, #-1]!", - "strbmi r3, [r0, #-1]!", - "bx lr", - - ".L_r_copy_block:", - with_pushed_registers!("{r4-r9}", { - "1:", - "subs r2, r2, #32", - "ldmdbcs r1!, {r3-r9, r12}", - "stmdbcs r0!, {r3-r9, r12}", - "bgt 1b", - }), - "bxeq lr", - "b .L_r_copy_post_block", - - ".L_r_copy_fixup4:", - "cmp r2, #7", // if count <= (fix+word): just byte copy - "ble .L_r_copy_max_coalign1", - "lsls r3, r0, #31", - "submi r2, r2, #1", - "ldrbmi r3, [r1, #-1]!", - "strbmi r3, [r0, #-1]!", - "subcs r2, r2, #2", - "ldrhcs r3, [r1, #-2]!", - "strhcs r3, [r0, #-2]!", - "b .L_r_copy_coalign4_assured", - - ".L_r_copy_max_coalign2:", - "tst r0, #1", - "bne .L_r_copy_fixup2", - ".L_r_copy_coalign2_assured:", - "1:", - "subs r2, r2, #2", - "ldrhge r3, [r1, #-2]!", - "strhge r3, [r0, #-2]!", - "bgt 1b", - "bxeq lr", - "tst r2, #1", - "ldrbne r3, [r1, #-1]!", - "strbne r3, [r0, #-1]!", - "bx lr", - - ".L_r_copy_fixup2:", - "cmp r2, #3", // if count <= (fix+halfword): just byte copy - "ble .L_r_copy_max_coalign1", - "sub r2, r2, #1", - "ldrb r3, [r1, #-1]!", - "strb r3, [r0, #-1]!", - "b .L_r_copy_coalign2_assured", - - ".L_r_copy_max_coalign1:", - "1:", - "subs r2, r2, #1", - "ldrbge r3, [r1, #-1]!", - "strbge r3, [r0, #-1]!", - "bgt 1b", - "bx lr", - }), - }, - options(raw) -} - -core::arch::global_asm! { - emit_a32_code!{ - put_code_in_section!(".iwram.aeabi.memory.clear.and.set", { - ".global __aeabi_memclr8", - ".global __aeabi_memclr4", - ".global __aeabi_memclr", - ".global __aeabi_memset8", - ".global __aeabi_memset4", - ".global __aeabi_memset", - // - "__aeabi_memclr8:", - "__aeabi_memclr4:", - "mov r2, #0", - "mov r3, #0", - "b .L_memset_check_for_block_work", - "__aeabi_memclr:", - "mov r2, #0", - "__aeabi_memset8:", - "__aeabi_memset4:", - "__aeabi_memset:", // r0(dest), r1(count), r2(byte) - // duplicate the byte across all of r2 and r3 - "and r2, r2, #0xFF", - "orr r2, r2, r2, lsl #8", - "orr r2, r2, r2, lsl #16", - "mov r3, r2", - // for 'sets' too small to fixup we just byte loop - "cmp r1, #3", - "ble .L_memset_byte_loop", - // carry/sign test on the address, then do fixup - "lsls r12, r0, #31", - "submi r1, r1, #1", - "strbmi r2, [r0], #1", - "subcs r1, r1, #2", - "strhcs r2, [r0], #2", - ".L_memset_check_for_block_work:", - "cmp r1, #32", - "bge .L_memset_block_work", - - ".L_memset_post_block_work:", - // set 4 words - "tst r1, #0b10000", - "stmne r0!, {r2, r3}", - "stmne r0!, {r2, r3}", - // set 2 and/or 1 words - "lsls r12, r1, #29", - "stmcs r0!, {r2, r3}", - "strmi r2, [r0], #4", - // set halfword and/or byte - "lsls r12, r1, #31", - "strhcs r2, [r0], #2", - "strbmi r2, [r0], #1", - "bx lr", - - ".L_memset_block_work:", - with_pushed_registers!("{r4-r9}", { - "mov r4, r2", - "mov r5, r2", - "mov r6, r2", - "mov r7, r2", - "mov r8, r2", - "mov r9, r2", - "1:", - "subs r1, r1, #32", - "stmge r0!, {r2-r9}", - "bgt 1b", - }), - "bxeq lr", - "b .L_memset_post_block_work", - - ".L_memset_byte_loop:", - "1:", - "subs r1, r1, #1", - "strbcs r2, [r0], #1", - "bgt 1b", - "bx lr", - }), - }, - options(raw), -} diff --git a/src/lib.rs b/src/lib.rs index 677c645..5352e4c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -96,6 +96,7 @@ pub mod fixed; pub mod gba_cell; pub mod interrupts; pub mod keys; +pub mod mem_fns; pub mod mgba; pub mod mmio; pub mod prelude; diff --git a/src/mem_fns.rs b/src/mem_fns.rs new file mode 100644 index 0000000..4b33d3d --- /dev/null +++ b/src/mem_fns.rs @@ -0,0 +1,659 @@ +//! Module for direct memory operations. +//! +//! Generally you don't need to call these yourself. Instead, the compiler will +//! insert calls to the functions defined here as necessary. + +use core::ffi::c_void; + +/// Byte copy between exclusive regions. +/// +/// * This will *always* copy one byte at a time, making it suitable for use +/// with SRAM memory. +#[inline] +#[no_mangle] +#[instruction_set(arm::a32)] +#[link_section = ".iwram.__aeabi_memcpy1"] +pub unsafe extern "C" fn __aeabi_memcpy1( + dest: *mut u8, src: *const u8, byte_count: usize, +) { + core::arch::asm! { + "1:", + "subs {count}, {count}, #1", + "ldrbge {temp}, [{src}], #1", + "strbge {temp}, [{dest}], #1", + "bgt 1b", + temp = out(reg) _, + count = inout(reg) byte_count => _, + src = inout(reg) src => _, + dest = inout(reg) dest => _, + options(nostack) + } +} + +/// Halfword copy between exclusive regions. +/// +/// * **Safety:** The pointers must start aligned to 2. +/// * If the `byte_count` is odd then a single byte copy will happen at the end. +#[inline] +#[no_mangle] +#[instruction_set(arm::a32)] +#[link_section = ".iwram.__aeabi_memcpy2"] +pub unsafe extern "C" fn __aeabi_memcpy2( + mut dest: *mut u16, mut src: *const u16, mut byte_count: usize, +) { + core::arch::asm! { + "1:", + "subs {count}, {count}, #2", + "ldrhge {temp}, [{src}], #2", + "strhge {temp}, [{dest}], #2", + "bgt 1b", + temp = out(reg) _, + count = inout(reg) byte_count, + src = inout(reg) src, + dest = inout(reg) dest, + options(nostack) + } + if byte_count != 0 { + let dest = dest.cast::(); + let src = src.cast::(); + dest.write_volatile(src.read_volatile()); + } +} + +/// Word copy between exclusive regions. +/// +/// * **Safety:** The pointers must start aligned to 4. +/// * If `byte_count` is not a multiple of 4 then a halfword and/or byte copy +/// will happen at the end. +#[naked] +#[no_mangle] +#[instruction_set(arm::a32)] +#[link_section = ".iwram.__aeabi_memcpy4"] +pub unsafe extern "C" fn __aeabi_memcpy4( + dest: *mut u32, src: *const u32, byte_count: usize, +) { + core::arch::asm! { + bracer::when!( "r2" >=u "#32" [label_id=2] { + bracer::with_pushed_registers!("{{r4-r9}}", { + "1:", + "subs r2, r2, #32", + "ldmge r1!, {{r3-r9, r12}}", + "stmge r0!, {{r3-r9, r12}}", + "bgt 1b", + }), + "bxeq lr", + }), + + // copy 4 words, two at a time + "tst r2, #0b10000", + "ldmne r1!, {{r3, r12}}", + "stmne r0!, {{r3, r12}}", + "ldmne r1!, {{r3, r12}}", + "stmne r0!, {{r3, r12}}", + "bics r2, r2, #0b10000", + "bxeq lr", + + // copy 2 and/or 1 words + "lsls r3, r2, #29", + "ldmcs r1!, {{r3, r12}}", + "stmcs r0!, {{r3, r12}}", + "ldrmi r3, [r1], #4", + "strmi r3, [r0], #4", + "bics r2, r2, #0b1100", + "bxeq lr", + + // copy halfword and/or byte + "lsls r3, r2, #31", + "ldrhcs r3, [r1], #2", + "strhcs r3, [r0], #2", + "ldrbmi r3, [r1], #1", + "strbmi r3, [r0], #1", + "bx lr", + options(noreturn), + } +} + +/// Just call [`__aeabi_memcpy4`] instead. +/// +/// This function is provided only for API completeness, because in some cases +/// the compiler might automatically generate a call to this function. +#[inline] +#[no_mangle] +#[instruction_set(arm::a32)] +#[link_section = ".iwram.__aeabi_memcpy8"] +pub unsafe extern "C" fn __aeabi_memcpy8( + dest: *mut u32, src: *const u32, byte_count: usize, +) { + __aeabi_memcpy4(dest, src, byte_count); +} + +/// Arbitrary width copy between exclusive regions. +/// +/// * The pointers do not have a minimum alignment. +/// * The function will automatically use the best type of copy possible, based +/// on the pointers given. +#[naked] +#[no_mangle] +#[instruction_set(arm::a32)] +#[link_section = ".iwram.__aeabi_memcpy"] +pub unsafe extern "C" fn __aeabi_memcpy( + dest: *mut u8, src: *const u8, byte_count: usize, +) { + core::arch::asm! { + "cmp r2, #7", // if count <= (fix+word): just byte copy + "ble {__aeabi_memcpy1}", + + // check max coalign + "eor r3, r0, r1", + "lsls r3, r3, #31", + "bmi {__aeabi_memcpy1}", + "bcs 2f", + + // max coalign4, possible fixup and jump + "lsls r3, r0, #31", + "submi r2, r2, #1", + "ldrbmi r3, [r1], #1", + "strbmi r3, [r0], #1", + "subcs r2, r2, #2", + "ldrhcs r3, [r1], #2", + "strhcs r3, [r0], #2", + "b {__aeabi_memcpy4}", + + // max coalign2, possible fixup and jump + "2:", + "lsls r3, r0, #31", + "submi r2, r2, #1", + "ldrbmi r3, [r1], #1", + "strbmi r3, [r0], #1", + "b {__aeabi_memcpy2}", + + // + __aeabi_memcpy4 = sym __aeabi_memcpy4, + __aeabi_memcpy2 = sym __aeabi_memcpy2, + __aeabi_memcpy1 = sym __aeabi_memcpy1, + options(noreturn) + } +} + +/// Copy between exclusive regions, prefer [`__aeabi_memcpy`] if possible. +/// +/// This is the libc version of a memory copy. It's required to return the +/// `dest` pointer at the end of the call, which makes it need an extra +/// push/pop compared to a direct call to `__aeabi_memcpy`. +/// +/// * **Returns:** The `dest` pointer. +#[naked] +#[no_mangle] +#[instruction_set(arm::a32)] +#[link_section = ".iwram.memcpy"] +pub unsafe extern "C" fn memcpy( + dest: *mut u8, src: *const u8, byte_count: usize, +) -> *mut u8 { + core::arch::asm! { + bracer::with_pushed_registers!("{{r0, lr}}", { + "bl {__aeabi_memcpy}", + }), + "bx lr", + __aeabi_memcpy = sym __aeabi_memcpy, + options(noreturn) + } +} + +// MOVE + +// used by `__aeabi_memmove` in some cases +#[inline] +#[instruction_set(arm::a32)] +#[link_section = ".iwram.reverse_copy_u8"] +unsafe extern "C" fn reverse_copy_u8( + dest: *mut u8, src: *const u8, byte_count: usize, +) { + core::arch::asm! { + "1:", + "subs {count}, {count}, #1", + "ldrbge {temp}, [{src}, #-1]!", + "strbge {temp}, [{dest}, #-1]!", + "bgt 1b", + temp = out(reg) _, + count = inout(reg) byte_count => _, + src = inout(reg) src => _, + dest = inout(reg) dest => _, + options(nostack) + } +} + +// used by `__aeabi_memmove` in some cases +#[inline] +#[instruction_set(arm::a32)] +#[link_section = ".iwram.reverse_copy_u16"] +unsafe extern "C" fn reverse_copy_u16( + mut dest: *mut u16, mut src: *const u16, mut byte_count: usize, +) { + core::arch::asm! { + "1:", + "subs {count}, {count}, #2", + "ldrhge {temp}, [{src}, #-2]!", + "strhge {temp}, [{dest}, #-2]!", + "bgt 1b", + temp = out(reg) _, + count = inout(reg) byte_count, + src = inout(reg) src, + dest = inout(reg) dest, + options(nostack) + } + if byte_count != 0 { + let dest = dest.cast::().sub(1); + let src = src.cast::().sub(1); + dest.write_volatile(src.read_volatile()); + } +} + +// used by `__aeabi_memmove` in some cases +#[naked] +#[instruction_set(arm::a32)] +#[link_section = ".iwram.reverse_copy_u32"] +unsafe extern "C" fn reverse_copy_u32( + dest: *mut u32, src: *const u32, byte_count: usize, +) { + core::arch::asm! { + bracer::when!( "r2" >=u "#32" [label_id=2] { + bracer::with_pushed_registers!("{{r4-r9}}", { + "1:", + "subs r2, r2, #32", + "ldmdbcs r1!, {{r3-r9, r12}}", + "stmdbcs r0!, {{r3-r9, r12}}", + "bgt 1b", + }), + "bxeq lr", + }), + + // copy 4 words, two at a time + "tst r2, #0b10000", + "ldmdbne r1!, {{r3, r12}}", + "stmdbne r0!, {{r3, r12}}", + "ldmdbne r1!, {{r3, r12}}", + "stmdbne r0!, {{r3, r12}}", + "bics r2, r2, #0b10000", + "bxeq lr", + + // copy 2 and/or 1 words + "lsls r3, r2, #29", + "ldmdbcs r1!, {{r3, r12}}", + "stmdbcs r0!, {{r3, r12}}", + "ldrmi r3, [r1, #-4]!", + "strmi r3, [r0, #-4]!", + "bxeq lr", + + // copy halfword and/or byte + "lsls r2, r2, #31", + "ldrhcs r3, [r1, #-2]!", + "strhcs r3, [r0, #-2]!", + "ldrbmi r3, [r1, #-1]!", + "strbmi r3, [r0, #-1]!", + "bx lr", + options(noreturn), + } +} + +/// Copy between non-exclusive regions, prefer [`__aeabi_memmove`] if possible. +/// +/// This function is provided only for API completeness, because in some cases +/// the compiler might automatically generate a call to this function. +#[inline] +#[no_mangle] +#[instruction_set(arm::a32)] +#[link_section = ".iwram.__aeabi_memmove4"] +pub unsafe extern "C" fn __aeabi_memmove4( + dest: *mut u32, src: *const u32, byte_count: usize, +) { + __aeabi_memmove(dest.cast(), src.cast(), byte_count) +} + +/// Copy between non-exclusive regions, prefer [`__aeabi_memmove`] if possible. +/// +/// This function is provided only for API completeness, because in some cases +/// the compiler might automatically generate a call to this function. +#[inline] +#[no_mangle] +#[instruction_set(arm::a32)] +#[link_section = ".iwram.__aeabi_memmove8"] +pub unsafe extern "C" fn __aeabi_memmove8( + dest: *mut u32, src: *const u32, byte_count: usize, +) { + __aeabi_memmove(dest.cast(), src.cast(), byte_count) +} + +/// Copy between non-exclusive regions. +/// +/// * The pointers do not have a minimum alignment. The function will +/// automatically detect the best type of copy to perform. +#[naked] +#[no_mangle] +#[instruction_set(arm::a32)] +#[link_section = ".iwram.__aeabi_memmove"] +pub unsafe extern "C" fn __aeabi_memmove( + dest: *mut u8, src: *const u8, byte_count: usize, +) { + core::arch::asm! { + "cmp r2, #7", // if count <= (fix+word): just byte copy + "ble {__aeabi_memcpy1}", + bracer::when!("r0" >=u "r1" [label_id=1] { + // when d > s we need to reverse-direction copy + "add r0, r0, r2", + "add r1, r1, r2", + "eor r3, r0, r1", + "lsls r3, r3, #31", + "bmi {reverse_copy_u8}", + "bcs 2f", + + // max coalign4, possible fixup and jump + "lsls r3, r0, #31", + "submi r2, r2, #1", + "ldrbmi r3, [r1, #-1]!", + "strbmi r3, [r0, #-1]!", + "subcs r2, r2, #2", + "ldrhcs r3, [r1, #-2]!", + "strhcs r3, [r0, #-2]!", + "b {reverse_copy_u32}", + + // max coalign2, possible fixup and jump + "2:", + "tst r0, #1", + "sub r2, r2, #1", + "ldrb r3, [r1, #-1]!", + "strb r3, [r0, #-1]!", + "b {reverse_copy_u16}", + }), + "b {__aeabi_memcpy}", + __aeabi_memcpy = sym __aeabi_memcpy, + __aeabi_memcpy1 = sym __aeabi_memcpy1, + reverse_copy_u8 = sym reverse_copy_u8, + reverse_copy_u16 = sym reverse_copy_u16, + reverse_copy_u32 = sym reverse_copy_u32, + options(noreturn), + } +} + +/// Copy between non-exclusive regions, prefer [`__aeabi_memmove`] if possible. +/// +/// This is the libc version of a memory move. It's required to return the +/// `dest` pointer at the end of the call, which makes it need an extra +/// push/pop compared to a direct call to `__aeabi_memmove`. +/// +/// * **Returns:** The `dest` pointer. +#[naked] +#[no_mangle] +#[instruction_set(arm::a32)] +#[link_section = ".iwram.memmove"] +pub unsafe extern "C" fn memmove( + dest: *mut u8, src: *const u8, byte_count: usize, +) -> *mut u8 { + core::arch::asm! { + bracer::with_pushed_registers!("{{r0, lr}}", { + "bl {__aeabi_memmove}", + }), + "bx lr", + __aeabi_memmove = sym __aeabi_memmove, + options(noreturn) + } +} + +// SET + +/// Copy between non-exclusive regions, prefer [`__aeabi_memset`] if possible. +/// +/// This function is provided only for API completeness, because in some cases +/// the compiler might automatically generate a call to this function. +#[inline] +#[no_mangle] +#[instruction_set(arm::a32)] +#[link_section = ".iwram.__aeabi_memset4"] +pub unsafe extern "C" fn __aeabi_memset4( + dest: *mut u32, byte_count: usize, byte: i32, +) { + __aeabi_memset(dest.cast(), byte_count, byte) +} + +/// Copy between non-exclusive regions, prefer [`__aeabi_memset`] if possible. +/// +/// This function is provided only for API completeness, because in some cases +/// the compiler might automatically generate a call to this function. +#[inline] +#[no_mangle] +#[instruction_set(arm::a32)] +#[link_section = ".iwram.__aeabi_memset8"] +pub unsafe extern "C" fn __aeabi_memset8( + dest: *mut u32, byte_count: usize, byte: i32, +) { + __aeabi_memset(dest.cast(), byte_count, byte) +} + +/// Sets all bytes in the region to the `byte` given. +/// +/// Because of historical reasons, the byte is passed in as an `i32`, but only +/// the lowest 8 bits are used. +#[naked] +#[no_mangle] +#[instruction_set(arm::a32)] +#[link_section = ".iwram.__aeabi_memset"] +pub unsafe extern "C" fn __aeabi_memset( + dest: *mut u8, byte_count: usize, byte: i32, +) { + core::arch::asm! { + bracer::when!("r1" >=u "#8" [label_id=7] { + // duplicate the byte across all of r2 and r3 + "and r2, r2, #0xFF", + "orr r2, r2, r2, lsl #8", + "orr r2, r2, r2, lsl #16", + "mov r3, r2", + + // carry/sign test on the address, then do fixup + "lsls r12, r0, #31", + "submi r1, r1, #1", + "strbmi r2, [r0], #1", + "subcs r1, r1, #2", + "strhcs r2, [r0], #2", + + bracer::when!("r1" >=u "#32" [label_id=8] { + bracer::with_pushed_registers!("{{r4-r9}}", { + "mov r4, r2", + "mov r5, r2", + "mov r6, r2", + "mov r7, r2", + "mov r8, r2", + "mov r9, r2", + "1:", + "subs r1, r1, #32", + "stmge r0!, {{r2-r9}}", + "bgt 1b", + }), + "bxeq lr", + }), + + // set 4 words + "tst r1, #0b10000", + "stmne r0!, {{r2, r3}}", + "stmne r0!, {{r2, r3}}", + + // set 2 and/or 1 words + "lsls r12, r1, #29", + "stmcs r0!, {{r2, r3}}", + "strmi r2, [r0], #4", + + // set halfword and/or byte + "lsls r12, r1, #31", + "strhcs r2, [r0], #2", + "strbmi r2, [r0], #1", + "bx lr", + }), + // byte loop + "9:", + "subs r1, r1, #1", + "strbcs r2, [r0], #1", + "bgt 9b", + "bx lr", + options(noreturn) + } +} + +/// Copy between non-exclusive regions, prefer [`__aeabi_memset`] if possible. +/// +/// This is the libc version of a memory set. It's required to return the +/// `dest` pointer at the end of the call, which makes it need an extra +/// push/pop compared to a direct call to `__aeabi_memset`. +/// +/// * **Returns:** The `dest` pointer. +#[naked] +#[no_mangle] +#[instruction_set(arm::a32)] +#[link_section = ".iwram.memset"] +pub unsafe extern "C" fn memset( + dest: *mut u8, byte: i32, byte_count: usize, +) -> *mut u8 { + core::arch::asm! { + bracer::with_pushed_registers!("{{r0, lr}}", { + "bl {__aeabi_memset}", + }), + "bx lr", + __aeabi_memset = sym __aeabi_memset, + options(noreturn) + } +} + +// CLEAR + +/// Just call [`__aeabi_memset`] with 0 as the `byte` instead. +/// +/// This function is provided only for API completeness, because in some cases +/// the compiler might automatically generate a call to this function. +#[inline] +#[no_mangle] +#[instruction_set(arm::a32)] +#[link_section = ".iwram.__aeabi_memclr4"] +pub unsafe extern "C" fn __aeabi_memclr4(dest: *mut u32, byte_count: usize) { + __aeabi_memset(dest.cast(), byte_count, 0) +} + +/// Just call [`__aeabi_memset`] with 0 as the `byte` instead. +/// +/// This function is provided only for API completeness, because in some cases +/// the compiler might automatically generate a call to this function. +#[inline] +#[no_mangle] +#[instruction_set(arm::a32)] +#[link_section = ".iwram.__aeabi_memclr8"] +pub unsafe extern "C" fn __aeabi_memclr8(dest: *mut u32, byte_count: usize) { + __aeabi_memset(dest.cast(), byte_count, 0) +} + +/// Just call [`__aeabi_memset`] with 0 as the `byte` instead. +/// +/// This function is provided only for API completeness, because in some cases +/// the compiler might automatically generate a call to this function. +#[inline] +#[no_mangle] +#[instruction_set(arm::a32)] +#[link_section = ".iwram.__aeabi_memclr"] +pub unsafe extern "C" fn __aeabi_memclr(dest: *mut u8, byte_count: usize) { + __aeabi_memset(dest, byte_count, 0) +} + +/// Reads 4 bytes, starting at the address given. +/// +/// See [__aeabi_uread4] +/// +/// [__aeabi_uread4]: https://github.com/ARM-software/abi-aa/blob/main/rtabi32/rtabi32.rst#unaligned-memory-access +#[naked] +#[no_mangle] +#[instruction_set(arm::a32)] +#[link_section = ".iwram.aeabi.uread4"] +unsafe extern "C" fn __aeabi_uread4(address: *const c_void) -> u32 { + core::arch::asm!( + "ldrb r2, [r0]", + "ldrb r3, [r0, #1]", + "orr r2, r2, r3, lsl #8", + "ldrb r3, [r0, #2]", + "orr r2, r2, r3, lsl #16", + "ldrb r3, [r0, #3]", + "orr r2, r2, r3, lsl #24", + "mov r0, r2", + "bx lr", + options(noreturn), + ) +} + +/// Writes 4 bytes, starting at the address given. +/// +/// See [__aeabi_uwrite4] +/// +/// [__aeabi_uwrite4]: https://github.com/ARM-software/abi-aa/blob/main/rtabi32/rtabi32.rst#unaligned-memory-access +#[naked] +#[no_mangle] +#[instruction_set(arm::a32)] +#[link_section = ".iwram.aeabi.uwrite4"] +unsafe extern "C" fn __aeabi_uwrite4(value: u32, address: *mut c_void) { + core::arch::asm!( + "strb r0, [r1]", + "lsr r2, r0, #8", + "strb r2, [r1, #1]", + "lsr r2, r2, #8", + "strb r2, [r1, #2]", + "lsr r2, r2, #8", + "strb r2, [r1, #3]", + "bx lr", + options(noreturn), + ) +} + +/// Reads 8 bytes, starting at the address given. +/// +/// See [__aeabi_uread8] +/// +/// [__aeabi_uread8]: https://github.com/ARM-software/abi-aa/blob/main/rtabi32/rtabi32.rst#unaligned-memory-access +#[naked] +#[no_mangle] +#[instruction_set(arm::a32)] +#[link_section = ".iwram.aeabi.uread8"] +unsafe extern "C" fn __aeabi_uread8(address: *const c_void) -> u64 { + core::arch::asm!( + "ldrb r1, [r0, #4]", + "ldrb r2, [r0, #5]", + "orr r1, r1, r2, lsl #8", + "ldrb r2, [r0, #6]", + "orr r1, r1, r2, lsl #16", + "ldrb r2, [r0, #7]", + "orr r1, r1, r2, lsl #24", + "b __aeabi_uread4", + options(noreturn), + ) +} + +/// Writes 8 bytes, starting at the address given. +/// +/// See [__aeabi_uwrite8] +/// +/// [__aeabi_uwrite8]: https://github.com/ARM-software/abi-aa/blob/main/rtabi32/rtabi32.rst#unaligned-memory-access +#[naked] +#[no_mangle] +#[instruction_set(arm::a32)] +#[link_section = ".iwram.aeabi.uwrite8"] +unsafe extern "C" fn __aeabi_uwrite8(value: u64, address: *mut c_void) { + core::arch::asm!( + "strb r0, [r2]", + "lsr r3, r0, #8", + "strb r3, [r2, #1]", + "lsr r3, r3, #8", + "strb r3, [r2, #2]", + "lsr r3, r3, #8", + "strb r3, [r2, #3]", + "strb r1, [r2, #4]", + "lsr r3, r1, #8", + "strb r3, [r2, #5]", + "lsr r3, r3, #8", + "strb r3, [r2, #6]", + "lsr r3, r3, #8", + "strb r3, [r2, #7]", + "bx lr", + options(noreturn), + ) +} diff --git a/src/mmio.rs b/src/mmio.rs index a78ad31..313f268 100644 --- a/src/mmio.rs +++ b/src/mmio.rs @@ -277,7 +277,7 @@ impl TextScreenblockAddress { #[inline] pub fn write_word_array(self, words: &[u32; Self::WORD_COUNT]) { - use crate::prelude::__aeabi_memcpy4; + use crate::mem_fns::__aeabi_memcpy4; let dest: *mut u32 = self.block.as_ptr() as *mut u32; let src: *const u32 = words.as_ptr(); let byte_count = size_of::<[u32; Self::WORD_COUNT]>(); @@ -347,7 +347,7 @@ macro_rules! make_affine_screenblock_address_type { #[inline] pub fn write_word_array(self, words: &[u32; Self::WORD_COUNT]) { - use crate::prelude::__aeabi_memcpy4; + use crate::mem_fns::__aeabi_memcpy4; let dest: *mut u32 = self.block.as_ptr() as *mut u32; let src: *const u32 = words.as_ptr(); let byte_count = size_of::<[u32; Self::WORD_COUNT]>();