mirror of
https://github.com/italicsjenga/agb.git
synced 2025-01-23 07:36:33 +11:00
Update agbabi's memcpy
This commit is contained in:
parent
6a8aeeb3e8
commit
a9da4a65f9
3 changed files with 103 additions and 58 deletions
|
@ -12,6 +12,7 @@ fn main() {
|
|||
println!("cargo:rerun-if-changed=gba.ld");
|
||||
println!("cargo:rerun-if-changed=gba_mb.ld");
|
||||
println!("cargo:rerun-if-changed=src/asm_include.s");
|
||||
println!("cargo:rerun-if-changed=src/agbabi/macros.inc");
|
||||
println!("cargo:rerun-if-changed=gfx/test_logo.png");
|
||||
|
||||
println!("cargo:rerun-if-changed=build.rs");
|
||||
|
|
43
agb/src/agbabi/macros.inc
Normal file
43
agb/src/agbabi/macros.inc
Normal file
|
@ -0,0 +1,43 @@
|
|||
/*
|
||||
===============================================================================
|
||||
|
||||
ARM assembly support macros
|
||||
|
||||
Copyright (C) 2021-2022 agbabi contributors
|
||||
For conditions of distribution and use, see copyright notice in LICENSE.md
|
||||
|
||||
===============================================================================
|
||||
*/
|
||||
|
||||
// Shift and test upper two bits, clobbering \reg
|
||||
// Use mi for first bit, cs for second bit
|
||||
.macro joaobapt_test_lsl reg shift = #0
|
||||
movs \reg, \reg, lsl \shift
|
||||
.endm
|
||||
|
||||
// Test lowest two bits, clobbering \reg
|
||||
// Use mi for low bit, cs for high bit
|
||||
.macro joaobapt_test reg
|
||||
joaobapt_test_lsl \reg, #31
|
||||
.endm
|
||||
|
||||
// Test lowest two bits of \src, result stored in \dst
|
||||
// Use mi for low bit, cs for high bit
|
||||
.macro joaobapt_test_into dst, src
|
||||
movs \dst, \src, lsl #31
|
||||
.endm
|
||||
|
||||
// Branches depending on lowest two bits, clobbering \reg
|
||||
// b_mi = low bit case, b_cs = high bit case
|
||||
.macro joaobapt_switch reg, b_mi, b_cs
|
||||
joaobapt_test \reg
|
||||
bmi \b_mi
|
||||
bcs \b_cs
|
||||
.endm
|
||||
|
||||
// Branches depending on alignment of \a and \b, clobbering \scratch
|
||||
// b_byte = off-by-byte case, b_half = off-by-half case
|
||||
.macro align_switch a, b, scratch, b_byte, b_half
|
||||
eor \scratch, \a, \b
|
||||
joaobapt_switch \scratch, \b_byte, \b_half
|
||||
.endm
|
|
@ -1,19 +1,18 @@
|
|||
/*
|
||||
===============================================================================
|
||||
|
||||
ABI:
|
||||
__aeabi_memcpy, __aeabi_memcpy4, __aeabi_memcpy8
|
||||
Standard:
|
||||
memcpy
|
||||
Support:
|
||||
__agbabi_memcpy2
|
||||
|
||||
__agbabi_memcpy2, __agbabi_memcpy1
|
||||
Copyright (C) 2021-2022 agbabi contributors
|
||||
For conditions of distribution and use, see copyright notice in LICENSE.md
|
||||
|
||||
===============================================================================
|
||||
*/
|
||||
|
||||
.include "src/agbabi/macros.inc"
|
||||
|
||||
.arm
|
||||
.align 2
|
||||
|
||||
|
@ -22,95 +21,97 @@
|
|||
__agbabi_memcpy:
|
||||
.global __aeabi_memcpy
|
||||
__aeabi_memcpy:
|
||||
// Check pointer alignment
|
||||
eor r3, r1, r0
|
||||
// JoaoBapt carry & sign bit test
|
||||
movs r3, r3, lsl #31
|
||||
bmi .Lcopy1
|
||||
bcs .Lcopy2
|
||||
// >6-bytes is roughly the threshold when byte-by-byte copy is slower
|
||||
cmp r2, #6
|
||||
ble __agbabi_memcpy1
|
||||
|
||||
.Lcopy4:
|
||||
// Handle <= 2 byte copies byte-by-byte
|
||||
cmp r2, #2
|
||||
ble .Lcopy1
|
||||
align_switch r0, r1, r3, __agbabi_memcpy1, .Lcopy_halves
|
||||
|
||||
// Copy half and byte head
|
||||
rsb r3, r0, #4
|
||||
// JoaoBapt carry & sign bit test
|
||||
movs r3, r3, lsl #31
|
||||
// Check if r0 (or r1) needs word aligning
|
||||
rsbs r3, r0, #4
|
||||
joaobapt_test r3
|
||||
|
||||
// Copy byte head to align
|
||||
ldrmib r3, [r1], #1
|
||||
strmib r3, [r0], #1
|
||||
submi r2, r2, #1
|
||||
// r0, r1 are now half aligned
|
||||
|
||||
// Copy half head to align
|
||||
ldrcsh r3, [r1], #2
|
||||
strcsh r3, [r0], #2
|
||||
subcs r2, r2, #2
|
||||
// Fallthrough
|
||||
// r0, r1 are now word aligned
|
||||
|
||||
.global __aeabi_memcpy8
|
||||
__aeabi_memcpy8:
|
||||
.global __aeabi_memcpy4
|
||||
__aeabi_memcpy4:
|
||||
// Copy 8 words
|
||||
movs r12, r2, lsr #5
|
||||
beq .Lskip32
|
||||
lsl r3, r12, #5
|
||||
sub r2, r2, r3
|
||||
cmp r2, #32
|
||||
blt .Lcopy_words
|
||||
|
||||
// Word aligned, 32-byte copy
|
||||
push {r4-r10}
|
||||
.LcopyWords8:
|
||||
ldmia r1!, {r3-r10}
|
||||
stmia r0!, {r3-r10}
|
||||
subs r12, r12, #1
|
||||
bne .LcopyWords8
|
||||
.Lloop_32:
|
||||
subs r2, r2, #32
|
||||
ldmgeia r1!, {r3-r10}
|
||||
stmgeia r0!, {r3-r10}
|
||||
bgt .Lloop_32
|
||||
pop {r4-r10}
|
||||
.Lskip32:
|
||||
bxeq lr
|
||||
|
||||
// Copy words
|
||||
movs r12, r2, lsr #2
|
||||
.LcopyWords:
|
||||
subs r12, r12, #1
|
||||
ldrhs r3, [r1], #4
|
||||
strhs r3, [r0], #4
|
||||
bhs .LcopyWords
|
||||
// < 32 bytes remaining to be copied
|
||||
add r2, r2, #32
|
||||
|
||||
// Copy half and byte tail
|
||||
// JoaoBapt carry & sign bit test
|
||||
movs r3, r2, lsl #31
|
||||
.Lcopy_words:
|
||||
cmp r2, #4
|
||||
blt .Lcopy_halves
|
||||
.Lloop_4:
|
||||
subs r2, r2, #4
|
||||
ldrge r3, [r1], #4
|
||||
strge r3, [r0], #4
|
||||
bgt .Lloop_4
|
||||
bxeq lr
|
||||
|
||||
// Copy byte & half tail
|
||||
// This test still works when r2 is negative
|
||||
joaobapt_test r2
|
||||
// Copy half
|
||||
ldrcsh r3, [r1], #2
|
||||
strcsh r3, [r0], #2
|
||||
// Copy byte
|
||||
ldrmib r3, [r1]
|
||||
strmib r3, [r0]
|
||||
bx lr
|
||||
|
||||
.Lcopy2:
|
||||
// Copy byte head
|
||||
.Lcopy_halves:
|
||||
// Copy byte head to align
|
||||
tst r0, #1
|
||||
cmpne r2, #0
|
||||
ldrneb r3, [r1], #1
|
||||
strneb r3, [r0], #1
|
||||
subne r2, r2, #1
|
||||
// Fallthrough
|
||||
// r0, r1 are now half aligned
|
||||
|
||||
.global __agbabi_memcpy2
|
||||
__agbabi_memcpy2:
|
||||
// Copy halves
|
||||
movs r12, r2, lsr #1
|
||||
.LcopyHalves:
|
||||
subs r12, r12, #1
|
||||
ldrhsh r3, [r1], #2
|
||||
strhsh r3, [r0], #2
|
||||
bhs .LcopyHalves
|
||||
subs r2, r2, #2
|
||||
ldrgeh r3, [r1], #2
|
||||
strgeh r3, [r0], #2
|
||||
bgt __agbabi_memcpy2
|
||||
bxeq lr
|
||||
|
||||
// Copy byte tail
|
||||
tst r2, #1
|
||||
ldrneb r3, [r1]
|
||||
strneb r3, [r0]
|
||||
adds r2, r2, #1
|
||||
ldreqb r3, [r1]
|
||||
streqb r3, [r0]
|
||||
bx lr
|
||||
|
||||
.Lcopy1:
|
||||
.global __agbabi_memcpy1
|
||||
__agbabi_memcpy1:
|
||||
subs r2, r2, #1
|
||||
ldrhsb r3, [r1], #1
|
||||
strhsb r3, [r0], #1
|
||||
bhs .Lcopy1
|
||||
ldrgeb r3, [r1], #1
|
||||
strgeb r3, [r0], #1
|
||||
bgt __agbabi_memcpy1
|
||||
bx lr
|
||||
|
||||
.section .iwram.memcpy, "ax", %progbits
|
||||
|
|
Loading…
Add table
Reference in a new issue