mirror of
https://github.com/italicsjenga/agb.git
synced 2024-12-24 00:31:34 +11:00
Update agbabi's memcpy
This commit is contained in:
parent
6a8aeeb3e8
commit
a9da4a65f9
|
@ -12,6 +12,7 @@ fn main() {
|
||||||
println!("cargo:rerun-if-changed=gba.ld");
|
println!("cargo:rerun-if-changed=gba.ld");
|
||||||
println!("cargo:rerun-if-changed=gba_mb.ld");
|
println!("cargo:rerun-if-changed=gba_mb.ld");
|
||||||
println!("cargo:rerun-if-changed=src/asm_include.s");
|
println!("cargo:rerun-if-changed=src/asm_include.s");
|
||||||
|
println!("cargo:rerun-if-changed=src/agbabi/macros.inc");
|
||||||
println!("cargo:rerun-if-changed=gfx/test_logo.png");
|
println!("cargo:rerun-if-changed=gfx/test_logo.png");
|
||||||
|
|
||||||
println!("cargo:rerun-if-changed=build.rs");
|
println!("cargo:rerun-if-changed=build.rs");
|
||||||
|
|
43
agb/src/agbabi/macros.inc
Normal file
43
agb/src/agbabi/macros.inc
Normal file
|
@ -0,0 +1,43 @@
|
||||||
|
/*
|
||||||
|
===============================================================================
|
||||||
|
|
||||||
|
ARM assembly support macros
|
||||||
|
|
||||||
|
Copyright (C) 2021-2022 agbabi contributors
|
||||||
|
For conditions of distribution and use, see copyright notice in LICENSE.md
|
||||||
|
|
||||||
|
===============================================================================
|
||||||
|
*/
|
||||||
|
|
||||||
|
// Shift and test upper two bits, clobbering \reg
|
||||||
|
// Use mi for first bit, cs for second bit
|
||||||
|
.macro joaobapt_test_lsl reg shift = #0
|
||||||
|
movs \reg, \reg, lsl \shift
|
||||||
|
.endm
|
||||||
|
|
||||||
|
// Test lowest two bits, clobbering \reg
|
||||||
|
// Use mi for low bit, cs for high bit
|
||||||
|
.macro joaobapt_test reg
|
||||||
|
joaobapt_test_lsl \reg, #31
|
||||||
|
.endm
|
||||||
|
|
||||||
|
// Test lowest two bits of \src, result stored in \dst
|
||||||
|
// Use mi for low bit, cs for high bit
|
||||||
|
.macro joaobapt_test_into dst, src
|
||||||
|
movs \dst, \src, lsl #31
|
||||||
|
.endm
|
||||||
|
|
||||||
|
// Branches depending on lowest two bits, clobbering \reg
|
||||||
|
// b_mi = low bit case, b_cs = high bit case
|
||||||
|
.macro joaobapt_switch reg, b_mi, b_cs
|
||||||
|
joaobapt_test \reg
|
||||||
|
bmi \b_mi
|
||||||
|
bcs \b_cs
|
||||||
|
.endm
|
||||||
|
|
||||||
|
// Branches depending on alignment of \a and \b, clobbering \scratch
|
||||||
|
// b_byte = off-by-byte case, b_half = off-by-half case
|
||||||
|
.macro align_switch a, b, scratch, b_byte, b_half
|
||||||
|
eor \scratch, \a, \b
|
||||||
|
joaobapt_switch \scratch, \b_byte, \b_half
|
||||||
|
.endm
|
|
@ -1,19 +1,18 @@
|
||||||
/*
|
/*
|
||||||
===============================================================================
|
===============================================================================
|
||||||
|
|
||||||
ABI:
|
ABI:
|
||||||
__aeabi_memcpy, __aeabi_memcpy4, __aeabi_memcpy8
|
__aeabi_memcpy, __aeabi_memcpy4, __aeabi_memcpy8
|
||||||
Standard:
|
Standard:
|
||||||
memcpy
|
memcpy
|
||||||
Support:
|
Support:
|
||||||
__agbabi_memcpy2
|
__agbabi_memcpy2, __agbabi_memcpy1
|
||||||
|
|
||||||
Copyright (C) 2021-2022 agbabi contributors
|
Copyright (C) 2021-2022 agbabi contributors
|
||||||
For conditions of distribution and use, see copyright notice in LICENSE.md
|
For conditions of distribution and use, see copyright notice in LICENSE.md
|
||||||
|
|
||||||
===============================================================================
|
===============================================================================
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
.include "src/agbabi/macros.inc"
|
||||||
|
|
||||||
.arm
|
.arm
|
||||||
.align 2
|
.align 2
|
||||||
|
|
||||||
|
@ -22,95 +21,97 @@
|
||||||
__agbabi_memcpy:
|
__agbabi_memcpy:
|
||||||
.global __aeabi_memcpy
|
.global __aeabi_memcpy
|
||||||
__aeabi_memcpy:
|
__aeabi_memcpy:
|
||||||
// Check pointer alignment
|
// >6-bytes is roughly the threshold when byte-by-byte copy is slower
|
||||||
eor r3, r1, r0
|
cmp r2, #6
|
||||||
// JoaoBapt carry & sign bit test
|
ble __agbabi_memcpy1
|
||||||
movs r3, r3, lsl #31
|
|
||||||
bmi .Lcopy1
|
|
||||||
bcs .Lcopy2
|
|
||||||
|
|
||||||
.Lcopy4:
|
align_switch r0, r1, r3, __agbabi_memcpy1, .Lcopy_halves
|
||||||
// Handle <= 2 byte copies byte-by-byte
|
|
||||||
cmp r2, #2
|
|
||||||
ble .Lcopy1
|
|
||||||
|
|
||||||
// Copy half and byte head
|
// Check if r0 (or r1) needs word aligning
|
||||||
rsb r3, r0, #4
|
rsbs r3, r0, #4
|
||||||
// JoaoBapt carry & sign bit test
|
joaobapt_test r3
|
||||||
movs r3, r3, lsl #31
|
|
||||||
|
// Copy byte head to align
|
||||||
ldrmib r3, [r1], #1
|
ldrmib r3, [r1], #1
|
||||||
strmib r3, [r0], #1
|
strmib r3, [r0], #1
|
||||||
submi r2, r2, #1
|
submi r2, r2, #1
|
||||||
|
// r0, r1 are now half aligned
|
||||||
|
|
||||||
|
// Copy half head to align
|
||||||
ldrcsh r3, [r1], #2
|
ldrcsh r3, [r1], #2
|
||||||
strcsh r3, [r0], #2
|
strcsh r3, [r0], #2
|
||||||
subcs r2, r2, #2
|
subcs r2, r2, #2
|
||||||
// Fallthrough
|
// r0, r1 are now word aligned
|
||||||
|
|
||||||
.global __aeabi_memcpy8
|
.global __aeabi_memcpy8
|
||||||
__aeabi_memcpy8:
|
__aeabi_memcpy8:
|
||||||
.global __aeabi_memcpy4
|
.global __aeabi_memcpy4
|
||||||
__aeabi_memcpy4:
|
__aeabi_memcpy4:
|
||||||
// Copy 8 words
|
cmp r2, #32
|
||||||
movs r12, r2, lsr #5
|
blt .Lcopy_words
|
||||||
beq .Lskip32
|
|
||||||
lsl r3, r12, #5
|
// Word aligned, 32-byte copy
|
||||||
sub r2, r2, r3
|
|
||||||
push {r4-r10}
|
push {r4-r10}
|
||||||
.LcopyWords8:
|
.Lloop_32:
|
||||||
ldmia r1!, {r3-r10}
|
subs r2, r2, #32
|
||||||
stmia r0!, {r3-r10}
|
ldmgeia r1!, {r3-r10}
|
||||||
subs r12, r12, #1
|
stmgeia r0!, {r3-r10}
|
||||||
bne .LcopyWords8
|
bgt .Lloop_32
|
||||||
pop {r4-r10}
|
pop {r4-r10}
|
||||||
.Lskip32:
|
bxeq lr
|
||||||
|
|
||||||
// Copy words
|
// < 32 bytes remaining to be copied
|
||||||
movs r12, r2, lsr #2
|
add r2, r2, #32
|
||||||
.LcopyWords:
|
|
||||||
subs r12, r12, #1
|
|
||||||
ldrhs r3, [r1], #4
|
|
||||||
strhs r3, [r0], #4
|
|
||||||
bhs .LcopyWords
|
|
||||||
|
|
||||||
// Copy half and byte tail
|
.Lcopy_words:
|
||||||
// JoaoBapt carry & sign bit test
|
cmp r2, #4
|
||||||
movs r3, r2, lsl #31
|
blt .Lcopy_halves
|
||||||
|
.Lloop_4:
|
||||||
|
subs r2, r2, #4
|
||||||
|
ldrge r3, [r1], #4
|
||||||
|
strge r3, [r0], #4
|
||||||
|
bgt .Lloop_4
|
||||||
|
bxeq lr
|
||||||
|
|
||||||
|
// Copy byte & half tail
|
||||||
|
// This test still works when r2 is negative
|
||||||
|
joaobapt_test r2
|
||||||
|
// Copy half
|
||||||
ldrcsh r3, [r1], #2
|
ldrcsh r3, [r1], #2
|
||||||
strcsh r3, [r0], #2
|
strcsh r3, [r0], #2
|
||||||
|
// Copy byte
|
||||||
ldrmib r3, [r1]
|
ldrmib r3, [r1]
|
||||||
strmib r3, [r0]
|
strmib r3, [r0]
|
||||||
bx lr
|
bx lr
|
||||||
|
|
||||||
.Lcopy2:
|
.Lcopy_halves:
|
||||||
// Copy byte head
|
// Copy byte head to align
|
||||||
tst r0, #1
|
tst r0, #1
|
||||||
cmpne r2, #0
|
|
||||||
ldrneb r3, [r1], #1
|
ldrneb r3, [r1], #1
|
||||||
strneb r3, [r0], #1
|
strneb r3, [r0], #1
|
||||||
subne r2, r2, #1
|
subne r2, r2, #1
|
||||||
// Fallthrough
|
// r0, r1 are now half aligned
|
||||||
|
|
||||||
.global __agbabi_memcpy2
|
.global __agbabi_memcpy2
|
||||||
__agbabi_memcpy2:
|
__agbabi_memcpy2:
|
||||||
// Copy halves
|
subs r2, r2, #2
|
||||||
movs r12, r2, lsr #1
|
ldrgeh r3, [r1], #2
|
||||||
.LcopyHalves:
|
strgeh r3, [r0], #2
|
||||||
subs r12, r12, #1
|
bgt __agbabi_memcpy2
|
||||||
ldrhsh r3, [r1], #2
|
bxeq lr
|
||||||
strhsh r3, [r0], #2
|
|
||||||
bhs .LcopyHalves
|
|
||||||
|
|
||||||
// Copy byte tail
|
// Copy byte tail
|
||||||
tst r2, #1
|
adds r2, r2, #1
|
||||||
ldrneb r3, [r1]
|
ldreqb r3, [r1]
|
||||||
strneb r3, [r0]
|
streqb r3, [r0]
|
||||||
bx lr
|
bx lr
|
||||||
|
|
||||||
.Lcopy1:
|
.global __agbabi_memcpy1
|
||||||
|
__agbabi_memcpy1:
|
||||||
subs r2, r2, #1
|
subs r2, r2, #1
|
||||||
ldrhsb r3, [r1], #1
|
ldrgeb r3, [r1], #1
|
||||||
strhsb r3, [r0], #1
|
strgeb r3, [r0], #1
|
||||||
bhs .Lcopy1
|
bgt __agbabi_memcpy1
|
||||||
bx lr
|
bx lr
|
||||||
|
|
||||||
.section .iwram.memcpy, "ax", %progbits
|
.section .iwram.memcpy, "ax", %progbits
|
||||||
|
|
Loading…
Reference in a new issue