Calculate 32-bits at a time, since we're storing into IWRAM

This commit is contained in:
Gwilym Kuiper 2022-04-26 21:17:07 +01:00
parent 245cafa599
commit 921e2e9adf

View file

@ -163,7 +163,7 @@ agb_arm_func agb_rs__mixer_collapse
@ Arguments: @ Arguments:
@ r0 = target buffer (i8) @ r0 = target buffer (i8)
@ r1 = input buffer (i16) of fixnums with 4 bits of precision (read in sets of i16 in an i32) @ r1 = input buffer (i16) of fixnums with 4 bits of precision (read in sets of i16 in an i32)
push {r4, r5, r6} push {r4, r5, r6, r7, r8, r9, r10}
ldr r2, agb_rs__buffer_size @ loop counter ldr r2, agb_rs__buffer_size @ loop counter
mov r4, r2 mov r4, r2
@ -183,18 +183,35 @@ agb_arm_func agb_rs__mixer_collapse
load_sample r3, r12 load_sample r3, r12
load_sample r5, r6 load_sample r5, r6
load_sample r7, r8
load_sample r9, r10
and r3, r3, #255 @ combine the two samples so we can store in 16-bit chunks @ combine the four samples so we can store in 32-bit chunks
@ need to ensure that we don't overwrite the extra bit of the sample
and r3, r3, #255
and r12, r12, #255 and r12, r12, #255
and r5, r5, #255
and r6, r6, #255
and r7, r7, #255
and r8, r8, #255
and r9, r9, #255
and r10, r10, #255
@ combine all of the samples
orr r3, r3, r5, lsl #8 orr r3, r3, r5, lsl #8
orr r3, r3, r7, lsl #16
orr r3, r3, r9, lsl #24
orr r12, r12, r6, lsl #8 orr r12, r12, r6, lsl #8
orr r12, r12, r8, lsl #16
orr r12, r12, r10, lsl #24
strh r3, [r0, r4] @ *(r0 + (r4 = SOUND_BUFFER_SIZE)) = r3 str r3, [r0, r4] @ *(r0 + (r4 = SOUND_BUFFER_SIZE)) = r3
strh r12, [r0], #2 @ *r0 = r12; r0 += 2 str r12, [r0], #4 @ *r0 = r12; r0 += 4
subs r2, r2, #2 @ r2 -= 2 subs r2, r2, #4 @ r2 -= 4
bne 1b @ loop if not 0 bne 1b @ loop if not 0
pop {r4, r5, r6} pop {r4, r5, r6, r7, r8, r9, r10}
bx lr bx lr
agb_arm_end agb_rs__mixer_collapse agb_arm_end agb_rs__mixer_collapse