mirror of
https://github.com/italicsjenga/agb.git
synced 2024-12-25 01:01:34 +11:00
Merge pull request #226 from gwilymk/another-audio-performance-improvement
Another audio performance improvement
This commit is contained in:
commit
93ed4b87e8
|
@ -42,6 +42,8 @@ fn main(mut gba: Gba) -> ! {
|
||||||
|
|
||||||
if input.is_pressed(Button::L) {
|
if input.is_pressed(Button::L) {
|
||||||
channel.volume(half);
|
channel.volume(half);
|
||||||
|
} else if input.is_pressed(Button::R) {
|
||||||
|
channel.volume(20.into()); // intentionally introduce clipping
|
||||||
} else {
|
} else {
|
||||||
channel.volume(1.into());
|
channel.volume(1.into());
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,7 +28,7 @@ modifications_fallback:
|
||||||
|
|
||||||
|
|
||||||
1:
|
1:
|
||||||
.macro mixer_add_loop
|
.rept 4
|
||||||
add r4, r0, r5, asr #8 @ calculate the address of the next read from the sound buffer
|
add r4, r0, r5, asr #8 @ calculate the address of the next read from the sound buffer
|
||||||
ldrsb r6, [r4] @ load the current sound sample to r6
|
ldrsb r6, [r4] @ load the current sound sample to r6
|
||||||
add r5, r5, r2 @ calculate the position to read the next sample from
|
add r5, r5, r2 @ calculate the position to read the next sample from
|
||||||
|
@ -38,12 +38,7 @@ modifications_fallback:
|
||||||
mla r4, r6, r7, r4 @ r4 += r6 * r7 (calculating both the left and right samples together)
|
mla r4, r6, r7, r4 @ r4 += r6 * r7 (calculating both the left and right samples together)
|
||||||
|
|
||||||
str r4, [r1], #4 @ store the new value, and increment the pointer
|
str r4, [r1], #4 @ store the new value, and increment the pointer
|
||||||
.endm
|
.endr
|
||||||
|
|
||||||
mixer_add_loop
|
|
||||||
mixer_add_loop
|
|
||||||
mixer_add_loop
|
|
||||||
mixer_add_loop
|
|
||||||
|
|
||||||
subs r8, r8, #4 @ loop counter
|
subs r8, r8, #4 @ loop counter
|
||||||
bne 1b @ jump back if we're done with the loop
|
bne 1b @ jump back if we're done with the loop
|
||||||
|
@ -69,7 +64,8 @@ same_modification:
|
||||||
mov r5, #0 @ current index we're reading from
|
mov r5, #0 @ current index we're reading from
|
||||||
ldr r8, agb_rs__buffer_size @ the number of steps left
|
ldr r8, agb_rs__buffer_size @ the number of steps left
|
||||||
|
|
||||||
.macro mixer_add_loop_simple
|
1:
|
||||||
|
.rept 4
|
||||||
add r4, r0, r5, asr #8 @ calculate the address of the next read from the sound buffer
|
add r4, r0, r5, asr #8 @ calculate the address of the next read from the sound buffer
|
||||||
ldrsb r6, [r4] @ load the current sound sample to r6
|
ldrsb r6, [r4] @ load the current sound sample to r6
|
||||||
add r5, r5, r2 @ calculate the position to read the next sample from
|
add r5, r5, r2 @ calculate the position to read the next sample from
|
||||||
|
@ -81,13 +77,7 @@ same_modification:
|
||||||
add r4, r4, r6, lsl r3 @ r4 += r6 << r3 (calculating both the left and right samples together)
|
add r4, r4, r6, lsl r3 @ r4 += r6 << r3 (calculating both the left and right samples together)
|
||||||
|
|
||||||
str r4, [r1], #4 @ store the new value, and increment the pointer
|
str r4, [r1], #4 @ store the new value, and increment the pointer
|
||||||
.endm
|
.endr
|
||||||
|
|
||||||
1:
|
|
||||||
mixer_add_loop_simple
|
|
||||||
mixer_add_loop_simple
|
|
||||||
mixer_add_loop_simple
|
|
||||||
mixer_add_loop_simple
|
|
||||||
|
|
||||||
subs r8, r8, #4 @ loop counter
|
subs r8, r8, #4 @ loop counter
|
||||||
bne 1b @ jump back if we're done with the loop
|
bne 1b @ jump back if we're done with the loop
|
||||||
|
@ -107,7 +97,9 @@ agb_arm_func agb_rs__mixer_add_stereo
|
||||||
|
|
||||||
ldr r5, =0x00000FFF
|
ldr r5, =0x00000FFF
|
||||||
|
|
||||||
.macro mixer_add_loop_simple_stereo
|
ldr r8, agb_rs__buffer_size
|
||||||
|
1:
|
||||||
|
.rept 4
|
||||||
ldrsh r6, [r0], #2 @ load the current sound sample to r6
|
ldrsh r6, [r0], #2 @ load the current sound sample to r6
|
||||||
|
|
||||||
ldr r4, [r1] @ read the current value
|
ldr r4, [r1] @ read the current value
|
||||||
|
@ -134,14 +126,7 @@ agb_arm_func agb_rs__mixer_add_stereo
|
||||||
add r4, r4, r6, lsl #4 @ r4 += r6 << 4 (calculating both the left and right samples together)
|
add r4, r4, r6, lsl #4 @ r4 += r6 << 4 (calculating both the left and right samples together)
|
||||||
|
|
||||||
str r4, [r1], #4 @ store the new value, and increment the pointer
|
str r4, [r1], #4 @ store the new value, and increment the pointer
|
||||||
.endm
|
.endr
|
||||||
|
|
||||||
ldr r8, agb_rs__buffer_size
|
|
||||||
1:
|
|
||||||
mixer_add_loop_simple_stereo
|
|
||||||
mixer_add_loop_simple_stereo
|
|
||||||
mixer_add_loop_simple_stereo
|
|
||||||
mixer_add_loop_simple_stereo
|
|
||||||
|
|
||||||
subs r8, r8, #4 @ loop counter
|
subs r8, r8, #4 @ loop counter
|
||||||
bne 1b @ jump back if we're done with the loop
|
bne 1b @ jump back if we're done with the loop
|
||||||
|
@ -151,60 +136,78 @@ agb_arm_func agb_rs__mixer_add_stereo
|
||||||
|
|
||||||
agb_arm_end agb_rs__mixer_add_stereo
|
agb_arm_end agb_rs__mixer_add_stereo
|
||||||
|
|
||||||
.macro clamp_s8 reg:req
|
|
||||||
cmn \reg, #128
|
|
||||||
mvnlt \reg, #128
|
|
||||||
|
|
||||||
cmp \reg, #127
|
|
||||||
movgt \reg, #127
|
|
||||||
.endm
|
|
||||||
|
|
||||||
agb_arm_func agb_rs__mixer_collapse
|
agb_arm_func agb_rs__mixer_collapse
|
||||||
@ Arguments:
|
@ Arguments:
|
||||||
@ r0 = target buffer (i8)
|
@ r0 = target buffer (i8)
|
||||||
@ r1 = input buffer (i16) of fixnums with 4 bits of precision (read in sets of i16 in an i32)
|
@ r1 = input buffer (i16) of fixnums with 4 bits of precision (read in sets of i16 in an i32)
|
||||||
push {r4, r5, r6, r7, r8, r9, r10}
|
push {r4-r11}
|
||||||
|
|
||||||
|
CONST_0 .req r7
|
||||||
|
CONST_FF .req r8
|
||||||
|
CONST_127 .req r9
|
||||||
|
TEMP .req r10
|
||||||
|
SWAP_SIGN .req r11
|
||||||
|
|
||||||
|
ldr CONST_0, =0
|
||||||
|
ldr CONST_FF, =0xff
|
||||||
|
ldr CONST_127, =127
|
||||||
|
ldr SWAP_SIGN, =0x80808080
|
||||||
|
|
||||||
ldr r2, agb_rs__buffer_size @ loop counter
|
ldr r2, agb_rs__buffer_size @ loop counter
|
||||||
mov r4, r2
|
mov r4, r2
|
||||||
|
|
||||||
1:
|
@ The idea for this solution came from pimpmobile:
|
||||||
|
@ https://github.com/kusma/pimpmobile/blob/f2b2be49e806ca2a0d99cf91b3838d6d10f86b7d/src/pimp_mixer_clip_arm.S
|
||||||
|
@
|
||||||
|
@ The register should be 127 bigger then what you actually want, and we'll correct for that later. Hence the
|
||||||
|
@ add instructions in `load_sample`.
|
||||||
|
@
|
||||||
|
@ The idea behind this is in the bit patters of -128 and 127 which are 10000000 and 01111111 respectively,
|
||||||
|
@ and we want to clamp the value between them.
|
||||||
|
@
|
||||||
|
@ The first instruction calculates `-((sample + 128) >> 8)`. If sample is between -128 and 127, then
|
||||||
|
@ 0 <= sample + 128 <= 255 which means that shifting that right by 8 is 0. Hence the zero flag will be set, so
|
||||||
|
@ the `andne` instruction won't execute.
|
||||||
|
@
|
||||||
|
@ If the sample is outside of a signed 8 bit value, then `sample >> 8` will either be -1 or 1 (we assume that samples)
|
||||||
|
@ don't go too high, but the idea still works, so you can generalise this further if you want. This value is stored in TEMP
|
||||||
|
@
|
||||||
|
@ -1 has binary expansion (as a 32-bit integer) of all 1s and 1 of all zeros and then a 1.
|
||||||
|
@ So (-1 logical >> 24) gives 11111111 and (1 logical >> 24) gives 00000000 so register is clamped between these two values.
|
||||||
|
.macro clamp_s8 reg:req
|
||||||
|
subs TEMP, CONST_0, \reg, asr #8
|
||||||
|
andne \reg, CONST_FF, TEMP, lsr #24
|
||||||
|
.endm
|
||||||
|
|
||||||
.macro load_sample left_reg:req right_reg:req
|
.macro load_sample left_reg:req right_reg:req
|
||||||
@ left_reg = *r1; r1++
|
@ left_reg = *r1; r1++
|
||||||
ldr \left_reg, [r1], #4
|
ldr \left_reg, [r1], #4
|
||||||
|
|
||||||
lsl \right_reg, \left_reg, #16 @ push the sample 16 bits first
|
mov \right_reg, \left_reg, lsl #16 @ push the sample 16 bits first
|
||||||
asr \right_reg, \right_reg, #20 @ move right sample back to being the correct value
|
add \right_reg, CONST_127, \right_reg, asr #20 @ move right sample back to being the correct value
|
||||||
mov \left_reg, \left_reg, asr #20 @ now we only have the left sample
|
add \left_reg, CONST_127, \left_reg, asr #20 @ now we only have the left sample
|
||||||
|
|
||||||
clamp_s8 \left_reg @ clamp the audio to 8 bit values
|
clamp_s8 \left_reg @ clamp the audio to 8 bit values
|
||||||
clamp_s8 \right_reg
|
clamp_s8 \right_reg
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
|
1:
|
||||||
load_sample r3, r12
|
load_sample r3, r12
|
||||||
|
|
||||||
load_sample r5, r6
|
load_sample r5, r6
|
||||||
load_sample r7, r8
|
|
||||||
load_sample r9, r10
|
|
||||||
|
|
||||||
@ combine the four samples so we can store in 32-bit chunks
|
|
||||||
@ need to ensure that we don't overwrite the extra bit of the sample
|
|
||||||
and r3, r3, #255
|
|
||||||
and r12, r12, #255
|
|
||||||
and r5, r5, #255
|
|
||||||
and r6, r6, #255
|
|
||||||
and r7, r7, #255
|
|
||||||
and r8, r8, #255
|
|
||||||
and r9, r9, #255
|
|
||||||
and r10, r10, #255
|
|
||||||
|
|
||||||
@ combine all of the samples
|
|
||||||
orr r3, r3, r5, lsl #8
|
orr r3, r3, r5, lsl #8
|
||||||
orr r3, r3, r7, lsl #16
|
|
||||||
orr r3, r3, r9, lsl #24
|
|
||||||
|
|
||||||
orr r12, r12, r6, lsl #8
|
orr r12, r12, r6, lsl #8
|
||||||
orr r12, r12, r8, lsl #16
|
|
||||||
orr r12, r12, r10, lsl #24
|
load_sample r5, r6
|
||||||
|
orr r3, r3, r5, lsl #16
|
||||||
|
orr r12, r12, r6, lsl #16
|
||||||
|
|
||||||
|
load_sample r5, r6
|
||||||
|
orr r3, r3, r5, lsl #24
|
||||||
|
orr r12, r12, r6, lsl #24
|
||||||
|
|
||||||
|
eor r3, r3, SWAP_SIGN
|
||||||
|
eor r12, r12, SWAP_SIGN
|
||||||
|
|
||||||
str r3, [r0, r4] @ *(r0 + (r4 = SOUND_BUFFER_SIZE)) = r3
|
str r3, [r0, r4] @ *(r0 + (r4 = SOUND_BUFFER_SIZE)) = r3
|
||||||
str r12, [r0], #4 @ *r0 = r12; r0 += 4
|
str r12, [r0], #4 @ *r0 = r12; r0 += 4
|
||||||
|
@ -212,6 +215,6 @@ agb_arm_func agb_rs__mixer_collapse
|
||||||
subs r2, r2, #4 @ r2 -= 4
|
subs r2, r2, #4 @ r2 -= 4
|
||||||
bne 1b @ loop if not 0
|
bne 1b @ loop if not 0
|
||||||
|
|
||||||
pop {r4, r5, r6, r7, r8, r9, r10}
|
pop {r4-r11}
|
||||||
bx lr
|
bx lr
|
||||||
agb_arm_end agb_rs__mixer_collapse
|
agb_arm_end agb_rs__mixer_collapse
|
||||||
|
|
|
@ -96,7 +96,6 @@ impl SoundChannel {
|
||||||
|
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
pub fn volume(&mut self, volume: Num<i16, 4>) -> &mut Self {
|
pub fn volume(&mut self, volume: Num<i16, 4>) -> &mut Self {
|
||||||
assert!(volume <= Num::new(1), "volume must be <= 1");
|
|
||||||
assert!(volume >= Num::new(0), "volume must be >= 0");
|
assert!(volume >= Num::new(0), "volume must be >= 0");
|
||||||
|
|
||||||
self.volume = volume;
|
self.volume = volume;
|
||||||
|
|
Loading…
Reference in a new issue