Save useless zeroing of buffer (#451)

I realised we don't need to zero the buffer or read the current buffer
value for the first sound that we're writing per frame. Gets us from
17728 cycles per frame for 32768Hz down to 15291 cycles per frame.

From tests (and theory), this reduces the number of cycles per frame by
2,000 for 32768Hz.

- [x] No changelog update needed - already mentioned mixer improvements
there
This commit is contained in:
Gwilym Inzani 2023-06-27 23:52:05 +01:00 committed by GitHub
commit 92e31aef35
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 104 additions and 31 deletions

View file

@ -4,7 +4,8 @@
agb_rs__buffer_size: agb_rs__buffer_size:
.word 0 .word 0
agb_arm_func agb_rs__mixer_add .macro mixer_add fn_name:req is_first:req
agb_arm_func \fn_name
@ Arguments @ Arguments
@ r0 - pointer to the data to be copied (u8 array) @ r0 - pointer to the data to be copied (u8 array)
@ r1 - pointer to the sound buffer (i16 array which will alternate left and right channels, 32-bit aligned) @ r1 - pointer to the sound buffer (i16 array which will alternate left and right channels, 32-bit aligned)
@ -18,9 +19,9 @@ agb_arm_func agb_rs__mixer_add
ldr r7, [sp, #20] @ load the right channel modification amount into r7 ldr r7, [sp, #20] @ load the right channel modification amount into r7
cmp r7, r3 @ check if left and right channel need the same modifications cmp r7, r3 @ check if left and right channel need the same modifications
beq same_modification beq 3f @ same modification
modifications_fallback: 4: @ modification fallback
orr r7, r7, r3, lsl #16 @ r7 now is the left channel followed by the right channel modifications. orr r7, r7, r3, lsl #16 @ r7 now is the left channel followed by the right channel modifications.
mov r5, #0 @ current index we're reading from mov r5, #0 @ current index we're reading from
@ -34,9 +35,12 @@ modifications_fallback:
ldrsb r6, [r4] @ load the current sound sample to r6 ldrsb r6, [r4] @ load the current sound sample to r6
add r5, r5, r2 @ calculate the position to read the next sample from add r5, r5, r2 @ calculate the position to read the next sample from
.ifc \is_first,true
mul r4, r6, r7 @ r4 = r6 * r7 (calculating both the left and right samples together)
.else
ldr r4, [r1] @ read the current value ldr r4, [r1] @ read the current value
mla r4, r6, r7, r4 @ r4 += r6 * r7 (calculating both the left and right samples together) mla r4, r6, r7, r4 @ r4 += r6 * r7 (calculating both the left and right samples together)
.endif
str r4, [r1], #4 @ store the new value, and increment the pointer str r4, [r1], #4 @ store the new value, and increment the pointer
.endr .endr
@ -47,13 +51,13 @@ modifications_fallback:
pop {{r4-r8}} pop {{r4-r8}}
bx lr bx lr
same_modification: 3: @ same modification
@ check to see if this is a perfect power of 2 @ check to see if this is a perfect power of 2
@ r5 is a scratch register, r7 = r3 = amount to modify @ r5 is a scratch register, r7 = r3 = amount to modify
sub r5, r7, #1 sub r5, r7, #1
ands r5, r5, r7 ands r5, r5, r7
bne modifications_fallback @ not 0 means we need to do the full modification bne 4b @ not 0 means we need to do the full modification, jump to modification fallback
@ count leading zeros of r7 into r3 @ count leading zeros of r7 into r3
mov r3, #0 mov r3, #0
@ -74,11 +78,16 @@ same_modification:
ldrsb r6, [r4] @ load the current sound sample to r6 ldrsb r6, [r4] @ load the current sound sample to r6
add r5, r5, r2 @ calculate the position to read the next sample from add r5, r5, r2 @ calculate the position to read the next sample from
ldr r4, [r1] @ read the current value
lsl r6, r6, #16 lsl r6, r6, #16
orr r6, r6, lsr #16 orr r6, r6, lsr #16
.ifc \is_first,true
mov r4, r6, lsl r3 @ r4 = r6 << r3
.else
ldr r4, [r1] @ read the current value
add r4, r4, r6, lsl r3 @ r4 += r6 << r3 (calculating both the left and right samples together) add r4, r4, r6, lsl r3 @ r4 += r6 << r3 (calculating both the left and right samples together)
.endif
str r4, [r1], #4 @ store the new value, and increment the pointer str r4, [r1], #4 @ store the new value, and increment the pointer
.endr .endr
@ -89,9 +98,14 @@ same_modification:
pop {{r4-r8}} pop {{r4-r8}}
bx lr bx lr
agb_arm_end agb_rs__mixer_add agb_arm_end \fn_name
.endm
agb_arm_func agb_rs__mixer_add_stereo mixer_add agb_rs__mixer_add false
mixer_add agb_rs__mixer_add_first true
.macro stereo_add_fn fn_name:req is_first:req
agb_arm_func \fn_name
@ Arguments @ Arguments
@ r0 - pointer to the data to be copied (u8 array) @ r0 - pointer to the data to be copied (u8 array)
@ r1 - pointer to the sound buffer (i16 array which will alternate left and right channels, 32-bit aligned) @ r1 - pointer to the sound buffer (i16 array which will alternate left and right channels, 32-bit aligned)
@ -127,16 +141,24 @@ agb_arm_func agb_rs__mixer_add_stereo
lsl r6, r6, #24 @ r6 = | R | 0 | 0 | 0 | drop everything except the right sample lsl r6, r6, #24 @ r6 = | R | 0 | 0 | 0 | drop everything except the right sample
orr r6, r7, r6, asr #8 @ r6 = | 1 | R | 1 | L | now we have it perfectly set up orr r6, r7, r6, asr #8 @ r6 = | 1 | R | 1 | L | now we have it perfectly set up
.ifc \is_first,true
mul \sample_reg, r6, r2
.else
mla \sample_reg, r6, r2, \sample_reg @ r4 += r6 * r2 (calculating both the left and right samples together) mla \sample_reg, r6, r2, \sample_reg @ r4 += r6 * r2 (calculating both the left and right samples together)
.endif
.endm .endm
1: 1:
.ifc \is_first,true
.else
ldmia r1, {{r9-r12}} @ read the current values ldmia r1, {{r9-r12}} @ read the current values
.endif
add_stereo_sample r9 add_stereo_sample r9
add_stereo_sample r10 add_stereo_sample r10
add_stereo_sample r11 add_stereo_sample r11
add_stereo_sample r12 add_stereo_sample r12
.purgem add_stereo_sample
stmia r1!, {{r9-r12}} @ store the new value, and increment the pointer stmia r1!, {{r9-r12}} @ store the new value, and increment the pointer
@ -146,7 +168,11 @@ agb_arm_func agb_rs__mixer_add_stereo
pop {{r4-r11}} pop {{r4-r11}}
bx lr bx lr
agb_arm_end agb_rs__mixer_add_stereo agb_arm_end \fn_name
.endm
stereo_add_fn agb_rs__mixer_add_stereo false
stereo_add_fn agb_rs__mixer_add_stereo_first true
agb_arm_func agb_rs__mixer_collapse agb_arm_func agb_rs__mixer_collapse
@ Arguments: @ Arguments:

View file

@ -29,12 +29,26 @@ extern "C" {
right_amount: Num<i16, 4>, right_amount: Num<i16, 4>,
); );
fn agb_rs__mixer_add_first(
sound_data: *const u8,
sound_buffer: *mut Num<i16, 4>,
playback_speed: Num<u32, 8>,
left_amount: Num<i16, 4>,
right_amount: Num<i16, 4>,
);
fn agb_rs__mixer_add_stereo( fn agb_rs__mixer_add_stereo(
sound_data: *const u8, sound_data: *const u8,
sound_buffer: *mut Num<i16, 4>, sound_buffer: *mut Num<i16, 4>,
volume: Num<i16, 4>, volume: Num<i16, 4>,
); );
fn agb_rs__mixer_add_stereo_first(
sound_data: *const u8,
sound_buffer: *mut Num<i16, 4>,
volume: Num<i16, 4>,
);
fn agb_rs__mixer_collapse( fn agb_rs__mixer_collapse(
sound_buffer: *mut i8, sound_buffer: *mut i8,
input_buffer: *const Num<i16, 4>, input_buffer: *const Num<i16, 4>,
@ -406,31 +420,64 @@ impl MixerBuffer {
working_buffer: &mut [Num<i16, 4>], working_buffer: &mut [Num<i16, 4>],
channels: impl Iterator<Item = &'a mut SoundChannel>, channels: impl Iterator<Item = &'a mut SoundChannel>,
) { ) {
working_buffer.fill(0.into()); let mut channels = channels
.filter(|channel| !channel.is_done)
for channel in channels { .filter_map(|channel| {
if channel.is_done { let playback_speed = if channel.is_stereo {
continue; 2.into()
}
let playback_speed = if channel.is_stereo {
2.into()
} else {
channel.playback_speed
};
if (channel.pos + playback_speed * self.frequency.buffer_size() as u32).floor()
>= channel.data.len() as u32
{
// TODO: This should probably play what's left rather than skip the last bit
if channel.should_loop {
channel.pos = 0.into();
} else { } else {
channel.is_done = true; channel.playback_speed
continue; };
if (channel.pos + playback_speed * self.frequency.buffer_size() as u32).floor()
>= channel.data.len() as u32
{
// TODO: This should probably play what's left rather than skip the last bit
if channel.should_loop {
channel.pos = 0.into();
} else {
channel.is_done = true;
return None;
}
} }
Some((channel, playback_speed))
});
if let Some((channel, playback_speed)) = channels.next() {
if channel.volume != 0.into() {
if channel.is_stereo {
unsafe {
agb_rs__mixer_add_stereo_first(
channel.data.as_ptr().add(channel.pos.floor() as usize),
working_buffer.as_mut_ptr(),
channel.volume,
);
}
} else {
let right_amount = ((channel.panning + 1) / 2) * channel.volume;
let left_amount = ((-channel.panning + 1) / 2) * channel.volume;
unsafe {
agb_rs__mixer_add_first(
channel.data.as_ptr().add(channel.pos.floor() as usize),
working_buffer.as_mut_ptr(),
playback_speed,
left_amount,
right_amount,
);
}
}
} else {
working_buffer.fill(0.into());
} }
channel.pos += playback_speed * self.frequency.buffer_size() as u32;
} else {
working_buffer.fill(0.into());
}
for (channel, playback_speed) in channels {
if channel.volume != 0.into() { if channel.volume != 0.into() {
if channel.is_stereo { if channel.is_stereo {
unsafe { unsafe {