mirror of
https://github.com/italicsjenga/agb.git
synced 2024-12-23 08:11:33 +11:00
Save useless zeroing of buffer (#451)
I realised we don't need to zero the buffer or read the current buffer value for the first sound that we're writing per frame. Gets us from 17728 cycles per frame for 32768Hz down to 15291 cycles per frame. From tests (and theory), this reduces the number of cycles per frame by 2,000 for 32768Hz. - [x] No changelog update needed - already mentioned mixer improvements there
This commit is contained in:
commit
92e31aef35
|
@ -4,7 +4,8 @@
|
||||||
agb_rs__buffer_size:
|
agb_rs__buffer_size:
|
||||||
.word 0
|
.word 0
|
||||||
|
|
||||||
agb_arm_func agb_rs__mixer_add
|
.macro mixer_add fn_name:req is_first:req
|
||||||
|
agb_arm_func \fn_name
|
||||||
@ Arguments
|
@ Arguments
|
||||||
@ r0 - pointer to the data to be copied (u8 array)
|
@ r0 - pointer to the data to be copied (u8 array)
|
||||||
@ r1 - pointer to the sound buffer (i16 array which will alternate left and right channels, 32-bit aligned)
|
@ r1 - pointer to the sound buffer (i16 array which will alternate left and right channels, 32-bit aligned)
|
||||||
|
@ -18,9 +19,9 @@ agb_arm_func agb_rs__mixer_add
|
||||||
ldr r7, [sp, #20] @ load the right channel modification amount into r7
|
ldr r7, [sp, #20] @ load the right channel modification amount into r7
|
||||||
|
|
||||||
cmp r7, r3 @ check if left and right channel need the same modifications
|
cmp r7, r3 @ check if left and right channel need the same modifications
|
||||||
beq same_modification
|
beq 3f @ same modification
|
||||||
|
|
||||||
modifications_fallback:
|
4: @ modification fallback
|
||||||
orr r7, r7, r3, lsl #16 @ r7 now is the left channel followed by the right channel modifications.
|
orr r7, r7, r3, lsl #16 @ r7 now is the left channel followed by the right channel modifications.
|
||||||
|
|
||||||
mov r5, #0 @ current index we're reading from
|
mov r5, #0 @ current index we're reading from
|
||||||
|
@ -34,9 +35,12 @@ modifications_fallback:
|
||||||
ldrsb r6, [r4] @ load the current sound sample to r6
|
ldrsb r6, [r4] @ load the current sound sample to r6
|
||||||
add r5, r5, r2 @ calculate the position to read the next sample from
|
add r5, r5, r2 @ calculate the position to read the next sample from
|
||||||
|
|
||||||
|
.ifc \is_first,true
|
||||||
|
mul r4, r6, r7 @ r4 = r6 * r7 (calculating both the left and right samples together)
|
||||||
|
.else
|
||||||
ldr r4, [r1] @ read the current value
|
ldr r4, [r1] @ read the current value
|
||||||
|
|
||||||
mla r4, r6, r7, r4 @ r4 += r6 * r7 (calculating both the left and right samples together)
|
mla r4, r6, r7, r4 @ r4 += r6 * r7 (calculating both the left and right samples together)
|
||||||
|
.endif
|
||||||
|
|
||||||
str r4, [r1], #4 @ store the new value, and increment the pointer
|
str r4, [r1], #4 @ store the new value, and increment the pointer
|
||||||
.endr
|
.endr
|
||||||
|
@ -47,13 +51,13 @@ modifications_fallback:
|
||||||
pop {{r4-r8}}
|
pop {{r4-r8}}
|
||||||
bx lr
|
bx lr
|
||||||
|
|
||||||
same_modification:
|
3: @ same modification
|
||||||
@ check to see if this is a perfect power of 2
|
@ check to see if this is a perfect power of 2
|
||||||
@ r5 is a scratch register, r7 = r3 = amount to modify
|
@ r5 is a scratch register, r7 = r3 = amount to modify
|
||||||
sub r5, r7, #1
|
sub r5, r7, #1
|
||||||
ands r5, r5, r7
|
ands r5, r5, r7
|
||||||
|
|
||||||
bne modifications_fallback @ not 0 means we need to do the full modification
|
bne 4b @ not 0 means we need to do the full modification, jump to modification fallback
|
||||||
|
|
||||||
@ count leading zeros of r7 into r3
|
@ count leading zeros of r7 into r3
|
||||||
mov r3, #0
|
mov r3, #0
|
||||||
|
@ -74,11 +78,16 @@ same_modification:
|
||||||
ldrsb r6, [r4] @ load the current sound sample to r6
|
ldrsb r6, [r4] @ load the current sound sample to r6
|
||||||
add r5, r5, r2 @ calculate the position to read the next sample from
|
add r5, r5, r2 @ calculate the position to read the next sample from
|
||||||
|
|
||||||
ldr r4, [r1] @ read the current value
|
|
||||||
|
|
||||||
lsl r6, r6, #16
|
lsl r6, r6, #16
|
||||||
orr r6, r6, lsr #16
|
orr r6, r6, lsr #16
|
||||||
|
|
||||||
|
.ifc \is_first,true
|
||||||
|
mov r4, r6, lsl r3 @ r4 = r6 << r3
|
||||||
|
.else
|
||||||
|
ldr r4, [r1] @ read the current value
|
||||||
add r4, r4, r6, lsl r3 @ r4 += r6 << r3 (calculating both the left and right samples together)
|
add r4, r4, r6, lsl r3 @ r4 += r6 << r3 (calculating both the left and right samples together)
|
||||||
|
.endif
|
||||||
|
|
||||||
str r4, [r1], #4 @ store the new value, and increment the pointer
|
str r4, [r1], #4 @ store the new value, and increment the pointer
|
||||||
.endr
|
.endr
|
||||||
|
@ -89,9 +98,14 @@ same_modification:
|
||||||
pop {{r4-r8}}
|
pop {{r4-r8}}
|
||||||
bx lr
|
bx lr
|
||||||
|
|
||||||
agb_arm_end agb_rs__mixer_add
|
agb_arm_end \fn_name
|
||||||
|
.endm
|
||||||
|
|
||||||
agb_arm_func agb_rs__mixer_add_stereo
|
mixer_add agb_rs__mixer_add false
|
||||||
|
mixer_add agb_rs__mixer_add_first true
|
||||||
|
|
||||||
|
.macro stereo_add_fn fn_name:req is_first:req
|
||||||
|
agb_arm_func \fn_name
|
||||||
@ Arguments
|
@ Arguments
|
||||||
@ r0 - pointer to the data to be copied (u8 array)
|
@ r0 - pointer to the data to be copied (u8 array)
|
||||||
@ r1 - pointer to the sound buffer (i16 array which will alternate left and right channels, 32-bit aligned)
|
@ r1 - pointer to the sound buffer (i16 array which will alternate left and right channels, 32-bit aligned)
|
||||||
|
@ -127,16 +141,24 @@ agb_arm_func agb_rs__mixer_add_stereo
|
||||||
lsl r6, r6, #24 @ r6 = | R | 0 | 0 | 0 | drop everything except the right sample
|
lsl r6, r6, #24 @ r6 = | R | 0 | 0 | 0 | drop everything except the right sample
|
||||||
orr r6, r7, r6, asr #8 @ r6 = | 1 | R | 1 | L | now we have it perfectly set up
|
orr r6, r7, r6, asr #8 @ r6 = | 1 | R | 1 | L | now we have it perfectly set up
|
||||||
|
|
||||||
|
.ifc \is_first,true
|
||||||
|
mul \sample_reg, r6, r2
|
||||||
|
.else
|
||||||
mla \sample_reg, r6, r2, \sample_reg @ r4 += r6 * r2 (calculating both the left and right samples together)
|
mla \sample_reg, r6, r2, \sample_reg @ r4 += r6 * r2 (calculating both the left and right samples together)
|
||||||
|
.endif
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
1:
|
1:
|
||||||
|
.ifc \is_first,true
|
||||||
|
.else
|
||||||
ldmia r1, {{r9-r12}} @ read the current values
|
ldmia r1, {{r9-r12}} @ read the current values
|
||||||
|
.endif
|
||||||
|
|
||||||
add_stereo_sample r9
|
add_stereo_sample r9
|
||||||
add_stereo_sample r10
|
add_stereo_sample r10
|
||||||
add_stereo_sample r11
|
add_stereo_sample r11
|
||||||
add_stereo_sample r12
|
add_stereo_sample r12
|
||||||
|
.purgem add_stereo_sample
|
||||||
|
|
||||||
stmia r1!, {{r9-r12}} @ store the new value, and increment the pointer
|
stmia r1!, {{r9-r12}} @ store the new value, and increment the pointer
|
||||||
|
|
||||||
|
@ -146,7 +168,11 @@ agb_arm_func agb_rs__mixer_add_stereo
|
||||||
pop {{r4-r11}}
|
pop {{r4-r11}}
|
||||||
bx lr
|
bx lr
|
||||||
|
|
||||||
agb_arm_end agb_rs__mixer_add_stereo
|
agb_arm_end \fn_name
|
||||||
|
.endm
|
||||||
|
|
||||||
|
stereo_add_fn agb_rs__mixer_add_stereo false
|
||||||
|
stereo_add_fn agb_rs__mixer_add_stereo_first true
|
||||||
|
|
||||||
agb_arm_func agb_rs__mixer_collapse
|
agb_arm_func agb_rs__mixer_collapse
|
||||||
@ Arguments:
|
@ Arguments:
|
||||||
|
|
|
@ -29,12 +29,26 @@ extern "C" {
|
||||||
right_amount: Num<i16, 4>,
|
right_amount: Num<i16, 4>,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
fn agb_rs__mixer_add_first(
|
||||||
|
sound_data: *const u8,
|
||||||
|
sound_buffer: *mut Num<i16, 4>,
|
||||||
|
playback_speed: Num<u32, 8>,
|
||||||
|
left_amount: Num<i16, 4>,
|
||||||
|
right_amount: Num<i16, 4>,
|
||||||
|
);
|
||||||
|
|
||||||
fn agb_rs__mixer_add_stereo(
|
fn agb_rs__mixer_add_stereo(
|
||||||
sound_data: *const u8,
|
sound_data: *const u8,
|
||||||
sound_buffer: *mut Num<i16, 4>,
|
sound_buffer: *mut Num<i16, 4>,
|
||||||
volume: Num<i16, 4>,
|
volume: Num<i16, 4>,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
fn agb_rs__mixer_add_stereo_first(
|
||||||
|
sound_data: *const u8,
|
||||||
|
sound_buffer: *mut Num<i16, 4>,
|
||||||
|
volume: Num<i16, 4>,
|
||||||
|
);
|
||||||
|
|
||||||
fn agb_rs__mixer_collapse(
|
fn agb_rs__mixer_collapse(
|
||||||
sound_buffer: *mut i8,
|
sound_buffer: *mut i8,
|
||||||
input_buffer: *const Num<i16, 4>,
|
input_buffer: *const Num<i16, 4>,
|
||||||
|
@ -406,13 +420,9 @@ impl MixerBuffer {
|
||||||
working_buffer: &mut [Num<i16, 4>],
|
working_buffer: &mut [Num<i16, 4>],
|
||||||
channels: impl Iterator<Item = &'a mut SoundChannel>,
|
channels: impl Iterator<Item = &'a mut SoundChannel>,
|
||||||
) {
|
) {
|
||||||
working_buffer.fill(0.into());
|
let mut channels = channels
|
||||||
|
.filter(|channel| !channel.is_done)
|
||||||
for channel in channels {
|
.filter_map(|channel| {
|
||||||
if channel.is_done {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
let playback_speed = if channel.is_stereo {
|
let playback_speed = if channel.is_stereo {
|
||||||
2.into()
|
2.into()
|
||||||
} else {
|
} else {
|
||||||
|
@ -427,10 +437,47 @@ impl MixerBuffer {
|
||||||
channel.pos = 0.into();
|
channel.pos = 0.into();
|
||||||
} else {
|
} else {
|
||||||
channel.is_done = true;
|
channel.is_done = true;
|
||||||
continue;
|
return None;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Some((channel, playback_speed))
|
||||||
|
});
|
||||||
|
|
||||||
|
if let Some((channel, playback_speed)) = channels.next() {
|
||||||
|
if channel.volume != 0.into() {
|
||||||
|
if channel.is_stereo {
|
||||||
|
unsafe {
|
||||||
|
agb_rs__mixer_add_stereo_first(
|
||||||
|
channel.data.as_ptr().add(channel.pos.floor() as usize),
|
||||||
|
working_buffer.as_mut_ptr(),
|
||||||
|
channel.volume,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
let right_amount = ((channel.panning + 1) / 2) * channel.volume;
|
||||||
|
let left_amount = ((-channel.panning + 1) / 2) * channel.volume;
|
||||||
|
|
||||||
|
unsafe {
|
||||||
|
agb_rs__mixer_add_first(
|
||||||
|
channel.data.as_ptr().add(channel.pos.floor() as usize),
|
||||||
|
working_buffer.as_mut_ptr(),
|
||||||
|
playback_speed,
|
||||||
|
left_amount,
|
||||||
|
right_amount,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
working_buffer.fill(0.into());
|
||||||
|
}
|
||||||
|
|
||||||
|
channel.pos += playback_speed * self.frequency.buffer_size() as u32;
|
||||||
|
} else {
|
||||||
|
working_buffer.fill(0.into());
|
||||||
|
}
|
||||||
|
|
||||||
|
for (channel, playback_speed) in channels {
|
||||||
if channel.volume != 0.into() {
|
if channel.volume != 0.into() {
|
||||||
if channel.is_stereo {
|
if channel.is_stereo {
|
||||||
unsafe {
|
unsafe {
|
||||||
|
|
Loading…
Reference in a new issue