Merge pull request #95 from gwilymk/attempt-speedup-mixer-in-asm

Speed up the mixer by writing the core loops in ASM
This commit is contained in:
Gwilym Kuiper 2021-08-01 21:35:07 +01:00 committed by GitHub
commit de360f5dbc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 117 additions and 12 deletions

View file

@ -2,6 +2,7 @@ fn main() {
println!("cargo:rerun-if-changed=crt0.s"); println!("cargo:rerun-if-changed=crt0.s");
println!("cargo:rerun-if-changed=gba_mb.ld"); println!("cargo:rerun-if-changed=gba_mb.ld");
println!("cargo:rerun-if-changed=interrupt_simple.s"); println!("cargo:rerun-if-changed=interrupt_simple.s");
println!("cargo:rerun-if-changed=src/sound/mixer/mixer.s");
println!("cargo:rerun-if-changed=gfx/test_logo.png"); println!("cargo:rerun-if-changed=gfx/test_logo.png");
let out_file_name = "crt0.o"; let out_file_name = "crt0.o";

View file

@ -63,3 +63,4 @@ b .Initialise_mb
.pool .pool
.include "interrupt_simple.s" .include "interrupt_simple.s"
.include "src/sound/mixer/mixer.s"

View file

@ -3,10 +3,10 @@
extern crate agb; extern crate agb;
use agb::input::{Button, ButtonController, Tri};
use agb::number::Num;
use agb::sound::mixer::SoundChannel; use agb::sound::mixer::SoundChannel;
use agb::Gba; use agb::Gba;
use agb::input::{ButtonController, Tri, Button};
use agb::number::Num;
// Music - "I will not let you let me down" by Josh Woodward, free download at http://joshwoodward.com // Music - "I will not let you let me down" by Josh Woodward, free download at http://joshwoodward.com
const I_WILL_NOT_LET_YOU_LET_ME_DOWN: &[u8] = include_bytes!("i-will-not-let-you-let-me-down.raw"); const I_WILL_NOT_LET_YOU_LET_ME_DOWN: &[u8] = include_bytes!("i-will-not-let-you-let-me-down.raw");

View file

@ -91,6 +91,7 @@ fixed_width_unsigned_integer_impl!(usize);
fixed_width_signed_integer_impl!(i16); fixed_width_signed_integer_impl!(i16);
fixed_width_signed_integer_impl!(i32); fixed_width_signed_integer_impl!(i32);
#[repr(C)]
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub struct Num<I: FixedWidthUnsignedInteger, const N: usize>(I); pub struct Num<I: FixedWidthUnsignedInteger, const N: usize>(I);

View file

@ -0,0 +1,86 @@
.arm
.global agb_rs__mixer_add
.section .iwram, "ax"
.align
agb_rs__mixer_add:
@ Arguments
@ r0 - pointer to the data to be copied (u8 array)
@ r1 - pointer to the sound buffer (i16 array)
@ r2 - playback speed (usize fixnum with 8 bits)
@ r3 - amount to modify the left channel by (u16 fixnum with 4 bits)
@ stack position 1 - amount to modify the right channel by (u16 fixnum with 4 bits)
@
@ The sound buffer must be SOUND_BUFFER_SIZE * 2 in size = 176 * 2
@ r9 = amount to modify right channel by
push {r4-r10}
ldr r9, [sp, #28] @ load the right channel modification amount into r9
mov r12, #0 @ current write offset into the resulting buffer
mov r8, #352 @ the offset for writing to the resulting buffer between left and right channels
mov r5, #0 @ current index we're reading from
@ kept between iterations:
@ r12 - current write offset into the output buffer (r1)
@ r9 - the amount to modify the right channel by
@ r8 - the constant 352
@ r5 - the current index from the input buffer we're reading from (r0)
@ the provided arguments are all unmodified
@ all other registers are temporary
1:
add r4, r0, r5, asr #8 @ calculate the address of the next read form the sound buffer
ldrsb r10, [r4] @ load the current value we want to read
add r5, r5, r2 @ calculate the position to read the next step from
mov r6, r1 @ r6 = current buffer location
ldrh r4, [r6, r12]! @ load the current buffer value (r12 being the offset) but pre-increment r6 by r12
mla r7, r10, r3, r4 @ r7 = r10 * r3 + r9 = current sound value * left amount + previous buffer value
strh r7, [r6], r8 @ *r6 = r7, r6 += r8 where r8 = 352 = offset for the right hand side
ldrh r7, [r6] @ same for the right hand side, r6 now points to the right hand side location
mla r4, r10, r9, r7
strh r4, [r6]
add r12, r12, #2 @ increment the current write offset in the resulting buffer
cmp r12, #352 @ check if we're done
bne 1b
pop {r4-r10}
bx lr
.pool
.arm
.global agb_rs__mixer_collapse
.section .iwram
.align
agb_rs__mixer_collapse:
@ Arguments:
@ r0 = target buffer (i8)
@ r1 = input buffer (i16) of fixnums with 4 bits of precision
mov r2, #352
1:
@ r12 = *r1; r1++
ldrsh r12, [r1], #2
lsr r3, r12, #4 @ r3 = r12 >> 4
cmn r12, #2048 @ compare r12 against -2048
mvnlt r3, #127 @ r3 = -127 if r12 <= 2048
cmp r12, #2048 @ compare r12 against 2048
movge r3, #127 @ r3 = 127 if r12 >= 2048
strb r3, [r0], #1 @ *r0 = r3; r0++
subs r2, r2, #1 @ r2 -= 1
bne 1b @ loop if not 0
bx lr
.pool

View file

@ -3,6 +3,19 @@ use super::hw::LeftOrRight;
use super::{SoundChannel, SoundPriority}; use super::{SoundChannel, SoundPriority};
use crate::number::Num; use crate::number::Num;
// Defined in mixer.s
extern "C" {
fn agb_rs__mixer_add(
sound_data: *const u8,
sound_buffer: *mut Num<i16, 4>,
playback_speed: Num<usize, 8>,
left_amount: Num<i16, 4>,
right_amount: Num<i16, 4>,
);
fn agb_rs__mixer_collapse(sound_buffer: *mut i8, input_buffer: *const Num<i16, 4>);
}
pub struct Mixer { pub struct Mixer {
buffer: MixerBuffer, buffer: MixerBuffer,
channels: [Option<SoundChannel>; 16], channels: [Option<SoundChannel>; 16],
@ -120,7 +133,7 @@ impl MixerBuffer {
let right_amount = ((channel.panning + 1) / 2) * channel.volume; let right_amount = ((channel.panning + 1) / 2) * channel.volume;
let left_amount = ((-channel.panning + 1) / 2) * channel.volume; let left_amount = ((-channel.panning + 1) / 2) * channel.volume;
if channel.pos + channel.playback_speed * SOUND_BUFFER_SIZE >= channel.data.len().into() if (channel.pos + channel.playback_speed * SOUND_BUFFER_SIZE).floor() >= channel.data.len()
{ {
// TODO: This should probably play what's left rather than skip the last bit // TODO: This should probably play what's left rather than skip the last bit
if channel.should_loop { if channel.should_loop {
@ -131,18 +144,21 @@ impl MixerBuffer {
} }
} }
for i in 0..SOUND_BUFFER_SIZE { unsafe {
let v = (channel.data[channel.pos.floor()] as i8) as i16; agb_rs__mixer_add(
channel.pos += channel.playback_speed; channel.data.as_ptr().add(channel.pos.floor()),
buffer.as_mut_ptr(),
buffer[i] += left_amount * v; channel.playback_speed,
buffer[i + SOUND_BUFFER_SIZE] += right_amount * v; left_amount,
right_amount,
);
} }
channel.pos += channel.playback_speed * SOUND_BUFFER_SIZE;
} }
let write_buffer = self.get_write_buffer(); let write_buffer = self.get_write_buffer();
for i in 0..SOUND_BUFFER_SIZE * 2 { unsafe {
write_buffer[i] = buffer[i].floor().clamp(i8::MIN as i16, i8::MAX as i16) as i8 agb_rs__mixer_collapse(write_buffer.as_mut_ptr(), buffer.as_ptr());
} }
} }