From f5d771d3497e724f9edcd09e1c88327503539f23 Mon Sep 17 00:00:00 2001 From: Gwilym Kuiper Date: Sat, 31 Jul 2021 21:47:05 +0100 Subject: [PATCH] Use asm for the tight loops in the sound mixer --- agb/build.rs | 1 + agb/crt0.s | 1 + agb/examples/mixer_basic.rs | 6 +- agb/src/number.rs | 1 + agb/src/sound/mixer/mixer.s | 98 +++++++++++++++++++++++++++++++++ agb/src/sound/mixer/sw_mixer.rs | 32 ++++++++--- 6 files changed, 128 insertions(+), 11 deletions(-) create mode 100644 agb/src/sound/mixer/mixer.s diff --git a/agb/build.rs b/agb/build.rs index e0b833cc..ee0cb111 100644 --- a/agb/build.rs +++ b/agb/build.rs @@ -4,6 +4,7 @@ fn main() { println!("cargo:rerun-if-changed=crt0.s"); println!("cargo:rerun-if-changed=gba_mb.ld"); println!("cargo:rerun-if-changed=interrupt_simple.s"); + println!("cargo:rerun-if-changed=src/sound/mixer/mixer.s"); println!("cargo:rerun-if-changed=gfx/test_logo.png"); let out_file_name = "crt0.o"; diff --git a/agb/crt0.s b/agb/crt0.s index a71b819b..1b51ec83 100644 --- a/agb/crt0.s +++ b/agb/crt0.s @@ -63,3 +63,4 @@ b .Initialise_mb .pool .include "interrupt_simple.s" +.include "src/sound/mixer/mixer.s" diff --git a/agb/examples/mixer_basic.rs b/agb/examples/mixer_basic.rs index 7b288633..c234642f 100644 --- a/agb/examples/mixer_basic.rs +++ b/agb/examples/mixer_basic.rs @@ -3,10 +3,10 @@ extern crate agb; +use agb::input::{Button, ButtonController, Tri}; +use agb::number::Num; use agb::sound::mixer::SoundChannel; use agb::Gba; -use agb::input::{ButtonController, Tri, Button}; -use agb::number::Num; // Music - "I will not let you let me down" by Josh Woodward, free download at http://joshwoodward.com const I_WILL_NOT_LET_YOU_LET_ME_DOWN: &[u8] = include_bytes!("i-will-not-let-you-let-me-down.raw"); @@ -35,7 +35,7 @@ pub fn main() -> ! { Tri::Zero => channel.panning(0.into()), Tri::Positive => channel.panning(half), }; - + match input.y_tri() { Tri::Negative => channel.playback(half_usize.change_base() + 1), Tri::Zero => channel.playback(1.into()), diff --git a/agb/src/number.rs b/agb/src/number.rs index 250b0a50..bf9c57f3 100644 --- a/agb/src/number.rs +++ b/agb/src/number.rs @@ -91,6 +91,7 @@ fixed_width_unsigned_integer_impl!(usize); fixed_width_signed_integer_impl!(i16); fixed_width_signed_integer_impl!(i32); +#[repr(C)] #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] pub struct Num(I); diff --git a/agb/src/sound/mixer/mixer.s b/agb/src/sound/mixer/mixer.s new file mode 100644 index 00000000..60d10eab --- /dev/null +++ b/agb/src/sound/mixer/mixer.s @@ -0,0 +1,98 @@ +.arm +.global agb_rs__mixer_add +.section .iwram, "ax" +.align +agb_rs__mixer_add: + @ Arguments + @ r0 - pointer to the data to be copied (u8 array) + @ r1 - pointer to the sound buffer (i16 array) + @ r2 - playback speed (usize fixnum with 8 bits) + @ r3 - amount to modify the left channel by (u16 fixnum with 4 bits) + @ stack position 1 - amount to modify the right channel by (u16 fixnum with 4 bits) + @ + @ The sound buffer must be SOUND_BUFFER_SIZE * 2 in size = 176 * 2 + + @ lr = amount to modify right channel by + + push {r4-r10, lr} + + @ load the right channel modification amount into lr + ldr lr, [sp, #32] + + @ current write offset into the resulting buffer + mov r12, #0 + mov r8, #352 + + @ current index + mov r5, #0 + +1: + @ load the current sound buffer location + mov r6, r1 + + @ calculate the address of the next read form the sound buffer + add r4, r0, r5, asr #8 + + @ calculate the position to read the next step from + add r5, r5, r2 + + @ load the current buffer value (r6 being the current location, r12 being the offset) + @ but pre-increment r6 by r12 + ldrh r9, [r6, r12]! + + @ load the current value we want to read + ldrsb r10, [r4] + + @ increment the current write offset in the resulting buffer + add r12, r12, #2 + + @ check if we're done + cmp r12, #352 + + @ r7 = r10 * r3 + r9 = current sound value * left amount + previous buffer value + mla r7, r10, r3, r9 + @ *(r6 + r8) = r7, r8 = 352 = offset for the right hand side + strh r7, [r6], r8 + + @ same for the left hand side (slightly confused here, but this is what was generated) + ldrh r7, [r6] + mla r4, r10, lr, r7 + strh r4, [r6] + + bne 1b + + pop {r4-r10, lr} + bx lr +.pool + +.arm +.global agb_rs__mixer_collapse +.section .iwram +.align +agb_rs__mixer_collapse: + @ Arguments: + @ r0 = target buffer (i8) + @ r1 = input buffer (i16) of fixnums with 4 bits of precision + + mov r2, #0 + +1: + @ r12 = *r1; r1++ + ldrsh r12, [r1], #2 + + lsr r3, r12, #4 @ r3 = r12 >> 4 + + cmn r12, #2048 @ compare r12 against -2048 + mvnlt r3, #127 @ r3 = -127 if r12 <= 2048 + + cmp r12, #2048 @ compare r12 against 2048 + movge r3, #127 @ r3 = 127 if r12 >= 2048 + + strb r3, [r0, -r2] @ r2 counts down, so need a negative offset + + sub r2, r2, #1 @ r2 -= 1 + cmn r2, #352 @ compare r2 against -352 + + bne 1b @ loop if not equal + + bx lr diff --git a/agb/src/sound/mixer/sw_mixer.rs b/agb/src/sound/mixer/sw_mixer.rs index accc4168..54ad67c0 100644 --- a/agb/src/sound/mixer/sw_mixer.rs +++ b/agb/src/sound/mixer/sw_mixer.rs @@ -3,6 +3,19 @@ use super::hw::LeftOrRight; use super::{SoundChannel, SoundPriority}; use crate::number::Num; +// Defined in mixer.s +extern "C" { + fn agb_rs__mixer_add( + sound_data: *const u8, + sound_buffer: *mut Num, + playback_speed: Num, + left_amount: Num, + right_amount: Num, + ); + + fn agb_rs__mixer_collapse(sound_buffer: *mut i8, input_buffer: *const Num); +} + pub struct Mixer { buffer: MixerBuffer, channels: [Option; 16], @@ -131,18 +144,21 @@ impl MixerBuffer { } } - for i in 0..SOUND_BUFFER_SIZE { - let v = (channel.data[channel.pos.floor()] as i8) as i16; - channel.pos += channel.playback_speed; - - buffer[i] += left_amount * v; - buffer[i + SOUND_BUFFER_SIZE] += right_amount * v; + unsafe { + agb_rs__mixer_add( + channel.data.as_ptr().offset(channel.pos.floor() as isize), + buffer.as_mut_ptr(), + channel.playback_speed, + left_amount, + right_amount, + ); } + channel.pos += channel.playback_speed * SOUND_BUFFER_SIZE; } let write_buffer = self.get_write_buffer(); - for i in 0..SOUND_BUFFER_SIZE * 2 { - write_buffer[i] = buffer[i].floor().clamp(i8::MIN as i16, i8::MAX as i16) as i8 + unsafe { + agb_rs__mixer_collapse(write_buffer.as_mut_ptr(), buffer.as_ptr()); } }