From 6cf3ecaf365b2e70b48e5d1e787d9a3eb1194286 Mon Sep 17 00:00:00 2001 From: Gwilym Inzani Date: Fri, 16 Jun 2023 22:11:44 +0100 Subject: [PATCH 01/11] Don't need to save r9 --- agb/src/sound/mixer/mixer.s | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/agb/src/sound/mixer/mixer.s b/agb/src/sound/mixer/mixer.s index 04210d2a..fe46d650 100644 --- a/agb/src/sound/mixer/mixer.s +++ b/agb/src/sound/mixer/mixer.s @@ -100,7 +100,6 @@ agb_arm_func agb_rs__mixer_add_stereo @ The sound buffer must be SOUND_BUFFER_SIZE * 2 in size = 176 * 2 push {{r4-r9}} - mov r9, r2 ldr r5, =0x00000FFF ldr r8, =agb_rs__buffer_size @@ -130,7 +129,7 @@ agb_arm_func agb_rs__mixer_add_stereo lsl r6, r6, #24 @ r6 = | R | 0 | 0 | 0 | drop everything except the right sample orr r6, r7, r6, asr #8 @ r6 = | 1 | R | 1 | L | now we have it perfectly set up - mla r4, r6, r9, r4 @ r4 += r6 * r9 (calculating both the left and right samples together) + mla r4, r6, r2, r4 @ r4 += r6 * r2 (calculating both the left and right samples together) str r4, [r1], #4 @ store the new value, and increment the pointer .endr From 9e08303e627e769ae979030f7faea3872f9b5cf0 Mon Sep 17 00:00:00 2001 From: Gwilym Inzani Date: Fri, 16 Jun 2023 22:15:45 +0100 Subject: [PATCH 02/11] Extract a macro for this --- agb/src/sound/mixer/mixer.s | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/agb/src/sound/mixer/mixer.s b/agb/src/sound/mixer/mixer.s index fe46d650..80ed60e8 100644 --- a/agb/src/sound/mixer/mixer.s +++ b/agb/src/sound/mixer/mixer.s @@ -98,17 +98,15 @@ agb_arm_func agb_rs__mixer_add_stereo @ r2 - volume to play the sound at @ @ The sound buffer must be SOUND_BUFFER_SIZE * 2 in size = 176 * 2 - push {{r4-r9}} + push {{r4-r12}} ldr r5, =0x00000FFF ldr r8, =agb_rs__buffer_size ldr r8, [r8] -1: -.rept 4 - ldrsh r6, [r0], #2 @ load the current sound sample to r6 - ldr r4, [r1] @ read the current value +.macro add_stereo_sample sample_reg:req + ldrsh r6, [r0], #2 @ load the current sound sample to r6 @ This is slightly convoluted, but is mainly done for performance reasons. It is better @ to hit ROM just once and then do 3 really simple instructions then do 2 ldrsbs however annoying @@ -129,7 +127,14 @@ agb_arm_func agb_rs__mixer_add_stereo lsl r6, r6, #24 @ r6 = | R | 0 | 0 | 0 | drop everything except the right sample orr r6, r7, r6, asr #8 @ r6 = | 1 | R | 1 | L | now we have it perfectly set up - mla r4, r6, r2, r4 @ r4 += r6 * r2 (calculating both the left and right samples together) + mla \sample_reg, r6, r2, \sample_reg @ r4 += r6 * r2 (calculating both the left and right samples together) +.endm + +1: +.rept 4 + ldr r4, [r1] @ read the current value + + add_stereo_sample r4 str r4, [r1], #4 @ store the new value, and increment the pointer .endr @@ -137,7 +142,7 @@ agb_arm_func agb_rs__mixer_add_stereo subs r8, r8, #4 @ loop counter bne 1b @ jump back if we're done with the loop - pop {{r4-r9}} + pop {{r4-r12}} bx lr agb_arm_end agb_rs__mixer_add_stereo From 334e70c66481cfcd43f9eb9ce191a19754254076 Mon Sep 17 00:00:00 2001 From: Gwilym Inzani Date: Fri, 16 Jun 2023 22:19:56 +0100 Subject: [PATCH 03/11] Do multiple loads at once --- agb/src/sound/mixer/mixer.s | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/agb/src/sound/mixer/mixer.s b/agb/src/sound/mixer/mixer.s index 80ed60e8..5dbb84c0 100644 --- a/agb/src/sound/mixer/mixer.s +++ b/agb/src/sound/mixer/mixer.s @@ -131,13 +131,14 @@ agb_arm_func agb_rs__mixer_add_stereo .endm 1: -.rept 4 - ldr r4, [r1] @ read the current value + ldmia r1, {{r9-r12}} @ read the current values - add_stereo_sample r4 + add_stereo_sample r9 + add_stereo_sample r10 + add_stereo_sample r11 + add_stereo_sample r12 - str r4, [r1], #4 @ store the new value, and increment the pointer -.endr + stmia r1!, {{r9-r12}} @ store the new value, and increment the pointer subs r8, r8, #4 @ loop counter bne 1b @ jump back if we're done with the loop From ceb57eb00294fc9705c791007a5d98896bf84031 Mon Sep 17 00:00:00 2001 From: Gwilym Inzani Date: Fri, 16 Jun 2023 22:22:03 +0100 Subject: [PATCH 04/11] r12 is a scratch register --- agb/src/sound/mixer/mixer.s | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/agb/src/sound/mixer/mixer.s b/agb/src/sound/mixer/mixer.s index 5dbb84c0..e19e682b 100644 --- a/agb/src/sound/mixer/mixer.s +++ b/agb/src/sound/mixer/mixer.s @@ -98,7 +98,7 @@ agb_arm_func agb_rs__mixer_add_stereo @ r2 - volume to play the sound at @ @ The sound buffer must be SOUND_BUFFER_SIZE * 2 in size = 176 * 2 - push {{r4-r12}} + push {{r4-r11}} ldr r5, =0x00000FFF @@ -143,7 +143,7 @@ agb_arm_func agb_rs__mixer_add_stereo subs r8, r8, #4 @ loop counter bne 1b @ jump back if we're done with the loop - pop {{r4-r12}} + pop {{r4-r11}} bx lr agb_arm_end agb_rs__mixer_add_stereo From e72de28961d6d3cfd997bba282923da40813e304 Mon Sep 17 00:00:00 2001 From: Gwilym Inzani Date: Fri, 16 Jun 2023 23:41:15 +0100 Subject: [PATCH 05/11] Can I use movne here instead? --- agb/src/sound/mixer/mixer.s | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agb/src/sound/mixer/mixer.s b/agb/src/sound/mixer/mixer.s index e19e682b..d7386ae8 100644 --- a/agb/src/sound/mixer/mixer.s +++ b/agb/src/sound/mixer/mixer.s @@ -190,7 +190,7 @@ SWAP_SIGN .req r11 @ So (-1 logical >> 24) gives 11111111 and (1 logical >> 24) gives 00000000 so register is clamped between these two values. .macro clamp_s8 reg:req subs TEMP, CONST_0, \reg, asr #8 - andne \reg, CONST_FF, TEMP, lsr #24 + movne \reg, TEMP, lsr #24 .endm .macro load_sample left_reg:req right_reg:req From ba18a0bf4ae29146080182c653423088a2bc411e Mon Sep 17 00:00:00 2001 From: Gwilym Inzani Date: Fri, 16 Jun 2023 23:43:37 +0100 Subject: [PATCH 06/11] ldmia again --- agb/src/sound/mixer/mixer.s | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/agb/src/sound/mixer/mixer.s b/agb/src/sound/mixer/mixer.s index d7386ae8..28d58e58 100644 --- a/agb/src/sound/mixer/mixer.s +++ b/agb/src/sound/mixer/mixer.s @@ -153,16 +153,14 @@ agb_arm_func agb_rs__mixer_collapse @ r0 = target buffer (i8) @ r1 = input buffer (i16) of fixnums with 4 bits of precision (read in sets of i16 in an i32) - push {{r4-r11}} + push {{r4-r11,lr}} CONST_0 .req r7 -CONST_FF .req r8 -CONST_127 .req r9 +CONST_127 .req r8 TEMP .req r10 SWAP_SIGN .req r11 ldr CONST_0, =0 - ldr CONST_FF, =0xff ldr CONST_127, =127 ldr SWAP_SIGN, =0x80808080 @@ -176,7 +174,7 @@ SWAP_SIGN .req r11 @ The register should be 127 bigger then what you actually want, and we'll correct for that later. Hence the @ add instructions in `load_sample`. @ -@ The idea behind this is in the bit patters of -128 and 127 which are 10000000 and 01111111 respectively, +@ The idea behind this is in the bit patters of -128 and 127 which are 10000000 and 01111111 respectively, -x = !x + 1 => !x = -x-1 @ and we want to clamp the value between them. @ @ The first instruction calculates `-((sample + 128) >> 8)`. If sample is between -128 and 127, then @@ -194,9 +192,6 @@ SWAP_SIGN .req r11 .endm .macro load_sample left_reg:req right_reg:req - @ left_reg = *r1; r1++ - ldr \left_reg, [r1], #4 - mov \right_reg, \left_reg, lsl #16 @ push the sample 16 bits first add \right_reg, CONST_127, \right_reg, asr #20 @ move right sample back to being the correct value add \left_reg, CONST_127, \left_reg, asr #20 @ now we only have the left sample @@ -207,19 +202,21 @@ SWAP_SIGN .req r11 1: .rept 4 + ldmia r1!, {{r3,r5,r6,r9}} + load_sample r3, r12 - load_sample r5, r6 + load_sample r5, lr orr r3, r3, r5, lsl #8 - orr r12, r12, r6, lsl #8 + orr r12, r12, lr, lsl #8 - load_sample r5, r6 - orr r3, r3, r5, lsl #16 - orr r12, r12, r6, lsl #16 + load_sample r6, lr + orr r3, r3, r6, lsl #16 + orr r12, r12, lr, lsl #16 - load_sample r5, r6 - orr r3, r3, r5, lsl #24 - orr r12, r12, r6, lsl #24 + load_sample r9, lr + orr r3, r3, r9, lsl #24 + orr r12, r12, lr, lsl #24 eor r3, r3, SWAP_SIGN eor r12, r12, SWAP_SIGN @@ -231,6 +228,6 @@ SWAP_SIGN .req r11 subs r2, r2, #16 @ r2 -= 16 bne 1b @ loop if not 0 - pop {{r4-r11}} + pop {{r4-r11,lr}} bx lr agb_arm_end agb_rs__mixer_collapse From b2dcd8c8547a743cb94f27e60ce684275a7f6f23 Mon Sep 17 00:00:00 2001 From: Gwilym Inzani Date: Sun, 18 Jun 2023 16:02:14 +0100 Subject: [PATCH 07/11] Pass buffer size as an argument --- agb/src/sound/mixer/mixer.s | 3 +-- agb/src/sound/mixer/sw_mixer.rs | 12 ++++++++++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/agb/src/sound/mixer/mixer.s b/agb/src/sound/mixer/mixer.s index 28d58e58..66bec925 100644 --- a/agb/src/sound/mixer/mixer.s +++ b/agb/src/sound/mixer/mixer.s @@ -152,6 +152,7 @@ agb_arm_func agb_rs__mixer_collapse @ Arguments: @ r0 = target buffer (i8) @ r1 = input buffer (i16) of fixnums with 4 bits of precision (read in sets of i16 in an i32) + @ r2 = loop counter push {{r4-r11,lr}} @@ -164,8 +165,6 @@ SWAP_SIGN .req r11 ldr CONST_127, =127 ldr SWAP_SIGN, =0x80808080 - ldr r2, =agb_rs__buffer_size @ loop counter - ldr r2, [r2] mov r4, r2 @ The idea for this solution came from pimpmobile: diff --git a/agb/src/sound/mixer/sw_mixer.rs b/agb/src/sound/mixer/sw_mixer.rs index 1962fef9..5c01181b 100644 --- a/agb/src/sound/mixer/sw_mixer.rs +++ b/agb/src/sound/mixer/sw_mixer.rs @@ -35,7 +35,11 @@ extern "C" { volume: Num, ); - fn agb_rs__mixer_collapse(sound_buffer: *mut i8, input_buffer: *const Num); + fn agb_rs__mixer_collapse( + sound_buffer: *mut i8, + input_buffer: *const Num, + num_samples: usize, + ); } /// The main software mixer struct. @@ -458,7 +462,11 @@ impl MixerBuffer { let write_buffer = free(|cs| self.state.borrow(cs).borrow_mut().active_advanced()); unsafe { - agb_rs__mixer_collapse(write_buffer, working_buffer.as_ptr()); + agb_rs__mixer_collapse( + write_buffer, + working_buffer.as_ptr(), + self.frequency.buffer_size(), + ); } } } From 5a374ba4f0cbef8c196afac2099285dcc424958a Mon Sep 17 00:00:00 2001 From: Gwilym Inzani Date: Sun, 18 Jun 2023 16:28:32 +0100 Subject: [PATCH 08/11] Add collapse test and fix bug uncovered by it --- agb/src/sound/mixer/mixer.s | 8 ++--- agb/src/sound/mixer/sw_mixer.rs | 61 +++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 4 deletions(-) diff --git a/agb/src/sound/mixer/mixer.s b/agb/src/sound/mixer/mixer.s index 66bec925..2299a22b 100644 --- a/agb/src/sound/mixer/mixer.s +++ b/agb/src/sound/mixer/mixer.s @@ -157,12 +157,12 @@ agb_arm_func agb_rs__mixer_collapse push {{r4-r11,lr}} CONST_0 .req r7 -CONST_127 .req r8 +CONST_128 .req r8 TEMP .req r10 SWAP_SIGN .req r11 ldr CONST_0, =0 - ldr CONST_127, =127 + ldr CONST_128, =128 ldr SWAP_SIGN, =0x80808080 mov r4, r2 @@ -192,8 +192,8 @@ SWAP_SIGN .req r11 .macro load_sample left_reg:req right_reg:req mov \right_reg, \left_reg, lsl #16 @ push the sample 16 bits first - add \right_reg, CONST_127, \right_reg, asr #20 @ move right sample back to being the correct value - add \left_reg, CONST_127, \left_reg, asr #20 @ now we only have the left sample + add \right_reg, CONST_128, \right_reg, asr #20 @ move right sample back to being the correct value + add \left_reg, CONST_128, \left_reg, asr #20 @ now we only have the left sample clamp_s8 \left_reg @ clamp the audio to 8 bit values clamp_s8 \right_reg diff --git a/agb/src/sound/mixer/sw_mixer.rs b/agb/src/sound/mixer/sw_mixer.rs index 5c01181b..2b75734e 100644 --- a/agb/src/sound/mixer/sw_mixer.rs +++ b/agb/src/sound/mixer/sw_mixer.rs @@ -470,3 +470,64 @@ impl MixerBuffer { } } } + +#[cfg(test)] +mod test { + use crate::fixnum::num; + use alloc::vec; + + use super::*; + + #[test_case] + fn collapse_should_correctly_reduce_size_of_input(_: &mut crate::Gba) { + let input: &[Num] = &[ + num!(10.0), + num!(10.0), + num!(5.0), + num!(5.0), + num!(-10.0), + num!(-10.5), + num!(-5.9), + num!(-5.2), + num!(0.0), + num!(1.1), + num!(2.2), + num!(3.3), + num!(155.4), + num!(-230.5), + num!(400.6), + num!(-700.7), + num!(10.0), + num!(10.0), + num!(5.0), + num!(5.0), + num!(-10.0), + num!(-10.5), + num!(-5.9), + num!(-5.2), + num!(0.0), + num!(1.1), + num!(2.2), + num!(3.3), + num!(155.4), + num!(-230.5), + num!(400.6), + num!(-700.7), + ]; + + let mut output_buffer = vec![0i8; input.len()]; + + unsafe { + agb_rs__mixer_collapse(output_buffer.as_mut_ptr(), input.as_ptr(), input.len() / 2); + } + + // output will be unzipped, so input is LRLRLRLRLRLRLR... and output is LLLLLLRRRRRR + assert_eq!( + output_buffer, + &[ + 10, 5, -10, -6, 0, 2, 127, 127, 10, 5, -10, -6, 0, 2, 127, 127, 10, 5, -11, -6, 1, + 3, -128, -128, 10, 5, -11, -6, 1, 3, -128, -128 + ] + ); + } +} From 0e3fe2c49a36f91fd9a931821467af104dc86481 Mon Sep 17 00:00:00 2001 From: Gwilym Inzani Date: Sun, 18 Jun 2023 16:43:00 +0100 Subject: [PATCH 09/11] add a changelog entry --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a5850034..daa6a377 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Changed the default template game. - `DynamicSprite` has a new API which changes the constructor and adds a `set_pixel` method. - You no longer need to install arm-none-eabi-binutils. In order to write games using `agb`, you now only need to install rust nightly. +- 10% performance improvement with the software mixer. ## [0.15.0] - 2023/04/25 From 389e3ecadbd4faf4d8916e2ec027c78c27f5b89d Mon Sep 17 00:00:00 2001 From: Corwin Date: Tue, 20 Jun 2023 21:03:54 +0100 Subject: [PATCH 10/11] make test not reliant on coincidence alignment --- agb/src/sound/mixer/sw_mixer.rs | 92 ++++++++++++++++++--------------- 1 file changed, 51 insertions(+), 41 deletions(-) diff --git a/agb/src/sound/mixer/sw_mixer.rs b/agb/src/sound/mixer/sw_mixer.rs index 2b75734e..b665a775 100644 --- a/agb/src/sound/mixer/sw_mixer.rs +++ b/agb/src/sound/mixer/sw_mixer.rs @@ -480,54 +480,64 @@ mod test { #[test_case] fn collapse_should_correctly_reduce_size_of_input(_: &mut crate::Gba) { - let input: &[Num] = &[ - num!(10.0), - num!(10.0), - num!(5.0), - num!(5.0), - num!(-10.0), - num!(-10.5), - num!(-5.9), - num!(-5.2), - num!(0.0), - num!(1.1), - num!(2.2), - num!(3.3), - num!(155.4), - num!(-230.5), - num!(400.6), - num!(-700.7), - num!(10.0), - num!(10.0), - num!(5.0), - num!(5.0), - num!(-10.0), - num!(-10.5), - num!(-5.9), - num!(-5.2), - num!(0.0), - num!(1.1), - num!(2.2), - num!(3.3), - num!(155.4), - num!(-230.5), - num!(400.6), - num!(-700.7), - ]; + #[repr(align(4))] + struct AlignedNumbers([Num; N]); - let mut output_buffer = vec![0i8; input.len()]; + let input = &AlignedNumbers([ + num!(10.0), + num!(10.0), + num!(5.0), + num!(5.0), + num!(-10.0), + num!(-10.5), + num!(-5.9), + num!(-5.2), + num!(0.0), + num!(1.1), + num!(2.2), + num!(3.3), + num!(155.4), + num!(-230.5), + num!(400.6), + num!(-700.7), + num!(10.0), + num!(10.0), + num!(5.0), + num!(5.0), + num!(-10.0), + num!(-10.5), + num!(-5.9), + num!(-5.2), + num!(0.0), + num!(1.1), + num!(2.2), + num!(3.3), + num!(155.4), + num!(-230.5), + num!(400.6), + num!(-700.7), + ]); + + let input = &input.0; + + let mut output_buffer = vec![0i32; input.len() / 4]; unsafe { - agb_rs__mixer_collapse(output_buffer.as_mut_ptr(), input.as_ptr(), input.len() / 2); + agb_rs__mixer_collapse( + output_buffer.as_mut_ptr().cast(), + input.as_ptr(), + input.len() / 2, + ); } // output will be unzipped, so input is LRLRLRLRLRLRLR... and output is LLLLLLRRRRRR - assert_eq!( - output_buffer, - &[ + assert!(output_buffer + .iter() + .flat_map(|x| x.to_le_bytes()) + .map(|x| x as i8) + .eq([ 10, 5, -10, -6, 0, 2, 127, 127, 10, 5, -10, -6, 0, 2, 127, 127, 10, 5, -11, -6, 1, 3, -128, -128, 10, 5, -11, -6, 1, 3, -128, -128 - ] - ); + ])); } } From dbf7715e67c134b583fcba14df55d7260c387d61 Mon Sep 17 00:00:00 2001 From: Corwin Date: Tue, 20 Jun 2023 21:08:00 +0100 Subject: [PATCH 11/11] use assert_eq and collect --- agb/src/sound/mixer/sw_mixer.rs | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/agb/src/sound/mixer/sw_mixer.rs b/agb/src/sound/mixer/sw_mixer.rs index b665a775..387c67fb 100644 --- a/agb/src/sound/mixer/sw_mixer.rs +++ b/agb/src/sound/mixer/sw_mixer.rs @@ -531,13 +531,16 @@ mod test { } // output will be unzipped, so input is LRLRLRLRLRLRLR... and output is LLLLLLRRRRRR - assert!(output_buffer - .iter() - .flat_map(|x| x.to_le_bytes()) - .map(|x| x as i8) - .eq([ + assert_eq!( + output_buffer + .iter() + .flat_map(|x| x.to_le_bytes()) + .map(|x| x as i8) + .collect::>(), + &[ 10, 5, -10, -6, 0, 2, 127, 127, 10, 5, -10, -6, 0, 2, 127, 127, 10, 5, -11, -6, 1, 3, -128, -128, 10, 5, -11, -6, 1, 3, -128, -128 - ])); + ] + ); } }