From 73b85302400ded4213f33d88729ebfc799695854 Mon Sep 17 00:00:00 2001 From: Gwilym Kuiper Date: Wed, 4 May 2022 16:58:46 +0100 Subject: [PATCH 01/11] Reduce register usage --- agb/src/sound/mixer/mixer.s | 35 +++++++++++++---------------------- 1 file changed, 13 insertions(+), 22 deletions(-) diff --git a/agb/src/sound/mixer/mixer.s b/agb/src/sound/mixer/mixer.s index 887fec1a..89f2b428 100644 --- a/agb/src/sound/mixer/mixer.s +++ b/agb/src/sound/mixer/mixer.s @@ -157,13 +157,15 @@ agb_arm_end agb_rs__mixer_add_stereo cmp \reg, #127 movgt \reg, #127 + + and \reg, \reg, #255 .endm agb_arm_func agb_rs__mixer_collapse @ Arguments: @ r0 = target buffer (i8) @ r1 = input buffer (i16) of fixnums with 4 bits of precision (read in sets of i16 in an i32) - push {r4, r5, r6, r7, r8, r9, r10} + push {r4, r5, r6} ldr r2, agb_rs__buffer_size @ loop counter mov r4, r2 @@ -182,29 +184,18 @@ agb_arm_func agb_rs__mixer_collapse .endm load_sample r3, r12 + load_sample r5, r6 - load_sample r7, r8 - load_sample r9, r10 - - @ combine the four samples so we can store in 32-bit chunks - @ need to ensure that we don't overwrite the extra bit of the sample - and r3, r3, #255 - and r12, r12, #255 - and r5, r5, #255 - and r6, r6, #255 - and r7, r7, #255 - and r8, r8, #255 - and r9, r9, #255 - and r10, r10, #255 - - @ combine all of the samples orr r3, r3, r5, lsl #8 - orr r3, r3, r7, lsl #16 - orr r3, r3, r9, lsl #24 - orr r12, r12, r6, lsl #8 - orr r12, r12, r8, lsl #16 - orr r12, r12, r10, lsl #24 + + load_sample r5, r6 + orr r3, r3, r5, lsl #16 + orr r12, r12, r6, lsl #16 + + load_sample r5, r6 + orr r3, r3, r5, lsl #24 + orr r12, r12, r6, lsl #24 str r3, [r0, r4] @ *(r0 + (r4 = SOUND_BUFFER_SIZE)) = r3 str r12, [r0], #4 @ *r0 = r12; r0 += 4 @@ -212,6 +203,6 @@ agb_arm_func agb_rs__mixer_collapse subs r2, r2, #4 @ r2 -= 4 bne 1b @ loop if not 0 - pop {r4, r5, r6, r7, r8, r9, r10} + pop {r4, r5, r6} bx lr agb_arm_end agb_rs__mixer_collapse From 0229b95d7caf4ccaddb64de2ec8da3744c1a5d7e Mon Sep 17 00:00:00 2001 From: Gwilym Kuiper Date: Wed, 4 May 2022 16:59:17 +0100 Subject: [PATCH 02/11] Move macro definition --- agb/src/sound/mixer/mixer.s | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/agb/src/sound/mixer/mixer.s b/agb/src/sound/mixer/mixer.s index 89f2b428..ba4796b6 100644 --- a/agb/src/sound/mixer/mixer.s +++ b/agb/src/sound/mixer/mixer.s @@ -151,6 +151,15 @@ agb_arm_func agb_rs__mixer_add_stereo agb_arm_end agb_rs__mixer_add_stereo +agb_arm_func agb_rs__mixer_collapse + @ Arguments: + @ r0 = target buffer (i8) + @ r1 = input buffer (i16) of fixnums with 4 bits of precision (read in sets of i16 in an i32) + push {r4, r5, r6} + + ldr r2, agb_rs__buffer_size @ loop counter + mov r4, r2 + .macro clamp_s8 reg:req cmn \reg, #128 mvnlt \reg, #128 @@ -161,16 +170,6 @@ agb_arm_end agb_rs__mixer_add_stereo and \reg, \reg, #255 .endm -agb_arm_func agb_rs__mixer_collapse - @ Arguments: - @ r0 = target buffer (i8) - @ r1 = input buffer (i16) of fixnums with 4 bits of precision (read in sets of i16 in an i32) - push {r4, r5, r6} - - ldr r2, agb_rs__buffer_size @ loop counter - mov r4, r2 - -1: .macro load_sample left_reg:req right_reg:req @ left_reg = *r1; r1++ ldr \left_reg, [r1], #4 @@ -183,6 +182,7 @@ agb_arm_func agb_rs__mixer_collapse clamp_s8 \right_reg .endm +1: load_sample r3, r12 load_sample r5, r6 From 3fea9aada05567178f0c7b7a33d1f5b157a9625e Mon Sep 17 00:00:00 2001 From: Gwilym Kuiper Date: Wed, 4 May 2022 17:58:24 +0100 Subject: [PATCH 03/11] Somehow reduce clamp_s8 by 1 instruction --- agb/src/sound/mixer/mixer.s | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/agb/src/sound/mixer/mixer.s b/agb/src/sound/mixer/mixer.s index ba4796b6..667607a1 100644 --- a/agb/src/sound/mixer/mixer.s +++ b/agb/src/sound/mixer/mixer.s @@ -155,19 +155,24 @@ agb_arm_func agb_rs__mixer_collapse @ Arguments: @ r0 = target buffer (i8) @ r1 = input buffer (i16) of fixnums with 4 bits of precision (read in sets of i16 in an i32) - push {r4, r5, r6} + push {r4, r5, r6, r7, r8, r9, r10} + +CONST_0 .req r7 +CONST_FF .req r8 +CONST_127 .req r9 +TEMP .req r10 + + ldr CONST_0, =0 + ldr CONST_FF, =0xff + ldr CONST_127, =127 ldr r2, agb_rs__buffer_size @ loop counter mov r4, r2 .macro clamp_s8 reg:req - cmn \reg, #128 - mvnlt \reg, #128 - - cmp \reg, #127 - movgt \reg, #127 - - and \reg, \reg, #255 + add \reg, \reg, #127 + subs r10, r7, \reg, asr #8 + andne \reg, r8, r10, lsr #24 .endm .macro load_sample left_reg:req right_reg:req @@ -197,12 +202,16 @@ agb_arm_func agb_rs__mixer_collapse orr r3, r3, r5, lsl #24 orr r12, r12, r6, lsl #24 + ldr r5, =0x80808080 + eor r3, r3, r5 + eor r12, r12, r5 + str r3, [r0, r4] @ *(r0 + (r4 = SOUND_BUFFER_SIZE)) = r3 str r12, [r0], #4 @ *r0 = r12; r0 += 4 subs r2, r2, #4 @ r2 -= 4 bne 1b @ loop if not 0 - pop {r4, r5, r6} + pop {r4, r5, r6, r7, r8, r9, r10} bx lr agb_arm_end agb_rs__mixer_collapse From b02cdd9233d48a3f53c33be192970582d1af7f2c Mon Sep 17 00:00:00 2001 From: Gwilym Kuiper Date: Wed, 4 May 2022 18:05:54 +0100 Subject: [PATCH 04/11] Calculate 127 + sample earlier --- agb/src/sound/mixer/mixer.s | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/agb/src/sound/mixer/mixer.s b/agb/src/sound/mixer/mixer.s index 667607a1..4df3d6b6 100644 --- a/agb/src/sound/mixer/mixer.s +++ b/agb/src/sound/mixer/mixer.s @@ -170,7 +170,6 @@ TEMP .req r10 mov r4, r2 .macro clamp_s8 reg:req - add \reg, \reg, #127 subs r10, r7, \reg, asr #8 andne \reg, r8, r10, lsr #24 .endm @@ -180,8 +179,8 @@ TEMP .req r10 ldr \left_reg, [r1], #4 lsl \right_reg, \left_reg, #16 @ push the sample 16 bits first - asr \right_reg, \right_reg, #20 @ move right sample back to being the correct value - mov \left_reg, \left_reg, asr #20 @ now we only have the left sample + add \right_reg, r9, \right_reg, asr #20 @ move right sample back to being the correct value + add \left_reg, r9, \left_reg, asr #20 @ now we only have the left sample clamp_s8 \left_reg @ clamp the audio to 8 bit values clamp_s8 \right_reg From 8ec839f181d59eae8ed3a8bfd74e8ec4a2d16e7e Mon Sep 17 00:00:00 2001 From: Gwilym Kuiper Date: Wed, 4 May 2022 18:07:01 +0100 Subject: [PATCH 05/11] Be more consistent with instruction use --- agb/src/sound/mixer/mixer.s | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agb/src/sound/mixer/mixer.s b/agb/src/sound/mixer/mixer.s index 4df3d6b6..84262d8d 100644 --- a/agb/src/sound/mixer/mixer.s +++ b/agb/src/sound/mixer/mixer.s @@ -178,7 +178,7 @@ TEMP .req r10 @ left_reg = *r1; r1++ ldr \left_reg, [r1], #4 - lsl \right_reg, \left_reg, #16 @ push the sample 16 bits first + mov \right_reg, \left_reg, lsl #16 @ push the sample 16 bits first add \right_reg, r9, \right_reg, asr #20 @ move right sample back to being the correct value add \left_reg, r9, \left_reg, asr #20 @ now we only have the left sample From 863abe1d429be0a09d33ff69eb8c1f953f815b14 Mon Sep 17 00:00:00 2001 From: Gwilym Kuiper Date: Wed, 4 May 2022 18:08:15 +0100 Subject: [PATCH 06/11] Only load SWAP_SIGN once --- agb/src/sound/mixer/mixer.s | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/agb/src/sound/mixer/mixer.s b/agb/src/sound/mixer/mixer.s index 84262d8d..2cbd4f36 100644 --- a/agb/src/sound/mixer/mixer.s +++ b/agb/src/sound/mixer/mixer.s @@ -155,16 +155,18 @@ agb_arm_func agb_rs__mixer_collapse @ Arguments: @ r0 = target buffer (i8) @ r1 = input buffer (i16) of fixnums with 4 bits of precision (read in sets of i16 in an i32) - push {r4, r5, r6, r7, r8, r9, r10} + push {r4, r5, r6, r7, r8, r9, r10, r11} CONST_0 .req r7 CONST_FF .req r8 CONST_127 .req r9 TEMP .req r10 +SWAP_SIGN .req r11 ldr CONST_0, =0 ldr CONST_FF, =0xff ldr CONST_127, =127 + ldr SWAP_SIGN, =0x80808080 ldr r2, agb_rs__buffer_size @ loop counter mov r4, r2 @@ -201,9 +203,8 @@ TEMP .req r10 orr r3, r3, r5, lsl #24 orr r12, r12, r6, lsl #24 - ldr r5, =0x80808080 - eor r3, r3, r5 - eor r12, r12, r5 + eor r3, r3, SWAP_SIGN + eor r12, r12, SWAP_SIGN str r3, [r0, r4] @ *(r0 + (r4 = SOUND_BUFFER_SIZE)) = r3 str r12, [r0], #4 @ *r0 = r12; r0 += 4 @@ -211,6 +212,6 @@ TEMP .req r10 subs r2, r2, #4 @ r2 -= 4 bne 1b @ loop if not 0 - pop {r4, r5, r6, r7, r8, r9, r10} + pop {r4, r5, r6, r7, r8, r9, r10, r11} bx lr agb_arm_end agb_rs__mixer_collapse From a0b28176c6288b8c12bb42cacdd3adcf6e52135e Mon Sep 17 00:00:00 2001 From: Gwilym Kuiper Date: Wed, 4 May 2022 20:25:23 +0100 Subject: [PATCH 07/11] Use .rept rather than defining a macro --- agb/src/sound/mixer/mixer.s | 33 +++++++++------------------------ 1 file changed, 9 insertions(+), 24 deletions(-) diff --git a/agb/src/sound/mixer/mixer.s b/agb/src/sound/mixer/mixer.s index 2cbd4f36..7af98fce 100644 --- a/agb/src/sound/mixer/mixer.s +++ b/agb/src/sound/mixer/mixer.s @@ -28,7 +28,7 @@ modifications_fallback: 1: -.macro mixer_add_loop +.rept 4 add r4, r0, r5, asr #8 @ calculate the address of the next read from the sound buffer ldrsb r6, [r4] @ load the current sound sample to r6 add r5, r5, r2 @ calculate the position to read the next sample from @@ -38,12 +38,7 @@ modifications_fallback: mla r4, r6, r7, r4 @ r4 += r6 * r7 (calculating both the left and right samples together) str r4, [r1], #4 @ store the new value, and increment the pointer -.endm - - mixer_add_loop - mixer_add_loop - mixer_add_loop - mixer_add_loop +.endr subs r8, r8, #4 @ loop counter bne 1b @ jump back if we're done with the loop @@ -69,7 +64,8 @@ same_modification: mov r5, #0 @ current index we're reading from ldr r8, agb_rs__buffer_size @ the number of steps left -.macro mixer_add_loop_simple +1: +.rept 4 add r4, r0, r5, asr #8 @ calculate the address of the next read from the sound buffer ldrsb r6, [r4] @ load the current sound sample to r6 add r5, r5, r2 @ calculate the position to read the next sample from @@ -81,13 +77,7 @@ same_modification: add r4, r4, r6, lsl r3 @ r4 += r6 << r3 (calculating both the left and right samples together) str r4, [r1], #4 @ store the new value, and increment the pointer -.endm - -1: - mixer_add_loop_simple - mixer_add_loop_simple - mixer_add_loop_simple - mixer_add_loop_simple +.endr subs r8, r8, #4 @ loop counter bne 1b @ jump back if we're done with the loop @@ -107,7 +97,9 @@ agb_arm_func agb_rs__mixer_add_stereo ldr r5, =0x00000FFF -.macro mixer_add_loop_simple_stereo + ldr r8, agb_rs__buffer_size +1: +.rept 4 ldrsh r6, [r0], #2 @ load the current sound sample to r6 ldr r4, [r1] @ read the current value @@ -134,14 +126,7 @@ agb_arm_func agb_rs__mixer_add_stereo add r4, r4, r6, lsl #4 @ r4 += r6 << 4 (calculating both the left and right samples together) str r4, [r1], #4 @ store the new value, and increment the pointer -.endm - - ldr r8, agb_rs__buffer_size -1: - mixer_add_loop_simple_stereo - mixer_add_loop_simple_stereo - mixer_add_loop_simple_stereo - mixer_add_loop_simple_stereo +.endr subs r8, r8, #4 @ loop counter bne 1b @ jump back if we're done with the loop From 0ec3c499cbe5892026fa9cdb9494b96626b63d92 Mon Sep 17 00:00:00 2001 From: Gwilym Kuiper Date: Wed, 4 May 2022 20:27:41 +0100 Subject: [PATCH 08/11] Just use range push and pop --- agb/src/sound/mixer/mixer.s | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/agb/src/sound/mixer/mixer.s b/agb/src/sound/mixer/mixer.s index 7af98fce..98a953e5 100644 --- a/agb/src/sound/mixer/mixer.s +++ b/agb/src/sound/mixer/mixer.s @@ -140,7 +140,7 @@ agb_arm_func agb_rs__mixer_collapse @ Arguments: @ r0 = target buffer (i8) @ r1 = input buffer (i16) of fixnums with 4 bits of precision (read in sets of i16 in an i32) - push {r4, r5, r6, r7, r8, r9, r10, r11} + push {r4-r11} CONST_0 .req r7 CONST_FF .req r8 @@ -197,6 +197,6 @@ SWAP_SIGN .req r11 subs r2, r2, #4 @ r2 -= 4 bne 1b @ loop if not 0 - pop {r4, r5, r6, r7, r8, r9, r10, r11} + pop {r4-r11} bx lr agb_arm_end agb_rs__mixer_collapse From abfbf6a454b57a505668283c23ea7e7b58a17436 Mon Sep 17 00:00:00 2001 From: Gwilym Kuiper Date: Wed, 4 May 2022 20:36:06 +0100 Subject: [PATCH 09/11] Use the renamed register names --- agb/src/sound/mixer/mixer.s | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/agb/src/sound/mixer/mixer.s b/agb/src/sound/mixer/mixer.s index 98a953e5..5e4964b2 100644 --- a/agb/src/sound/mixer/mixer.s +++ b/agb/src/sound/mixer/mixer.s @@ -157,19 +157,19 @@ SWAP_SIGN .req r11 mov r4, r2 .macro clamp_s8 reg:req - subs r10, r7, \reg, asr #8 - andne \reg, r8, r10, lsr #24 + subs TEMP, CONST_0, \reg, asr #8 + andne \reg, CONST_FF, TEMP, lsr #24 .endm .macro load_sample left_reg:req right_reg:req @ left_reg = *r1; r1++ ldr \left_reg, [r1], #4 - mov \right_reg, \left_reg, lsl #16 @ push the sample 16 bits first - add \right_reg, r9, \right_reg, asr #20 @ move right sample back to being the correct value - add \left_reg, r9, \left_reg, asr #20 @ now we only have the left sample + mov \right_reg, \left_reg, lsl #16 @ push the sample 16 bits first + add \right_reg, CONST_127, \right_reg, asr #20 @ move right sample back to being the correct value + add \left_reg, CONST_127, \left_reg, asr #20 @ now we only have the left sample - clamp_s8 \left_reg @ clamp the audio to 8 bit values + clamp_s8 \left_reg @ clamp the audio to 8 bit values clamp_s8 \right_reg .endm From 4a06acba6ba5e23d331d7a980b66154a039300c5 Mon Sep 17 00:00:00 2001 From: Gwilym Kuiper Date: Wed, 4 May 2022 20:42:32 +0100 Subject: [PATCH 10/11] Allow volumes to be a lot higher and check that clipping is handled --- agb/examples/mixer_basic.rs | 2 ++ agb/src/sound/mixer/mod.rs | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/agb/examples/mixer_basic.rs b/agb/examples/mixer_basic.rs index 0dbdb630..a5a982b1 100644 --- a/agb/examples/mixer_basic.rs +++ b/agb/examples/mixer_basic.rs @@ -42,6 +42,8 @@ fn main(mut gba: Gba) -> ! { if input.is_pressed(Button::L) { channel.volume(half); + } else if input.is_pressed(Button::R) { + channel.volume(20.into()); // intentionally introduce clipping } else { channel.volume(1.into()); } diff --git a/agb/src/sound/mixer/mod.rs b/agb/src/sound/mixer/mod.rs index d537d3cc..9ff2db63 100644 --- a/agb/src/sound/mixer/mod.rs +++ b/agb/src/sound/mixer/mod.rs @@ -96,7 +96,6 @@ impl SoundChannel { #[inline(always)] pub fn volume(&mut self, volume: Num) -> &mut Self { - assert!(volume <= Num::new(1), "volume must be <= 1"); assert!(volume >= Num::new(0), "volume must be >= 0"); self.volume = volume; From 4d2ad8859b529dbd8fdde48e115f2cb736bd330d Mon Sep 17 00:00:00 2001 From: Gwilym Kuiper Date: Wed, 4 May 2022 20:56:39 +0100 Subject: [PATCH 11/11] Add a comment explaining the improvement --- agb/src/sound/mixer/mixer.s | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/agb/src/sound/mixer/mixer.s b/agb/src/sound/mixer/mixer.s index 5e4964b2..fc40820b 100644 --- a/agb/src/sound/mixer/mixer.s +++ b/agb/src/sound/mixer/mixer.s @@ -156,6 +156,24 @@ SWAP_SIGN .req r11 ldr r2, agb_rs__buffer_size @ loop counter mov r4, r2 +@ The idea for this solution came from pimpmobile: +@ https://github.com/kusma/pimpmobile/blob/f2b2be49e806ca2a0d99cf91b3838d6d10f86b7d/src/pimp_mixer_clip_arm.S +@ +@ The register should be 127 bigger then what you actually want, and we'll correct for that later. Hence the +@ add instructions in `load_sample`. +@ +@ The idea behind this is in the bit patters of -128 and 127 which are 10000000 and 01111111 respectively, +@ and we want to clamp the value between them. +@ +@ The first instruction calculates `-((sample + 128) >> 8)`. If sample is between -128 and 127, then +@ 0 <= sample + 128 <= 255 which means that shifting that right by 8 is 0. Hence the zero flag will be set, so +@ the `andne` instruction won't execute. +@ +@ If the sample is outside of a signed 8 bit value, then `sample >> 8` will either be -1 or 1 (we assume that samples) +@ don't go too high, but the idea still works, so you can generalise this further if you want. This value is stored in TEMP +@ +@ -1 has binary expansion (as a 32-bit integer) of all 1s and 1 of all zeros and then a 1. +@ So (-1 logical >> 24) gives 11111111 and (1 logical >> 24) gives 00000000 so register is clamped between these two values. .macro clamp_s8 reg:req subs TEMP, CONST_0, \reg, asr #8 andne \reg, CONST_FF, TEMP, lsr #24