From 9524b4b3c31d5249f63d947a0c18572fcf042802 Mon Sep 17 00:00:00 2001
From: Gwilym Kuiper <gw@ilym.me>
Date: Tue, 26 Oct 2021 22:25:35 +0100
Subject: [PATCH] Loads of comments to explain what all the steps are doing

---
 agb/src/sound/mixer/mixer.s | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/agb/src/sound/mixer/mixer.s b/agb/src/sound/mixer/mixer.s
index 440753b6..f4a5850f 100644
--- a/agb/src/sound/mixer/mixer.s
+++ b/agb/src/sound/mixer/mixer.s
@@ -101,17 +101,31 @@ agb_arm_func agb_rs__mixer_add_stereo
     @ The sound buffer must be SOUND_BUFFER_SIZE * 2 in size = 176 * 2
     push {r4-r8}
 
-    ldr r5, =0x0000FFFF
+    ldr r5, =0x00000FFF
 
 .macro mixer_add_loop_simple_stereo
     ldrsh r6, [r0], #2        @ load the current sound sample to r6
 
     ldr r4, [r1]             @ read the current value
 
-    mov r7, r6, asr #8
-    and r7, r7, r5
-    lsl r6, r6, #24
-    orr r6, r7, r6, asr #8
+    @ This is slightly convoluted, but is mainly done for performance reasons. It is better
+    @ to hit ROM just once and then do 4 really simple instructions then do 2 ldrsbs however annoying
+    @ this is. Also, since all this code is in IWRAM and we never hit ROM otherwise, all accesses
+    @ are sequential and exactly the size of the bus to ROM (16 bits), so hopefully this will be super fast.
+    @
+    @ The next 4 instructions set up the current value in r6 to be in the expected format
+    @ 1 = 2s complement marks (so if negative, these are all 1s, if positive these are 0s)
+    @ L = the left sample
+    @ R = the right sample
+    @ 0 = all zeros
+    @ Split into bytes
+    @
+    @ At this point
+    @                        r6 = | 1 | 1 | L | R | where the upper bytes are 1s if L is negative. No care about R
+    mov r7, r6, asr #8     @ r7 = | 1 | 1 | 1 | L | drop R off the right hand side
+    and r7, r7, r5         @ r7 = | 0 | 0 | 1 | L | exactly what we want this to be. The mask puts the 1 as 00001111 ready for the shift later
+    lsl r6, r6, #24        @ r6 = | R | 0 | 0 | 0 | drop everything except the right sample
+    orr r6, r7, r6, asr #8 @ r6 = | 1 | R | 1 | L | now we have it perfectly set up
 
     add r4, r4, r6, lsl #4  @ r4 += r6 << 4 (calculating both the left and right samples together)