mirror of
https://github.com/italicsjenga/agb.git
synced 2025-01-26 09:06:33 +11:00
Merge pull request #234 from gwilymk/speed-up-sound-attempt-209
Speed up sound attempt 209
This commit is contained in:
commit
3c7d97e7f4
3 changed files with 59 additions and 30 deletions
|
@ -136,10 +136,39 @@ agb_arm_func agb_rs__mixer_add_stereo
|
||||||
|
|
||||||
agb_arm_end agb_rs__mixer_add_stereo
|
agb_arm_end agb_rs__mixer_add_stereo
|
||||||
|
|
||||||
|
.section .iwram
|
||||||
|
.balign 4
|
||||||
|
constant_zero:
|
||||||
|
.rept 4
|
||||||
|
.word 0
|
||||||
|
.endr
|
||||||
|
|
||||||
|
agb_arm_func agb_rs__init_buffer
|
||||||
|
@ arguments:
|
||||||
|
@ r0 = target buffer
|
||||||
|
@ r1 = size in bytes (must be a multiple of 16)
|
||||||
|
push {r4-r5}
|
||||||
|
|
||||||
|
@ zero registers r3-r5
|
||||||
|
ldr r2, =constant_zero
|
||||||
|
ldm r2, {r3-r5,r12}
|
||||||
|
|
||||||
|
1:
|
||||||
|
@ zero 4 words worth of the buffer
|
||||||
|
stmia r0!, {r3-r5,r12}
|
||||||
|
subs r1, r1, #(4 * 4)
|
||||||
|
@ loop if we haven't zeroed everything
|
||||||
|
bne 1b
|
||||||
|
|
||||||
|
pop {r4-r5}
|
||||||
|
bx lr
|
||||||
|
agb_arm_end agb_rs__init_buffer
|
||||||
|
|
||||||
agb_arm_func agb_rs__mixer_collapse
|
agb_arm_func agb_rs__mixer_collapse
|
||||||
@ Arguments:
|
@ Arguments:
|
||||||
@ r0 = target buffer (i8)
|
@ r0 = target buffer (i8)
|
||||||
@ r1 = input buffer (i16) of fixnums with 4 bits of precision (read in sets of i16 in an i32)
|
@ r1 = input buffer (i16) of fixnums with 4 bits of precision (read in sets of i16 in an i32)
|
||||||
|
|
||||||
push {r4-r11}
|
push {r4-r11}
|
||||||
|
|
||||||
CONST_0 .req r7
|
CONST_0 .req r7
|
||||||
|
|
|
@ -1,11 +1,13 @@
|
||||||
use core::cell::RefCell;
|
use core::cell::RefCell;
|
||||||
|
use core::mem;
|
||||||
|
use core::mem::MaybeUninit;
|
||||||
|
|
||||||
use bare_metal::{CriticalSection, Mutex};
|
use bare_metal::{CriticalSection, Mutex};
|
||||||
|
|
||||||
use super::hw;
|
use super::hw;
|
||||||
use super::hw::LeftOrRight;
|
use super::hw::LeftOrRight;
|
||||||
use super::{SoundChannel, SoundPriority};
|
use super::{SoundChannel, SoundPriority};
|
||||||
use crate::syscall::cpu_fast_fill_i8;
|
|
||||||
use crate::{
|
use crate::{
|
||||||
fixnum::Num,
|
fixnum::Num,
|
||||||
interrupt::free,
|
interrupt::free,
|
||||||
|
@ -27,6 +29,8 @@ extern "C" {
|
||||||
fn agb_rs__mixer_add_stereo(sound_data: *const u8, sound_buffer: *mut Num<i16, 4>);
|
fn agb_rs__mixer_add_stereo(sound_data: *const u8, sound_buffer: *mut Num<i16, 4>);
|
||||||
|
|
||||||
fn agb_rs__mixer_collapse(sound_buffer: *mut i8, input_buffer: *const Num<i16, 4>);
|
fn agb_rs__mixer_collapse(sound_buffer: *mut i8, input_buffer: *const Num<i16, 4>);
|
||||||
|
|
||||||
|
fn agb_rs__init_buffer(buffer: *mut MaybeUninit<Num<i16, 4>>, size_in_bytes: usize);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct Mixer {
|
pub struct Mixer {
|
||||||
|
@ -231,8 +235,28 @@ impl MixerBuffer {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write_channels<'a>(&mut self, channels: impl Iterator<Item = &'a mut SoundChannel>) {
|
fn write_channels<'a>(&mut self, channels: impl Iterator<Item = &'a mut SoundChannel>) {
|
||||||
let mut buffer: [Num<i16, 4>; constants::SOUND_BUFFER_SIZE * 2] =
|
// This code is equivalent to:
|
||||||
[Num::new(0); constants::SOUND_BUFFER_SIZE * 2];
|
// let mut buffer: [Num<i16, 4>; constants::SOUND_BUFFER_SIZE * 2] =
|
||||||
|
// [Num::new(0); constants::SOUND_BUFFER_SIZE * 2];
|
||||||
|
// but the above uses approximately 7% of the CPU time if running at 32kHz
|
||||||
|
let mut buffer: [Num<i16, 4>; constants::SOUND_BUFFER_SIZE * 2] = {
|
||||||
|
// Create an uninitialized array of `MaybeUninit`. The `assume_init` is
|
||||||
|
// safe because the type we are claiming to have initialized here is a
|
||||||
|
// bunch of `MaybeUninit`s, which do not require initialization.
|
||||||
|
let mut data: [MaybeUninit<Num<i16, 4>>; constants::SOUND_BUFFER_SIZE * 2] =
|
||||||
|
unsafe { MaybeUninit::uninit().assume_init() };
|
||||||
|
|
||||||
|
// Actually init the array (by filling it with zeros) and then transmute it (which is safe because
|
||||||
|
// we have now zeroed everything)
|
||||||
|
unsafe {
|
||||||
|
agb_rs__init_buffer(
|
||||||
|
data.as_mut_ptr(),
|
||||||
|
mem::size_of::<Num<i16, 4>>() * data.len(),
|
||||||
|
);
|
||||||
|
|
||||||
|
mem::transmute(data)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
for channel in channels {
|
for channel in channels {
|
||||||
if channel.is_done {
|
if channel.is_done {
|
||||||
|
@ -245,9 +269,6 @@ impl MixerBuffer {
|
||||||
channel.playback_speed
|
channel.playback_speed
|
||||||
};
|
};
|
||||||
|
|
||||||
let right_amount = ((channel.panning + 1) / 2) * channel.volume;
|
|
||||||
let left_amount = ((-channel.panning + 1) / 2) * channel.volume;
|
|
||||||
|
|
||||||
if (channel.pos + playback_speed * constants::SOUND_BUFFER_SIZE).floor()
|
if (channel.pos + playback_speed * constants::SOUND_BUFFER_SIZE).floor()
|
||||||
>= channel.data.len()
|
>= channel.data.len()
|
||||||
{
|
{
|
||||||
|
@ -268,6 +289,9 @@ impl MixerBuffer {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
let right_amount = ((channel.panning + 1) / 2) * channel.volume;
|
||||||
|
let left_amount = ((-channel.panning + 1) / 2) * channel.volume;
|
||||||
|
|
||||||
unsafe {
|
unsafe {
|
||||||
agb_rs__mixer_add(
|
agb_rs__mixer_add(
|
||||||
channel.data.as_ptr().add(channel.pos.floor()),
|
channel.data.as_ptr().add(channel.pos.floor()),
|
||||||
|
@ -285,7 +309,6 @@ impl MixerBuffer {
|
||||||
let write_buffer_index = free(|cs| self.state.borrow(cs).borrow_mut().active_advanced());
|
let write_buffer_index = free(|cs| self.state.borrow(cs).borrow_mut().active_advanced());
|
||||||
|
|
||||||
let write_buffer = &mut self.buffers[write_buffer_index].0;
|
let write_buffer = &mut self.buffers[write_buffer_index].0;
|
||||||
cpu_fast_fill_i8(write_buffer, 0);
|
|
||||||
|
|
||||||
unsafe {
|
unsafe {
|
||||||
agb_rs__mixer_collapse(write_buffer.as_mut_ptr(), buffer.as_ptr());
|
agb_rs__mixer_collapse(write_buffer.as_mut_ptr(), buffer.as_ptr());
|
||||||
|
|
|
@ -120,29 +120,6 @@ pub fn arc_tan2(x: i16, y: i32) -> i16 {
|
||||||
result
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn cpu_fast_fill_i8(input: &mut [i8], new_content: i32) {
|
|
||||||
assert_eq!(
|
|
||||||
input.len() % (4 * 8),
|
|
||||||
0,
|
|
||||||
"Input length must be divisible by 32"
|
|
||||||
);
|
|
||||||
|
|
||||||
let input_ptr = [new_content].as_ptr();
|
|
||||||
let output_ptr = input.as_mut_ptr();
|
|
||||||
let length_mode = (1 << 24) | // copy
|
|
||||||
(input.len() / 4);
|
|
||||||
|
|
||||||
unsafe {
|
|
||||||
asm!(
|
|
||||||
"swi 0x0c",
|
|
||||||
in("r0") input_ptr,
|
|
||||||
in("r1") output_ptr,
|
|
||||||
in("r2") length_mode,
|
|
||||||
lateout("r3") _,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// pub fn affine_matrix(
|
// pub fn affine_matrix(
|
||||||
// x_scale: Num<i16, 8>,
|
// x_scale: Num<i16, 8>,
|
||||||
// y_scale: Num<i16, 8>,
|
// y_scale: Num<i16, 8>,
|
||||||
|
|
Loading…
Add table
Reference in a new issue