diff --git a/.gitignore b/.gitignore index b180dad..f4ea00d 100644 --- a/.gitignore +++ b/.gitignore @@ -14,4 +14,8 @@ Cargo.lock crt0.o # Don't track VSCode Workspace settings -/.vscode \ No newline at end of file +/.vscode + +# Don't track IntelliJ Workspaces +/.idea +/*.ida diff --git a/examples/hello_world.rs b/examples/hello_world.rs index 9f6fad4..727c1f6 100644 --- a/examples/hello_world.rs +++ b/examples/hello_world.rs @@ -52,8 +52,8 @@ fn main(_argc: isize, _argv: *const *const u8) -> isize { let this_frame_keys = read_key_input(); // adjust game state and wait for vblank - px = px.wrapping_add(2 * this_frame_keys.x_tribool() as usize); - py = py.wrapping_add(2 * this_frame_keys.y_tribool() as usize); + px = px.wrapping_add((2 * this_frame_keys.x_tribool() as i32) as usize); + py = py.wrapping_add((2 * this_frame_keys.y_tribool() as i32) as usize); if this_frame_keys.l() { color = Color(color.0.rotate_left(5)); } diff --git a/make_example.sh b/make_example.sh new file mode 100755 index 0000000..0704c80 --- /dev/null +++ b/make_example.sh @@ -0,0 +1,11 @@ +#!/bin/sh + +if [ "$1" = "" ]; then + echo "Usage: $0 [example to build]" + exit 1 +fi + +cargo build --example $1 --release || exit 1 +arm-none-eabi-objcopy -O binary target/thumbv4-none-agb/release/examples/$1 target/$1.gba || exit 1 +gbafix target/$1.gba || exit 1 +echo "ROM built successfully!" diff --git a/src/bios.rs b/src/bios.rs index fb5ffc6..326b9e4 100644 --- a/src/bios.rs +++ b/src/bios.rs @@ -8,7 +8,7 @@ //! whatever value is necessary for that function). Some functions also perform //! necessary checks to save you from yourself, such as not dividing by zero. -#![cfg_attr(not(all(target_vendor = "nintendo", target_env = "agb")), allow(unused_variables))] +#![cfg_attr(not(target_arch = "arm"), allow(unused_variables))] use core::mem; use super::*; @@ -55,11 +55,11 @@ use io::irq::IrqFlags; /// perform UB, but such a scenario might exist. #[inline(always)] pub unsafe fn soft_reset() -> ! { - #[cfg(not(all(target_vendor = "nintendo", target_env = "agb")))] + #[cfg(not(target_arch = "arm"))] { - unimplemented!() + unimplemented!("This function is not supported on this target.") } - #[cfg(all(target_vendor = "nintendo", target_env = "agb"))] + #[cfg(target_arch = "arm")] { asm!("swi 0x00", options(noreturn)) } @@ -92,11 +92,11 @@ pub unsafe fn soft_reset() -> ! { /// that. If you do then you return to nothing and have a bad time. #[inline(always)] pub unsafe fn register_ram_reset(flags: RegisterRAMResetFlags) { - #[cfg(not(all(target_vendor = "nintendo", target_env = "agb")))] + #[cfg(not(target_arch = "arm"))] { - unimplemented!() + unimplemented!("This function is not supported on this target.") } - #[cfg(all(target_vendor = "nintendo", target_env = "agb"))] + #[cfg(target_arch = "arm")] { asm!("swi 0x01", in("r0") flags.0); } @@ -126,11 +126,11 @@ impl RegisterRAMResetFlags { /// any enabled interrupt triggers. #[inline(always)] pub fn halt() { - #[cfg(not(all(target_vendor = "nintendo", target_env = "agb")))] + #[cfg(not(target_arch = "arm"))] { - unimplemented!() + unimplemented!("This function is not supported on this target.") } - #[cfg(all(target_vendor = "nintendo", target_env = "agb"))] + #[cfg(target_arch = "arm")] { unsafe { asm!("swi 0x02"); @@ -148,11 +148,11 @@ pub fn halt() { /// optional externals such as rumble and infra-red. #[inline(always)] pub fn stop() { - #[cfg(not(all(target_vendor = "nintendo", target_env = "agb")))] + #[cfg(not(target_arch = "arm"))] { - unimplemented!() + unimplemented!("This function is not supported on this target.") } - #[cfg(all(target_vendor = "nintendo", target_env = "agb"))] + #[cfg(target_arch = "arm")] { unsafe { asm!("swi 0x03"); @@ -175,11 +175,11 @@ pub fn stop() { /// the usual interrupt acknowledgement. #[inline(always)] pub fn interrupt_wait(ignore_current_flags: bool, target_flags: IrqFlags) { - #[cfg(not(all(target_vendor = "nintendo", target_env = "agb")))] + #[cfg(not(target_arch = "arm"))] { - unimplemented!() + unimplemented!("This function is not supported on this target.") } - #[cfg(all(target_vendor = "nintendo", target_env = "agb"))] + #[cfg(target_arch = "arm")] { unsafe { asm!( @@ -198,11 +198,11 @@ pub fn interrupt_wait(ignore_current_flags: bool, target_flags: IrqFlags) { /// [`interrupt_wait`](interrupt_wait) outlines. #[inline(always)] pub fn vblank_interrupt_wait() { - #[cfg(not(all(target_vendor = "nintendo", target_env = "agb")))] + #[cfg(not(target_arch = "arm"))] { - unimplemented!() + unimplemented!("This function is not supported on this target.") } - #[cfg(all(target_vendor = "nintendo", target_env = "agb"))] + #[cfg(target_arch = "arm")] { unsafe { asm!( @@ -222,11 +222,11 @@ pub fn vblank_interrupt_wait() { #[inline(always)] pub fn div_rem(numerator: i32, denominator: i32) -> (i32, i32) { assert!(denominator != 0); - #[cfg(not(all(target_vendor = "nintendo", target_env = "agb")))] + #[cfg(not(target_arch = "arm"))] { (numerator / denominator, numerator % denominator) } - #[cfg(all(target_vendor = "nintendo", target_env = "agb"))] + #[cfg(target_arch = "arm")] { let div_out: i32; let rem_out: i32; @@ -265,11 +265,11 @@ pub fn rem(numerator: i32, denominator: i32) -> i32 { /// by `2n` bits to get `n` more bits of fractional precision in your output. #[inline(always)] pub fn sqrt(val: u32) -> u16 { - #[cfg(not(all(target_vendor = "nintendo", target_env = "agb")))] + #[cfg(not(target_arch = "arm"))] { 0 // TODO: simulate this properly when not on GBA } - #[cfg(all(target_vendor = "nintendo", target_env = "agb"))] + #[cfg(target_arch = "arm")] { let out: u32; unsafe { @@ -293,11 +293,11 @@ pub fn sqrt(val: u32) -> u16 { /// Accuracy suffers if `theta` is less than `-pi/4` or greater than `pi/4`. #[inline(always)] pub fn atan(theta: i16) -> i16 { - #[cfg(not(all(target_vendor = "nintendo", target_env = "agb")))] + #[cfg(not(target_arch = "arm"))] { 0 // TODO: simulate this properly when not on GBA } - #[cfg(all(target_vendor = "nintendo", target_env = "agb"))] + #[cfg(target_arch = "arm")] { let out: i16; unsafe { @@ -322,11 +322,11 @@ pub fn atan(theta: i16) -> i16 { /// integral, 14 bits for fractional. #[inline(always)] pub fn atan2(y: i16, x: i16) -> u16 { - #[cfg(not(all(target_vendor = "nintendo", target_env = "agb")))] + #[cfg(not(target_arch = "arm"))] { 0 // TODO: simulate this properly when not on GBA } - #[cfg(all(target_vendor = "nintendo", target_env = "agb"))] + #[cfg(target_arch = "arm")] { let out: u16; unsafe { @@ -353,11 +353,11 @@ pub fn atan2(y: i16, x: i16) -> u16 { /// * Both pointers must be aligned #[inline(always)] pub unsafe fn cpu_set16(src: *const u16, dest: *mut u16, count: u32, fixed_source: bool) { - #[cfg(not(all(target_vendor = "nintendo", target_env = "agb")))] + #[cfg(not(target_arch = "arm"))] { - unimplemented!() + unimplemented!("This function is not supported on this target.") } - #[cfg(all(target_vendor = "nintendo", target_env = "agb"))] + #[cfg(target_arch = "arm")] { let control = count + ((fixed_source as u32) << 24); asm!( @@ -380,11 +380,11 @@ pub unsafe fn cpu_set16(src: *const u16, dest: *mut u16, count: u32, fixed_sourc /// * Both pointers must be aligned #[inline(always)] pub unsafe fn cpu_set32(src: *const u32, dest: *mut u32, count: u32, fixed_source: bool) { - #[cfg(not(all(target_vendor = "nintendo", target_env = "agb")))] + #[cfg(not(target_arch = "arm"))] { - unimplemented!() + unimplemented!("This function is not supported on this target.") } - #[cfg(all(target_vendor = "nintendo", target_env = "agb"))] + #[cfg(target_arch = "arm")] { let control = count + ((fixed_source as u32) << 24) + (1 << 26); asm!( @@ -408,11 +408,11 @@ pub unsafe fn cpu_set32(src: *const u32, dest: *mut u32, count: u32, fixed_sourc /// * Both pointers must be aligned #[inline(always)] pub unsafe fn cpu_fast_set(src: *const u32, dest: *mut u32, count: u32, fixed_source: bool) { - #[cfg(not(all(target_vendor = "nintendo", target_env = "agb")))] + #[cfg(not(target_arch = "arm"))] { - unimplemented!() + unimplemented!("This function is not supported on this target.") } - #[cfg(all(target_vendor = "nintendo", target_env = "agb"))] + #[cfg(target_arch = "arm")] { let control = count + ((fixed_source as u32) << 24); asm!( @@ -434,11 +434,11 @@ pub unsafe fn cpu_fast_set(src: *const u32, dest: *mut u32, count: u32, fixed_so /// some other value I guess you're probably running on an emulator that just /// broke the fourth wall. pub fn get_bios_checksum() -> u32 { - #[cfg(not(all(target_vendor = "nintendo", target_env = "agb")))] + #[cfg(not(target_arch = "arm"))] { - unimplemented!() + unimplemented!("This function is not supported on this target.") } - #[cfg(all(target_vendor = "nintendo", target_env = "agb"))] + #[cfg(target_arch = "arm")] { let out: u32; unsafe { @@ -473,11 +473,11 @@ pub fn get_bios_checksum() -> u32 { /// /// The final sound level setting will be `level` * `0x200`. pub fn sound_bias(level: u32) { - #[cfg(not(all(target_vendor = "nintendo", target_env = "agb")))] + #[cfg(not(target_arch = "arm"))] { - unimplemented!() + unimplemented!("This function is not supported on this target.") } - #[cfg(all(target_vendor = "nintendo", target_env = "agb"))] + #[cfg(target_arch = "arm")] { unsafe { asm!("swi 0x19", in("r0") level); @@ -513,11 +513,11 @@ pub fn sound_bias(level: u32) { /// * 10: 40137 /// * 11: 42048 pub fn sound_driver_mode(mode: u32) { - #[cfg(not(all(target_vendor = "nintendo", target_env = "agb")))] + #[cfg(not(target_arch = "arm"))] { - unimplemented!() + unimplemented!("This function is not supported on this target.") } - #[cfg(all(target_vendor = "nintendo", target_env = "agb"))] + #[cfg(target_arch = "arm")] { unsafe { asm!("swi 0x1B", in("r0") mode); @@ -535,11 +535,11 @@ pub fn sound_driver_mode(mode: u32) { /// executed." --what? #[inline(always)] pub fn sound_driver_main() { - #[cfg(not(all(target_vendor = "nintendo", target_env = "agb")))] + #[cfg(not(target_arch = "arm"))] { - unimplemented!() + unimplemented!("This function is not supported on this target.") } - #[cfg(all(target_vendor = "nintendo", target_env = "agb"))] + #[cfg(target_arch = "arm")] { unsafe { asm!("swi 0x1C"); @@ -553,11 +553,11 @@ pub fn sound_driver_main() { /// vblank interrupt (every 1/60th of a second). #[inline(always)] pub fn sound_driver_vsync() { - #[cfg(not(all(target_vendor = "nintendo", target_env = "agb")))] + #[cfg(not(target_arch = "arm"))] { - unimplemented!() + unimplemented!("This function is not supported on this target.") } - #[cfg(all(target_vendor = "nintendo", target_env = "agb"))] + #[cfg(target_arch = "arm")] { unsafe { asm!("swi 0x1D"); @@ -573,11 +573,11 @@ pub fn sound_driver_vsync() { /// --what? #[inline(always)] pub fn sound_channel_clear() { - #[cfg(not(all(target_vendor = "nintendo", target_env = "agb")))] + #[cfg(not(target_arch = "arm"))] { - unimplemented!() + unimplemented!("This function is not supported on this target.") } - #[cfg(all(target_vendor = "nintendo", target_env = "agb"))] + #[cfg(target_arch = "arm")] { unsafe { asm!("swi 0x1E"); @@ -596,11 +596,11 @@ pub fn sound_channel_clear() { /// noise. #[inline(always)] pub fn sound_driver_vsync_off() { - #[cfg(not(all(target_vendor = "nintendo", target_env = "agb")))] + #[cfg(not(target_arch = "arm"))] { - unimplemented!() + unimplemented!("This function is not supported on this target.") } - #[cfg(all(target_vendor = "nintendo", target_env = "agb"))] + #[cfg(target_arch = "arm")] { unsafe { asm!("swi 0x28"); @@ -615,11 +615,11 @@ pub fn sound_driver_vsync_off() { /// interrupt followed by a `sound_driver_vsync` within 2/60th of a second. #[inline(always)] pub fn sound_driver_vsync_on() { - #[cfg(not(all(target_vendor = "nintendo", target_env = "agb")))] + #[cfg(not(target_arch = "arm"))] { - unimplemented!() + unimplemented!("This function is not supported on this target.") } - #[cfg(all(target_vendor = "nintendo", target_env = "agb"))] + #[cfg(target_arch = "arm")] { unsafe { asm!("swi 0x29"); diff --git a/src/io/dma.rs b/src/io/dma.rs index 0da6c8e..4507c07 100644 --- a/src/io/dma.rs +++ b/src/io/dma.rs @@ -143,7 +143,8 @@ impl DMA0 { /// /// The source pointer must be aligned and valid to read from. pub unsafe fn set_source(src: *const u32) { - Self::DMA0SAD.write(src) + crate::sync::memory_read_hint(src); + Self::DMA0SAD.write(src); } /// Assigns the destination register. @@ -154,7 +155,8 @@ impl DMA0 { /// /// The source pointer must be aligned and valid to write to. pub unsafe fn set_dest(dest: *mut u32) { - Self::DMA0DAD.write(dest) + Self::DMA0DAD.write(dest); + crate::sync::memory_write_hint(dest); } /// Assigns the count register. @@ -204,7 +206,8 @@ impl DMA1 { /// /// The source pointer must be aligned and valid to read from. pub unsafe fn set_source(src: *const u32) { - Self::DMA1SAD.write(src) + crate::sync::memory_read_hint(src); + Self::DMA1SAD.write(src); } /// Assigns the destination register. @@ -215,7 +218,8 @@ impl DMA1 { /// /// The source pointer must be aligned and valid to write to. pub unsafe fn set_dest(dest: *mut u32) { - Self::DMA1DAD.write(dest) + Self::DMA1DAD.write(dest); + crate::sync::memory_write_hint(dest); } /// Assigns the count register. @@ -265,7 +269,8 @@ impl DMA2 { /// /// The source pointer must be aligned and valid to read from. pub unsafe fn set_source(src: *const u32) { - Self::DMA2SAD.write(src) + crate::sync::memory_read_hint(src); + Self::DMA2SAD.write(src); } /// Assigns the destination register. @@ -276,7 +281,8 @@ impl DMA2 { /// /// The source pointer must be aligned and valid to write to. pub unsafe fn set_dest(dest: *mut u32) { - Self::DMA2DAD.write(dest) + Self::DMA2DAD.write(dest); + crate::sync::memory_write_hint(dest); } /// Assigns the count register. @@ -327,7 +333,8 @@ impl DMA3 { /// /// The source pointer must be aligned and valid to read from. pub unsafe fn set_source(src: *const u32) { - Self::DMA3SAD.write(src) + crate::sync::memory_read_hint(src); + Self::DMA3SAD.write(src); } /// Assigns the destination register. @@ -338,7 +345,8 @@ impl DMA3 { /// /// The source pointer must be aligned and valid to write to. pub unsafe fn set_dest(dest: *mut u32) { - Self::DMA3DAD.write(dest) + Self::DMA3DAD.write(dest); + crate::sync::memory_write_hint(dest); } /// Assigns the count register. @@ -380,18 +388,24 @@ impl DMA3 { .with_use_32bit(true) .with_enabled(true); // TODO: destination checking against SRAM + crate::sync::memory_read_hint(src); Self::DMA3SAD.write(src); Self::DMA3DAD.write(dest); Self::DMA3CNT_L.write(count); Self::DMA3CNT_H.write(FILL_CONTROL); + crate::sync::memory_write_hint(dest); + // Note(Lokathor): Once DMA is set to activate it takes 2 cycles for it to // kick in. You can do any non-DMA thing you like before that, but since // it's only two cycles we just insert two NOP instructions to ensure that // successive calls to `fill32` or other DMA methods don't interfere with // each other. - asm!(" + asm!( + " NOP NOP - ", options(nomem, nostack)); + ", + options(nomem, nostack) + ); } } diff --git a/src/lib.rs b/src/lib.rs index 8589008..217b946 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,5 @@ #![cfg_attr(not(test), no_std)] -#![feature(asm)] +#![feature(asm, isa_attribute)] #![allow(clippy::cast_lossless)] #![deny(clippy::float_arithmetic)] #![warn(missing_docs)] @@ -44,6 +44,8 @@ pub mod sram; pub mod mgba; +pub mod sync; + extern "C" { /// This marks the end of the `.data` and `.bss` sections in IWRAM. /// diff --git a/src/sync.rs b/src/sync.rs new file mode 100644 index 0000000..239b8f0 --- /dev/null +++ b/src/sync.rs @@ -0,0 +1,63 @@ +//! A module containing functions and utilities useful for synchronizing state. + +use crate::io::irq::{IrqEnableSetting, IME}; + +mod locks; +mod statics; + +pub use locks::*; +pub use statics::*; + +/// Marks that a pointer is read without actually reading from this. +/// +/// This uses an [`asm!`] instruction that marks the parameter as being read, +/// requiring the compiler to treat this function as if anything could be +/// done to it. +#[inline(always)] +pub fn memory_read_hint(val: *const T) { + unsafe { asm!("/* {0} */", in(reg) val, options(readonly, nostack)) } +} + +/// Marks that a pointer is read or written to without actually writing to it. +/// +/// This uses an [`asm!`] instruction that marks the parameter as being read +/// and written, requiring the compiler to treat this function as if anything +/// could be done to it. +#[inline(always)] +pub fn memory_write_hint(val: *mut T) { + unsafe { asm!("/* {0} */", in(reg) val, options(nostack)) } +} + +/// An internal function used as a temporary hack to get `compiler_fence` +/// working. While this call is not properly inlined, working is better than +/// not working at all. +/// +/// This seems to be a problem caused by Rust issue #62256: +/// +/// +/// Not public API, obviously. +#[doc(hidden)] +#[deprecated] +#[allow(dead_code)] +#[no_mangle] +#[inline(always)] +pub unsafe extern "C" fn __sync_synchronize() {} + +/// Runs a function with IRQs disabled. +/// +/// This should not be done without good reason, as IRQs are usually important +/// for game functionality. +pub fn with_irqs_disabled(mut func: impl FnOnce() -> T) -> T { + let current_ime = IME.read(); + IME.write(IrqEnableSetting::IRQ_NO); + // prevents the contents of the function from being reordered before IME is disabled. + memory_write_hint(&mut func); + + let mut result = func(); + + // prevents the contents of the function from being reordered after IME is reenabled. + memory_write_hint(&mut result); + IME.write(current_ime); + + result +} diff --git a/src/sync/locks.rs b/src/sync/locks.rs new file mode 100644 index 0000000..921e788 --- /dev/null +++ b/src/sync/locks.rs @@ -0,0 +1,202 @@ +use super::*; +use core::{ + cell::UnsafeCell, + mem::MaybeUninit, + ops::{Deref, DerefMut}, + ptr, + sync::atomic::{compiler_fence, Ordering}, +}; + +#[inline(never)] +fn already_locked() -> ! { + panic!("This lock has already been locked by another thread.") +} + +/// A mutex that prevents code from running in both an IRQ and normal code at +/// the same time. +/// +/// Note that this does not support blocking like a typical mutex, and instead +/// mainly exists for memory safety reasons. +pub struct RawMutex(Static); +impl RawMutex { + /// Creates a new lock. + pub const fn new() -> Self { + RawMutex(Static::new(false)) + } + + /// Locks the mutex and returns whether a lock was successfully acquired. + fn raw_lock(&self) -> bool { + if self.0.replace(true) { + // value was already true, opps. + false + } else { + // prevent any weird reordering, and continue + compiler_fence(Ordering::Acquire); + true + } + } + + /// Unlocks the mutex. + fn raw_unlock(&self) { + compiler_fence(Ordering::Release); + if !self.0.replace(false) { + panic!("Internal error: Attempt to unlock a `RawMutex` which is not locked.") + } + } + + /// Returns a guard for this lock, or panics if there is another lock active. + pub fn lock(&self) -> RawMutexGuard<'_> { + self.try_lock().unwrap_or_else(|| already_locked()) + } + + /// Returns a guard for this lock, or `None` if there is another lock active. + pub fn try_lock(&self) -> Option> { + if self.raw_lock() { + Some(RawMutexGuard(self)) + } else { + None + } + } +} +unsafe impl Send for RawMutex {} +unsafe impl Sync for RawMutex {} + +/// A guard representing an active lock on an [`RawMutex`]. +pub struct RawMutexGuard<'a>(&'a RawMutex); +impl<'a> Drop for RawMutexGuard<'a> { + fn drop(&mut self) { + self.0.raw_unlock(); + } +} + +/// A mutex that protects an object from being accessed in both an IRQ and +/// normal code at once. +/// +/// Note that this does not support blocking like a typical mutex, and instead +/// mainly exists for memory safety reasons. +pub struct Mutex { + raw: RawMutex, + data: UnsafeCell, +} +impl Mutex { + /// Creates a new lock containing a given value. + pub const fn new(t: T) -> Self { + Mutex { raw: RawMutex::new(), data: UnsafeCell::new(t) } + } + + /// Returns a guard for this lock, or panics if there is another lock active. + pub fn lock(&self) -> MutexGuard<'_, T> { + self.try_lock().unwrap_or_else(|| already_locked()) + } + + /// Returns a guard for this lock or `None` if there is another lock active. + pub fn try_lock(&self) -> Option> { + if self.raw.raw_lock() { + Some(MutexGuard { underlying: self, ptr: self.data.get() }) + } else { + None + } + } +} +unsafe impl Send for Mutex {} +unsafe impl Sync for Mutex {} + +/// A guard representing an active lock on an [`Mutex`]. +pub struct MutexGuard<'a, T> { + underlying: &'a Mutex, + ptr: *mut T, +} +impl<'a, T> Drop for MutexGuard<'a, T> { + fn drop(&mut self) { + self.underlying.raw.raw_unlock(); + } +} +impl<'a, T> Deref for MutexGuard<'a, T> { + type Target = T; + fn deref(&self) -> &Self::Target { + unsafe { &*self.ptr } + } +} +impl<'a, T> DerefMut for MutexGuard<'a, T> { + fn deref_mut(&mut self) -> &mut Self::Target { + unsafe { &mut *self.ptr } + } +} + +enum Void {} + +/// A helper type that ensures a particular value is only initialized once. +pub struct InitOnce { + is_initialized: Static, + value: UnsafeCell>, +} +impl InitOnce { + /// Creates a new uninitialized object. + pub const fn new() -> Self { + InitOnce { + is_initialized: Static::new(false), + value: UnsafeCell::new(MaybeUninit::uninit()), + } + } + + /// Gets the contents of this state, or initializes it if it has not already + /// been initialized. + /// + /// The initializer function is guaranteed to only be called once. + /// + /// This function disables IRQs while it is initializing the inner value. + /// While this can cause audio skipping and other similar issues, it is + /// not normally a problem as interrupts will only be disabled once per + /// `InitOnce` during the life cycle of the program. + pub fn get(&self, initializer: impl FnOnce() -> T) -> &T { + match self.try_get(|| -> Result { Ok(initializer()) }) { + Ok(v) => v, + _ => unimplemented!(), + } + } + + /// Gets the contents of this state, or initializes it if it has not already + /// been initialized. + /// + /// The initializer function is guaranteed to only be called once if it + /// returns `Ok`. If it returns `Err`, it will be called again in the + /// future until an attempt at initialization succeeds. + /// + /// This function disables IRQs while it is initializing the inner value. + /// While this can cause audio skipping and other similar issues, it is + /// not normally a problem as interrupts will only be disabled once per + /// `InitOnce` during the life cycle of the program. + pub fn try_get(&self, initializer: impl FnOnce() -> Result) -> Result<&T, E> { + unsafe { + if !self.is_initialized.read() { + // We disable interrupts to make this simpler, since this is likely to + // only occur once in a program anyway. + with_irqs_disabled(|| -> Result<(), E> { + // We check again to make sure this function wasn't called in an + // interrupt between the first check and when interrupts were + // actually disabled. + if !self.is_initialized.read() { + // Do the actual initialization. + ptr::write_volatile((*self.value.get()).as_mut_ptr(), initializer()?); + self.is_initialized.write(true); + } + Ok(()) + })?; + } + compiler_fence(Ordering::Acquire); + Ok(&*(*self.value.get()).as_mut_ptr()) + } + } +} +impl Drop for InitOnce { + fn drop(&mut self) { + if self.is_initialized.read() { + // drop the value inside the `MaybeUninit` + unsafe { + ptr::read((*self.value.get()).as_ptr()); + } + } + } +} +unsafe impl Send for InitOnce {} +unsafe impl Sync for InitOnce {} diff --git a/src/sync/statics.rs b/src/sync/statics.rs new file mode 100644 index 0000000..d85a1e1 --- /dev/null +++ b/src/sync/statics.rs @@ -0,0 +1,261 @@ +#![cfg_attr(not(target_arch = "arm"), allow(unused_variables))] + +use super::*; +use core::{cell::UnsafeCell, mem, mem::MaybeUninit, ptr}; + +/// The internal function for replacing a `Copy` (really `!Drop`) value in a +/// [`Static`]. This uses assembly to use an `stmia` instruction to ensure +/// an IRQ cannot occur during the write operation. +#[cfg(target_arch = "arm")] +unsafe fn transfer(dst: *mut T, src: *const T) { + let align = mem::align_of::(); + let size = mem::size_of::(); + if size == 0 { + // Do nothing with ZSTs. Obviously. + } else if size <= 16 && align % 4 == 0 { + // We can do an 4-byte aligned transfer up to 16 bytes. + transfer_align4_thumb(dst, src); + } else if size <= 36 && align % 4 == 0 { + // We can do the same up to 36 bytes, but we need to switch to ARM. + transfer_align4_arm(dst, src); + } else if size <= 2 && align % 2 == 0 { + // We can do a 2-byte aligned transfer up to 2 bytes. + asm!( + "ldrh {2},[{0}]", + "strh {2},[{1}]", + in(reg) src, in(reg) dst, out(reg) _, + ) + } else if size == 1 { + // We can do a simple byte copy. + asm!( + "ldrb {2},[{0}]", + "strb {2},[{1}]", + in(reg) src, in(reg) dst, out(reg) _, + ) + } else { + // When we don't have an optimized path, we just disable IRQs. + with_irqs_disabled(|| ptr::write_volatile(dst, ptr::read_volatile(src))); + } +} + +#[cfg(target_arch = "arm")] +#[allow(unused_assignments)] +unsafe fn transfer_align4_thumb(mut dst: *mut T, mut src: *const T) { + let size = mem::size_of::(); + if size <= 4 { + // We use assembly here regardless to just do the word aligned copy. This + // ensures it's done with a single ldr/str instruction. + asm!( + "ldr {2},[{0}]", + "str {2},[{1}]", + inout(reg) src, in(reg) dst, out(reg) _, + ) + } else if size <= 8 { + // Starting at size == 5, we begin using ldmia/stmia to load/save multiple + // words in one instruction, avoiding IRQs from interrupting our operation. + asm!( + "ldmia {0}!, {{r2-r3}}", + "stmia {1}!, {{r2-r3}}", + inout(reg) src, inout(reg) dst, + out("r2") _, out("r3") _, + ) + } else if size <= 12 { + asm!( + "ldmia {0}!, {{r2-r4}}", + "stmia {1}!, {{r2-r4}}", + inout(reg) src, inout(reg) dst, + out("r2") _, out("r3") _, out("r4") _, + ) + } else if size <= 16 { + asm!( + "ldmia {0}!, {{r2-r5}}", + "stmia {1}!, {{r2-r5}}", + inout(reg) src, inout(reg) dst, + out("r2") _, out("r3") _, out("r4") _, out("r5") _, + ) + } else { + unimplemented!("This should be done via transfer_arm."); + } +} + +#[cfg(target_arch = "arm")] +#[instruction_set(arm::a32)] +#[allow(unused_assignments)] +unsafe fn transfer_align4_arm(mut dst: *mut T, mut src: *const T) { + let size = mem::size_of::(); + if size <= 16 { + unimplemented!("This should be done via transfer_thumb."); + } else if size <= 20 { + // Starting at size == 20, we have to switch to ARM due to lack of + // accessible registers in THUMB mode. + asm!( + "ldmia {0}!, {{r2-r5,r8}}", + "stmia {1}!, {{r2-r5,r8}}", + inout(reg) src, inout(reg) dst, + out("r2") _, out("r3") _, out("r4") _, out("r5") _, out("r8") _, + ) + } else if size <= 24 { + asm!( + "ldmia {0}!, {{r2-r5,r8-r9}}", + "stmia {1}!, {{r2-r5,r8-r9}}", + inout(reg) src, inout(reg) dst, + out("r2") _, out("r3") _, out("r4") _, out("r5") _, out("r8") _, out("r9") _, + ) + } else if size <= 28 { + asm!( + "ldmia {0}!, {{r2-r5,r8-r10}}", + "stmia {1}!, {{r2-r5,r8-r10}}", + inout(reg) src, inout(reg) dst, + out("r2") _, out("r3") _, out("r4") _, out("r5") _, out("r8") _, out("r9") _, + out("r10") _, + ) + } else if size <= 32 { + asm!( + "ldmia {0}!, {{r2-r5,r8-r10,r12}}", + "stmia {1}!, {{r2-r5,r8-r10,r12}}", + inout(reg) src, inout(reg) dst, + out("r2") _, out("r3") _, out("r4") _, out("r5") _, out("r8") _, out("r9") _, + out("r10") _, out("r12") _, + ) + } else if size <= 36 { + asm!( + "ldmia {0}!, {{r2-r5,r8-r10,r12,r14}}", + "stmia {1}!, {{r2-r5,r8-r10,r12,r14}}", + inout(reg) src, inout(reg) dst, + out("r2") _, out("r3") _, out("r4") _, out("r5") _, out("r8") _, out("r9") _, + out("r10") _, out("r12") _, out("r14") _, + ) + } else { + unimplemented!("Copy too large for use of ldmia/stmia."); + } +} + +/// The internal function for swapping the current value of a [`Static`] with +/// another value. +#[cfg(target_arch = "arm")] +unsafe fn exchange(dst: *mut T, src: *const T) -> T { + let align = mem::align_of::(); + let size = mem::size_of::(); + if size == 0 { + // Do nothing with ZSTs. + ptr::read(dst) + } else if size <= 4 && align % 4 == 0 { + // Swap a single word with the SWP instruction. + let val = ptr::read(src as *const u32); + let new_val = exchange_align4_arm(dst, val); + ptr::read(&new_val as *const _ as *const T) + } else if size == 1 { + // Swap a byte with the SWPB instruction. + let val = ptr::read(src as *const u8); + let new_val = exchange_align1_arm(dst, val); + ptr::read(&new_val as *const _ as *const T) + } else { + // fallback + with_irqs_disabled(|| { + let cur = ptr::read_volatile(dst); + ptr::write_volatile(dst, ptr::read_volatile(src)); + cur + }) + } +} + +#[cfg(target_arch = "arm")] +#[instruction_set(arm::a32)] +unsafe fn exchange_align4_arm(dst: *mut T, i: u32) -> u32 { + let out; + asm!("swp {2}, {1}, [{0}]", in(reg) dst, in(reg) i, lateout(reg) out); + out +} + +#[cfg(target_arch = "arm")] +#[instruction_set(arm::a32)] +unsafe fn exchange_align1_arm(dst: *mut T, i: u8) -> u8 { + let out; + asm!("swpb {2}, {1}, [{0}]", in(reg) dst, in(reg) i, lateout(reg) out); + out +} + +#[cfg(not(target_arch = "arm"))] +unsafe fn exchange(dst: *mut T, src: *const T) -> T { + unimplemented!("This function is not supported on this target.") +} + +#[cfg(not(target_arch = "arm"))] +unsafe fn transfer(dst: *mut T, src: *const T) { + unimplemented!("This function is not supported on this target.") +} + +/// A helper that implements static variables. +/// +/// It ensures that even if you use the same static variable in both an IRQ +/// and normal code, the IRQ will never observe an invalid value of the +/// variable. +/// +/// This type only works with owned values. If you need to work with borrows, +/// consider using [`Mutex`] instead. +/// +/// ## Performance +/// +/// Writing or reading from a static variable is efficient under the following +/// conditions: +/// +/// * The type is aligned to 4 bytes and can be stored in 36 bytes or less. +/// * The type is aligned to 2 bytes and can be stored in 2 bytes. +/// * The type is can be stored in a single byte. +/// +/// Replacing the current value of the static variable is efficient under the +/// following conditions: +/// +/// * The type is aligned to 4 bytes and can be stored in 4 bytes or less. +/// * The type is can be stored in a single byte. +/// +/// When these conditions are not met, static variables are handled using a +/// fallback routine that disables IRQs and does a normal copy. This can be +/// dangerous as disabling IRQs can cause your program to miss out on important +/// interrupts such as V-Blank. +/// +/// Consider using [`Mutex`] instead if you need to use a large amount of +/// operations that would cause IRQs to be disabled. Also consider using +/// `#[repr(align(4))]` to force proper alignment for your type. +pub struct Static { + data: UnsafeCell, +} +impl Static { + /// Creates a new static variable. + pub const fn new(val: T) -> Self { + Static { data: UnsafeCell::new(val) } + } + + /// Replaces the current value of the static variable with another, and + /// returns the old value. + pub fn replace(&self, val: T) -> T { + unsafe { exchange(self.data.get(), &val) } + } + + /// Extracts the interior value of the static variable. + pub fn into_inner(self) -> T { + self.data.into_inner() + } +} +impl Static { + /// Writes a new value into this static variable. + pub fn write(&self, val: T) { + unsafe { transfer(self.data.get(), &val) } + } + + /// Reads a value from this static variable. + pub fn read(&self) -> T { + unsafe { + let mut out: MaybeUninit = MaybeUninit::uninit(); + transfer(out.as_mut_ptr(), self.data.get()); + out.assume_init() + } + } +} +impl Default for Static { + fn default() -> Self { + Static::new(T::default()) + } +} +unsafe impl Send for Static {} +unsafe impl Sync for Static {}