From ec0ca7d804a2618a6f8ead8b174b2a2a8052ed2e Mon Sep 17 00:00:00 2001 From: Hpmason Date: Mon, 17 May 2021 14:22:51 -0400 Subject: [PATCH] Add RNG (#142) * Add random * Add GBA random example * switch macro_export to pub(crate) * rename u32_to_u32 macro * remove useless pub use * Comment out extra u32 to u16 macro * Comment random_color example * formatting * More commenting * Add/fix some docs * Fix some doc links * Remove use of u64 - Halve bit widths where u64 was used * Add Gen32::next_color() --- examples/random_color.rs | 69 +++++++++++++++++++ src/lib.rs | 4 ++ src/random.rs | 51 ++++++++++++++ src/random/algorithms.rs | 30 +++++++++ src/random/bounded_rand.rs | 73 ++++++++++++++++++++ src/random/gen32.rs | 134 +++++++++++++++++++++++++++++++++++++ src/random/pcg32.rs | 108 ++++++++++++++++++++++++++++++ src/save.rs | 4 +- src/save/utils.rs | 2 +- src/sync/statics.rs | 4 +- 10 files changed, 474 insertions(+), 5 deletions(-) create mode 100644 examples/random_color.rs create mode 100644 src/random.rs create mode 100644 src/random/algorithms.rs create mode 100644 src/random/bounded_rand.rs create mode 100644 src/random/gen32.rs create mode 100644 src/random/pcg32.rs diff --git a/examples/random_color.rs b/examples/random_color.rs new file mode 100644 index 0000000..6dc2c0a --- /dev/null +++ b/examples/random_color.rs @@ -0,0 +1,69 @@ +#![no_std] +#![no_main] + +use gba::prelude::*; + +#[panic_handler] +#[allow(unused)] +fn panic(info: &core::panic::PanicInfo) -> ! { + // This kills the emulation with a message if we're running inside an + // emulator we support (mGBA or NO$GBA), or just crashes the game if we + // aren't. + //fatal!("{}", info); + + loop { + DISPCNT.read(); + } +} + +/// Performs a busy loop until VBlank starts. +/// +/// This is very inefficient, and please keep following the lessons until we +/// cover how interrupts work! +pub fn spin_until_vblank() { + while VCOUNT.read() < 160 {} +} + +/// Performs a busy loop until VDraw starts. +/// +/// This is very inefficient, and please keep following the lessons until we +/// cover how interrupts work! +pub fn spin_until_vdraw() { + while VCOUNT.read() >= 160 {} +} + +#[no_mangle] +pub fn main() -> ! { + const SETTING: DisplayControl = DisplayControl::new().with_display_mode(3).with_display_bg2(true); + DISPCNT.write(SETTING); + // Create default RNG + let mut rng = RNG::default(); + + let mut px: usize = 0; + let mut py: usize = 0; + let mut color; + + loop { + // Generate color from RNG + color = rng.next_color(); + // now we wait + spin_until_vblank(); + // Draw pixels to screen + mode3::bitmap_xy(px, py).write(color); + mode3::bitmap_xy(px, py + 1).write(color); + mode3::bitmap_xy(px + 1, py).write(color); + mode3::bitmap_xy(px + 1, py + 1).write(color); + // Increment x and y, wrap as needed + px += 2; + if px >= mode3::WIDTH { + px = 0; + py += 2; + if py >= mode3::HEIGHT { + py = 0; + } + } + + // now we wait again + spin_until_vdraw(); + } +} diff --git a/src/lib.rs b/src/lib.rs index bb2443f..40087ef 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -34,6 +34,8 @@ pub mod prelude { pub use crate::save::*; #[cfg(target_arch = "arm")] pub use crate::sync::*; + #[cfg(target_arch = "arm")] + pub use crate::random::*; } pub mod mmio_types; @@ -58,6 +60,8 @@ pub mod save; #[cfg(target_arch = "arm")] pub mod debugging; +#[cfg(target_arch = "arm")] +pub mod random; /* extern "C" { /// This marks the end of the `.data` and `.bss` sections in IWRAM. diff --git a/src/random.rs b/src/random.rs new file mode 100644 index 0000000..4a2b3e8 --- /dev/null +++ b/src/random.rs @@ -0,0 +1,51 @@ + + +//! Module for random number generation +//! +//! This module provides functions and utilites for randomly generated values +//! # Usage +//! ```rust +//! use gba::random::RNG; +//! +//! let mut rng = RNG::seed(123, 321); +//! let x = rng.next_u32(); +//! ``` +//! +//! +//! + +macro_rules! make_jump_lcgX { + ($(#[$attr:meta])* $f:ident, $u:ty) => { + $(#[$attr])* + /// Gives the state `delta` steps from now in `log(delta)` time. + #[must_use] + #[inline(always)] + const fn $f(mut delta: $u, state: $u, mult: $u, inc: $u) -> $u { + let mut cur_mult: $u = mult; + let mut cur_plus: $u = inc; + let mut acc_mult: $u = 1; + let mut acc_plus: $u = 0; + while delta > 0 { + if (delta & 1) > 0 { + acc_mult = acc_mult.wrapping_mul(cur_mult); + acc_plus = acc_plus.wrapping_mul(cur_mult).wrapping_add(cur_plus); + } + cur_plus = cur_mult.wrapping_add(1).wrapping_mul(cur_plus); + cur_mult = cur_mult.wrapping_mul(cur_mult); + delta /= 2; + } + acc_mult.wrapping_mul(state).wrapping_add(acc_plus) + } + }; +} + +mod gen32; +pub use gen32::*; + +mod pcg32; +pub use pcg32::*; + +mod bounded_rand; +pub use bounded_rand::*; + +mod algorithms; \ No newline at end of file diff --git a/src/random/algorithms.rs b/src/random/algorithms.rs new file mode 100644 index 0000000..7e42bd0 --- /dev/null +++ b/src/random/algorithms.rs @@ -0,0 +1,30 @@ +/// Advances a PCG with 32 bits of state. +macro_rules! pcg_core_state32 { + ($state:expr, $inc:expr) => { + $state.wrapping_mul(PCG_MULTIPLIER_32).wrapping_add($inc) + }; +} +pub(crate) use pcg_core_state32; +/// Generates u32 from u32 state +macro_rules! pcg_rxs_m_xs_u32_to_u32 { + ($state: expr) => {{ + $state ^= ($state >> (4 + ($state >> 28) as u32)).wrapping_mul(277803737u32); + $state ^ ($state >> 22) + }}; +} +pub(crate) use pcg_rxs_m_xs_u32_to_u32; + +// Alternative for u32 to u16 +// macro_rules! pcg_xsh_rr_u32_to_u16 { +// ($state: expr) => { +// ((($state ^ ($state >> 18)) >> 11) as u16).rotate_right($state >> 27) as u16 +// }; +// } +// pub(crate) use pcg_xsh_rr_u32_to_u16; +/// Generates u16 from u32 state +macro_rules! pcg_xsh_rs_u32_to_u16 { + ($state: expr) => { + (($state ^ ($state >> 6)) >> (6 + ($state >> 29))) as u16 + }; +} +pub(crate) use pcg_xsh_rs_u32_to_u16; diff --git a/src/random/bounded_rand.rs b/src/random/bounded_rand.rs new file mode 100644 index 0000000..b413d44 --- /dev/null +++ b/src/random/bounded_rand.rs @@ -0,0 +1,73 @@ +use super::*; + +/// Stores the values to sample a number in `0 .. N` +/// +/// Making one of these performs a division operation. In comparison, +/// [`Gen32::next_bounded`] will avoid needing to do a division much of the +/// time. Thus, unless you need to sample repeatedly from a specific bounded +/// range, simply calling `next_bounded` directly might be more efficient. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct BoundedRandU16 { + /// number of possible outputs. outputs will be in `0 .. count` + count: u16, + /// Multiplication threshold thing. + /// + /// + threshold: u16, +} +impl BoundedRandU16 { + /// Constructs a new `BoundedRandU32`. + /// + /// ## Panics + /// If the count is 0. + #[inline] + pub const fn new(count: u16) -> Self { + let threshold = count.wrapping_neg() % count; + Self { count, threshold } + } + + /// Constructs a new `BoundedRandU32`, or `None` on failure. + /// + /// ## Failure + /// If the count is 0. + #[inline] + pub const fn try_new(count: u16) -> Option { + if count > 0 { + Some(Self::new(count)) + } else { + None + } + } + + /// The number of possible outputs. + #[inline] + pub const fn count(self) -> u16 { + self.count + } + + /// Given a `u32`, place it into this bounded range. + /// + /// ## Failure + /// * If the value is such that it doesn't fit evenly it is rejected. + #[inline] + pub const fn place_in_range(self, val: u16) -> Option { + let mul: u32 = (val as u32).wrapping_mul(self.count as u32); + let low_part: u16 = mul as u16; + if low_part < self.threshold { + None + } else { + //debug_assert!(((mul >> 32) as u32) < self.count()); + Some((mul >> 16) as u16) + } + } + + /// Given a gen, sample from the gen until `place_in_range` succeeds. + #[inline] + pub fn sample(self, gen: &mut G) -> u16 { + loop { + if let Some(output) = self.place_in_range(gen.next_u16()) { + return output; + } + } + } +} diff --git a/src/random/gen32.rs b/src/random/gen32.rs new file mode 100644 index 0000000..b6f1062 --- /dev/null +++ b/src/random/gen32.rs @@ -0,0 +1,134 @@ +use core::convert::{TryFrom, TryInto}; + +use crate::mmio_types::Color; + +/// A Generator with 32 bits of output per step. +pub trait Gen32 { + /// Generates the next 32 bits of output. + fn next_u32(&mut self) -> u32; + /// Generates the next 16-bits of output + fn next_u16(&mut self) -> u16; + + /// Produces a `Color` + fn next_color(&mut self) -> Color { + Color(self.next_u16() & 0b0111111111111111) + } + /// Produce a `bool` + #[inline(always)] + fn next_bool(&mut self) -> bool { + (self.next_u32() as i32) < 0 + } + + /// Produce a `u8` + #[inline(always)] + fn next_u8(&mut self) -> u8 { + (self.next_u16() >> 8) as u8 + } + + /// Produce a `u64` + #[inline(always)] + fn next_u64(&mut self) -> u64 { + let l = self.next_u32() as u64; + let h = self.next_u32() as u64; + h << 32 | l + } + + /// Gives a value within `0 .. B` + /// + /// This is often more efficient than making a + /// [`BoundedRandU32`](crate::random::BoundedRandU32) if you don't need to use a + /// specific bound value more than once. + /// + /// ## Panics + /// * If the input is 0. + #[inline] + fn next_bounded(&mut self, b: u16) -> u16 { + assert!(b != 0, "Gen32::next_bounded> Bound must be non-zero."); + let mut x = self.next_u16() as u32; + let mut mul = (b as u32).wrapping_mul(x); + let mut low = mul as u16; + if low < b { + let threshold = b.wrapping_neg() % b; + while low < threshold { + x = self.next_u32() as u32; + mul = (b as u32).wrapping_mul(x); + low = mul as u16; + } + } + let high = (mul >> 16) as u16; + high + } + + /// Gets a value out of the slice given (by copy). + /// + /// * The default impl will not pick past index `u16::MAX`. + #[inline(always)] + fn pick(&mut self, buf: &[T]) -> T + where + Self: Sized, + T: Copy, + { + let end: u16 = saturating_usize_as_u16(buf.len()); + buf[usize::try_from(self.next_bounded(end)).unwrap()] + } + + /// Gets a value out of the slice given (by shared ref). + /// + /// * The default impl will not pick past index `u16::MAX`. + #[inline(always)] + fn pick_ref<'b, T>(&mut self, buf: &'b [T]) -> &'b T + where + Self: Sized, + { + let end: u16 = saturating_usize_as_u16(buf.len()); + &buf[usize::try_from(self.next_bounded(end)).unwrap()] + } + + /// Gets a value out of the slice given (by unique ref). + /// + /// * The default impl will not pick past index `u16::MAX`. + #[inline(always)] + fn pick_mut<'b, T>(&mut self, buf: &'b mut [T]) -> &'b mut T + where + Self: Sized, + { + let end: u16 = saturating_usize_as_u16(buf.len()); + &mut buf[usize::try_from(self.next_bounded(end)).unwrap()] + } + + /// Shuffles a slice in `O(len)` time. + /// + /// * The default impl shuffles only the first `u16::MAX` elements. + #[inline] + fn shuffle(&mut self, buf: &mut [T]) + where + Self: Sized, + { + // Note(Lokathor): The "standard" Fisher-Yates shuffle goes backward from + // the end of the slice, but this version allows us to access memory forward + // from the start to the end, so that we play more nicely with the + // fetch-ahead of most modern CPUs. + let mut possibility_count: u16 = buf.len().try_into().unwrap_or(u16::max_value()); + let mut this_index: usize = 0; + let end = buf.len() - 1; + while this_index < end { + let offset = self.next_bounded(possibility_count) as usize; + buf.swap(this_index, this_index + offset); + possibility_count -= 1; + this_index += 1; + } + } +} + +// Asserts that `Gen32` is an object-safe trait. +const _: [&mut dyn Gen32; 0] = []; + +/// Converts the `usize` into a `u16`, or gives `u16::MAX` if that wouldn't fit. +#[inline(always)] +const fn saturating_usize_as_u16(val: usize) -> u16 { + if val <= u16::MAX as usize { + val as u16 + } else { + u16::MAX + } +} diff --git a/src/random/pcg32.rs b/src/random/pcg32.rs new file mode 100644 index 0000000..78e8e30 --- /dev/null +++ b/src/random/pcg32.rs @@ -0,0 +1,108 @@ +use super::{algorithms::*, Gen32}; + +/// A default seed for any PCG. +/// +/// Truncate to fit, as necessary. +pub const DEFAULT_PCG_SEED: u128 = 201526561274146932589719779721328219291; + +/// A default `inc` for any PCG. +/// +/// Truncate to fit, as necessary. +pub const DEFAULT_PCG_INC: u128 = 34172814569070222299; + +// Other multipliers: 0xffffffff0e703b65 0xf2fc5985 +const PCG_MULTIPLIER_32: u32 = 0xf13283ad; + +make_jump_lcgX!(jump_lcg32, u32); + +/// A [permuted congruential +/// generator](https://en.wikipedia.org/wiki/Permuted_congruential_generator) +/// with 32 bits of output per step. +/// +/// * Generally you should create new generator values with the +/// [`seed`](Self::seed) constructor. This will shuffle around the inputs +/// somewhat, so it will work alright even with "boring" input values like +/// `seed(0,0)` or whatever. +/// * If you want to exactly save/restore a generator use the `Into` and `From` +/// impls to convert the generator into and from a `[u32; 2]`. +/// * The methods on this type are quite minimal. You're expected to use the +/// [`Gen32`] trait to provide most of the useful operations. +/// +/// Full list of methods can be found in the [`Gen32`] trait +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct RNG { + /// 32 bit state, which is used to generate the ouput + state: u32, + /// `inc` used when advancing the state forward + inc: u32, +} + +impl RNG { + /// Seed a new generator. + /// Used to create a new random number generator. + pub const fn seed(seed: u32, inc: u32) -> Self { + let inc = (inc << 1) | 1; + let mut state = pcg_core_state32!(0_u32, inc); + state = state.wrapping_add(seed); + state = pcg_core_state32!(state, inc); + Self { state, inc } + } + + /// Gets the next 32-bits of output. + #[inline] + pub fn next_u32(&mut self) -> u32 { + // LLVM do the instruction-level parallelism plz ;_; + let out = pcg_rxs_m_xs_u32_to_u32!(self.state); + self.state = pcg_core_state32!(self.state, self.inc); + out + } + /// Gets the next 16-bits of output. + #[inline] + pub fn next_u16(&mut self) -> u16 { + let out = pcg_xsh_rs_u32_to_u16!(self.state); + self.state = pcg_core_state32!(self.state, self.inc); + out + } + /// Jumps the generator by `delta` steps forward. + /// + /// The generator sequence loops, so if you want to go "backwards" you can + /// just subtract the number of steps you want to go back from `u32::MAX` and + /// jump by that amount. + #[inline] + pub fn jump(&mut self, delta: u32) { + self.state = jump_lcg32(delta, self.state, PCG_MULTIPLIER_32, self.inc); + } +} + +impl Gen32 for RNG { + #[inline(always)] + fn next_u32(&mut self) -> u32 { + RNG::next_u32(self) + } + + #[inline(always)] + fn next_u16(&mut self) -> u16 { + RNG::next_u16(self) + } +} + +impl Default for RNG { + fn default() -> Self { + const THE_DEFAULT: RNG = RNG::seed(DEFAULT_PCG_SEED as _, DEFAULT_PCG_INC as _); + THE_DEFAULT + } +} + +impl From<[u32; 2]> for RNG { + fn from([state, inc]: [u32; 2]) -> Self { + Self { state, inc } + } +} + +impl From for [u32; 2] { + fn from(pcg: RNG) -> Self { + [pcg.state, pcg.inc] + } +} + + diff --git a/src/save.rs b/src/save.rs index b45a3a2..130dc35 100644 --- a/src/save.rs +++ b/src/save.rs @@ -38,7 +38,7 @@ //! Then, call [`set_timer_for_timeout`] to set the timer you intend to use to //! track the timeout that prevents errors with the save media from hanging your //! game. For more information on GBA timers, see the -//! [`timers`](`crate::io::timers`) module's documentation. +//! [`timers`](`crate::mmio_types::TimerControl`) module's documentation. //! //! ```rust //! # use gba::save; @@ -276,7 +276,7 @@ impl SaveAccess { self.access.verify(offset, buffer) } - /// Returns whether this save media requires the use of [`prepare_write`]. + /// Returns whether this save media requires the use of [`SaveAccess::prepare_write`]. pub fn requires_prepare_write(&self) -> bool { self.info.requires_prepare_write } diff --git a/src/save/utils.rs b/src/save/utils.rs index c255095..ca4bde6 100644 --- a/src/save/utils.rs +++ b/src/save/utils.rs @@ -92,7 +92,7 @@ impl Timeout { } /// Returns whether a number of milliseconds has passed since the last call - /// to [`start`]. + /// to [`Timeout::start()`]. pub fn is_timeout_met(&self, check_ms: u16) -> bool { self.active && check_ms * 17 < self.timer_l.read() } diff --git a/src/sync/statics.rs b/src/sync/statics.rs index a986472..56cfbd3 100644 --- a/src/sync/statics.rs +++ b/src/sync/statics.rs @@ -196,7 +196,7 @@ unsafe fn transfer(dst: *mut T, src: *const T) { /// variable. /// /// This type only works with owned values. If you need to work with borrows, -/// consider using [`Mutex`] instead. +/// consider using [`super::Mutex`] instead. /// /// ## Performance /// @@ -218,7 +218,7 @@ unsafe fn transfer(dst: *mut T, src: *const T) { /// dangerous as disabling IRQs can cause your program to miss out on important /// interrupts such as V-Blank. /// -/// Consider using [`Mutex`] instead if you need to use a large amount of +/// Consider using [`super::Mutex`] instead if you need to use a large amount of /// operations that would cause IRQs to be disabled. Also consider using /// `#[repr(align(4))]` to force proper alignment for your type. pub struct Static {