From a6daaf9fa33715c9a311a612591687791334b7e7 Mon Sep 17 00:00:00 2001
From: Derek Hageman <hageman@inthat.cloud>
Date: Sat, 29 Jan 2022 17:43:06 -0700
Subject: [PATCH 1/2] Standardize ROM function access

Make all ROM functions (normal and floating point) provide both a direct
call that does the operation and a module with a ptr() function to get
the function pointer.
---
 boards/rp-pico/examples/pico_ws2812_led.rs |   2 +-
 rp2040-hal/examples/rom_funcs.rs           |   2 +-
 rp2040-hal/src/rom_data.rs                 | 298 ++++++++++++---------
 3 files changed, 175 insertions(+), 127 deletions(-)

diff --git a/boards/rp-pico/examples/pico_ws2812_led.rs b/boards/rp-pico/examples/pico_ws2812_led.rs
index ab100ae..cba725b 100644
--- a/boards/rp-pico/examples/pico_ws2812_led.rs
+++ b/boards/rp-pico/examples/pico_ws2812_led.rs
@@ -119,7 +119,7 @@ fn main() -> ! {
 
     // Import the `sin` function for a smooth hue animation from the
     // Pico rp2040 ROM:
-    let sin = rp_pico::hal::rom_data::float_funcs::fsin();
+    let sin = rp_pico::hal::rom_data::float_funcs::fsin::ptr();
 
     // Create a count down timer for the Ws2812 instance:
     let timer = Timer::new(pac.TIMER, &mut pac.RESETS);
diff --git a/rp2040-hal/examples/rom_funcs.rs b/rp2040-hal/examples/rom_funcs.rs
index 0d38233..2509ac8 100644
--- a/rp2040-hal/examples/rom_funcs.rs
+++ b/rp2040-hal/examples/rom_funcs.rs
@@ -138,7 +138,7 @@ fn main() -> ! {
     // Some functions require a look-up in a table. First we do the lookup and
     // find the function pointer in ROM (you only want to do this once per
     // function).
-    let fmul = hal::rom_data::float_funcs::fmul();
+    let fmul = hal::rom_data::float_funcs::fmul::ptr();
 
     // Then we can call the function whenever we want
     let start_rom = cortex_m::peripheral::SYST::get_current();
diff --git a/rp2040-hal/src/rom_data.rs b/rp2040-hal/src/rom_data.rs
index 02aa014..95981d8 100644
--- a/rp2040-hal/src/rom_data.rs
+++ b/rp2040-hal/src/rom_data.rs
@@ -47,56 +47,106 @@ unsafe fn rom_hword_as_ptr(rom_address: *const u16) -> *const u32 {
     ptr as *const u32
 }
 
-macro_rules! rom_funcs {
+macro_rules! declare_rom_function {
     (
-        $(
-            $(#[$outer:meta])*
-            $c:literal $name:ident (
-                $( $aname:ident : $aty:ty ),*
-            ) -> $ret:ty ;
-        )*
+        $(#[$outer:meta])*
+        fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty
+        $lookup:block
     ) => {
-        $(
-            $(#[$outer])*
-            pub fn $name($( $aname:$aty ),*) -> $ret{
-                let func:  extern "C" fn( $( $aty ),* ) -> $ret = rom_table_lookup(FUNC_TABLE, *$c);
-                func($( $aname ),*)
+        #[doc = r"Additional access for the `"]
+        #[doc = stringify!($name)]
+        #[doc = r"` ROM function."]
+        pub mod $name {
+            /// Retrieve a function pointer.
+            pub fn ptr() -> extern "C" fn( $($argname: $ty),* ) -> $ret {
+                let p: *const u32 = $lookup;
+                unsafe {
+                    let func : extern "C" fn( $($argname: $ty),* ) -> $ret = core::mem::transmute(p);
+                    func
+                }
             }
-        )*
-    }
+        }
+
+        $(#[$outer])*
+        pub extern "C" fn $name( $($argname: $ty),* ) -> $ret {
+            $name::ptr()($($argname),*)
+        }
+    };
+
+    (
+        $(#[$outer:meta])*
+        unsafe fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty
+        $lookup:block
+    ) => {
+        #[doc = r"Additional access for the `"]
+        #[doc = stringify!($name)]
+        #[doc = r"` ROM function."]
+        pub mod $name {
+            /// Retrieve a function pointer.
+            pub fn ptr() -> unsafe extern "C" fn( $($argname: $ty),* ) -> $ret {
+                let p: *const u32 = $lookup;
+                unsafe {
+                    let func : unsafe extern "C" fn( $($argname: $ty),* ) -> $ret = core::mem::transmute(p);
+                    func
+                }
+            }
+        }
+
+        $(#[$outer])*
+        pub unsafe extern "C" fn $name( $($argname: $ty),* ) -> $ret {
+            $name::ptr()($($argname),*)
+        }
+    };
 }
 
-macro_rules! rom_funcs_unsafe {
+macro_rules! rom_functions {
+    () => {};
+
     (
-        $(
-            $(#[$outer:meta])*
-            $c:literal $name:ident (
-                $( $aname:ident : $aty:ty ),*
-            ) -> $ret:ty ;
-        )*
+        $(#[$outer:meta])*
+        $c:literal fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty;
+
+        $($rest:tt)*
     ) => {
-        $(
+        declare_rom_function! {
             $(#[$outer])*
-            pub unsafe fn $name($( $aname:$aty ),*) -> $ret{
-                let func:  extern "C" fn( $( $aty ),* ) -> $ret = rom_table_lookup(FUNC_TABLE, *$c);
-                func($( $aname ),*)
+            fn $name( $($argname: $ty),* ) -> $ret {
+                $crate::rom_data::rom_table_lookup($crate::rom_data::FUNC_TABLE, *$c)
             }
-        )*
-    }
+        }
+
+        rom_functions!($($rest)*);
+    };
+
+    (
+        $(#[$outer:meta])*
+        $c:literal unsafe fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty;
+
+        $($rest:tt)*
+    ) => {
+        declare_rom_function! {
+            $(#[$outer])*
+            unsafe fn $name( $($argname: $ty),* ) -> $ret {
+                $crate::rom_data::rom_table_lookup($crate::rom_data::FUNC_TABLE, *$c)
+            }
+        }
+
+        rom_functions!($($rest)*);
+    };
 }
 
-rom_funcs! {
+rom_functions! {
     /// Return a count of the number of 1 bits in value.
-    b"P3" popcount32(value: u32) -> u32;
+    b"P3" fn popcount32(value: u32) -> u32;
 
     /// Return the bits of value in the reverse order.
-    b"R3" reverse32(value: u32) -> u32;
+    b"R3" fn reverse32(value: u32) -> u32;
 
     /// Return the number of consecutive high order 0 bits of value. If value is zero, returns 32.
-    b"L3" clz32(value: u32) -> u32;
+    b"L3" fn clz32(value: u32) -> u32;
 
     /// Return the number of consecutive low order 0 bits of value. If value is zero, returns 32.
-    b"T3" ctz32(value: u32) -> u32;
+    b"T3" fn ctz32(value: u32) -> u32;
 
     /// Resets the RP2040 and uses the watchdog facility to re-start in BOOTSEL mode:
     ///   * gpio_activity_pin_mask is provided to enable an 'activity light' via GPIO attached LED
@@ -108,68 +158,66 @@ rom_funcs! {
     ///    * 0 To enable both interfaces (as per cold boot).
     ///    * 1 To disable the USB Mass Storage Interface.
     ///    * 2 to Disable the USB PICOBOOT Interface.
-    b"UB" reset_to_usb_boot(gpio_activity_pin_mask: u32, disable_interface_mask: u32) -> ();
-}
+    b"UB" fn reset_to_usb_boot(gpio_activity_pin_mask: u32, disable_interface_mask: u32) -> ();
 
-rom_funcs_unsafe! {
     /// Sets n bytes start at ptr to the value c and returns ptr
-    b"MS" memset(ptr: *mut u8, c: u8, n: u8) -> *mut u8;
+    b"MS" unsafe fn memset(ptr: *mut u8, c: u8, n: u8) -> *mut u8;
 
     /// Sets n bytes start at ptr to the value c and returns ptr.
     ///
     /// Note this is a slightly more efficient variant of _memset that may only
     /// be used if ptr is word aligned.
-    b"M4" memset4(ptr: *mut u32, c: u8, n: u32) -> *mut u32;
+    b"M4" unsafe fn memset4(ptr: *mut u32, c: u8, n: u32) -> *mut u32;
 
     /// Copies n bytes starting at src to dest and returns dest. The results are undefined if the
     /// regions overlap.
-    b"MC" memcpy(dest: *mut u8, src: *mut u8, n: u32) -> u8;
+    b"MC" unsafe fn memcpy(dest: *mut u8, src: *mut u8, n: u32) -> u8;
 
     /// Copies n bytes starting at src to dest and returns dest. The results are undefined if the
     /// regions overlap.
     ///
     /// Note this is a slightly more efficient variant of _memcpy that may only be
     /// used if dest and src are word aligned.
-    b"C4" memcpy44(dest: *mut u32, src: *mut u32, n: u32) -> *mut u8;
+    b"C4" unsafe fn memcpy44(dest: *mut u32, src: *mut u32, n: u32) -> *mut u8;
 
     /// Restore all QSPI pad controls to their default state, and connect the SSI to the QSPI pads.
-    b"IF" connect_internal_flash() -> ();
+    b"IF" unsafe fn connect_internal_flash() -> ();
 
     /// First set up the SSI for serial-mode operations, then issue the fixed XIP exit sequence.
     ///
     /// Note that the bootrom code uses the IO forcing logic to drive the CS pin, which must be
     /// cleared before returning the SSI to XIP mode (e.g. by a call to _flash_flush_cache). This
     /// function configures the SSI with a fixed SCK clock divisor of /6.
-    b"EX" flash_exit_xip() -> ();
+    b"EX" unsafe fn flash_exit_xip() -> ();
 
     /// Erase a count bytes, starting at addr (offset from start of flash). Optionally, pass a
     /// block erase command e.g. D8h block erase, and the size of the block erased by this
     /// command — this function will use the larger block erase where possible, for much higher
     /// erase speed. addr must be aligned to a 4096-byte sector, and count must be a multiple of
     /// 4096 bytes.
-    b"RE" flash_range_erase(addr: u32, count: usize, block_size: u32, block_cmd: u8) -> ();
+    b"RE" unsafe fn flash_range_erase(addr: u32, count: usize, block_size: u32, block_cmd: u8) -> ();
 
     /// Program data to a range of flash addresses starting at `addr` (and
     /// offset from the start of flash) and `count` bytes in size. The value
     /// `addr` must be aligned to a 256-byte boundary, and `count` must be a
     /// multiple of 256.
-    b"RP" flash_range_program(addr: u32, data: *const u8, count: usize) -> ();
+    b"RP" unsafe fn flash_range_program(addr: u32, data: *const u8, count: usize) -> ();
 
     /// Flush and enable the XIP cache. Also clears the IO forcing on QSPI CSn, so that the SSI can
     /// drive the flashchip select as normal.
-    b"FC" flash_flush_cache() -> ();
+    b"FC" unsafe fn flash_flush_cache() -> ();
 
     /// Configure the SSI to generate a standard 03h serial read command, with 24 address bits,
     /// upon each XIP access. This is a very slow XIP configuration, but is very widely supported.
     /// The debugger calls this function after performing a flash erase/programming operation, so
     /// that the freshly-programmed code and data is visible to the debug host, without having to
     /// know exactly what kind of flash device is connected.
-    b"CX" flash_enter_cmd_xip() -> ();
+    b"CX" unsafe fn flash_enter_cmd_xip() -> ();
 
     /// This is the method that is entered by core 1 on reset to wait to be launched by core 0.
     /// There are few cases where you should call this method (resetting core 1 is much better).
     /// This method does not return and should only ever be called on core 1.
-    b"WV" wait_for_vector() -> !;
+    b"WV" unsafe fn wait_for_vector() -> !;
 }
 
 unsafe fn convert_str(s: *const u8) -> &'static str {
@@ -230,18 +278,18 @@ pub mod float_funcs {
             )*
         ) => {
             $(
-                $(#[$outer])*
-                pub fn $name() -> extern "C" fn( $( $aname : $aty ),* ) -> $ret {
-                    let table: *const usize = $crate::rom_data::soft_float_table() as *const usize;
-                    unsafe {
-                        // This is the entry in the table. Our offset is given as a
-                        // byte offset, but we want the table index (each pointer in
-                        // the table is 4 bytes long)
-                        let entry: *const usize = table.offset($offset / 4);
-                        // Read the pointer from the table
-                        let ptr: usize = core::ptr::read(entry);
-                        // Convert the pointer we read into a function
-                        core::mem::transmute_copy(&ptr)
+                declare_rom_function! {
+                    $(#[$outer])*
+                    fn $name( $( $aname : $aty ),* ) -> $ret {
+                        let table: *const usize = $crate::rom_data::soft_float_table();
+                        unsafe {
+                            // This is the entry in the table. Our offset is given as a
+                            // byte offset, but we want the table index (each pointer in
+                            // the table is 4 bytes long)
+                            let entry: *const usize = table.offset($offset / 4);
+                            // Read the pointer from the table
+                            core::ptr::read(entry) as *const u32
+                        }
                     }
                 }
             )*
@@ -249,97 +297,97 @@ pub mod float_funcs {
     }
 
     make_functions! {
-        /// Returns a function that will calculate `a + b`
+        /// Calculates `a + b`
         0x00 fadd(a: f32, b: f32) -> f32;
-        /// Returns a function that will calculate `a - b`
+        /// Calculates `a - b`
         0x04 fsub(a: f32, b: f32) -> f32;
-        /// Returns a function that will calculate `a * b`
+        /// Calculates `a * b`
         0x08 fmul(a: f32, b: f32) -> f32;
-        /// Returns a function that will calculate `a / b`
+        /// Calculates `a / b`
         0x0c fdiv(a: f32, b: f32) -> f32;
 
         // 0x10 and 0x14 are deprecated
 
-        /// Returns a function that will calculate `sqrt(v)` (or return -Infinity if v is negative)
+        /// Calculates `sqrt(v)` (or return -Infinity if v is negative)
         0x18 fsqrt(v: f32) -> f32;
-        /// Returns a function that will convert an f32 to a signed integer,
+        /// Converts an f32 to a signed integer,
         /// rounding towards -Infinity, and clamping the result to lie within the
         /// range `-0x80000000` to `0x7FFFFFFF`
         0x1c float_to_int(v: f32) -> i32;
-        /// Returns a function that will convert an f32 to an signed fixed point
+        /// Converts an f32 to an signed fixed point
         /// integer representation where n specifies the position of the binary
         /// point in the resulting fixed point representation, e.g.
         /// `f(0.5f, 16) == 0x8000`. This method rounds towards -Infinity,
         /// and clamps the resulting integer to lie within the range `0x00000000` to
         /// `0xFFFFFFFF`
         0x20 float_to_fix(v: f32, n: i32) -> i32;
-        /// Returns a function that will convert an f32 to an unsigned integer,
+        /// Converts an f32 to an unsigned integer,
         /// rounding towards -Infinity, and clamping the result to lie within the
         /// range `0x00000000` to `0xFFFFFFFF`
         0x24 float_to_uint(v: f32) -> u32;
-        /// Returns a function that will convert an f32 to an unsigned fixed point
+        /// Converts an f32 to an unsigned fixed point
         /// integer representation where n specifies the position of the binary
         /// point in the resulting fixed point representation, e.g.
         /// `f(0.5f, 16) == 0x8000`. This method rounds towards -Infinity,
         /// and clamps the resulting integer to lie within the range `0x00000000` to
         /// `0xFFFFFFFF`
         0x28 float_to_ufix(v: f32, n: i32) -> u32;
-        /// Returns a function that will convert a signed integer to the nearest
+        /// Converts a signed integer to the nearest
         /// f32 value, rounding to even on tie
         0x2c int_to_float(v: i32) -> f32;
-        /// Returns a function that will convert a signed fixed point integer
+        /// Converts a signed fixed point integer
         /// representation to the nearest f32 value, rounding to even on tie. `n`
         /// specifies the position of the binary point in fixed point, so `f =
         /// nearest(v/(2^n))`
         0x30 fix_to_float(v: i32, n: i32) -> f32;
-        /// Returns a function that will convert an unsigned integer to the nearest
+        /// Converts an unsigned integer to the nearest
         /// f32 value, rounding to even on tie
         0x34 uint_to_float(v: u32) -> f32;
-        /// Returns a function that will convert an unsigned fixed point integer
+        /// Converts an unsigned fixed point integer
         /// representation to the nearest f32 value, rounding to even on tie. `n`
         /// specifies the position of the binary point in fixed point, so `f =
         /// nearest(v/(2^n))`
         0x38 ufix_to_float(v: u32, n: i32) -> f32;
-        /// Returns a function that will calculate the cosine of `angle`. The value
+        /// Calculates the cosine of `angle`. The value
         /// of `angle` is in radians, and must be in the range `-1024` to `1024`
         0x3c fcos(angle: f32) -> f32;
-        /// Returns a function that will calculate the sine of `angle`. The value of
+        /// Calculates the sine of `angle`. The value of
         /// `angle` is in radians, and must be in the range `-1024` to `1024`
         0x40 fsin(angle: f32) -> f32;
-        /// Returns a function that will calculate the tangent of `angle`. The value
+        /// Calculates the tangent of `angle`. The value
         /// of `angle` is in radians, and must be in the range `-1024` to `1024`
         0x44 ftan(angle: f32) -> f32;
 
         // 0x48 is deprecated
 
-        /// Returns a function that will calculate the exponential value of `v`,
+        /// Calculates the exponential value of `v`,
         /// i.e. `e ** v`
         0x4c fexp(v: f32) -> f32;
-        /// Returns a function that will calculate the natural logarithm of `v`. If `v <= 0` return -Infinity
+        /// Calculates the natural logarithm of `v`. If `v <= 0` return -Infinity
         0x50 fln(v: f32) -> f32;
 
         // These are only on BootROM v2 or higher
 
-        /// Returns a function that will compare two floating point numbers, returning:
+        /// Compares two floating point numbers, returning:
         ///     • 0 if a == b
         ///     • -1 if a < b
         ///     • 1 if a > b
         0x54 fcmp(a: f32, b: f32) -> i32;
-        /// Returns a function that will compute the arc tangent of `y/x` using the
+        /// Computes the arc tangent of `y/x` using the
         /// signs of arguments to determine the correct quadrant
         0x58 fatan2(y: f32, x: f32) -> f32;
-        /// Returns a function that will convert a signed 64-bit integer to the
+        /// Converts a signed 64-bit integer to the
         /// nearest f32 value, rounding to even on tie
         0x5c int64_to_float(v: i64) -> f32;
-        /// Returns a function that will convert a signed fixed point 64-bit integer
+        /// Converts a signed fixed point 64-bit integer
         /// representation to the nearest f32 value, rounding to even on tie. `n`
         /// specifies the position of the binary point in fixed point, so `f =
         /// nearest(v/(2^n))`
         0x60 fix64_to_float(v: i64, n: i32) -> f32;
-        /// Returns a function that will convert an unsigned 64-bit integer to the
+        /// Converts an unsigned 64-bit integer to the
         /// nearest f32 value, rounding to even on tie
         0x64 uint64_to_float(v: u64) -> f32;
-        /// Returns a function that will convert an unsigned fixed point 64-bit
+        /// Converts an unsigned fixed point 64-bit
         /// integer representation to the nearest f32 value, rounding to even on
         /// tie. `n` specifies the position of the binary point in fixed point, so
         /// `f = nearest(v/(2^n))`
@@ -348,18 +396,18 @@ pub mod float_funcs {
         /// and clamping the result to lie within the range `-0x8000000000000000` to
         /// `0x7FFFFFFFFFFFFFFF`
         0x6c float_to_int64(v: f32) -> i64;
-        /// Returns a function that will convert an f32 to a signed fixed point
+        /// Converts an f32 to a signed fixed point
         /// 64-bit integer representation where n specifies the position of the
         /// binary point in the resulting fixed point representation - e.g. `f(0.5f,
         /// 16) == 0x8000`. This method rounds towards -Infinity, and clamps the
         /// resulting integer to lie within the range `-0x8000000000000000` to
         /// `0x7FFFFFFFFFFFFFFF`
         0x70 float_to_fix64(v: f32, n: i32) -> f32;
-        /// Returns a function that will convert an f32 to an unsigned 64-bit
+        /// Converts an f32 to an unsigned 64-bit
         /// integer, rounding towards -Infinity, and clamping the result to lie
         /// within the range `0x0000000000000000` to `0xFFFFFFFFFFFFFFFF`
         0x74 float_to_uint64(v: f32) -> u64;
-        /// Returns a function that will convert an f32 to an unsigned fixed point
+        /// Converts an f32 to an unsigned fixed point
         /// 64-bit integer representation where n specifies the position of the
         /// binary point in the resulting fixed point representation, e.g. `f(0.5f,
         /// 16) == 0x8000`. This method rounds towards -Infinity, and clamps the
@@ -384,18 +432,18 @@ pub mod double_funcs {
             )*
         ) => {
             $(
-                $(#[$outer])*
-                pub fn $name() -> extern "C" fn( $( $aname : $aty ),* ) -> $ret {
-                    let table: *const usize = $crate::rom_data::soft_double_table() as *const usize;
-                    unsafe {
-                        // This is the entry in the table. Our offset is given as a
-                        // byte offset, but we want the table index (each pointer in
-                        // the table is 4 bytes long)
-                        let entry: *const usize = table.offset($offset / 4);
-                        // Read the pointer from the table
-                        let ptr: usize = core::ptr::read(entry);
-                        // Convert the pointer we read into a function
-                        core::mem::transmute_copy(&ptr)
+                declare_rom_function! {
+                    $(#[$outer])*
+                    fn $name( $( $aname : $aty ),* ) -> $ret {
+                        let table: *const usize = $crate::rom_data::soft_double_table();
+                        unsafe {
+                            // This is the entry in the table. Our offset is given as a
+                            // byte offset, but we want the table index (each pointer in
+                            // the table is 4 bytes long)
+                            let entry: *const usize = table.offset($offset / 4);
+                            // Read the pointer from the table
+                            core::ptr::read(entry) as *const u32
+                        }
                     }
                 }
             )*
@@ -403,97 +451,97 @@ pub mod double_funcs {
     }
 
     make_double_funcs! {
-        /// Returns a function that will calculate `a + b`
+        /// Calculates `a + b`
         0x00 dadd(a: f64, b: f64) -> f64;
-        /// Returns a function that will calculate `a - b`
+        /// Calculates `a - b`
         0x04 dsub(a: f64, b: f64) -> f64;
-        /// Returns a function that will calculate `a * b`
+        /// Calculates `a * b`
         0x08 dmul(a: f64, b: f64) -> f64;
-        /// Returns a function that will calculate `a / b`
+        /// Calculates `a / b`
         0x0c ddiv(a: f64, b: f64) -> f64;
 
         // 0x10 and 0x14 are deprecated
 
-        /// Returns a function that will calculate `sqrt(v)` (or return -Infinity if v is negative)
+        /// Calculates `sqrt(v)` (or return -Infinity if v is negative)
         0x18 dsqrt(v: f64) -> f64;
-        /// Returns a function that will convert an f64 to a signed integer,
+        /// Converts an f64 to a signed integer,
         /// rounding towards -Infinity, and clamping the result to lie within the
         /// range `-0x80000000` to `0x7FFFFFFF`
         0x1c double_to_int(v: f64) -> i32;
-        /// Returns a function that will convert an f64 to an signed fixed point
+        /// Converts an f64 to an signed fixed point
         /// integer representation where n specifies the position of the binary
         /// point in the resulting fixed point representation, e.g.
         /// `f(0.5f, 16) == 0x8000`. This method rounds towards -Infinity,
         /// and clamps the resulting integer to lie within the range `0x00000000` to
         /// `0xFFFFFFFF`
         0x20 double_to_fix(v: f64, n: i32) -> i32;
-        /// Returns a function that will convert an f64 to an unsigned integer,
+        /// Converts an f64 to an unsigned integer,
         /// rounding towards -Infinity, and clamping the result to lie within the
         /// range `0x00000000` to `0xFFFFFFFF`
         0x24 double_to_uint(v: f64) -> u32;
-        /// Returns a function that will convert an f64 to an unsigned fixed point
+        /// Converts an f64 to an unsigned fixed point
         /// integer representation where n specifies the position of the binary
         /// point in the resulting fixed point representation, e.g.
         /// `f(0.5f, 16) == 0x8000`. This method rounds towards -Infinity,
         /// and clamps the resulting integer to lie within the range `0x00000000` to
         /// `0xFFFFFFFF`
         0x28 double_to_ufix(v: f64, n: i32) -> u32;
-        /// Returns a function that will convert a signed integer to the nearest
+        /// Converts a signed integer to the nearest
         /// double value, rounding to even on tie
         0x2c int_to_double(v: i32) -> f64;
-        /// Returns a function that will convert a signed fixed point integer
+        /// Converts a signed fixed point integer
         /// representation to the nearest double value, rounding to even on tie. `n`
         /// specifies the position of the binary point in fixed point, so `f =
         /// nearest(v/(2^n))`
         0x30 fix_to_double(v: i32, n: i32) -> f64;
-        /// Returns a function that will convert an unsigned integer to the nearest
+        /// Converts an unsigned integer to the nearest
         /// double value, rounding to even on tie
         0x34 uint_to_double(v: u32) -> f64;
-        /// Returns a function that will convert an unsigned fixed point integer
+        /// Converts an unsigned fixed point integer
         /// representation to the nearest double value, rounding to even on tie. `n`
         /// specifies the position of the binary point in fixed point, so f =
         /// nearest(v/(2^n))
         0x38 ufix_to_double(v: u32, n: i32) -> f64;
-        /// Returns a function that will calculate the cosine of `angle`. The value
+        /// Calculates the cosine of `angle`. The value
         /// of `angle` is in radians, and must be in the range `-1024` to `1024`
         0x3c dcos(angle: f64) -> f64;
-        /// Returns a function that will calculate the sine of `angle`. The value of
+        /// Calculates the sine of `angle`. The value of
         /// `angle` is in radians, and must be in the range `-1024` to `1024`
         0x40 dsin(angle: f64) -> f64;
-        /// Returns a function that will calculate the tangent of `angle`. The value
+        /// Calculates the tangent of `angle`. The value
         /// of `angle` is in radians, and must be in the range `-1024` to `1024`
         0x44 dtan(angle: f64) -> f64;
 
         // 0x48 is deprecated
 
-        /// Returns a function that will calculate the exponential value of `v`,
+        /// Calculates the exponential value of `v`,
         /// i.e. `e ** v`
         0x4c dexp(v: f64) -> f64;
-        /// Returns a function that will calculate the natural logarithm of v. If v <= 0 return -Infinity
+        /// Calculates the natural logarithm of v. If v <= 0 return -Infinity
         0x50 dln(v: f64) -> f64;
 
         // These are only on BootROM v2 or higher
 
-        /// Returns a function that will compare two floating point numbers, returning:
+        /// Compares two floating point numbers, returning:
         ///     • 0 if a == b
         ///     • -1 if a < b
         ///     • 1 if a > b
         0x54 dcmp(a: f64, b: f64) -> i32;
-        /// Returns a function that will compute the arc tangent of `y/x` using the
+        /// Computes the arc tangent of `y/x` using the
         /// signs of arguments to determine the correct quadrant
         0x58 datan2(y: f64, x: f64) -> f64;
-        /// Returns a function that will convert a signed 64-bit integer to the
+        /// Converts a signed 64-bit integer to the
         /// nearest double value, rounding to even on tie
         0x5c int64_to_double(v: i64) -> f64;
-        /// Returns a function that will convert a signed fixed point 64-bit integer
+        /// Converts a signed fixed point 64-bit integer
         /// representation to the nearest double value, rounding to even on tie. `n`
         /// specifies the position of the binary point in fixed point, so `f =
         /// nearest(v/(2^n))`
         0x60 fix64_to_doubl(v: i64, n: i32) -> f64;
-        /// Returns a function that will convert an unsigned 64-bit integer to the
+        /// Converts an unsigned 64-bit integer to the
         /// nearest double value, rounding to even on tie
         0x64 uint64_to_double(v: u64) -> f64;
-        /// Returns a function that will convert an unsigned fixed point 64-bit
+        /// Converts an unsigned fixed point 64-bit
         /// integer representation to the nearest double value, rounding to even on
         /// tie. `n` specifies the position of the binary point in fixed point, so
         /// `f = nearest(v/(2^n))`
@@ -502,25 +550,25 @@ pub mod double_funcs {
         /// and clamping the result to lie within the range `-0x8000000000000000` to
         /// `0x7FFFFFFFFFFFFFFF`
         0x6c double_to_int64(v: f64) -> i64;
-        /// Returns a function that will convert an f64 to a signed fixed point
+        /// Converts an f64 to a signed fixed point
         /// 64-bit integer representation where n specifies the position of the
         /// binary point in the resulting fixed point representation - e.g. `f(0.5f,
         /// 16) == 0x8000`. This method rounds towards -Infinity, and clamps the
         /// resulting integer to lie within the range `-0x8000000000000000` to
         /// `0x7FFFFFFFFFFFFFFF`
         0x70 double_to_fix64(v: f64, n: i32) -> i64;
-        /// Returns a function that will convert an f64 to an unsigned 64-bit
+        /// Converts an f64 to an unsigned 64-bit
         /// integer, rounding towards -Infinity, and clamping the result to lie
         /// within the range `0x0000000000000000` to `0xFFFFFFFFFFFFFFFF`
         0x74 double_to_uint64(v: f64) -> u64;
-        /// Returns a function that will convert an f64 to an unsigned fixed point
+        /// Converts an f64 to an unsigned fixed point
         /// 64-bit integer representation where n specifies the position of the
         /// binary point in the resulting fixed point representation, e.g. `f(0.5f,
         /// 16) == 0x8000`. This method rounds towards -Infinity, and clamps the
         /// resulting integer to lie within the range `0x0000000000000000` to
         /// `0xFFFFFFFFFFFFFFFF`
         0x78 double_to_ufix64(v: f64, n: i32) -> u64;
-        /// Returns a function that will convert an f64 to an f32
+        /// Converts an f64 to an f32
         0x7c double_to_float(v: f64) -> f32;
     }
 }

From 98fd6c1724e3fbea60940d67e57567e2e98df598 Mon Sep 17 00:00:00 2001
From: Derek Hageman <hageman@inthat.cloud>
Date: Sat, 29 Jan 2022 17:46:45 -0700
Subject: [PATCH 2/2] Add ROM function caching

Add a feature to enable automatic caching of the result of ROM table
function lookups.
---
 rp2040-hal/Cargo.toml      |  1 +
 rp2040-hal/src/rom_data.rs | 54 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 55 insertions(+)

diff --git a/rp2040-hal/Cargo.toml b/rp2040-hal/Cargo.toml
index 2b323a5..342b76c 100644
--- a/rp2040-hal/Cargo.toml
+++ b/rp2040-hal/Cargo.toml
@@ -53,3 +53,4 @@ rt = ["rp2040-pac/rt"]
 #
 # embassy-traits = ["embassy_traits", "futures"]
 alloc = []
+rom-func-cache = []
diff --git a/rp2040-hal/src/rom_data.rs b/rp2040-hal/src/rom_data.rs
index 95981d8..d53d78f 100644
--- a/rp2040-hal/src/rom_data.rs
+++ b/rp2040-hal/src/rom_data.rs
@@ -58,6 +58,7 @@ macro_rules! declare_rom_function {
         #[doc = r"` ROM function."]
         pub mod $name {
             /// Retrieve a function pointer.
+            #[cfg(not(feature = "rom-func-cache"))]
             pub fn ptr() -> extern "C" fn( $($argname: $ty),* ) -> $ret {
                 let p: *const u32 = $lookup;
                 unsafe {
@@ -65,6 +66,32 @@ macro_rules! declare_rom_function {
                     func
                 }
             }
+
+            /// Retrieve a function pointer.
+            #[cfg(feature = "rom-func-cache")]
+            pub fn ptr() -> extern "C" fn( $($argname: $ty),* ) -> $ret {
+                use core::sync::atomic::{AtomicU16, Ordering};
+
+                // All pointers in the ROM fit in 16 bits, so we don't need a
+                // full width word to store the cached value.
+                static CACHED_PTR: AtomicU16 = AtomicU16::new(0);
+                // This is safe because the lookup will always resolve
+                // to the same value.  So even if an interrupt or another
+                // core starts at the same time, it just repeats some
+                // work and eventually writes back the correct value.
+                let p: *const u32 = match CACHED_PTR.load(Ordering::Relaxed) {
+                    0 => {
+                        let raw: *const u32 = $lookup;
+                        CACHED_PTR.store(raw as u16, Ordering::Relaxed);
+                        raw
+                    },
+                    val => val as *const u32,
+                };
+                unsafe {
+                    let func : extern "C" fn( $($argname: $ty),* ) -> $ret = core::mem::transmute(p);
+                    func
+                }
+            }
         }
 
         $(#[$outer])*
@@ -83,6 +110,7 @@ macro_rules! declare_rom_function {
         #[doc = r"` ROM function."]
         pub mod $name {
             /// Retrieve a function pointer.
+            #[cfg(not(feature = "rom-func-cache"))]
             pub fn ptr() -> unsafe extern "C" fn( $($argname: $ty),* ) -> $ret {
                 let p: *const u32 = $lookup;
                 unsafe {
@@ -90,6 +118,32 @@ macro_rules! declare_rom_function {
                     func
                 }
             }
+
+            /// Retrieve a function pointer.
+            #[cfg(feature = "rom-func-cache")]
+            pub fn ptr() -> unsafe extern "C" fn( $($argname: $ty),* ) -> $ret {
+                use core::sync::atomic::{AtomicU16, Ordering};
+
+                // All pointers in the ROM fit in 16 bits, so we don't need a
+                // full width word to store the cached value.
+                static CACHED_PTR: AtomicU16 = AtomicU16::new(0);
+                // This is safe because the lookup will always resolve
+                // to the same value.  So even if an interrupt or another
+                // core starts at the same time, it just repeats some
+                // work and eventually writes back the correct value.
+                let p: *const u32 = match CACHED_PTR.load(Ordering::Relaxed) {
+                    0 => {
+                        let raw: *const u32 = $lookup;
+                        CACHED_PTR.store(raw as u16, Ordering::Relaxed);
+                        raw
+                    },
+                    val => val as *const u32,
+                };
+                unsafe {
+                    let func : unsafe extern "C" fn( $($argname: $ty),* ) -> $ret = core::mem::transmute(p);
+                    func
+                }
+            }
         }
 
         $(#[$outer])*