From 80c84b13bad87c0015d17132ae8656ea0cbfcded Mon Sep 17 00:00:00 2001 From: Derek Hageman Date: Tue, 1 Feb 2022 20:58:37 -0700 Subject: [PATCH] Add ROM floating point math library Hook up the ROM functions to floating point intrinsics. --- rp2040-hal/src/float/add_sub.rs | 89 +++++++++++ rp2040-hal/src/float/cmp.rs | 198 +++++++++++++++++++++++++ rp2040-hal/src/float/conv.rs | 154 +++++++++++++++++++ rp2040-hal/src/float/div.rs | 71 +++++++++ rp2040-hal/src/float/functions.rs | 236 ++++++++++++++++++++++++++++++ rp2040-hal/src/float/mod.rs | 146 ++++++++++++++++++ rp2040-hal/src/float/mul.rs | 67 +++++++++ rp2040-hal/src/lib.rs | 1 + rp2040-hal/src/sio.rs | 2 +- 9 files changed, 963 insertions(+), 1 deletion(-) create mode 100644 rp2040-hal/src/float/add_sub.rs create mode 100644 rp2040-hal/src/float/cmp.rs create mode 100644 rp2040-hal/src/float/conv.rs create mode 100644 rp2040-hal/src/float/div.rs create mode 100644 rp2040-hal/src/float/functions.rs create mode 100644 rp2040-hal/src/float/mod.rs create mode 100644 rp2040-hal/src/float/mul.rs diff --git a/rp2040-hal/src/float/add_sub.rs b/rp2040-hal/src/float/add_sub.rs new file mode 100644 index 0000000..06c987a --- /dev/null +++ b/rp2040-hal/src/float/add_sub.rs @@ -0,0 +1,89 @@ +use super::{Float, Int}; +use crate::rom_data; + +trait ROMAdd { + fn rom_add(self, b: Self) -> Self; +} + +impl ROMAdd for f32 { + fn rom_add(self, b: Self) -> Self { + rom_data::float_funcs::fadd(self, b) + } +} + +impl ROMAdd for f64 { + fn rom_add(self, b: Self) -> Self { + rom_data::double_funcs::dadd(self, b) + } +} + +fn add(a: F, b: F) -> F { + if a.is_not_finite() { + if b.is_not_finite() { + let class_a = a.repr() & (F::SIGNIFICAND_MASK | F::SIGN_MASK); + let class_b = b.repr() & (F::SIGNIFICAND_MASK | F::SIGN_MASK); + + if class_a == F::Int::ZERO && class_b == F::Int::ZERO { + // inf + inf = inf + return a; + } + if class_a == F::SIGN_MASK && class_b == F::SIGN_MASK { + // -inf + (-inf) = -inf + return a; + } + + // Sign mismatch, or either is NaN already + return F::NAN; + } + + // [-]inf/NaN + X = [-]inf/NaN + return a; + } + + if b.is_not_finite() { + // X + [-]inf/NaN = [-]inf/NaN + return b; + } + + a.rom_add(b) +} + +intrinsics! { + #[alias = __addsf3vfp] + #[aeabi = __aeabi_fadd] + extern "C" fn __addsf3(a: f32, b: f32) -> f32 { + add(a, b) + } + + #[bootrom_v2] + #[alias = __adddf3vfp] + #[aeabi = __aeabi_dadd] + extern "C" fn __adddf3(a: f64, b: f64) -> f64 { + add(a, b) + } + + // The ROM just implements subtraction the same way, so just do it here + // and save the work of implementing more complicated NaN/inf handling. + + #[alias = __subsf3vfp] + #[aeabi = __aeabi_fsub] + extern "C" fn __subsf3(a: f32, b: f32) -> f32 { + add(a, -b) + } + + #[bootrom_v2] + #[alias = __subdf3vfp] + #[aeabi = __aeabi_dsub] + extern "C" fn __subdf3(a: f64, b: f64) -> f64 { + add(a, -b) + } + + extern "aapcs" fn __aeabi_frsub(a: f32, b: f32) -> f32 { + add(b, -a) + } + + #[bootrom_v2] + extern "aapcs" fn __aeabi_drsub(a: f64, b: f64) -> f64 { + add(b, -a) + } +} diff --git a/rp2040-hal/src/float/cmp.rs b/rp2040-hal/src/float/cmp.rs new file mode 100644 index 0000000..f3b16de --- /dev/null +++ b/rp2040-hal/src/float/cmp.rs @@ -0,0 +1,198 @@ +use super::Float; +use crate::rom_data; + +trait ROMCmp { + fn rom_cmp(self, b: Self) -> i32; +} + +impl ROMCmp for f32 { + fn rom_cmp(self, b: Self) -> i32 { + rom_data::float_funcs::fcmp(self, b) + } +} + +impl ROMCmp for f64 { + fn rom_cmp(self, b: Self) -> i32 { + rom_data::double_funcs::dcmp(self, b) + } +} + +fn le_abi(a: F, b: F) -> i32 { + if a.is_nan() || b.is_nan() { + 1 + } else { + a.rom_cmp(b) + } +} + +fn ge_abi(a: F, b: F) -> i32 { + if a.is_nan() || b.is_nan() { + -1 + } else { + a.rom_cmp(b) + } +} + +intrinsics! { + #[slower_than_default] + #[bootrom_v2] + #[alias = __eqsf2, __ltsf2, __nesf2] + extern "C" fn __lesf2(a: f32, b: f32) -> i32 { + le_abi(a, b) + } + + #[slower_than_default] + #[bootrom_v2] + #[alias = __eqdf2, __ltdf2, __nedf2] + extern "C" fn __ledf2(a: f64, b: f64) -> i32 { + le_abi(a, b) + } + + #[slower_than_default] + #[bootrom_v2] + #[alias = __gtsf2] + extern "C" fn __gesf2(a: f32, b: f32) -> i32 { + ge_abi(a, b) + } + + #[slower_than_default] + #[bootrom_v2] + #[alias = __gtdf2] + extern "C" fn __gedf2(a: f64, b: f64) -> i32 { + ge_abi(a, b) + } + + + #[slower_than_default] + #[bootrom_v2] + extern "aapcs" fn __aeabi_fcmple(a: f32, b: f32) -> i32 { + (le_abi(a, b) <= 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "aapcs" fn __aeabi_fcmpge(a: f32, b: f32) -> i32 { + (ge_abi(a, b) >= 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "aapcs" fn __aeabi_fcmpeq(a: f32, b: f32) -> i32 { + (le_abi(a, b) == 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "aapcs" fn __aeabi_fcmplt(a: f32, b: f32) -> i32 { + (le_abi(a, b) < 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "aapcs" fn __aeabi_fcmpgt(a: f32, b: f32) -> i32 { + (ge_abi(a, b) > 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "aapcs" fn __aeabi_dcmple(a: f64, b: f64) -> i32 { + (le_abi(a, b) <= 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "aapcs" fn __aeabi_dcmpge(a: f64, b: f64) -> i32 { + (ge_abi(a, b) >= 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "aapcs" fn __aeabi_dcmpeq(a: f64, b: f64) -> i32 { + (le_abi(a, b) == 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "aapcs" fn __aeabi_dcmplt(a: f64, b: f64) -> i32 { + (le_abi(a, b) < 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "aapcs" fn __aeabi_dcmpgt(a: f64, b: f64) -> i32 { + (ge_abi(a, b) > 0) as i32 + } + + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn __gesf2vfp(a: f32, b: f32) -> i32 { + (ge_abi(a, b) >= 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn __gedf2vfp(a: f64, b: f64) -> i32 { + (ge_abi(a, b) >= 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn __gtsf2vfp(a: f32, b: f32) -> i32 { + (ge_abi(a, b) > 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn __gtdf2vfp(a: f64, b: f64) -> i32 { + (ge_abi(a, b) > 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn __ltsf2vfp(a: f32, b: f32) -> i32 { + (le_abi(a, b) < 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn __ltdf2vfp(a: f64, b: f64) -> i32 { + (le_abi(a, b) < 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn __lesf2vfp(a: f32, b: f32) -> i32 { + (le_abi(a, b) <= 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn __ledf2vfp(a: f64, b: f64) -> i32 { + (le_abi(a, b) <= 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn __nesf2vfp(a: f32, b: f32) -> i32 { + (le_abi(a, b) != 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn __nedf2vfp(a: f64, b: f64) -> i32 { + (le_abi(a, b) != 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn __eqsf2vfp(a: f32, b: f32) -> i32 { + (le_abi(a, b) == 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn __eqdf2vfp(a: f64, b: f64) -> i32 { + (le_abi(a, b) == 0) as i32 + } +} diff --git a/rp2040-hal/src/float/conv.rs b/rp2040-hal/src/float/conv.rs new file mode 100644 index 0000000..40678fc --- /dev/null +++ b/rp2040-hal/src/float/conv.rs @@ -0,0 +1,154 @@ +use super::Float; +use crate::rom_data; + +// Some of these are also not connected in the Pico SDK. This is probably +// because the ROM version actually does a fixed point conversion, just with +// the fractional width set to zero. + +intrinsics! { + // Not connected in the Pico SDK + #[slower_than_default] + #[aeabi = __aeabi_i2f] + extern "C" fn __floatsisf(i: i32) -> f32 { + rom_data::float_funcs::int_to_float(i) + } + + // Not connected in the Pico SDK + #[slower_than_default] + #[aeabi = __aeabi_i2d] + extern "C" fn __floatsidf(i: i32) -> f64 { + rom_data::double_funcs::int_to_double(i) + } + + // Questionable gain + #[aeabi = __aeabi_l2f] + extern "C" fn __floatdisf(i: i64) -> f32 { + rom_data::float_funcs::int64_to_float(i) + } + + #[bootrom_v2] + #[aeabi = __aeabi_l2d] + extern "C" fn __floatdidf(i: i64) -> f64 { + rom_data::double_funcs::int64_to_double(i) + } + + // Not connected in the Pico SDK + #[slower_than_default] + #[aeabi = __aeabi_ui2f] + extern "C" fn __floatunsisf(i: u32) -> f32 { + rom_data::float_funcs::uint_to_float(i) + } + + // Questionable gain + #[bootrom_v2] + #[aeabi = __aeabi_ui2d] + extern "C" fn __floatunsidf(i: u32) -> f64 { + rom_data::double_funcs::uint_to_double(i) + } + + // Questionable gain + #[bootrom_v2] + #[aeabi = __aeabi_ul2f] + extern "C" fn __floatundisf(i: u64) -> f32 { + rom_data::float_funcs::uint64_to_float(i) + } + + #[bootrom_v2] + #[aeabi = __aeabi_ul2d] + extern "C" fn __floatundidf(i: u64) -> f64 { + rom_data::double_funcs::uint64_to_double(i) + } + + + // The Pico SDK does some optimization here (e.x. fast paths for zero and + // one), but we can just directly connect it. + #[aeabi = __aeabi_f2iz] + extern "C" fn __fixsfsi(f: f32) -> i32 { + rom_data::float_funcs::float_to_int(f) + } + + #[bootrom_v2] + #[aeabi = __aeabi_f2lz] + extern "C" fn __fixsfdi(f: f32) -> i64 { + rom_data::float_funcs::float_to_int64(f) + } + + // Not connected in the Pico SDK + #[slower_than_default] + #[bootrom_v2] + #[aeabi = __aeabi_d2iz] + extern "C" fn __fixdfsi(f: f64) -> i32 { + rom_data::double_funcs::double_to_int(f) + } + + // Like with the 32 bit version, there's optimization that we just + // skip. + #[bootrom_v2] + #[aeabi = __aeabi_d2lz] + extern "C" fn __fixdfdi(f: f64) -> i64 { + rom_data::double_funcs::double_to_int64(f) + } + + #[slower_than_default] + #[aeabi = __aeabi_f2uiz] + extern "C" fn __fixunssfsi(f: f32) -> u32 { + rom_data::float_funcs::float_to_uint(f) + } + + #[slower_than_default] + #[bootrom_v2] + #[aeabi = __aeabi_f2ulz] + extern "C" fn __fixunssfdi(f: f32) -> u64 { + rom_data::float_funcs::float_to_uint64(f) + } + + #[slower_than_default] + #[bootrom_v2] + #[aeabi = __aeabi_d2uiz] + extern "C" fn __fixunsdfsi(f: f64) -> u32 { + rom_data::double_funcs::double_to_uint(f) + } + + #[slower_than_default] + #[bootrom_v2] + #[aeabi = __aeabi_d2ulz] + extern "C" fn __fixunsdfdi(f: f64) -> u64 { + rom_data::double_funcs::double_to_uint64(f) + } + + #[bootrom_v2] + #[alias = __extendsfdf2vfp] + #[aeabi = __aeabi_f2d] + extern "C" fn __extendsfdf2(f: f32) -> f64 { + if f.is_not_finite() { + return f64::from_repr( + // Not finite + f64::EXPONENT_MASK | + // Preserve NaN or inf + ((f.repr() & f32::SIGNIFICAND_MASK) as u64) | + // Preserve sign + ((f.repr() & f32::SIGN_MASK) as u64) << (f64::BITS-f32::BITS) + ); + } + rom_data::float_funcs::float_to_double(f) + } + + #[bootrom_v2] + #[alias = __truncdfsf2vfp] + #[aeabi = __aeabi_d2f] + extern "C" fn __truncdfsf2(f: f64) -> f32 { + if f.is_not_finite() { + let mut repr: u32 = + // Not finite + f32::EXPONENT_MASK | + // Preserve sign + ((f.repr() & f64::SIGN_MASK) >> (f64::BITS-f32::BITS)) as u32; + // Set NaN + if (f.repr() & f64::SIGNIFICAND_MASK) != 0 { + repr |= 1; + } + return f32::from_repr(repr); + } + rom_data::double_funcs::double_to_float(f) + } +} diff --git a/rp2040-hal/src/float/div.rs b/rp2040-hal/src/float/div.rs new file mode 100644 index 0000000..d0d1e2f --- /dev/null +++ b/rp2040-hal/src/float/div.rs @@ -0,0 +1,71 @@ +use super::Float; +use crate::rom_data; +use crate::sio::save_divider; + +trait ROMDiv { + fn rom_div(self, b: Self) -> Self; +} + +impl ROMDiv for f32 { + fn rom_div(self, b: Self) -> Self { + // ROM implementation uses the hardware divider, so we have to save it + save_divider(|_sio| rom_data::float_funcs::fdiv(self, b)) + } +} + +impl ROMDiv for f64 { + fn rom_div(self, b: Self) -> Self { + // ROM implementation uses the hardware divider, so we have to save it + save_divider(|_sio| rom_data::double_funcs::ddiv(self, b)) + } +} + +fn div(a: F, b: F) -> F { + if a.is_not_finite() { + if b.is_not_finite() { + // inf/NaN / inf/NaN = NaN + return F::NAN; + } + + if b.is_zero() { + // inf/NaN / 0 = NaN + return F::NAN; + } + + return if b.is_sign_negative() { + // [+/-]inf/NaN / (-X) = [-/+]inf/NaN + a.negate() + } else { + // [-]inf/NaN / X = [-]inf/NaN + a + }; + } + + if b.is_nan() { + // X / NaN = NaN + return b; + } + + // ROM handles X / 0 = [-]inf and X / [-]inf = [-]0, so we only + // need to catch 0 / 0 + if b.is_zero() && a.is_zero() { + return F::NAN; + } + + a.rom_div(b) +} + +intrinsics! { + #[alias = __divsf3vfp] + #[aeabi = __aeabi_fdiv] + extern "C" fn __divsf3(a: f32, b: f32) -> f32 { + div(a, b) + } + + #[bootrom_v2] + #[alias = __divdf3vfp] + #[aeabi = __aeabi_ddiv] + extern "C" fn __divdf3(a: f64, b: f64) -> f64 { + div(a, b) + } +} diff --git a/rp2040-hal/src/float/functions.rs b/rp2040-hal/src/float/functions.rs new file mode 100644 index 0000000..4ed8333 --- /dev/null +++ b/rp2040-hal/src/float/functions.rs @@ -0,0 +1,236 @@ +use crate::float::{Float, Int}; +use crate::rom_data; + +trait ROMFunctions { + fn sqrt(self) -> Self; + fn ln(self) -> Self; + fn exp(self) -> Self; + fn sin(self) -> Self; + fn cos(self) -> Self; + fn tan(self) -> Self; + fn atan2(self, y: Self) -> Self; + + fn to_trig_range(self) -> Self; +} + +impl ROMFunctions for f32 { + fn sqrt(self) -> Self { + rom_data::float_funcs::fsqrt(self) + } + + fn ln(self) -> Self { + rom_data::float_funcs::fln(self) + } + + fn exp(self) -> Self { + rom_data::float_funcs::fexp(self) + } + + fn sin(self) -> Self { + rom_data::float_funcs::fsin(self) + } + + fn cos(self) -> Self { + rom_data::float_funcs::fcos(self) + } + + fn tan(self) -> Self { + rom_data::float_funcs::ftan(self) + } + + fn atan2(self, y: Self) -> Self { + rom_data::float_funcs::fatan2(self, y) + } + + fn to_trig_range(self) -> Self { + // -128 < X < 128, logic from the Pico SDK + let exponent = (self.repr() & Self::EXPONENT_MASK) >> Self::SIGNIFICAND_BITS; + if exponent < 134 { + self + } else { + self % (core::f32::consts::PI * 2.0) + } + } +} + +impl ROMFunctions for f64 { + fn sqrt(self) -> Self { + rom_data::double_funcs::dsqrt(self) + } + + fn ln(self) -> Self { + rom_data::double_funcs::dln(self) + } + + fn exp(self) -> Self { + rom_data::double_funcs::dexp(self) + } + + fn sin(self) -> Self { + rom_data::double_funcs::dsin(self) + } + + fn cos(self) -> Self { + rom_data::double_funcs::dcos(self) + } + fn tan(self) -> Self { + rom_data::double_funcs::dtan(self) + } + + fn atan2(self, y: Self) -> Self { + rom_data::double_funcs::datan2(self, y) + } + + fn to_trig_range(self) -> Self { + // -1024 < X < 1024, logic from the Pico SDK + let exponent = (self.repr() & Self::EXPONENT_MASK) >> Self::SIGNIFICAND_BITS; + if exponent < 1033 { + self + } else { + self % (core::f64::consts::PI * 2.0) + } + } +} + +fn is_negative_nonzero_or_nan(f: F) -> bool { + let repr = f.repr(); + if (repr & F::SIGN_MASK) != F::Int::ZERO { + // Negative, so anything other than exactly zero + return (repr & (!F::SIGN_MASK)) != F::Int::ZERO; + } + // NaN + (repr & (F::EXPONENT_MASK | F::SIGNIFICAND_MASK)) > F::EXPONENT_MASK +} + +fn sqrt(f: F) -> F { + if is_negative_nonzero_or_nan(f) { + F::NAN + } else { + f.sqrt() + } +} + +fn ln(f: F) -> F { + if is_negative_nonzero_or_nan(f) { + F::NAN + } else { + f.ln() + } +} + +fn exp(f: F) -> F { + if f.is_nan() { + F::NAN + } else { + f.exp() + } +} + +fn sin(f: F) -> F { + if f.is_not_finite() { + F::NAN + } else { + f.to_trig_range().sin() + } +} + +fn cos(f: F) -> F { + if f.is_not_finite() { + F::NAN + } else { + f.to_trig_range().cos() + } +} + +fn tan(f: F) -> F { + if f.is_not_finite() { + F::NAN + } else { + f.to_trig_range().tan() + } +} + +fn atan2(x: F, y: F) -> F { + if x.is_nan() || y.is_nan() { + F::NAN + } else { + x.to_trig_range().atan2(y) + } +} + +// Name collisions +mod intrinsics { + intrinsics! { + extern "C" fn sqrtf(f: f32) -> f32 { + super::sqrt(f) + } + + #[bootrom_v2] + extern "C" fn sqrt(f: f64) -> f64 { + super::sqrt(f) + } + + extern "C" fn logf(f: f32) -> f32 { + super::ln(f) + } + + #[bootrom_v2] + extern "C" fn log(f: f64) -> f64 { + super::ln(f) + } + + extern "C" fn expf(f: f32) -> f32 { + super::exp(f) + } + + #[bootrom_v2] + extern "C" fn exp(f: f64) -> f64 { + super::exp(f) + } + + #[slower_than_default] + extern "C" fn sinf(f: f32) -> f32 { + super::sin(f) + } + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn sin(f: f64) -> f64 { + super::sin(f) + } + + #[slower_than_default] + extern "C" fn cosf(f: f32) -> f32 { + super::cos(f) + } + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn cos(f: f64) -> f64 { + super::cos(f) + } + + #[slower_than_default] + extern "C" fn tanf(f: f32) -> f32 { + super::tan(f) + } + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn tan(f: f64) -> f64 { + super::tan(f) + } + + // Questionable gain + #[bootrom_v2] + extern "C" fn atan2f(a: f32, b: f32) -> f32 { + super::atan2(a, b) + } + + // Questionable gain + #[bootrom_v2] + extern "C" fn atan2(a: f64, b: f64) -> f64 { + super::atan2(a, b) + } + } +} diff --git a/rp2040-hal/src/float/mod.rs b/rp2040-hal/src/float/mod.rs new file mode 100644 index 0000000..2fe07a0 --- /dev/null +++ b/rp2040-hal/src/float/mod.rs @@ -0,0 +1,146 @@ +use core::ops; + +// Borrowed and simplified from compiler-builtins so we can use bit ops +// on floating point without macro soup. +pub trait Int: + Copy + + core::fmt::Debug + + PartialEq + + PartialOrd + + ops::AddAssign + + ops::SubAssign + + ops::BitAndAssign + + ops::BitOrAssign + + ops::BitXorAssign + + ops::ShlAssign + + ops::ShrAssign + + ops::Add + + ops::Sub + + ops::Div + + ops::Shl + + ops::Shr + + ops::BitOr + + ops::BitXor + + ops::BitAnd + + ops::Not +{ + const ZERO: Self; +} + +macro_rules! int_impl { + ($ty:ty) => { + impl Int for $ty { + const ZERO: Self = 0; + } + }; +} + +int_impl!(u32); +int_impl!(u64); + +pub trait Float: + Copy + + core::fmt::Debug + + PartialEq + + PartialOrd + + ops::AddAssign + + ops::MulAssign + + ops::Add + + ops::Sub + + ops::Div + + ops::Rem +{ + /// A uint of the same with as the float + type Int: Int; + + /// NaN representation for the float + const NAN: Self; + + /// The bitwidth of the float type + const BITS: u32; + + /// The bitwidth of the significand + const SIGNIFICAND_BITS: u32; + + /// A mask for the sign bit + const SIGN_MASK: Self::Int; + + /// A mask for the significand + const SIGNIFICAND_MASK: Self::Int; + + /// A mask for the exponent + const EXPONENT_MASK: Self::Int; + + /// Returns `self` transmuted to `Self::Int` + fn repr(self) -> Self::Int; + + /// Returns a `Self::Int` transmuted back to `Self` + fn from_repr(a: Self::Int) -> Self; + + /// Return a sign swapped `self` + fn negate(self) -> Self; + + /// Returns true if `self` is either NaN or infinity + fn is_not_finite(self) -> bool { + (self.repr() & Self::EXPONENT_MASK) == Self::EXPONENT_MASK + } + + /// Returns true if `self` is infinity + fn is_infinity(self) -> bool { + (self.repr() & (Self::EXPONENT_MASK | Self::SIGNIFICAND_MASK)) == Self::EXPONENT_MASK + } + + /// Returns true if `self is NaN + fn is_nan(self) -> bool { + (self.repr() & (Self::EXPONENT_MASK | Self::SIGNIFICAND_MASK)) > Self::EXPONENT_MASK + } + + /// Returns true if `self` is negative + fn is_sign_negative(self) -> bool { + (self.repr() & Self::SIGN_MASK) != Self::Int::ZERO + } + + /// Returns true if `self` is zero (either sign) + fn is_zero(self) -> bool { + (self.repr() & (Self::SIGNIFICAND_MASK | Self::EXPONENT_MASK)) == Self::Int::ZERO + } +} + +macro_rules! float_impl { + ($ty:ident, $ity:ident, $bits:expr, $significand_bits:expr) => { + impl Float for $ty { + type Int = $ity; + + const NAN: Self = <$ty>::NAN; + + const BITS: u32 = $bits; + const SIGNIFICAND_BITS: u32 = $significand_bits; + + const SIGN_MASK: Self::Int = 1 << (Self::BITS - 1); + const SIGNIFICAND_MASK: Self::Int = (1 << Self::SIGNIFICAND_BITS) - 1; + const EXPONENT_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIGNIFICAND_MASK); + + fn repr(self) -> Self::Int { + self.to_bits() + } + + fn from_repr(a: Self::Int) -> Self { + Self::from_bits(a) + } + + fn negate(self) -> Self { + -self + } + } + }; +} + +float_impl!(f32, u32, 32, 23); +float_impl!(f64, u64, 64, 52); + +mod add_sub; +mod cmp; +mod conv; +mod div; +mod functions; +mod mul; diff --git a/rp2040-hal/src/float/mul.rs b/rp2040-hal/src/float/mul.rs new file mode 100644 index 0000000..aab1a87 --- /dev/null +++ b/rp2040-hal/src/float/mul.rs @@ -0,0 +1,67 @@ +use super::Float; +use crate::rom_data; + +trait ROMMul { + fn rom_mul(self, b: Self) -> Self; +} + +impl ROMMul for f32 { + fn rom_mul(self, b: Self) -> Self { + rom_data::float_funcs::fmul(self, b) + } +} + +impl ROMMul for f64 { + fn rom_mul(self, b: Self) -> Self { + rom_data::double_funcs::dmul(self, b) + } +} + +fn mul(a: F, b: F) -> F { + if a.is_not_finite() { + if b.is_zero() { + // [-]inf/NaN * 0 = NaN + return F::NAN; + } + + return if b.is_sign_negative() { + // [+/-]inf/NaN * (-X) = [-/+]inf/NaN + a.negate() + } else { + // [-]inf/NaN * X = [-]inf/NaN + a + }; + } + + if b.is_not_finite() { + if a.is_zero() { + // 0 * [-]inf/NaN = NaN + return F::NAN; + } + + return if b.is_sign_negative() { + // (-X) * [+/-]inf/NaN = [-/+]inf/NaN + b.negate() + } else { + // X * [-]inf/NaN = [-]inf/NaN + b + }; + } + + a.rom_mul(b) +} + +intrinsics! { + #[alias = __mulsf3vfp] + #[aeabi = __aeabi_fmul] + extern "C" fn __mulsf3(a: f32, b: f32) -> f32 { + mul(a, b) + } + + #[bootrom_v2] + #[alias = __muldf3vfp] + #[aeabi = __aeabi_dmul] + extern "C" fn __muldf3(a: f64, b: f64) -> f64 { + mul(a, b) + } +} diff --git a/rp2040-hal/src/lib.rs b/rp2040-hal/src/lib.rs index 47850da..8d2db4e 100644 --- a/rp2040-hal/src/lib.rs +++ b/rp2040-hal/src/lib.rs @@ -23,6 +23,7 @@ pub(crate) mod atomic_register_access; pub mod clocks; mod critical_section_impl; pub mod dma; +mod float; pub mod gpio; pub mod i2c; pub mod multicore; diff --git a/rp2040-hal/src/sio.rs b/rp2040-hal/src/sio.rs index c449687..a2f769f 100644 --- a/rp2040-hal/src/sio.rs +++ b/rp2040-hal/src/sio.rs @@ -171,7 +171,7 @@ impl SioFifo { } } -fn save_divider(f: F) -> R +pub(crate) fn save_divider(f: F) -> R where F: FnOnce(&pac::sio::RegisterBlock) -> R, {