diff --git a/rp2040-hal/Cargo.toml b/rp2040-hal/Cargo.toml index 342b76c..ecf2ffc 100644 --- a/rp2040-hal/Cargo.toml +++ b/rp2040-hal/Cargo.toml @@ -54,3 +54,5 @@ rt = ["rp2040-pac/rt"] # embassy-traits = ["embassy_traits", "futures"] alloc = [] rom-func-cache = [] +disable-intrinsics = [] +rom-v2-intrinsics = [] diff --git a/rp2040-hal/src/float/add_sub.rs b/rp2040-hal/src/float/add_sub.rs new file mode 100644 index 0000000..06c987a --- /dev/null +++ b/rp2040-hal/src/float/add_sub.rs @@ -0,0 +1,89 @@ +use super::{Float, Int}; +use crate::rom_data; + +trait ROMAdd { + fn rom_add(self, b: Self) -> Self; +} + +impl ROMAdd for f32 { + fn rom_add(self, b: Self) -> Self { + rom_data::float_funcs::fadd(self, b) + } +} + +impl ROMAdd for f64 { + fn rom_add(self, b: Self) -> Self { + rom_data::double_funcs::dadd(self, b) + } +} + +fn add(a: F, b: F) -> F { + if a.is_not_finite() { + if b.is_not_finite() { + let class_a = a.repr() & (F::SIGNIFICAND_MASK | F::SIGN_MASK); + let class_b = b.repr() & (F::SIGNIFICAND_MASK | F::SIGN_MASK); + + if class_a == F::Int::ZERO && class_b == F::Int::ZERO { + // inf + inf = inf + return a; + } + if class_a == F::SIGN_MASK && class_b == F::SIGN_MASK { + // -inf + (-inf) = -inf + return a; + } + + // Sign mismatch, or either is NaN already + return F::NAN; + } + + // [-]inf/NaN + X = [-]inf/NaN + return a; + } + + if b.is_not_finite() { + // X + [-]inf/NaN = [-]inf/NaN + return b; + } + + a.rom_add(b) +} + +intrinsics! { + #[alias = __addsf3vfp] + #[aeabi = __aeabi_fadd] + extern "C" fn __addsf3(a: f32, b: f32) -> f32 { + add(a, b) + } + + #[bootrom_v2] + #[alias = __adddf3vfp] + #[aeabi = __aeabi_dadd] + extern "C" fn __adddf3(a: f64, b: f64) -> f64 { + add(a, b) + } + + // The ROM just implements subtraction the same way, so just do it here + // and save the work of implementing more complicated NaN/inf handling. + + #[alias = __subsf3vfp] + #[aeabi = __aeabi_fsub] + extern "C" fn __subsf3(a: f32, b: f32) -> f32 { + add(a, -b) + } + + #[bootrom_v2] + #[alias = __subdf3vfp] + #[aeabi = __aeabi_dsub] + extern "C" fn __subdf3(a: f64, b: f64) -> f64 { + add(a, -b) + } + + extern "aapcs" fn __aeabi_frsub(a: f32, b: f32) -> f32 { + add(b, -a) + } + + #[bootrom_v2] + extern "aapcs" fn __aeabi_drsub(a: f64, b: f64) -> f64 { + add(b, -a) + } +} diff --git a/rp2040-hal/src/float/cmp.rs b/rp2040-hal/src/float/cmp.rs new file mode 100644 index 0000000..f3b16de --- /dev/null +++ b/rp2040-hal/src/float/cmp.rs @@ -0,0 +1,198 @@ +use super::Float; +use crate::rom_data; + +trait ROMCmp { + fn rom_cmp(self, b: Self) -> i32; +} + +impl ROMCmp for f32 { + fn rom_cmp(self, b: Self) -> i32 { + rom_data::float_funcs::fcmp(self, b) + } +} + +impl ROMCmp for f64 { + fn rom_cmp(self, b: Self) -> i32 { + rom_data::double_funcs::dcmp(self, b) + } +} + +fn le_abi(a: F, b: F) -> i32 { + if a.is_nan() || b.is_nan() { + 1 + } else { + a.rom_cmp(b) + } +} + +fn ge_abi(a: F, b: F) -> i32 { + if a.is_nan() || b.is_nan() { + -1 + } else { + a.rom_cmp(b) + } +} + +intrinsics! { + #[slower_than_default] + #[bootrom_v2] + #[alias = __eqsf2, __ltsf2, __nesf2] + extern "C" fn __lesf2(a: f32, b: f32) -> i32 { + le_abi(a, b) + } + + #[slower_than_default] + #[bootrom_v2] + #[alias = __eqdf2, __ltdf2, __nedf2] + extern "C" fn __ledf2(a: f64, b: f64) -> i32 { + le_abi(a, b) + } + + #[slower_than_default] + #[bootrom_v2] + #[alias = __gtsf2] + extern "C" fn __gesf2(a: f32, b: f32) -> i32 { + ge_abi(a, b) + } + + #[slower_than_default] + #[bootrom_v2] + #[alias = __gtdf2] + extern "C" fn __gedf2(a: f64, b: f64) -> i32 { + ge_abi(a, b) + } + + + #[slower_than_default] + #[bootrom_v2] + extern "aapcs" fn __aeabi_fcmple(a: f32, b: f32) -> i32 { + (le_abi(a, b) <= 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "aapcs" fn __aeabi_fcmpge(a: f32, b: f32) -> i32 { + (ge_abi(a, b) >= 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "aapcs" fn __aeabi_fcmpeq(a: f32, b: f32) -> i32 { + (le_abi(a, b) == 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "aapcs" fn __aeabi_fcmplt(a: f32, b: f32) -> i32 { + (le_abi(a, b) < 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "aapcs" fn __aeabi_fcmpgt(a: f32, b: f32) -> i32 { + (ge_abi(a, b) > 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "aapcs" fn __aeabi_dcmple(a: f64, b: f64) -> i32 { + (le_abi(a, b) <= 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "aapcs" fn __aeabi_dcmpge(a: f64, b: f64) -> i32 { + (ge_abi(a, b) >= 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "aapcs" fn __aeabi_dcmpeq(a: f64, b: f64) -> i32 { + (le_abi(a, b) == 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "aapcs" fn __aeabi_dcmplt(a: f64, b: f64) -> i32 { + (le_abi(a, b) < 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "aapcs" fn __aeabi_dcmpgt(a: f64, b: f64) -> i32 { + (ge_abi(a, b) > 0) as i32 + } + + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn __gesf2vfp(a: f32, b: f32) -> i32 { + (ge_abi(a, b) >= 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn __gedf2vfp(a: f64, b: f64) -> i32 { + (ge_abi(a, b) >= 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn __gtsf2vfp(a: f32, b: f32) -> i32 { + (ge_abi(a, b) > 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn __gtdf2vfp(a: f64, b: f64) -> i32 { + (ge_abi(a, b) > 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn __ltsf2vfp(a: f32, b: f32) -> i32 { + (le_abi(a, b) < 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn __ltdf2vfp(a: f64, b: f64) -> i32 { + (le_abi(a, b) < 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn __lesf2vfp(a: f32, b: f32) -> i32 { + (le_abi(a, b) <= 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn __ledf2vfp(a: f64, b: f64) -> i32 { + (le_abi(a, b) <= 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn __nesf2vfp(a: f32, b: f32) -> i32 { + (le_abi(a, b) != 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn __nedf2vfp(a: f64, b: f64) -> i32 { + (le_abi(a, b) != 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn __eqsf2vfp(a: f32, b: f32) -> i32 { + (le_abi(a, b) == 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn __eqdf2vfp(a: f64, b: f64) -> i32 { + (le_abi(a, b) == 0) as i32 + } +} diff --git a/rp2040-hal/src/float/conv.rs b/rp2040-hal/src/float/conv.rs new file mode 100644 index 0000000..40678fc --- /dev/null +++ b/rp2040-hal/src/float/conv.rs @@ -0,0 +1,154 @@ +use super::Float; +use crate::rom_data; + +// Some of these are also not connected in the Pico SDK. This is probably +// because the ROM version actually does a fixed point conversion, just with +// the fractional width set to zero. + +intrinsics! { + // Not connected in the Pico SDK + #[slower_than_default] + #[aeabi = __aeabi_i2f] + extern "C" fn __floatsisf(i: i32) -> f32 { + rom_data::float_funcs::int_to_float(i) + } + + // Not connected in the Pico SDK + #[slower_than_default] + #[aeabi = __aeabi_i2d] + extern "C" fn __floatsidf(i: i32) -> f64 { + rom_data::double_funcs::int_to_double(i) + } + + // Questionable gain + #[aeabi = __aeabi_l2f] + extern "C" fn __floatdisf(i: i64) -> f32 { + rom_data::float_funcs::int64_to_float(i) + } + + #[bootrom_v2] + #[aeabi = __aeabi_l2d] + extern "C" fn __floatdidf(i: i64) -> f64 { + rom_data::double_funcs::int64_to_double(i) + } + + // Not connected in the Pico SDK + #[slower_than_default] + #[aeabi = __aeabi_ui2f] + extern "C" fn __floatunsisf(i: u32) -> f32 { + rom_data::float_funcs::uint_to_float(i) + } + + // Questionable gain + #[bootrom_v2] + #[aeabi = __aeabi_ui2d] + extern "C" fn __floatunsidf(i: u32) -> f64 { + rom_data::double_funcs::uint_to_double(i) + } + + // Questionable gain + #[bootrom_v2] + #[aeabi = __aeabi_ul2f] + extern "C" fn __floatundisf(i: u64) -> f32 { + rom_data::float_funcs::uint64_to_float(i) + } + + #[bootrom_v2] + #[aeabi = __aeabi_ul2d] + extern "C" fn __floatundidf(i: u64) -> f64 { + rom_data::double_funcs::uint64_to_double(i) + } + + + // The Pico SDK does some optimization here (e.x. fast paths for zero and + // one), but we can just directly connect it. + #[aeabi = __aeabi_f2iz] + extern "C" fn __fixsfsi(f: f32) -> i32 { + rom_data::float_funcs::float_to_int(f) + } + + #[bootrom_v2] + #[aeabi = __aeabi_f2lz] + extern "C" fn __fixsfdi(f: f32) -> i64 { + rom_data::float_funcs::float_to_int64(f) + } + + // Not connected in the Pico SDK + #[slower_than_default] + #[bootrom_v2] + #[aeabi = __aeabi_d2iz] + extern "C" fn __fixdfsi(f: f64) -> i32 { + rom_data::double_funcs::double_to_int(f) + } + + // Like with the 32 bit version, there's optimization that we just + // skip. + #[bootrom_v2] + #[aeabi = __aeabi_d2lz] + extern "C" fn __fixdfdi(f: f64) -> i64 { + rom_data::double_funcs::double_to_int64(f) + } + + #[slower_than_default] + #[aeabi = __aeabi_f2uiz] + extern "C" fn __fixunssfsi(f: f32) -> u32 { + rom_data::float_funcs::float_to_uint(f) + } + + #[slower_than_default] + #[bootrom_v2] + #[aeabi = __aeabi_f2ulz] + extern "C" fn __fixunssfdi(f: f32) -> u64 { + rom_data::float_funcs::float_to_uint64(f) + } + + #[slower_than_default] + #[bootrom_v2] + #[aeabi = __aeabi_d2uiz] + extern "C" fn __fixunsdfsi(f: f64) -> u32 { + rom_data::double_funcs::double_to_uint(f) + } + + #[slower_than_default] + #[bootrom_v2] + #[aeabi = __aeabi_d2ulz] + extern "C" fn __fixunsdfdi(f: f64) -> u64 { + rom_data::double_funcs::double_to_uint64(f) + } + + #[bootrom_v2] + #[alias = __extendsfdf2vfp] + #[aeabi = __aeabi_f2d] + extern "C" fn __extendsfdf2(f: f32) -> f64 { + if f.is_not_finite() { + return f64::from_repr( + // Not finite + f64::EXPONENT_MASK | + // Preserve NaN or inf + ((f.repr() & f32::SIGNIFICAND_MASK) as u64) | + // Preserve sign + ((f.repr() & f32::SIGN_MASK) as u64) << (f64::BITS-f32::BITS) + ); + } + rom_data::float_funcs::float_to_double(f) + } + + #[bootrom_v2] + #[alias = __truncdfsf2vfp] + #[aeabi = __aeabi_d2f] + extern "C" fn __truncdfsf2(f: f64) -> f32 { + if f.is_not_finite() { + let mut repr: u32 = + // Not finite + f32::EXPONENT_MASK | + // Preserve sign + ((f.repr() & f64::SIGN_MASK) >> (f64::BITS-f32::BITS)) as u32; + // Set NaN + if (f.repr() & f64::SIGNIFICAND_MASK) != 0 { + repr |= 1; + } + return f32::from_repr(repr); + } + rom_data::double_funcs::double_to_float(f) + } +} diff --git a/rp2040-hal/src/float/div.rs b/rp2040-hal/src/float/div.rs new file mode 100644 index 0000000..d0d1e2f --- /dev/null +++ b/rp2040-hal/src/float/div.rs @@ -0,0 +1,71 @@ +use super::Float; +use crate::rom_data; +use crate::sio::save_divider; + +trait ROMDiv { + fn rom_div(self, b: Self) -> Self; +} + +impl ROMDiv for f32 { + fn rom_div(self, b: Self) -> Self { + // ROM implementation uses the hardware divider, so we have to save it + save_divider(|_sio| rom_data::float_funcs::fdiv(self, b)) + } +} + +impl ROMDiv for f64 { + fn rom_div(self, b: Self) -> Self { + // ROM implementation uses the hardware divider, so we have to save it + save_divider(|_sio| rom_data::double_funcs::ddiv(self, b)) + } +} + +fn div(a: F, b: F) -> F { + if a.is_not_finite() { + if b.is_not_finite() { + // inf/NaN / inf/NaN = NaN + return F::NAN; + } + + if b.is_zero() { + // inf/NaN / 0 = NaN + return F::NAN; + } + + return if b.is_sign_negative() { + // [+/-]inf/NaN / (-X) = [-/+]inf/NaN + a.negate() + } else { + // [-]inf/NaN / X = [-]inf/NaN + a + }; + } + + if b.is_nan() { + // X / NaN = NaN + return b; + } + + // ROM handles X / 0 = [-]inf and X / [-]inf = [-]0, so we only + // need to catch 0 / 0 + if b.is_zero() && a.is_zero() { + return F::NAN; + } + + a.rom_div(b) +} + +intrinsics! { + #[alias = __divsf3vfp] + #[aeabi = __aeabi_fdiv] + extern "C" fn __divsf3(a: f32, b: f32) -> f32 { + div(a, b) + } + + #[bootrom_v2] + #[alias = __divdf3vfp] + #[aeabi = __aeabi_ddiv] + extern "C" fn __divdf3(a: f64, b: f64) -> f64 { + div(a, b) + } +} diff --git a/rp2040-hal/src/float/functions.rs b/rp2040-hal/src/float/functions.rs new file mode 100644 index 0000000..4ed8333 --- /dev/null +++ b/rp2040-hal/src/float/functions.rs @@ -0,0 +1,236 @@ +use crate::float::{Float, Int}; +use crate::rom_data; + +trait ROMFunctions { + fn sqrt(self) -> Self; + fn ln(self) -> Self; + fn exp(self) -> Self; + fn sin(self) -> Self; + fn cos(self) -> Self; + fn tan(self) -> Self; + fn atan2(self, y: Self) -> Self; + + fn to_trig_range(self) -> Self; +} + +impl ROMFunctions for f32 { + fn sqrt(self) -> Self { + rom_data::float_funcs::fsqrt(self) + } + + fn ln(self) -> Self { + rom_data::float_funcs::fln(self) + } + + fn exp(self) -> Self { + rom_data::float_funcs::fexp(self) + } + + fn sin(self) -> Self { + rom_data::float_funcs::fsin(self) + } + + fn cos(self) -> Self { + rom_data::float_funcs::fcos(self) + } + + fn tan(self) -> Self { + rom_data::float_funcs::ftan(self) + } + + fn atan2(self, y: Self) -> Self { + rom_data::float_funcs::fatan2(self, y) + } + + fn to_trig_range(self) -> Self { + // -128 < X < 128, logic from the Pico SDK + let exponent = (self.repr() & Self::EXPONENT_MASK) >> Self::SIGNIFICAND_BITS; + if exponent < 134 { + self + } else { + self % (core::f32::consts::PI * 2.0) + } + } +} + +impl ROMFunctions for f64 { + fn sqrt(self) -> Self { + rom_data::double_funcs::dsqrt(self) + } + + fn ln(self) -> Self { + rom_data::double_funcs::dln(self) + } + + fn exp(self) -> Self { + rom_data::double_funcs::dexp(self) + } + + fn sin(self) -> Self { + rom_data::double_funcs::dsin(self) + } + + fn cos(self) -> Self { + rom_data::double_funcs::dcos(self) + } + fn tan(self) -> Self { + rom_data::double_funcs::dtan(self) + } + + fn atan2(self, y: Self) -> Self { + rom_data::double_funcs::datan2(self, y) + } + + fn to_trig_range(self) -> Self { + // -1024 < X < 1024, logic from the Pico SDK + let exponent = (self.repr() & Self::EXPONENT_MASK) >> Self::SIGNIFICAND_BITS; + if exponent < 1033 { + self + } else { + self % (core::f64::consts::PI * 2.0) + } + } +} + +fn is_negative_nonzero_or_nan(f: F) -> bool { + let repr = f.repr(); + if (repr & F::SIGN_MASK) != F::Int::ZERO { + // Negative, so anything other than exactly zero + return (repr & (!F::SIGN_MASK)) != F::Int::ZERO; + } + // NaN + (repr & (F::EXPONENT_MASK | F::SIGNIFICAND_MASK)) > F::EXPONENT_MASK +} + +fn sqrt(f: F) -> F { + if is_negative_nonzero_or_nan(f) { + F::NAN + } else { + f.sqrt() + } +} + +fn ln(f: F) -> F { + if is_negative_nonzero_or_nan(f) { + F::NAN + } else { + f.ln() + } +} + +fn exp(f: F) -> F { + if f.is_nan() { + F::NAN + } else { + f.exp() + } +} + +fn sin(f: F) -> F { + if f.is_not_finite() { + F::NAN + } else { + f.to_trig_range().sin() + } +} + +fn cos(f: F) -> F { + if f.is_not_finite() { + F::NAN + } else { + f.to_trig_range().cos() + } +} + +fn tan(f: F) -> F { + if f.is_not_finite() { + F::NAN + } else { + f.to_trig_range().tan() + } +} + +fn atan2(x: F, y: F) -> F { + if x.is_nan() || y.is_nan() { + F::NAN + } else { + x.to_trig_range().atan2(y) + } +} + +// Name collisions +mod intrinsics { + intrinsics! { + extern "C" fn sqrtf(f: f32) -> f32 { + super::sqrt(f) + } + + #[bootrom_v2] + extern "C" fn sqrt(f: f64) -> f64 { + super::sqrt(f) + } + + extern "C" fn logf(f: f32) -> f32 { + super::ln(f) + } + + #[bootrom_v2] + extern "C" fn log(f: f64) -> f64 { + super::ln(f) + } + + extern "C" fn expf(f: f32) -> f32 { + super::exp(f) + } + + #[bootrom_v2] + extern "C" fn exp(f: f64) -> f64 { + super::exp(f) + } + + #[slower_than_default] + extern "C" fn sinf(f: f32) -> f32 { + super::sin(f) + } + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn sin(f: f64) -> f64 { + super::sin(f) + } + + #[slower_than_default] + extern "C" fn cosf(f: f32) -> f32 { + super::cos(f) + } + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn cos(f: f64) -> f64 { + super::cos(f) + } + + #[slower_than_default] + extern "C" fn tanf(f: f32) -> f32 { + super::tan(f) + } + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn tan(f: f64) -> f64 { + super::tan(f) + } + + // Questionable gain + #[bootrom_v2] + extern "C" fn atan2f(a: f32, b: f32) -> f32 { + super::atan2(a, b) + } + + // Questionable gain + #[bootrom_v2] + extern "C" fn atan2(a: f64, b: f64) -> f64 { + super::atan2(a, b) + } + } +} diff --git a/rp2040-hal/src/float/mod.rs b/rp2040-hal/src/float/mod.rs new file mode 100644 index 0000000..2fe07a0 --- /dev/null +++ b/rp2040-hal/src/float/mod.rs @@ -0,0 +1,146 @@ +use core::ops; + +// Borrowed and simplified from compiler-builtins so we can use bit ops +// on floating point without macro soup. +pub trait Int: + Copy + + core::fmt::Debug + + PartialEq + + PartialOrd + + ops::AddAssign + + ops::SubAssign + + ops::BitAndAssign + + ops::BitOrAssign + + ops::BitXorAssign + + ops::ShlAssign + + ops::ShrAssign + + ops::Add + + ops::Sub + + ops::Div + + ops::Shl + + ops::Shr + + ops::BitOr + + ops::BitXor + + ops::BitAnd + + ops::Not +{ + const ZERO: Self; +} + +macro_rules! int_impl { + ($ty:ty) => { + impl Int for $ty { + const ZERO: Self = 0; + } + }; +} + +int_impl!(u32); +int_impl!(u64); + +pub trait Float: + Copy + + core::fmt::Debug + + PartialEq + + PartialOrd + + ops::AddAssign + + ops::MulAssign + + ops::Add + + ops::Sub + + ops::Div + + ops::Rem +{ + /// A uint of the same with as the float + type Int: Int; + + /// NaN representation for the float + const NAN: Self; + + /// The bitwidth of the float type + const BITS: u32; + + /// The bitwidth of the significand + const SIGNIFICAND_BITS: u32; + + /// A mask for the sign bit + const SIGN_MASK: Self::Int; + + /// A mask for the significand + const SIGNIFICAND_MASK: Self::Int; + + /// A mask for the exponent + const EXPONENT_MASK: Self::Int; + + /// Returns `self` transmuted to `Self::Int` + fn repr(self) -> Self::Int; + + /// Returns a `Self::Int` transmuted back to `Self` + fn from_repr(a: Self::Int) -> Self; + + /// Return a sign swapped `self` + fn negate(self) -> Self; + + /// Returns true if `self` is either NaN or infinity + fn is_not_finite(self) -> bool { + (self.repr() & Self::EXPONENT_MASK) == Self::EXPONENT_MASK + } + + /// Returns true if `self` is infinity + fn is_infinity(self) -> bool { + (self.repr() & (Self::EXPONENT_MASK | Self::SIGNIFICAND_MASK)) == Self::EXPONENT_MASK + } + + /// Returns true if `self is NaN + fn is_nan(self) -> bool { + (self.repr() & (Self::EXPONENT_MASK | Self::SIGNIFICAND_MASK)) > Self::EXPONENT_MASK + } + + /// Returns true if `self` is negative + fn is_sign_negative(self) -> bool { + (self.repr() & Self::SIGN_MASK) != Self::Int::ZERO + } + + /// Returns true if `self` is zero (either sign) + fn is_zero(self) -> bool { + (self.repr() & (Self::SIGNIFICAND_MASK | Self::EXPONENT_MASK)) == Self::Int::ZERO + } +} + +macro_rules! float_impl { + ($ty:ident, $ity:ident, $bits:expr, $significand_bits:expr) => { + impl Float for $ty { + type Int = $ity; + + const NAN: Self = <$ty>::NAN; + + const BITS: u32 = $bits; + const SIGNIFICAND_BITS: u32 = $significand_bits; + + const SIGN_MASK: Self::Int = 1 << (Self::BITS - 1); + const SIGNIFICAND_MASK: Self::Int = (1 << Self::SIGNIFICAND_BITS) - 1; + const EXPONENT_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIGNIFICAND_MASK); + + fn repr(self) -> Self::Int { + self.to_bits() + } + + fn from_repr(a: Self::Int) -> Self { + Self::from_bits(a) + } + + fn negate(self) -> Self { + -self + } + } + }; +} + +float_impl!(f32, u32, 32, 23); +float_impl!(f64, u64, 64, 52); + +mod add_sub; +mod cmp; +mod conv; +mod div; +mod functions; +mod mul; diff --git a/rp2040-hal/src/float/mul.rs b/rp2040-hal/src/float/mul.rs new file mode 100644 index 0000000..aab1a87 --- /dev/null +++ b/rp2040-hal/src/float/mul.rs @@ -0,0 +1,67 @@ +use super::Float; +use crate::rom_data; + +trait ROMMul { + fn rom_mul(self, b: Self) -> Self; +} + +impl ROMMul for f32 { + fn rom_mul(self, b: Self) -> Self { + rom_data::float_funcs::fmul(self, b) + } +} + +impl ROMMul for f64 { + fn rom_mul(self, b: Self) -> Self { + rom_data::double_funcs::dmul(self, b) + } +} + +fn mul(a: F, b: F) -> F { + if a.is_not_finite() { + if b.is_zero() { + // [-]inf/NaN * 0 = NaN + return F::NAN; + } + + return if b.is_sign_negative() { + // [+/-]inf/NaN * (-X) = [-/+]inf/NaN + a.negate() + } else { + // [-]inf/NaN * X = [-]inf/NaN + a + }; + } + + if b.is_not_finite() { + if a.is_zero() { + // 0 * [-]inf/NaN = NaN + return F::NAN; + } + + return if b.is_sign_negative() { + // (-X) * [+/-]inf/NaN = [-/+]inf/NaN + b.negate() + } else { + // X * [-]inf/NaN = [-]inf/NaN + b + }; + } + + a.rom_mul(b) +} + +intrinsics! { + #[alias = __mulsf3vfp] + #[aeabi = __aeabi_fmul] + extern "C" fn __mulsf3(a: f32, b: f32) -> f32 { + mul(a, b) + } + + #[bootrom_v2] + #[alias = __muldf3vfp] + #[aeabi = __aeabi_dmul] + extern "C" fn __muldf3(a: f64, b: f64) -> f64 { + mul(a, b) + } +} diff --git a/rp2040-hal/src/intrinsics.rs b/rp2040-hal/src/intrinsics.rs new file mode 100644 index 0000000..dbb67a9 --- /dev/null +++ b/rp2040-hal/src/intrinsics.rs @@ -0,0 +1,271 @@ +/// Generate a series of aliases for an intrinsic function. +macro_rules! intrinsics_aliases { + ( + extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty, + ) => {}; + ( + unsafe extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty, + ) => {}; + + ( + extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty, + $alias:ident + $($rest:ident)* + ) => { + #[cfg(all(target_arch = "arm", not(feature = "disable-intrinsics")))] + intrinsics! { + extern $abi fn $alias( $($argname: $ty),* ) -> $ret { + $name($($argname),*) + } + } + + intrinsics_aliases! { + extern $abi fn $name( $($argname: $ty),* ) -> $ret, + $($rest)* + } + }; + + ( + unsafe extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty, + $alias:ident + $($rest:ident)* + ) => { + #[cfg(all(target_arch = "arm", not(feature = "disable-intrinsics")))] + intrinsics! { + unsafe extern $abi fn $alias( $($argname: $ty),* ) -> $ret { + $name($($argname),*) + } + } + + intrinsics_aliases! { + unsafe extern $abi fn $name( $($argname: $ty),* ) -> $ret, + $($rest)* + } + }; +} + +/// The macro used to define overridden intrinsics. +/// +/// This is heavily inspired by the macro used by compiler-builtins. The idea +/// is to abstract anything special that needs to be done to override an +/// intrinsic function. Intrinsic generation is disabled for non-ARM targets +/// so things like CI and docs generation do not have problems. Additionally +/// they can be disabled with the crate feature `disable-intrinsics` for +/// testing or comparing performance. +/// +/// Like the compiler-builtins macro, it accepts a series of functions that +/// looks like normal Rust code: +/// +/// intrinsics! { +/// extern "C" fn foo(a: i32) -> u32 { +/// // ... +/// } +/// +/// #[nonstandard_attribute] +/// extern "C" fn bar(a: i32) -> u32 { +/// // ... +/// } +/// } +/// +/// Each function can also be decorated with nonstandard attributes to control +/// additional behaviour: +/// +/// * `slower_than_default` - indicates that the override is slower than the +/// default implementation. Currently this just disables the override +/// entirely. +/// * `bootrom_v2` - indicates that the override is only available +/// on a V2 bootrom or higher. Only enabled when the feature +/// `rom-v2-intrinsics` is set. +/// * `alias` - accepts a list of names to alias the intrinsic to. +/// * `aeabi` - accepts a list of ARM EABI names to alias to. +/// +macro_rules! intrinsics { + () => {}; + + ( + #[slower_than_default] + $(#[$($attr:tt)*])* + extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty { + $($body:tt)* + } + + $($rest:tt)* + ) => { + // Not exported, but defined so the actual implementation is + // considered used + #[allow(dead_code)] + fn $name( $($argname: $ty),* ) -> $ret { + $($body)* + } + + intrinsics!($($rest)*); + }; + + ( + #[bootrom_v2] + $(#[$($attr:tt)*])* + extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty { + $($body:tt)* + } + + $($rest:tt)* + ) => { + // Not exported, but defined so the actual implementation is + // considered used + #[cfg(not(feature = "rom-v2-intrinsics"))] + #[allow(dead_code)] + fn $name( $($argname: $ty),* ) -> $ret { + $($body)* + } + + #[cfg(feature = "rom-v2-intrinsics")] + intrinsics! { + $(#[$($attr)*])* + extern $abi fn $name( $($argname: $ty),* ) -> $ret { + $($body)* + } + } + + intrinsics!($($rest)*); + }; + + ( + #[alias = $($alias:ident),*] + $(#[$($attr:tt)*])* + extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty { + $($body:tt)* + } + + $($rest:tt)* + ) => { + intrinsics! { + $(#[$($attr)*])* + extern $abi fn $name( $($argname: $ty),* ) -> $ret { + $($body)* + } + } + + intrinsics_aliases! { + extern $abi fn $name( $($argname: $ty),* ) -> $ret, + $($alias) * + } + + intrinsics!($($rest)*); + }; + + ( + #[alias = $($alias:ident),*] + $(#[$($attr:tt)*])* + unsafe extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty { + $($body:tt)* + } + + $($rest:tt)* + ) => { + intrinsics! { + $(#[$($attr)*])* + unsafe extern $abi fn $name( $($argname: $ty),* ) -> $ret { + $($body)* + } + } + + intrinsics_aliases! { + unsafe extern $abi fn $name( $($argname: $ty),* ) -> $ret, + $($alias) * + } + + intrinsics!($($rest)*); + }; + + ( + #[aeabi = $($alias:ident),*] + $(#[$($attr:tt)*])* + extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty { + $($body:tt)* + } + + $($rest:tt)* + ) => { + intrinsics! { + $(#[$($attr)*])* + extern $abi fn $name( $($argname: $ty),* ) -> $ret { + $($body)* + } + } + + intrinsics_aliases! { + extern "aapcs" fn $name( $($argname: $ty),* ) -> $ret, + $($alias) * + } + + intrinsics!($($rest)*); + }; + + ( + $(#[$($attr:tt)*])* + extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty { + $($body:tt)* + } + + $($rest:tt)* + ) => { + #[cfg(all(target_arch = "arm", not(feature = "disable-intrinsics")))] + $(#[$($attr)*])* + extern $abi fn $name( $($argname: $ty),* ) -> $ret { + $($body)* + } + + #[cfg(all(target_arch = "arm", not(feature = "disable-intrinsics")))] + mod $name { + #[no_mangle] + $(#[$($attr)*])* + pub extern $abi fn $name( $($argname: $ty),* ) -> $ret { + super::$name($($argname),*) + } + } + + // Not exported, but defined so the actual implementation is + // considered used + #[cfg(not(all(target_arch = "arm", not(feature = "disable-intrinsics"))))] + #[allow(dead_code)] + fn $name( $($argname: $ty),* ) -> $ret { + $($body)* + } + + intrinsics!($($rest)*); + }; + + ( + $(#[$($attr:tt)*])* + unsafe extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty { + $($body:tt)* + } + + $($rest:tt)* + ) => { + #[cfg(all(target_arch = "arm", not(feature = "disable-intrinsics")))] + $(#[$($attr)*])* + unsafe extern $abi fn $name( $($argname: $ty),* ) -> $ret { + $($body)* + } + + #[cfg(all(target_arch = "arm", not(feature = "disable-intrinsics")))] + mod $name { + #[no_mangle] + $(#[$($attr)*])* + pub unsafe extern $abi fn $name( $($argname: $ty),* ) -> $ret { + super::$name($($argname),*) + } + } + + // Not exported, but defined so the actual implementation is + // considered used + #[cfg(not(all(target_arch = "arm", not(feature = "disable-intrinsics"))))] + #[allow(dead_code)] + unsafe fn $name( $($argname: $ty),* ) -> $ret { + $($body)* + } + + intrinsics!($($rest)*); + }; +} diff --git a/rp2040-hal/src/lib.rs b/rp2040-hal/src/lib.rs index 6f70dc3..8d2db4e 100644 --- a/rp2040-hal/src/lib.rs +++ b/rp2040-hal/src/lib.rs @@ -15,11 +15,15 @@ pub use paste; pub extern crate rp2040_pac as pac; +#[macro_use] +mod intrinsics; + pub mod adc; pub(crate) mod atomic_register_access; pub mod clocks; mod critical_section_impl; pub mod dma; +mod float; pub mod gpio; pub mod i2c; pub mod multicore; diff --git a/rp2040-hal/src/rom_data.rs b/rp2040-hal/src/rom_data.rs index d53d78f..4e28aec 100644 --- a/rp2040-hal/src/rom_data.rs +++ b/rp2040-hal/src/rom_data.rs @@ -24,6 +24,9 @@ const FUNC_TABLE: *const u16 = 0x0000_0014 as _; /// Pointer to the public data lookup table. const DATA_TABLE: *const u16 = 0x0000_0016 as _; +/// Address of the version number of the ROM. +const VERSION_NUMBER: *const u8 = 0x0000_0013 as _; + /// Retrive rom content from a table using a code. fn rom_table_lookup(table: *const u16, tag: RomFnTableCode) -> T { unsafe { @@ -215,24 +218,26 @@ rom_functions! { b"UB" fn reset_to_usb_boot(gpio_activity_pin_mask: u32, disable_interface_mask: u32) -> (); /// Sets n bytes start at ptr to the value c and returns ptr - b"MS" unsafe fn memset(ptr: *mut u8, c: u8, n: u8) -> *mut u8; + b"MS" unsafe fn memset(ptr: *mut u8, c: u8, n: u32) -> *mut u8; /// Sets n bytes start at ptr to the value c and returns ptr. /// /// Note this is a slightly more efficient variant of _memset that may only /// be used if ptr is word aligned. - b"M4" unsafe fn memset4(ptr: *mut u32, c: u8, n: u32) -> *mut u32; + // Note the datasheet does not match the actual ROM for the code here, see + // https://github.com/raspberrypi/pico-feedback/issues/217 + b"S4" unsafe fn memset4(ptr: *mut u32, c: u8, n: u32) -> *mut u32; /// Copies n bytes starting at src to dest and returns dest. The results are undefined if the /// regions overlap. - b"MC" unsafe fn memcpy(dest: *mut u8, src: *mut u8, n: u32) -> u8; + b"MC" unsafe fn memcpy(dest: *mut u8, src: *const u8, n: u32) -> *mut u8; /// Copies n bytes starting at src to dest and returns dest. The results are undefined if the /// regions overlap. /// /// Note this is a slightly more efficient variant of _memcpy that may only be /// used if dest and src are word aligned. - b"C4" unsafe fn memcpy44(dest: *mut u32, src: *mut u32, n: u32) -> *mut u8; + b"C4" unsafe fn memcpy44(dest: *mut u32, src: *const u32, n: u32) -> *mut u8; /// Restore all QSPI pad controls to their default state, and connect the SSI to the QSPI pads. b"IF" unsafe fn connect_internal_flash() -> (); @@ -274,6 +279,60 @@ rom_functions! { b"WV" unsafe fn wait_for_vector() -> !; } +// Various C intrinsics in the ROM +intrinsics! { + #[alias = __popcountdi2] + extern "C" fn __popcountsi2(x: u32) -> u32 { + popcount32(x) + } + + #[alias = __clzdi2] + extern "C" fn __clzsi2(x: u32) -> u32 { + clz32(x) + } + + #[alias = __ctzdi2] + extern "C" fn __ctzsi2(x: u32) -> u32 { + ctz32(x) + } + + // __rbit is only unofficial, but it show up in the ARM documentation, + // so may as well hook it up. + #[alias = __rbitl] + extern "C" fn __rbit(x: u32) -> u32 { + reverse32(x) + } + + unsafe extern "aapcs" fn __aeabi_memset(dest: *mut u8, n: usize, c: i32) -> () { + // Different argument order + memset(dest, c as u8, n as u32); + } + + #[alias = __aeabi_memset8] + unsafe extern "aapcs" fn __aeabi_memset4(dest: *mut u8, n: usize, c: i32) -> () { + // Different argument order + memset4(dest as *mut u32, c as u8, n as u32); + } + + unsafe extern "aapcs" fn __aeabi_memclr(dest: *mut u8, n: usize) -> () { + memset(dest, 0, n as u32); + } + + #[alias = __aeabi_memclr8] + unsafe extern "aapcs" fn __aeabi_memclr4(dest: *mut u8, n: usize) -> () { + memset4(dest as *mut u32, 0, n as u32); + } + + unsafe extern "aapcs" fn __aeabi_memcpy(dest: *mut u8, src: *const u8, n: usize) -> () { + memcpy(dest, src, n as u32); + } + + #[alias = __aeabi_memcpy8] + unsafe extern "aapcs" fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, n: usize) -> () { + memcpy44(dest as *mut u32, src as *const u32, n as u32); + } +} + unsafe fn convert_str(s: *const u8) -> &'static str { let mut end = s; while *end != 0 { @@ -283,6 +342,11 @@ unsafe fn convert_str(s: *const u8) -> &'static str { core::str::from_utf8_unchecked(s) } +/// The version number of the rom. +pub fn rom_version_number() -> u8 { + unsafe { *VERSION_NUMBER } +} + /// The Raspberry Pi Trading Ltd copyright string. pub fn copyright_string() -> &'static str { let s: *const u8 = rom_table_lookup(DATA_TABLE, *b"CR"); @@ -316,6 +380,12 @@ pub fn fplib_end() -> *const u8 { /// This entry is only present in the V2 bootrom. See Table 182 in the RP2040 datasheet for the contents of this table. pub fn soft_double_table() -> *const usize { + if rom_version_number() < 2 { + panic!( + "Double precision operations require V2 bootrom (found: V{})", + rom_version_number() + ); + } rom_table_lookup(DATA_TABLE, *b"SD") } @@ -419,9 +489,44 @@ pub mod float_funcs { 0x4c fexp(v: f32) -> f32; /// Calculates the natural logarithm of `v`. If `v <= 0` return -Infinity 0x50 fln(v: f32) -> f32; + } - // These are only on BootROM v2 or higher + macro_rules! make_functions_v2 { + ( + $( + $(#[$outer:meta])* + $offset:literal $name:ident ( + $( $aname:ident : $aty:ty ),* + ) -> $ret:ty; + )* + ) => { + $( + declare_rom_function! { + $(#[$outer])* + fn $name( $( $aname : $aty ),* ) -> $ret { + if $crate::rom_data::rom_version_number() < 2 { + panic!( + "Floating point function requires V2 bootrom (found: V{})", + $crate::rom_data::rom_version_number() + ); + } + let table: *const usize = $crate::rom_data::soft_float_table(); + unsafe { + // This is the entry in the table. Our offset is given as a + // byte offset, but we want the table index (each pointer in + // the table is 4 bytes long) + let entry: *const usize = table.offset($offset / 4); + // Read the pointer from the table + core::ptr::read(entry) as *const u32 + } + } + } + )* + } + } + // These are only on BootROM v2 or higher + make_functions_v2! { /// Compares two floating point numbers, returning: /// • 0 if a == b /// • -1 if a < b diff --git a/rp2040-hal/src/sio.rs b/rp2040-hal/src/sio.rs index 90b18e3..a2f769f 100644 --- a/rp2040-hal/src/sio.rs +++ b/rp2040-hal/src/sio.rs @@ -171,7 +171,7 @@ impl SioFifo { } } -fn save_divider(f: F) -> R +pub(crate) fn save_divider(f: F) -> R where F: FnOnce(&pac::sio::RegisterBlock) -> R, { @@ -286,71 +286,17 @@ impl HwDivider { } } -macro_rules! divider_intrinsics { - () => (); - - ( - #[arm_aeabi_alias = $alias:ident] - pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty { - $($body:tt)* - } - - $($rest:tt)* - ) => ( - extern $abi fn $name( $($argname: $ty),* ) -> $ret { - $($body)* - } - - mod $name { - #[no_mangle] - pub extern $abi fn $name( $($argname: $ty),* ) -> $ret { - super::$name($($argname),*) - } - } - - mod $alias { - #[no_mangle] - pub extern $abi fn $alias( $($argname: $ty),* ) -> $ret { - super::$name($($argname),*) - } - } - - divider_intrinsics!($($rest)*); - ); - - ( - pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty { - $($body:tt)* - } - - $($rest:tt)* - ) => ( - extern $abi fn $name( $($argname: $ty),* ) -> $ret { - $($body)* - } - - mod $name { - #[no_mangle] - pub extern $abi fn $name( $($argname: $ty),* ) -> $ret { - super::$name($($argname),*) - } - } - - divider_intrinsics!($($rest)*); - ); -} - -divider_intrinsics! { - #[arm_aeabi_alias = __aeabi_uidiv] - pub extern "C" fn __udivsi3(n: u32, d: u32) -> u32 { +intrinsics! { + #[aeabi = __aeabi_uidiv] + extern "C" fn __udivsi3(n: u32, d: u32) -> u32 { divider_unsigned(n, d).quotient } - pub extern "C" fn __umodsi3(n: u32, d: u32) -> u32 { + extern "C" fn __umodsi3(n: u32, d: u32) -> u32 { divider_unsigned(n, d).remainder } - pub extern "C" fn __udivmodsi4(n: u32, d: u32, rem: Option<&mut u32>) -> u32 { + extern "C" fn __udivmodsi4(n: u32, d: u32, rem: Option<&mut u32>) -> u32 { let quo_rem = divider_unsigned(n, d); if let Some(rem) = rem { *rem = quo_rem.remainder; @@ -358,16 +304,16 @@ divider_intrinsics! { quo_rem.quotient } - #[arm_aeabi_alias = __aeabi_idiv] - pub extern "C" fn __divsi3(n: i32, d: i32) -> i32 { + #[aeabi = __aeabi_idiv] + extern "C" fn __divsi3(n: i32, d: i32) -> i32 { divider_signed(n, d).quotient } - pub extern "C" fn __modsi3(n: i32, d: i32) -> i32 { + extern "C" fn __modsi3(n: i32, d: i32) -> i32 { divider_signed(n, d).remainder } - pub extern "C" fn __divmodsi4(n: i32, d: i32, rem: &mut i32) -> i32 { + extern "C" fn __divmodsi4(n: i32, d: i32, rem: &mut i32) -> i32 { let quo_rem = divider_signed(n, d); *rem = quo_rem.remainder; quo_rem.quotient