From 0355ed3168be3235e51da122930dde107781cad3 Mon Sep 17 00:00:00 2001 From: Robbert van der Helm Date: Tue, 10 May 2022 23:12:52 +0200 Subject: [PATCH] Add flush-to-zero support for AArch64 --- src/wrapper/util.rs | 44 +++++++++++++++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 11 deletions(-) diff --git a/src/wrapper/util.rs b/src/wrapper/util.rs index 68018d79..998c6d83 100644 --- a/src/wrapper/util.rs +++ b/src/wrapper/util.rs @@ -7,6 +7,13 @@ use std::sync::atomic::{AtomicBool, Ordering}; /// The environment variable for controlling the logging behavior. const NIH_LOG_ENV: &str = "NIH_LOG"; +/// The bit that controls flush-to-zero behavior for denormals in 32 and 64-bit floating point +/// numbers on AArch64. +/// +/// +#[cfg(target_arch = "aarch64")] +const AARCH64_FTZ_BIT: u64 = 1 << 24; + #[cfg(all(debug_assertions, feature = "assert_process_allocs"))] #[global_allocator] static A: assert_no_alloc::AllocDisabler = assert_no_alloc::AllocDisabler; @@ -148,22 +155,33 @@ struct ScopedFtz { impl ScopedFtz { fn enable() -> Self { - // TODO: Implement this for AArch64/neon cfg_if::cfg_if! { if #[cfg(target_feature = "sse")] { let mode = unsafe { std::arch::x86_64::_MM_GET_FLUSH_ZERO_MODE() }; - if mode != std::arch::x86_64::_MM_FLUSH_ZERO_ON { + let should_disable_again = mode != std::arch::x86_64::_MM_FLUSH_ZERO_ON; + if should_disable_again { unsafe { std::arch::x86_64::_MM_SET_FLUSH_ZERO_MODE(std::arch::x86_64::_MM_FLUSH_ZERO_ON) }; + } - Self { - should_disable_again: true, - _send_sync_marker: PhantomData, - } - } else { - Self { - should_disable_again: false, - _send_sync_marker: PhantomData, - } + Self { + should_disable_again, + _send_sync_marker: PhantomData, + } + } else if #[cfg(target_arch = "aarch64")] { + // There are no convient intrinsics to change the FTZ settings on AArch64, so this + // requires inline assembly: + // https://developer.arm.com/documentation/ddi0595/2021-06/AArch64-Registers/FPCR--Floating-point-Control-Register + let mut fpcr: u64; + unsafe { std::arch::asm!("mrs {}, fpcr", out(reg) fpcr) }; + + let should_disable_again = fpcr & AARCH64_FTZ_BIT == 0; + if should_disable_again { + unsafe { std::arch::asm!("msr fpcr, {}", in(reg) fpcr | AARCH64_FTZ_BIT) }; + } + + Self { + should_disable_again, + _send_sync_marker: PhantomData, } } else { Self { @@ -181,6 +199,10 @@ impl Drop for ScopedFtz { cfg_if::cfg_if! { if #[cfg(target_feature = "sse")] { unsafe { std::arch::x86_64::_MM_SET_FLUSH_ZERO_MODE(std::arch::x86_64::_MM_FLUSH_ZERO_OFF) }; + } else if #[cfg(target_arch = "aarch64")] { + let mut fpcr: u64; + unsafe { std::arch::asm!("mrs {}, fpcr", out(reg) fpcr) }; + unsafe { std::arch::asm!("msr fpcr, {}", in(reg) fpcr & !AARCH64_FTZ_BIT) }; } }; }