diff --git a/compiler/rustc_target/src/spec/targets/aarch64_apple_ios.rs b/compiler/rustc_target/src/spec/targets/aarch64_apple_ios.rs index 769a7b6c3919e..476e348ccdbee 100644 --- a/compiler/rustc_target/src/spec/targets/aarch64_apple_ios.rs +++ b/compiler/rustc_target/src/spec/targets/aarch64_apple_ios.rs @@ -16,7 +16,7 @@ pub(crate) fn target() -> Target { .into(), arch, options: TargetOptions { - features: "+neon,+fp-armv8,+apple-a7".into(), + features: "+neon,+fp-armv8,+apple-a7,+outline-atomics".into(), max_atomic_width: Some(128), supported_sanitizers: SanitizerSet::ADDRESS | SanitizerSet::THREAD, ..opts diff --git a/compiler/rustc_target/src/spec/targets/aarch64_apple_tvos.rs b/compiler/rustc_target/src/spec/targets/aarch64_apple_tvos.rs index ec92a40e255fb..bd20e3b1a9a67 100644 --- a/compiler/rustc_target/src/spec/targets/aarch64_apple_tvos.rs +++ b/compiler/rustc_target/src/spec/targets/aarch64_apple_tvos.rs @@ -16,7 +16,7 @@ pub(crate) fn target() -> Target { .into(), arch, options: TargetOptions { - features: "+neon,+fp-armv8,+apple-a7".into(), + features: "+neon,+fp-armv8,+apple-a7,+outline-atomics".into(), max_atomic_width: Some(128), ..opts }, diff --git a/compiler/rustc_target/src/spec/targets/aarch64_apple_visionos.rs b/compiler/rustc_target/src/spec/targets/aarch64_apple_visionos.rs index dc595fbe7b6c1..031421b5e3c98 100644 --- a/compiler/rustc_target/src/spec/targets/aarch64_apple_visionos.rs +++ b/compiler/rustc_target/src/spec/targets/aarch64_apple_visionos.rs @@ -16,7 +16,7 @@ pub(crate) fn target() -> Target { .into(), arch, options: TargetOptions { - features: "+neon,+fp-armv8,+apple-a16".into(), + features: "+neon,+fp-armv8,+apple-a16,+outline-atomics".into(), max_atomic_width: Some(128), supported_sanitizers: SanitizerSet::ADDRESS | SanitizerSet::THREAD, ..opts diff --git a/compiler/rustc_target/src/spec/targets/aarch64_apple_watchos.rs b/compiler/rustc_target/src/spec/targets/aarch64_apple_watchos.rs index 2359627110729..2dcca4b7be8b4 100644 --- a/compiler/rustc_target/src/spec/targets/aarch64_apple_watchos.rs +++ b/compiler/rustc_target/src/spec/targets/aarch64_apple_watchos.rs @@ -16,7 +16,7 @@ pub(crate) fn target() -> Target { .into(), arch, options: TargetOptions { - features: "+v8a,+neon,+fp-armv8,+apple-a7".into(), + features: "+v8a,+neon,+fp-armv8,+apple-a7,+outline-atomics".into(), max_atomic_width: Some(128), dynamic_linking: false, position_independent_executables: true, diff --git a/compiler/rustc_target/src/spec/targets/aarch64_linux_android.rs b/compiler/rustc_target/src/spec/targets/aarch64_linux_android.rs index d8651f305fe94..1cfa43b53a716 100644 --- a/compiler/rustc_target/src/spec/targets/aarch64_linux_android.rs +++ b/compiler/rustc_target/src/spec/targets/aarch64_linux_android.rs @@ -21,7 +21,7 @@ pub(crate) fn target() -> Target { max_atomic_width: Some(128), // As documented in https://developer.android.com/ndk/guides/cpu-features.html // the neon (ASIMD) and FP must exist on all android aarch64 targets. - features: "+v8a,+neon,+fp-armv8".into(), + features: "+v8a,+neon,+fp-armv8,+outline-atomics".into(), // the AAPCS64 expects use of non-leaf frame pointers per // https://github.com/ARM-software/abi-aa/blob/4492d1570eb70c8fd146623e0db65b2d241f12e7/aapcs64/aapcs64.rst#the-frame-pointer // and we tend to encounter interesting bugs in AArch64 unwinding code if we do not diff --git a/compiler/rustc_target/src/spec/targets/aarch64_pc_windows_gnullvm.rs b/compiler/rustc_target/src/spec/targets/aarch64_pc_windows_gnullvm.rs index eee668cc67edb..468a3eaf6cc95 100644 --- a/compiler/rustc_target/src/spec/targets/aarch64_pc_windows_gnullvm.rs +++ b/compiler/rustc_target/src/spec/targets/aarch64_pc_windows_gnullvm.rs @@ -3,7 +3,7 @@ use crate::spec::{FramePointer, Target, TargetMetadata, base}; pub(crate) fn target() -> Target { let mut base = base::windows_gnullvm::opts(); base.max_atomic_width = Some(128); - base.features = "+v8a,+neon,+fp-armv8".into(); + base.features = "+v8a,+neon,+fp-armv8,+outline-atomics".into(); base.linker = Some("aarch64-w64-mingw32-clang".into()); // Microsoft recommends enabling frame pointers on Arm64 Windows. diff --git a/compiler/rustc_target/src/spec/targets/aarch64_pc_windows_msvc.rs b/compiler/rustc_target/src/spec/targets/aarch64_pc_windows_msvc.rs index f1b6fa123deb4..b330ce2cbcea0 100644 --- a/compiler/rustc_target/src/spec/targets/aarch64_pc_windows_msvc.rs +++ b/compiler/rustc_target/src/spec/targets/aarch64_pc_windows_msvc.rs @@ -3,7 +3,7 @@ use crate::spec::{FramePointer, Target, TargetMetadata, base}; pub(crate) fn target() -> Target { let mut base = base::windows_msvc::opts(); base.max_atomic_width = Some(128); - base.features = "+v8a,+neon,+fp-armv8".into(); + base.features = "+v8a,+neon,+fp-armv8,+outline-atomics".into(); // Microsoft recommends enabling frame pointers on Arm64 Windows. // From https://learn.microsoft.com/en-us/cpp/build/arm64-windows-abi-conventions?view=msvc-170#integer-registers diff --git a/compiler/rustc_target/src/spec/targets/aarch64_unknown_freebsd.rs b/compiler/rustc_target/src/spec/targets/aarch64_unknown_freebsd.rs index 7306a75aa2274..8c8d8ef1a560e 100644 --- a/compiler/rustc_target/src/spec/targets/aarch64_unknown_freebsd.rs +++ b/compiler/rustc_target/src/spec/targets/aarch64_unknown_freebsd.rs @@ -13,7 +13,7 @@ pub(crate) fn target() -> Target { data_layout: "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32".into(), arch: "aarch64".into(), options: TargetOptions { - features: "+v8a".into(), + features: "+v8a,+outline-atomics".into(), max_atomic_width: Some(128), stack_probes: StackProbeType::Inline, supported_sanitizers: SanitizerSet::ADDRESS diff --git a/compiler/rustc_target/src/target_features.rs b/compiler/rustc_target/src/target_features.rs index b2af99228fe6d..7551d5a66fc92 100644 --- a/compiler/rustc_target/src/target_features.rs +++ b/compiler/rustc_target/src/target_features.rs @@ -248,6 +248,10 @@ static AARCH64_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[ ("mte", Stable, &[]), // FEAT_AdvSimd & FEAT_FP ("neon", Stable, &[]), + // Backend option to turn atomic operations into an intrinsic call when `lse` is not known to be + // available, so the intrinsic can do runtime LSE feature detection rather than unconditionally + // using slower non-LSE operations. Unstable since it doesn't need to user-togglable. + ("outline-atomics", Unstable(sym::aarch64_unstable_target_feature), &[]), // FEAT_PAUTH (address authentication) ("paca", Stable, &[]), // FEAT_PAUTH (generic authentication) diff --git a/library/compiler-builtins/compiler-builtins/src/aarch64_linux.rs b/library/compiler-builtins/compiler-builtins/src/aarch64_outline_atomics.rs similarity index 79% rename from library/compiler-builtins/compiler-builtins/src/aarch64_linux.rs rename to library/compiler-builtins/compiler-builtins/src/aarch64_outline_atomics.rs index 38fcab152aed2..f5550dcdd5289 100644 --- a/library/compiler-builtins/compiler-builtins/src/aarch64_linux.rs +++ b/library/compiler-builtins/compiler-builtins/src/aarch64_outline_atomics.rs @@ -6,9 +6,6 @@ //! which is supported on the current CPU. //! See for more discussion. //! -//! Currently we only support LL/SC, because LSE requires `getauxval` from libc in order to do runtime detection. -//! Use the `compiler-rt` intrinsics if you want LSE support. -//! //! Ported from `aarch64/lse.S` in LLVM's compiler-rt. //! //! Generate functions for each of the following symbols: @@ -24,7 +21,18 @@ //! We do something similar, but with macro arguments. #![cfg_attr(feature = "c", allow(unused_macros))] // avoid putting the macros into a submodule -// We don't do runtime dispatch so we don't have to worry about the `__aarch64_have_lse_atomics` global ctor. +use core::sync::atomic::{AtomicU8, Ordering}; + +/// non-zero if the host supports LSE atomics. +static HAVE_LSE_ATOMICS: AtomicU8 = AtomicU8::new(0); + +intrinsics! { + /// Call to enable LSE in outline atomic operations. The caller must verify + /// LSE operations are supported. + pub extern "C" fn __rust_enable_lse() { + HAVE_LSE_ATOMICS.store(1, Ordering::Relaxed); + } +} /// Translate a byte size to a Rust type. #[rustfmt::skip] @@ -126,6 +134,39 @@ macro_rules! stxp { }; } +// Check if LSE intrinsic can be used, and jump to label if not. +macro_rules! jmp_if_no_lse { + ($label:literal) => { + concat!( + ".arch_extension lse; ", + "adrp x16, {have_lse}; ", + "ldrb w16, [x16, :lo12:{have_lse}]; ", + "cbz w16, ", + $label, + ";" + ) + }; +} + +// Translate memory ordering to the LSE suffix +#[rustfmt::skip] +macro_rules! lse_mem_sfx { + (Relaxed) => { "" }; + (Acquire) => { "a" }; + (Release) => { "l" }; + (AcqRel) => { "al" }; +} + +// Generate the aarch64 LSE operation for memory ordering and width +macro_rules! lse { + ($op:literal, $order:ident, 16) => { + concat!($op, "p", lse_mem_sfx!($order)) + }; + ($op:literal, $order:ident, $bytes:tt) => { + concat!($op, lse_mem_sfx!($order), size!($bytes)) + }; +} + /// See . macro_rules! compare_and_swap { ($ordering:ident, $bytes:tt, $name:ident) => { @@ -137,6 +178,11 @@ macro_rules! compare_and_swap { ) -> int_ty!($bytes) { // We can't use `AtomicI8::compare_and_swap`; we *are* compare_and_swap. core::arch::naked_asm! { + jmp_if_no_lse!("8f"), + // CAS s(0), s(1), [x2] + concat!(lse!("cas", $ordering, $bytes), " ", reg!($bytes, 0), ", ", reg!($bytes, 1), ", [x2]"), + "ret", + "8:", // UXT s(tmp0), s(0) concat!(uxt!($bytes), " ", reg!($bytes, 16), ", ", reg!($bytes, 0)), "0:", @@ -150,6 +196,7 @@ macro_rules! compare_and_swap { "cbnz w17, 0b", "1:", "ret", + have_lse = sym crate::aarch64_outline_atomics::HAVE_LSE_ATOMICS, } } } @@ -166,6 +213,11 @@ macro_rules! compare_and_swap_i128 { expected: i128, desired: i128, ptr: *mut i128 ) -> i128 { core::arch::naked_asm! { + jmp_if_no_lse!("8f"), + // CASP x0, x1, x2, x3, [x4] + concat!(lse!("cas", $ordering, 16), " x0, x1, x2, x3, [x4]"), + "ret", + "8:", "mov x16, x0", "mov x17, x1", "0:", @@ -179,6 +231,7 @@ macro_rules! compare_and_swap_i128 { "cbnz w15, 0b", "1:", "ret", + have_lse = sym crate::aarch64_outline_atomics::HAVE_LSE_ATOMICS, } } } @@ -195,6 +248,11 @@ macro_rules! swap { left: int_ty!($bytes), right_ptr: *mut int_ty!($bytes) ) -> int_ty!($bytes) { core::arch::naked_asm! { + jmp_if_no_lse!("8f"), + // SWP s(0), s(0), [x1] + concat!(lse!("swp", $ordering, $bytes), " ", reg!($bytes, 0), ", ", reg!($bytes, 0), ", [x1]"), + "ret", + "8:", // mov s(tmp0), s(0) concat!("mov ", reg!($bytes, 16), ", ", reg!($bytes, 0)), "0:", @@ -204,6 +262,7 @@ macro_rules! swap { concat!(stxr!($ordering, $bytes), " w17, ", reg!($bytes, 16), ", [x1]"), "cbnz w17, 0b", "ret", + have_lse = sym crate::aarch64_outline_atomics::HAVE_LSE_ATOMICS, } } } @@ -212,7 +271,7 @@ macro_rules! swap { /// See (e.g.) . macro_rules! fetch_op { - ($ordering:ident, $bytes:tt, $name:ident, $op:literal) => { + ($ordering:ident, $bytes:tt, $name:ident, $op:literal, $lse_op:literal) => { intrinsics! { #[maybe_use_optimized_c_shim] #[unsafe(naked)] @@ -220,6 +279,11 @@ macro_rules! fetch_op { val: int_ty!($bytes), ptr: *mut int_ty!($bytes) ) -> int_ty!($bytes) { core::arch::naked_asm! { + jmp_if_no_lse!("8f"), + // LSEOP s(0), s(0), [x1] + concat!(lse!($lse_op, $ordering, $bytes), " ", reg!($bytes, 0), ", ", reg!($bytes, 0), ", [x1]"), + "ret", + "8:", // mov s(tmp0), s(0) concat!("mov ", reg!($bytes, 16), ", ", reg!($bytes, 0)), "0:", @@ -231,6 +295,7 @@ macro_rules! fetch_op { concat!(stxr!($ordering, $bytes), " w15, ", reg!($bytes, 17), ", [x1]"), "cbnz w15, 0b", "ret", + have_lse = sym crate::aarch64_outline_atomics::HAVE_LSE_ATOMICS, } } } @@ -240,25 +305,25 @@ macro_rules! fetch_op { // We need a single macro to pass to `foreach_ldadd`. macro_rules! add { ($ordering:ident, $bytes:tt, $name:ident) => { - fetch_op! { $ordering, $bytes, $name, "add" } + fetch_op! { $ordering, $bytes, $name, "add", "ldadd" } }; } macro_rules! and { ($ordering:ident, $bytes:tt, $name:ident) => { - fetch_op! { $ordering, $bytes, $name, "bic" } + fetch_op! { $ordering, $bytes, $name, "bic", "ldclr" } }; } macro_rules! xor { ($ordering:ident, $bytes:tt, $name:ident) => { - fetch_op! { $ordering, $bytes, $name, "eor" } + fetch_op! { $ordering, $bytes, $name, "eor", "ldeor" } }; } macro_rules! or { ($ordering:ident, $bytes:tt, $name:ident) => { - fetch_op! { $ordering, $bytes, $name, "orr" } + fetch_op! { $ordering, $bytes, $name, "orr", "ldset" } }; } diff --git a/library/compiler-builtins/compiler-builtins/src/lib.rs b/library/compiler-builtins/compiler-builtins/src/lib.rs index fe0ad81dd3a3d..ad4bf5992132c 100644 --- a/library/compiler-builtins/compiler-builtins/src/lib.rs +++ b/library/compiler-builtins/compiler-builtins/src/lib.rs @@ -60,8 +60,8 @@ pub mod arm; #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] pub mod aarch64; -#[cfg(all(target_arch = "aarch64", target_os = "linux", not(feature = "no-asm"),))] -pub mod aarch64_linux; +#[cfg(all(target_arch = "aarch64", target_feature = "outline-atomics"))] +pub mod aarch64_outline_atomics; #[cfg(all( kernel_user_helpers, diff --git a/library/std/src/sys/configure_builtins.rs b/library/std/src/sys/configure_builtins.rs new file mode 100644 index 0000000000000..3677b0bb4ee02 --- /dev/null +++ b/library/std/src/sys/configure_builtins.rs @@ -0,0 +1,27 @@ +/// Hook into .init_array to enable LSE atomic operations at startup, if +/// supported. +#[cfg(all( + target_arch = "aarch64", + target_os = "linux", + any(target_env = "gnu", target_env = "musl"), + not(feature = "compiler-builtins-c") +))] +#[used] +#[unsafe(link_section = ".init_array.90")] +static RUST_LSE_INIT: extern "C" fn() = { + extern "C" fn init_lse() { + use crate::arch; + + // This is provided by compiler-builtins::aarch64_linux. + unsafe extern "C" { + fn __rust_enable_lse(); + } + + if arch::is_aarch64_feature_detected!("lse") { + unsafe { + __rust_enable_lse(); + } + } + } + init_lse +}; diff --git a/library/std/src/sys/mod.rs b/library/std/src/sys/mod.rs index f9a02b522e5e1..8ec0a0e33023f 100644 --- a/library/std/src/sys/mod.rs +++ b/library/std/src/sys/mod.rs @@ -1,5 +1,10 @@ #![allow(unsafe_op_in_unsafe_fn)] +/// The configure builtins provides runtime support compiler-builtin features +/// which require dynamic intialization to work as expected, e.g. aarch64 +/// outline-atomics. +mod configure_builtins; + /// The PAL (platform abstraction layer) contains platform-specific abstractions /// for implementing the features in the other submodules, e.g. UNIX file /// descriptors. diff --git a/tests/assembly-llvm/asm/aarch64-outline-atomics.rs b/tests/assembly-llvm/asm/aarch64-outline-atomics.rs index 22599c18dcf37..1177c1e68ed56 100644 --- a/tests/assembly-llvm/asm/aarch64-outline-atomics.rs +++ b/tests/assembly-llvm/asm/aarch64-outline-atomics.rs @@ -8,6 +8,10 @@ use std::sync::atomic::AtomicI32; use std::sync::atomic::Ordering::*; +// Verify config on outline-atomics works (it is always enabled on aarch64-linux). +#[cfg(not(target_feature = "outline-atomics"))] +compile_error!("outline-atomics is not enabled"); + pub fn compare_exchange(a: &AtomicI32) { // On AArch64 LLVM should outline atomic operations. // CHECK: __aarch64_cas4_relax diff --git a/tests/ui/check-cfg/target_feature.stderr b/tests/ui/check-cfg/target_feature.stderr index f422919983b75..e730492fd9b09 100644 --- a/tests/ui/check-cfg/target_feature.stderr +++ b/tests/ui/check-cfg/target_feature.stderr @@ -183,6 +183,7 @@ LL | cfg!(target_feature = "_UNEXPECTED_VALUE"); `nnp-assist` `nontrapping-fptoint` `nvic` +`outline-atomics` `paca` `pacg` `pan`