From 48f6b2fbd6c6d8396f220a0624100cdaa27e798a Mon Sep 17 00:00:00 2001 From: Jarrod Petz Date: Fri, 29 May 2026 17:55:40 +1000 Subject: [PATCH] Add `_mm_clflushopt` intrinsic Adds the `_mm_clflushopt` intrinsic for the CLFLUSHOPT instruction (weakly-ordered cache-line flush). Until now stdarch exposed only the SSE2 `_mm_clflush`, so there was no way to emit `clflushopt` from an intrinsic. Gated on the new `clflushopt` target feature. See the PR description for the related compiler PR and tracking issue. Modeled on the existing `_mm_clflush` / `adx` intrinsics: links `llvm.x86.clflushopt`, assert_instr + simd_test coverage. --- crates/core_arch/missing-x86.md | 2 +- crates/core_arch/src/x86/clflushopt.rs | 45 ++++++++++++++++++++++++ crates/core_arch/src/x86/mod.rs | 4 +++ crates/stdarch-verify/tests/x86-intel.rs | 2 +- 4 files changed, 51 insertions(+), 2 deletions(-) create mode 100644 crates/core_arch/src/x86/clflushopt.rs diff --git a/crates/core_arch/missing-x86.md b/crates/core_arch/missing-x86.md index e9f68eb9e6..4d1068adb5 100644 --- a/crates/core_arch/missing-x86.md +++ b/crates/core_arch/missing-x86.md @@ -72,7 +72,7 @@
["CLFLUSHOPT"]

- * [ ] [`_mm_clflushopt`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_clflushopt) + * [x] [`_mm_clflushopt`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_clflushopt)

diff --git a/crates/core_arch/src/x86/clflushopt.rs b/crates/core_arch/src/x86/clflushopt.rs new file mode 100644 index 0000000000..6b7e213bd5 --- /dev/null +++ b/crates/core_arch/src/x86/clflushopt.rs @@ -0,0 +1,45 @@ +//! `CLFLUSHOPT` cache-line flush. + +#[cfg(test)] +use stdarch_test::assert_instr; + +#[allow(improper_ctypes)] +unsafe extern "C" { + #[link_name = "llvm.x86.clflushopt"] + fn clflushopt(p: *mut i8); +} + +/// Invalidates from every level of the cache hierarchy the cache line that +/// contains `p`. +/// +/// Unlike [`_mm_clflush`], `CLFLUSHOPT` is only ordered with respect to older +/// writes to the flushed cache line and with respect to fence/locked +/// operations; it is *not* serialized against other `CLFLUSHOPT`/`CLFLUSH` +/// instructions or unrelated stores. This makes flushing a range of lines +/// substantially faster, but a fence (e.g. [`_mm_sfence`] or [`_mm_mfence`]) is +/// required afterward to order the flushes against subsequent operations. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clflushopt) +/// +/// [`_mm_clflush`]: crate::arch::x86_64::_mm_clflush +/// [`_mm_sfence`]: crate::arch::x86_64::_mm_sfence +/// [`_mm_mfence`]: crate::arch::x86_64::_mm_mfence +#[inline] +#[target_feature(enable = "clflushopt")] +#[cfg_attr(test, assert_instr(clflushopt))] +#[unstable(feature = "simd_x86_clflushopt", issue = "157096")] +pub unsafe fn _mm_clflushopt(p: *const u8) { + clflushopt(p as *mut i8); +} + +#[cfg(test)] +mod tests { + use crate::core_arch::x86::*; + use stdarch_test::simd_test; + + #[simd_test(enable = "clflushopt")] + unsafe fn test_mm_clflushopt() { + let x = 0_u8; + _mm_clflushopt(core::ptr::addr_of!(x)); + } +} diff --git a/crates/core_arch/src/x86/mod.rs b/crates/core_arch/src/x86/mod.rs index f5a8acbd8f..fbf1002eab 100644 --- a/crates/core_arch/src/x86/mod.rs +++ b/crates/core_arch/src/x86/mod.rs @@ -692,6 +692,10 @@ mod adx; #[stable(feature = "simd_x86_adx", since = "1.33.0")] pub use self::adx::*; +mod clflushopt; +#[unstable(feature = "simd_x86_clflushopt", issue = "157096")] +pub use self::clflushopt::*; + #[cfg(test)] use stdarch_test::assert_instr; diff --git a/crates/stdarch-verify/tests/x86-intel.rs b/crates/stdarch-verify/tests/x86-intel.rs index 024a873de1..485ba098fe 100644 --- a/crates/stdarch-verify/tests/x86-intel.rs +++ b/crates/stdarch-verify/tests/x86-intel.rs @@ -668,7 +668,7 @@ fn pointed_type(intrinsic: &Intrinsic) -> Result { { // AMX, KEYLOCKER and XSAVE intrinsics should take `*u8` U8 - } else if intrinsic.name == "_mm_clflush" { + } else if intrinsic.name == "_mm_clflush" || intrinsic.name == "_mm_clflushopt" { // Just a false match in the following logic U8 } else if ["_mm_storeu_si", "_mm_loadu_si"]